1 | //===--- Forest.cpp - Parse forest ------------------------------*- C++-*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang-pseudo/Forest.h" |
10 | #include "clang-pseudo/Token.h" |
11 | #include "llvm/ADT/ArrayRef.h" |
12 | #include "llvm/ADT/STLExtras.h" |
13 | #include "llvm/Support/FormatVariadic.h" |
14 | #include <optional> |
15 | |
16 | namespace clang { |
17 | namespace pseudo { |
18 | |
19 | void ForestNode::RecursiveIterator::operator++() { |
20 | auto C = Cur->children(); |
21 | // Try to find a child of the current node to descend into. |
22 | for (unsigned I = 0; I < C.size(); ++I) { |
23 | if (Seen.insert(V: C[I]).second) { |
24 | Stack.push_back(x: {.Parent: Cur, .ChildIndex: I}); |
25 | Cur = C[I]; |
26 | return; |
27 | } |
28 | } |
29 | // Try to find a sibling af an ancestor to advance to. |
30 | for (; !Stack.empty(); Stack.pop_back()) { |
31 | C = Stack.back().Parent->children(); |
32 | unsigned &Index = Stack.back().ChildIndex; |
33 | while (++Index < C.size()) { |
34 | if (Seen.insert(V: C[Index]).second) { |
35 | Cur = C[Index]; |
36 | return; |
37 | } |
38 | } |
39 | } |
40 | Cur = nullptr; |
41 | } |
42 | |
43 | llvm::iterator_range<ForestNode::RecursiveIterator> |
44 | ForestNode::descendants() const { |
45 | return {RecursiveIterator(this), RecursiveIterator()}; |
46 | } |
47 | |
48 | std::string ForestNode::dump(const Grammar &G) const { |
49 | switch (kind()) { |
50 | case Ambiguous: |
51 | return llvm::formatv(Fmt: "{0} := <ambiguous>" , Vals: G.symbolName(symbol())); |
52 | case Terminal: |
53 | return llvm::formatv(Fmt: "{0} := tok[{1}]" , Vals: G.symbolName(symbol()), |
54 | Vals: startTokenIndex()); |
55 | case Sequence: |
56 | return G.dumpRule(rule()); |
57 | case Opaque: |
58 | return llvm::formatv(Fmt: "{0} := <opaque>" , Vals: G.symbolName(symbol())); |
59 | } |
60 | llvm_unreachable("Unhandled node kind!" ); |
61 | } |
62 | |
63 | std::string ForestNode::dumpRecursive(const Grammar &G, |
64 | bool Abbreviated) const { |
65 | using llvm::formatv; |
66 | Token::Index MaxToken = 0; |
67 | // Count visits of nodes so we can mark those seen multiple times. |
68 | llvm::DenseMap<const ForestNode *, /*VisitCount*/ unsigned> VisitCounts; |
69 | std::function<void(const ForestNode *)> CountVisits = |
70 | [&](const ForestNode *P) { |
71 | MaxToken = std::max(a: MaxToken, b: P->startTokenIndex()); |
72 | if (VisitCounts[P]++ > 0) |
73 | return; // Don't count children as multiply visited. |
74 | if (P->kind() == Ambiguous) |
75 | llvm::for_each(Range: P->alternatives(), F: CountVisits); |
76 | else if (P->kind() == Sequence) |
77 | llvm::for_each(Range: P->elements(), F: CountVisits); |
78 | }; |
79 | CountVisits(this); |
80 | |
81 | unsigned IndexWidth = std::max(a: 3, b: (int)std::to_string(val: MaxToken).size()); |
82 | // e.g. "[{0,4}, {1,4})" if MaxToken is 5742. |
83 | std::string RangeFormat = formatv(Fmt: "[{{0,{0}}, {{1,{0}}) " , Vals&: IndexWidth); |
84 | |
85 | // The box-drawing characters that should be added as a child is rendered. |
86 | struct LineDecoration { |
87 | std::string Prefix; // Prepended to every line. |
88 | llvm::StringRef First; // added to the child's line. |
89 | llvm::StringRef Subsequent; // added to descendants' lines. |
90 | }; |
91 | |
92 | // We print a "#<id>" for nonterminal forest nodes that are being dumped |
93 | // multiple times. |
94 | llvm::DenseMap<const ForestNode *, size_t> ReferenceIds; |
95 | std::string Result; |
96 | constexpr Token::Index KEnd = std::numeric_limits<Token::Index>::max(); |
97 | std::function<void(const ForestNode *, Token::Index, std::optional<SymbolID>, |
98 | LineDecoration &LineDec)> |
99 | Dump = [&](const ForestNode *P, Token::Index End, |
100 | std::optional<SymbolID> ElidedParent, LineDecoration LineDec) { |
101 | bool SharedNode = VisitCounts.find(Val: P)->getSecond() > 1; |
102 | llvm::ArrayRef<const ForestNode *> Children; |
103 | auto EndOfElement = [&](size_t ChildIndex) { |
104 | return ChildIndex + 1 == Children.size() |
105 | ? End |
106 | : Children[ChildIndex + 1]->startTokenIndex(); |
107 | }; |
108 | if (P->kind() == Ambiguous) { |
109 | Children = P->alternatives(); |
110 | } else if (P->kind() == Sequence) { |
111 | Children = P->elements(); |
112 | if (Abbreviated) { |
113 | // Abbreviate chains of trivial sequence nodes. |
114 | // A := B, B := C, C := D, D := X Y Z |
115 | // becomes |
116 | // A~D := X Y Z |
117 | // |
118 | // We can't hide nodes that appear multiple times in the tree, |
119 | // because we need to call out their identity with IDs. |
120 | if (Children.size() == 1 && !SharedNode) { |
121 | assert(Children[0]->startTokenIndex() == P->startTokenIndex() && |
122 | EndOfElement(0) == End); |
123 | return Dump(Children[0], End, |
124 | /*ElidedParent=*/ElidedParent.value_or(u: P->symbol()), |
125 | LineDec); |
126 | } |
127 | } |
128 | } |
129 | |
130 | if (End == KEnd) |
131 | Result += formatv(Fmt: RangeFormat.c_str(), Vals: P->startTokenIndex(), Vals: "end" ); |
132 | else |
133 | Result += formatv(Fmt: RangeFormat.c_str(), Vals: P->startTokenIndex(), Vals&: End); |
134 | Result += LineDec.Prefix; |
135 | Result += LineDec.First; |
136 | if (ElidedParent) { |
137 | Result += G.symbolName(*ElidedParent); |
138 | Result += "~" ; |
139 | } |
140 | |
141 | if (SharedNode && P->kind() != ForestNode::Terminal) { |
142 | auto It = ReferenceIds.try_emplace(Key: P, Args: ReferenceIds.size() + 1); |
143 | bool First = It.second; |
144 | unsigned ID = It.first->second; |
145 | |
146 | // The first time, print as #1. Later, =#1. |
147 | if (First) { |
148 | Result += formatv(Fmt: "{0} #{1}" , Vals: P->dump(G), Vals&: ID); |
149 | } else { |
150 | Result += formatv(Fmt: "{0} =#{1}" , Vals: G.symbolName(P->symbol()), Vals&: ID); |
151 | Children = {}; // Don't walk the children again. |
152 | } |
153 | } else { |
154 | Result.append(str: P->dump(G)); |
155 | } |
156 | Result.push_back(c: '\n'); |
157 | |
158 | auto OldPrefixSize = LineDec.Prefix.size(); |
159 | LineDec.Prefix += LineDec.Subsequent; |
160 | for (size_t I = 0; I < Children.size(); ++I) { |
161 | if (I == Children.size() - 1) { |
162 | LineDec.First = "└─" ; |
163 | LineDec.Subsequent = " " ; |
164 | } else { |
165 | LineDec.First = "├─" ; |
166 | LineDec.Subsequent = "│ " ; |
167 | } |
168 | Dump(Children[I], P->kind() == Sequence ? EndOfElement(I) : End, |
169 | std::nullopt, LineDec); |
170 | } |
171 | LineDec.Prefix.resize(n: OldPrefixSize); |
172 | }; |
173 | LineDecoration LineDec; |
174 | Dump(this, KEnd, std::nullopt, LineDec); |
175 | return Result; |
176 | } |
177 | |
178 | llvm::ArrayRef<ForestNode> |
179 | ForestArena::createTerminals(const TokenStream &Code) { |
180 | ForestNode *Terminals = Arena.Allocate<ForestNode>(Num: Code.tokens().size() + 1); |
181 | size_t Index = 0; |
182 | for (const auto &T : Code.tokens()) { |
183 | new (&Terminals[Index]) |
184 | ForestNode(ForestNode::Terminal, tokenSymbol(TK: T.Kind), |
185 | /*Start=*/Index, /*TerminalData*/ 0); |
186 | ++Index; |
187 | } |
188 | // Include an `eof` terminal. |
189 | // This is important to drive the final shift/recover/reduce loop. |
190 | new (&Terminals[Index]) |
191 | ForestNode(ForestNode::Terminal, tokenSymbol(TK: tok::eof), |
192 | /*Start=*/Index, /*TerminalData*/ 0); |
193 | ++Index; |
194 | NodeCount = Index; |
195 | return llvm::ArrayRef(Terminals, Index); |
196 | } |
197 | |
198 | } // namespace pseudo |
199 | } // namespace clang |
200 | |