1//===--- Forest.cpp - Parse forest ------------------------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang-pseudo/Forest.h"
10#include "clang-pseudo/Token.h"
11#include "llvm/ADT/ArrayRef.h"
12#include "llvm/ADT/STLExtras.h"
13#include "llvm/Support/FormatVariadic.h"
14#include <optional>
15
16namespace clang {
17namespace pseudo {
18
19void ForestNode::RecursiveIterator::operator++() {
20 auto C = Cur->children();
21 // Try to find a child of the current node to descend into.
22 for (unsigned I = 0; I < C.size(); ++I) {
23 if (Seen.insert(V: C[I]).second) {
24 Stack.push_back(x: {.Parent: Cur, .ChildIndex: I});
25 Cur = C[I];
26 return;
27 }
28 }
29 // Try to find a sibling af an ancestor to advance to.
30 for (; !Stack.empty(); Stack.pop_back()) {
31 C = Stack.back().Parent->children();
32 unsigned &Index = Stack.back().ChildIndex;
33 while (++Index < C.size()) {
34 if (Seen.insert(V: C[Index]).second) {
35 Cur = C[Index];
36 return;
37 }
38 }
39 }
40 Cur = nullptr;
41}
42
43llvm::iterator_range<ForestNode::RecursiveIterator>
44ForestNode::descendants() const {
45 return {RecursiveIterator(this), RecursiveIterator()};
46}
47
48std::string ForestNode::dump(const Grammar &G) const {
49 switch (kind()) {
50 case Ambiguous:
51 return llvm::formatv(Fmt: "{0} := <ambiguous>", Vals: G.symbolName(symbol()));
52 case Terminal:
53 return llvm::formatv(Fmt: "{0} := tok[{1}]", Vals: G.symbolName(symbol()),
54 Vals: startTokenIndex());
55 case Sequence:
56 return G.dumpRule(rule());
57 case Opaque:
58 return llvm::formatv(Fmt: "{0} := <opaque>", Vals: G.symbolName(symbol()));
59 }
60 llvm_unreachable("Unhandled node kind!");
61}
62
63std::string ForestNode::dumpRecursive(const Grammar &G,
64 bool Abbreviated) const {
65 using llvm::formatv;
66 Token::Index MaxToken = 0;
67 // Count visits of nodes so we can mark those seen multiple times.
68 llvm::DenseMap<const ForestNode *, /*VisitCount*/ unsigned> VisitCounts;
69 std::function<void(const ForestNode *)> CountVisits =
70 [&](const ForestNode *P) {
71 MaxToken = std::max(a: MaxToken, b: P->startTokenIndex());
72 if (VisitCounts[P]++ > 0)
73 return; // Don't count children as multiply visited.
74 if (P->kind() == Ambiguous)
75 llvm::for_each(Range: P->alternatives(), F: CountVisits);
76 else if (P->kind() == Sequence)
77 llvm::for_each(Range: P->elements(), F: CountVisits);
78 };
79 CountVisits(this);
80
81 unsigned IndexWidth = std::max(a: 3, b: (int)std::to_string(val: MaxToken).size());
82 // e.g. "[{0,4}, {1,4})" if MaxToken is 5742.
83 std::string RangeFormat = formatv(Fmt: "[{{0,{0}}, {{1,{0}}) ", Vals&: IndexWidth);
84
85 // The box-drawing characters that should be added as a child is rendered.
86 struct LineDecoration {
87 std::string Prefix; // Prepended to every line.
88 llvm::StringRef First; // added to the child's line.
89 llvm::StringRef Subsequent; // added to descendants' lines.
90 };
91
92 // We print a "#<id>" for nonterminal forest nodes that are being dumped
93 // multiple times.
94 llvm::DenseMap<const ForestNode *, size_t> ReferenceIds;
95 std::string Result;
96 constexpr Token::Index KEnd = std::numeric_limits<Token::Index>::max();
97 std::function<void(const ForestNode *, Token::Index, std::optional<SymbolID>,
98 LineDecoration &LineDec)>
99 Dump = [&](const ForestNode *P, Token::Index End,
100 std::optional<SymbolID> ElidedParent, LineDecoration LineDec) {
101 bool SharedNode = VisitCounts.find(Val: P)->getSecond() > 1;
102 llvm::ArrayRef<const ForestNode *> Children;
103 auto EndOfElement = [&](size_t ChildIndex) {
104 return ChildIndex + 1 == Children.size()
105 ? End
106 : Children[ChildIndex + 1]->startTokenIndex();
107 };
108 if (P->kind() == Ambiguous) {
109 Children = P->alternatives();
110 } else if (P->kind() == Sequence) {
111 Children = P->elements();
112 if (Abbreviated) {
113 // Abbreviate chains of trivial sequence nodes.
114 // A := B, B := C, C := D, D := X Y Z
115 // becomes
116 // A~D := X Y Z
117 //
118 // We can't hide nodes that appear multiple times in the tree,
119 // because we need to call out their identity with IDs.
120 if (Children.size() == 1 && !SharedNode) {
121 assert(Children[0]->startTokenIndex() == P->startTokenIndex() &&
122 EndOfElement(0) == End);
123 return Dump(Children[0], End,
124 /*ElidedParent=*/ElidedParent.value_or(u: P->symbol()),
125 LineDec);
126 }
127 }
128 }
129
130 if (End == KEnd)
131 Result += formatv(Fmt: RangeFormat.c_str(), Vals: P->startTokenIndex(), Vals: "end");
132 else
133 Result += formatv(Fmt: RangeFormat.c_str(), Vals: P->startTokenIndex(), Vals&: End);
134 Result += LineDec.Prefix;
135 Result += LineDec.First;
136 if (ElidedParent) {
137 Result += G.symbolName(*ElidedParent);
138 Result += "~";
139 }
140
141 if (SharedNode && P->kind() != ForestNode::Terminal) {
142 auto It = ReferenceIds.try_emplace(Key: P, Args: ReferenceIds.size() + 1);
143 bool First = It.second;
144 unsigned ID = It.first->second;
145
146 // The first time, print as #1. Later, =#1.
147 if (First) {
148 Result += formatv(Fmt: "{0} #{1}", Vals: P->dump(G), Vals&: ID);
149 } else {
150 Result += formatv(Fmt: "{0} =#{1}", Vals: G.symbolName(P->symbol()), Vals&: ID);
151 Children = {}; // Don't walk the children again.
152 }
153 } else {
154 Result.append(str: P->dump(G));
155 }
156 Result.push_back(c: '\n');
157
158 auto OldPrefixSize = LineDec.Prefix.size();
159 LineDec.Prefix += LineDec.Subsequent;
160 for (size_t I = 0; I < Children.size(); ++I) {
161 if (I == Children.size() - 1) {
162 LineDec.First = "└─";
163 LineDec.Subsequent = " ";
164 } else {
165 LineDec.First = "├─";
166 LineDec.Subsequent = "│ ";
167 }
168 Dump(Children[I], P->kind() == Sequence ? EndOfElement(I) : End,
169 std::nullopt, LineDec);
170 }
171 LineDec.Prefix.resize(n: OldPrefixSize);
172 };
173 LineDecoration LineDec;
174 Dump(this, KEnd, std::nullopt, LineDec);
175 return Result;
176}
177
178llvm::ArrayRef<ForestNode>
179ForestArena::createTerminals(const TokenStream &Code) {
180 ForestNode *Terminals = Arena.Allocate<ForestNode>(Num: Code.tokens().size() + 1);
181 size_t Index = 0;
182 for (const auto &T : Code.tokens()) {
183 new (&Terminals[Index])
184 ForestNode(ForestNode::Terminal, tokenSymbol(TK: T.Kind),
185 /*Start=*/Index, /*TerminalData*/ 0);
186 ++Index;
187 }
188 // Include an `eof` terminal.
189 // This is important to drive the final shift/recover/reduce loop.
190 new (&Terminals[Index])
191 ForestNode(ForestNode::Terminal, tokenSymbol(TK: tok::eof),
192 /*Start=*/Index, /*TerminalData*/ 0);
193 ++Index;
194 NodeCount = Index;
195 return llvm::ArrayRef(Terminals, Index);
196}
197
198} // namespace pseudo
199} // namespace clang
200

source code of clang-tools-extra/pseudo/lib/Forest.cpp