1 | //===-- HTMLForest.cpp - browser-based parse forest explorer |
2 | //---------------===// |
3 | // |
4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | // See https://llvm.org/LICENSE.txt for license information. |
6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | // |
10 | // The plain text forest node dump (clang-pseudo -print-forest) is useful but |
11 | // hard to reconcile with the code being examined, especially when it is large. |
12 | // |
13 | // HTMLForest produces a self-contained HTML file containing both the code and |
14 | // the forest representation, linking them interactively with javascript. |
15 | // At any given time, a single parse tree is shown (ambiguities resolved). |
16 | // The user can switch between ambiguous alternatives. |
17 | // |
18 | // +-------+---------------+ |
19 | // | | +-----+| |
20 | // | #tree | #code |#info|| |
21 | // | | +-----+| |
22 | // | | | |
23 | // +-------+---------------+ |
24 | // |
25 | // #tree is a hierarchical view of the nodes (nested <ul>s), like -print-forest. |
26 | // (It is a simple tree, not a DAG, because ambiguities have been resolved). |
27 | // Like -print-forest, trivial sequences are collapsed (expression~IDENTIFIER). |
28 | // |
29 | // #code is the source code, annotated with <span>s marking the node ranges. |
30 | // These spans are usually invisible (exception: ambiguities are marked), but |
31 | // they are used to show and change the selection. |
32 | // |
33 | // #info is a floating box that shows details of the currently selected node: |
34 | // - rule (for sequence nodes). Abbreviated rules are also shown. |
35 | // - alternatives (for ambiguous nodes). The user can choose an alternative. |
36 | // - ancestors. The parent nodes show how this node fits in translation-unit. |
37 | // |
38 | // There are two types of 'active' node: |
39 | // - *highlight* is what the cursor is over, and is colored blue. |
40 | // Near ancestors are shaded faintly (onion-skin) to show local structure. |
41 | // - *selection* is set by clicking. |
42 | // The #info box shows the selection, and selected nodes have a dashed ring. |
43 | // |
44 | //===----------------------------------------------------------------------===// |
45 | |
46 | #include "clang-pseudo/Disambiguate.h" |
47 | #include "clang-pseudo/Forest.h" |
48 | #include "clang-pseudo/grammar/Grammar.h" |
49 | #include "llvm/ADT/StringExtras.h" |
50 | #include "llvm/Support/JSON.h" |
51 | #include "llvm/Support/raw_ostream.h" |
52 | namespace clang { |
53 | namespace pseudo { |
54 | namespace { |
55 | |
56 | // Defines const char HTMLForest_css[] = "...contents of HTMLForest.css..."; etc |
57 | #include "HTMLForestResources.inc" |
58 | |
59 | struct Writer { |
60 | llvm::raw_ostream &Out; |
61 | const Grammar &G; |
62 | const ForestNode &Root; |
63 | const TokenStream &Stream; |
64 | const Disambiguation &Disambig; |
65 | |
66 | void write() { |
67 | Out << "<!doctype html>\n" ; |
68 | tag("html" , [&] { |
69 | tag("head" , [&] { |
70 | tag("title" , [&] { Out << "HTMLForest" ; }); |
71 | tag("script" , [&] { Out << HTMLForest_js; }); |
72 | tag("style" , [&] { Out << HTMLForest_css; }); |
73 | tag("script" , [&] { |
74 | Out << "var forest=" ; |
75 | writeForestJSON(); |
76 | Out << ";" ; |
77 | }); |
78 | tag("pre id='hidden-code' hidden" , [&] { writeCode(); }); |
79 | }); |
80 | tag("body" , [&] { Out << HTMLForest_html; }); |
81 | }); |
82 | } |
83 | |
84 | void writeCode(); |
85 | void writeForestJSON(); |
86 | void tag(llvm::StringRef Opener, llvm::function_ref<void()> Body) { |
87 | Out << "<" << Opener << ">" ; |
88 | Body(); |
89 | Out << "</" << Opener.split(' ').first << ">\n" ; |
90 | } |
91 | }; |
92 | |
93 | void Writer::writeCode() { |
94 | // This loop (whitespace logic) is cribbed from TokenStream::Print. |
95 | bool FirstToken = true; |
96 | unsigned LastLine = -1; |
97 | StringRef LastText; |
98 | for (const auto &T : Stream.tokens()) { |
99 | StringRef Text = T.text(); |
100 | if (FirstToken) { |
101 | FirstToken = false; |
102 | } else if (T.Line == LastLine) { |
103 | if (LastText.data() + LastText.size() != Text.data()) |
104 | Out << ' '; |
105 | } else { |
106 | Out << " \n" ; // Extra space aids selection. |
107 | Out.indent(NumSpaces: T.Indent); |
108 | } |
109 | Out << "<span class='token' id='t" << Stream.index(T) << "'>" ; |
110 | llvm::printHTMLEscaped(String: Text, Out); |
111 | Out << "</span>" ; |
112 | LastLine = T.Line; |
113 | LastText = Text; |
114 | } |
115 | if (!FirstToken) |
116 | Out << '\n'; |
117 | } |
118 | |
119 | // Writes a JSON array of forest nodes. Items are e.g.: |
120 | // {kind:'sequence', symbol:'compound-stmt', children:[5,8,33], |
121 | // rule:'compound-stmt := ...'} {kind:'terminal', symbol:'VOID', token:'t52'} |
122 | // {kind:'ambiguous', symbol:'type-specifier', children:[3,100] selected:3} |
123 | // {kind:'opaque', symbol:'statement-seq', firstToken:'t5', lastToken:'t6'} |
124 | void Writer::writeForestJSON() { |
125 | // This is the flat array of nodes: the index into this array is the node ID. |
126 | std::vector<std::pair<const ForestNode *, /*End*/ Token::Index>> Sequence; |
127 | llvm::DenseMap<const ForestNode *, unsigned> Index; |
128 | auto AssignID = [&](const ForestNode *N, Token::Index End) -> unsigned { |
129 | auto R = Index.try_emplace(N, Sequence.size()); |
130 | if (R.second) |
131 | Sequence.push_back({N, End}); |
132 | return R.first->second; |
133 | }; |
134 | AssignID(&Root, Stream.tokens().size()); |
135 | auto TokenID = [](Token::Index I) { return ("t" + llvm::Twine(I)).str(); }; |
136 | |
137 | llvm::json::OStream Out(this->Out, 2); |
138 | Out.array([&] { |
139 | for (unsigned I = 0; I < Sequence.size(); ++I) { |
140 | const ForestNode *N = Sequence[I].first; |
141 | Token::Index End = Sequence[I].second; |
142 | Out.object([&] { |
143 | Out.attribute(Key: "symbol" , Contents: G.symbolName(N->symbol())); |
144 | switch (N->kind()) { |
145 | case ForestNode::Terminal: |
146 | Out.attribute(Key: "kind" , Contents: "terminal" ); |
147 | Out.attribute(Key: "token" , Contents: TokenID(N->startTokenIndex())); |
148 | break; |
149 | case ForestNode::Sequence: |
150 | Out.attribute(Key: "kind" , Contents: "sequence" ); |
151 | Out.attribute(Key: "rule" , Contents: G.dumpRule(N->rule())); |
152 | break; |
153 | case ForestNode::Ambiguous: |
154 | Out.attribute(Key: "kind" , Contents: "ambiguous" ); |
155 | Out.attribute(Key: "selected" , |
156 | Contents: AssignID(N->children()[Disambig.lookup(N)], End)); |
157 | break; |
158 | case ForestNode::Opaque: |
159 | Out.attribute(Key: "kind" , Contents: "opaque" ); |
160 | Out.attribute(Key: "firstToken" , Contents: TokenID(N->startTokenIndex())); |
161 | // [firstToken, lastToken] is a closed range. |
162 | // If empty, lastToken is omitted. |
163 | if (N->startTokenIndex() != End) |
164 | Out.attribute(Key: "lastToken" , Contents: TokenID(End - 1)); |
165 | break; |
166 | } |
167 | auto Children = N->children(); |
168 | if (!Children.empty()) |
169 | Out.attributeArray("children" , [&] { |
170 | for (unsigned I = 0; I < Children.size(); ++I) |
171 | Out.value(V: AssignID(Children[I], |
172 | I + 1 == Children.size() |
173 | ? End |
174 | : Children[I + 1]->startTokenIndex())); |
175 | }); |
176 | }); |
177 | } |
178 | }); |
179 | } |
180 | |
181 | } // namespace |
182 | |
183 | // We only accept the derived stream here. |
184 | // FIXME: allow the original stream instead? |
185 | void writeHTMLForest(llvm::raw_ostream &OS, const Grammar &G, |
186 | const ForestNode &Root, const Disambiguation &Disambig, |
187 | const TokenStream &Stream) { |
188 | Writer{.Out: OS, .G: G, .Root: Root, .Stream: Stream, .Disambig: Disambig}.write(); |
189 | } |
190 | |
191 | } // namespace pseudo |
192 | } // namespace clang |
193 | |