1//===-- HTMLForest.cpp - browser-based parse forest explorer
2//---------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10// The plain text forest node dump (clang-pseudo -print-forest) is useful but
11// hard to reconcile with the code being examined, especially when it is large.
12//
13// HTMLForest produces a self-contained HTML file containing both the code and
14// the forest representation, linking them interactively with javascript.
15// At any given time, a single parse tree is shown (ambiguities resolved).
16// The user can switch between ambiguous alternatives.
17//
18// +-------+---------------+
19// | | +-----+|
20// | #tree | #code |#info||
21// | | +-----+|
22// | | |
23// +-------+---------------+
24//
25// #tree is a hierarchical view of the nodes (nested <ul>s), like -print-forest.
26// (It is a simple tree, not a DAG, because ambiguities have been resolved).
27// Like -print-forest, trivial sequences are collapsed (expression~IDENTIFIER).
28//
29// #code is the source code, annotated with <span>s marking the node ranges.
30// These spans are usually invisible (exception: ambiguities are marked), but
31// they are used to show and change the selection.
32//
33// #info is a floating box that shows details of the currently selected node:
34// - rule (for sequence nodes). Abbreviated rules are also shown.
35// - alternatives (for ambiguous nodes). The user can choose an alternative.
36// - ancestors. The parent nodes show how this node fits in translation-unit.
37//
38// There are two types of 'active' node:
39// - *highlight* is what the cursor is over, and is colored blue.
40// Near ancestors are shaded faintly (onion-skin) to show local structure.
41// - *selection* is set by clicking.
42// The #info box shows the selection, and selected nodes have a dashed ring.
43//
44//===----------------------------------------------------------------------===//
45
46#include "clang-pseudo/Disambiguate.h"
47#include "clang-pseudo/Forest.h"
48#include "clang-pseudo/grammar/Grammar.h"
49#include "llvm/ADT/StringExtras.h"
50#include "llvm/Support/JSON.h"
51#include "llvm/Support/raw_ostream.h"
52namespace clang {
53namespace pseudo {
54namespace {
55
56// Defines const char HTMLForest_css[] = "...contents of HTMLForest.css..."; etc
57#include "HTMLForestResources.inc"
58
59struct Writer {
60 llvm::raw_ostream &Out;
61 const Grammar &G;
62 const ForestNode &Root;
63 const TokenStream &Stream;
64 const Disambiguation &Disambig;
65
66 void write() {
67 Out << "<!doctype html>\n";
68 tag("html", [&] {
69 tag("head", [&] {
70 tag("title", [&] { Out << "HTMLForest"; });
71 tag("script", [&] { Out << HTMLForest_js; });
72 tag("style", [&] { Out << HTMLForest_css; });
73 tag("script", [&] {
74 Out << "var forest=";
75 writeForestJSON();
76 Out << ";";
77 });
78 tag("pre id='hidden-code' hidden", [&] { writeCode(); });
79 });
80 tag("body", [&] { Out << HTMLForest_html; });
81 });
82 }
83
84 void writeCode();
85 void writeForestJSON();
86 void tag(llvm::StringRef Opener, llvm::function_ref<void()> Body) {
87 Out << "<" << Opener << ">";
88 Body();
89 Out << "</" << Opener.split(' ').first << ">\n";
90 }
91};
92
93void Writer::writeCode() {
94 // This loop (whitespace logic) is cribbed from TokenStream::Print.
95 bool FirstToken = true;
96 unsigned LastLine = -1;
97 StringRef LastText;
98 for (const auto &T : Stream.tokens()) {
99 StringRef Text = T.text();
100 if (FirstToken) {
101 FirstToken = false;
102 } else if (T.Line == LastLine) {
103 if (LastText.data() + LastText.size() != Text.data())
104 Out << ' ';
105 } else {
106 Out << " \n"; // Extra space aids selection.
107 Out.indent(NumSpaces: T.Indent);
108 }
109 Out << "<span class='token' id='t" << Stream.index(T) << "'>";
110 llvm::printHTMLEscaped(String: Text, Out);
111 Out << "</span>";
112 LastLine = T.Line;
113 LastText = Text;
114 }
115 if (!FirstToken)
116 Out << '\n';
117}
118
119// Writes a JSON array of forest nodes. Items are e.g.:
120// {kind:'sequence', symbol:'compound-stmt', children:[5,8,33],
121// rule:'compound-stmt := ...'} {kind:'terminal', symbol:'VOID', token:'t52'}
122// {kind:'ambiguous', symbol:'type-specifier', children:[3,100] selected:3}
123// {kind:'opaque', symbol:'statement-seq', firstToken:'t5', lastToken:'t6'}
124void Writer::writeForestJSON() {
125 // This is the flat array of nodes: the index into this array is the node ID.
126 std::vector<std::pair<const ForestNode *, /*End*/ Token::Index>> Sequence;
127 llvm::DenseMap<const ForestNode *, unsigned> Index;
128 auto AssignID = [&](const ForestNode *N, Token::Index End) -> unsigned {
129 auto R = Index.try_emplace(N, Sequence.size());
130 if (R.second)
131 Sequence.push_back({N, End});
132 return R.first->second;
133 };
134 AssignID(&Root, Stream.tokens().size());
135 auto TokenID = [](Token::Index I) { return ("t" + llvm::Twine(I)).str(); };
136
137 llvm::json::OStream Out(this->Out, 2);
138 Out.array([&] {
139 for (unsigned I = 0; I < Sequence.size(); ++I) {
140 const ForestNode *N = Sequence[I].first;
141 Token::Index End = Sequence[I].second;
142 Out.object([&] {
143 Out.attribute(Key: "symbol", Contents: G.symbolName(N->symbol()));
144 switch (N->kind()) {
145 case ForestNode::Terminal:
146 Out.attribute(Key: "kind", Contents: "terminal");
147 Out.attribute(Key: "token", Contents: TokenID(N->startTokenIndex()));
148 break;
149 case ForestNode::Sequence:
150 Out.attribute(Key: "kind", Contents: "sequence");
151 Out.attribute(Key: "rule", Contents: G.dumpRule(N->rule()));
152 break;
153 case ForestNode::Ambiguous:
154 Out.attribute(Key: "kind", Contents: "ambiguous");
155 Out.attribute(Key: "selected",
156 Contents: AssignID(N->children()[Disambig.lookup(N)], End));
157 break;
158 case ForestNode::Opaque:
159 Out.attribute(Key: "kind", Contents: "opaque");
160 Out.attribute(Key: "firstToken", Contents: TokenID(N->startTokenIndex()));
161 // [firstToken, lastToken] is a closed range.
162 // If empty, lastToken is omitted.
163 if (N->startTokenIndex() != End)
164 Out.attribute(Key: "lastToken", Contents: TokenID(End - 1));
165 break;
166 }
167 auto Children = N->children();
168 if (!Children.empty())
169 Out.attributeArray("children", [&] {
170 for (unsigned I = 0; I < Children.size(); ++I)
171 Out.value(V: AssignID(Children[I],
172 I + 1 == Children.size()
173 ? End
174 : Children[I + 1]->startTokenIndex()));
175 });
176 });
177 }
178 });
179}
180
181} // namespace
182
183// We only accept the derived stream here.
184// FIXME: allow the original stream instead?
185void writeHTMLForest(llvm::raw_ostream &OS, const Grammar &G,
186 const ForestNode &Root, const Disambiguation &Disambig,
187 const TokenStream &Stream) {
188 Writer{.Out: OS, .G: G, .Root: Root, .Stream: Stream, .Disambig: Disambig}.write();
189}
190
191} // namespace pseudo
192} // namespace clang
193

source code of clang-tools-extra/pseudo/tool/HTMLForest.cpp