1//===-- HTMLLogger.cpp ----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the HTML logger. Given a directory dir/, we write
10// dir/0.html for the first analysis, etc.
11// These files contain a visualization that allows inspecting the CFG and the
12// state of the analysis at each point.
13// Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded
14// so each output file is self-contained.
15//
16// VIEWS
17//
18// The timeline and function view are always shown. These allow selecting basic
19// blocks, statements within them, and processing iterations (BBs are visited
20// multiple times when e.g. loops are involved).
21// These are written directly into the HTML body.
22//
23// There are also listings of particular basic blocks, and dumps of the state
24// at particular analysis points (i.e. BB2 iteration 3 statement 2).
25// These are only shown when the relevant BB/analysis point is *selected*.
26//
27// DATA AND TEMPLATES
28//
29// The HTML proper is mostly static.
30// The analysis data is in a JSON object HTMLLoggerData which is embedded as
31// a <script> in the <head>.
32// This gets rendered into DOM by a simple template processor which substitutes
33// the data into <template> tags embedded in the HTML. (see inflate() in JS).
34//
35// SELECTION
36//
37// This is the only real interactive mechanism.
38//
39// At any given time, there are several named selections, e.g.:
40// bb: B2 (basic block 0 is selected)
41// elt: B2.4 (statement 4 is selected)
42// iter: B2:1 (iteration 1 of the basic block is selected)
43// hover: B3 (hovering over basic block 3)
44//
45// The selection is updated by mouse events: hover by moving the mouse and
46// others by clicking. Elements that are click targets generally have attributes
47// (id or data-foo) that define what they should select.
48// See watchSelection() in JS for the exact logic.
49//
50// When the "bb" selection is set to "B2":
51// - sections <section data-selection="bb"> get shown
52// - templates under such sections get re-rendered
53// - elements with class/id "B2" get class "bb-select"
54//
55//===----------------------------------------------------------------------===//
56
57#include "clang/Analysis/FlowSensitive/AdornedCFG.h"
58#include "clang/Analysis/FlowSensitive/DebugSupport.h"
59#include "clang/Analysis/FlowSensitive/Logger.h"
60#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
61#include "clang/Analysis/FlowSensitive/Value.h"
62#include "clang/Basic/SourceManager.h"
63#include "clang/Lex/Lexer.h"
64#include "llvm/ADT/DenseMap.h"
65#include "llvm/ADT/ScopeExit.h"
66#include "llvm/Support/Error.h"
67#include "llvm/Support/FormatVariadic.h"
68#include "llvm/Support/JSON.h"
69#include "llvm/Support/Program.h"
70#include "llvm/Support/ScopedPrinter.h"
71#include "llvm/Support/raw_ostream.h"
72// Defines assets: HTMLLogger_{html_js,css}
73#include "HTMLLogger.inc"
74
75namespace clang::dataflow {
76namespace {
77
78// Render a graphviz graph specification to SVG using the `dot` tool.
79llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph);
80
81using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>;
82
83// Recursively dumps Values/StorageLocations as JSON
84class ModelDumper {
85public:
86 ModelDumper(llvm::json::OStream &JOS, const Environment &Env)
87 : JOS(JOS), Env(Env) {}
88
89 void dump(Value &V) {
90 JOS.attribute(Key: "value_id", Contents: llvm::to_string(&V));
91 if (!Visited.insert(&V).second)
92 return;
93
94 JOS.attribute(Key: "kind", Contents: debugString(Kind: V.getKind()));
95
96 switch (V.getKind()) {
97 case Value::Kind::Integer:
98 case Value::Kind::TopBool:
99 case Value::Kind::AtomicBool:
100 case Value::Kind::FormulaBool:
101 break;
102 case Value::Kind::Pointer:
103 JOS.attributeObject(
104 "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); });
105 break;
106 }
107
108 for (const auto& Prop : V.properties())
109 JOS.attributeObject(("p:" + Prop.first()).str(),
110 [&] { dump(*Prop.second); });
111
112 // Running the SAT solver is expensive, but knowing which booleans are
113 // guaranteed true/false here is valuable and hard to determine by hand.
114 if (auto *B = llvm::dyn_cast<BoolValue>(&V)) {
115 JOS.attribute(Key: "formula", Contents: llvm::to_string(B->formula()));
116 JOS.attribute(Key: "truth", Contents: Env.proves(B->formula()) ? "true"
117 : Env.proves(Env.arena().makeNot(Val: B->formula()))
118 ? "false"
119 : "unknown");
120 }
121 }
122 void dump(const StorageLocation &L) {
123 JOS.attribute(Key: "location", Contents: llvm::to_string(&L));
124 if (!Visited.insert(&L).second)
125 return;
126
127 JOS.attribute(Key: "type", Contents: L.getType().getAsString());
128 if (!L.getType()->isRecordType())
129 if (auto *V = Env.getValue(Loc: L))
130 dump(V&: *V);
131
132 if (auto *RLoc = dyn_cast<RecordStorageLocation>(&L)) {
133 for (const auto &Child : RLoc->children())
134 JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] {
135 if (Child.second)
136 if (Value *Val = Env.getValue(*Child.second))
137 dump(*Val);
138 });
139
140 for (const auto &SyntheticField : RLoc->synthetic_fields())
141 JOS.attributeObject(("sf:" + SyntheticField.first()).str(),
142 [&] { dump(*SyntheticField.second); });
143 }
144 }
145
146 llvm::DenseSet<const void*> Visited;
147 llvm::json::OStream &JOS;
148 const Environment &Env;
149};
150
151class HTMLLogger : public Logger {
152 struct Iteration {
153 const CFGBlock *Block;
154 unsigned Iter;
155 bool PostVisit;
156 bool Converged;
157 };
158
159 StreamFactory Streams;
160 std::unique_ptr<llvm::raw_ostream> OS;
161 std::string JSON;
162 llvm::raw_string_ostream JStringStream{JSON};
163 llvm::json::OStream JOS{JStringStream, /*Indent=*/2};
164
165 const AdornedCFG *ACFG;
166 // Timeline of iterations of CFG block visitation.
167 std::vector<Iteration> Iters;
168 // Indexes in `Iters` of the iterations for each block.
169 llvm::DenseMap<const CFGBlock *, llvm::SmallVector<size_t>> BlockIters;
170 // For a given block ID, did the block converge (on the last iteration)?
171 llvm::BitVector BlockConverged;
172 // The messages logged in the current context but not yet written.
173 std::string ContextLogs;
174 // The number of elements we have visited within the current CFG block.
175 unsigned ElementIndex;
176
177public:
178 explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {}
179 void beginAnalysis(const AdornedCFG &ACFG,
180 TypeErasedDataflowAnalysis &A) override {
181 OS = Streams();
182 this->ACFG = &ACFG;
183 *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first;
184
185 BlockConverged.resize(N: ACFG.getCFG().getNumBlockIDs());
186
187 const auto &D = ACFG.getDecl();
188 const auto &SM = A.getASTContext().getSourceManager();
189 *OS << "<title>";
190 if (const auto *ND = dyn_cast<NamedDecl>(&D))
191 *OS << ND->getNameAsString() << " at ";
192 *OS << SM.getFilename(D.getLocation()) << ":"
193 << SM.getSpellingLineNumber(D.getLocation());
194 *OS << "</title>\n";
195
196 *OS << "<style>" << HTMLLogger_css << "</style>\n";
197 *OS << "<script>" << HTMLLogger_js << "</script>\n";
198
199 writeCode();
200 JOS.objectBegin();
201 JOS.attributeBegin(Key: "states");
202 JOS.objectBegin();
203 }
204 // Between beginAnalysis() and endAnalysis() we write all the states for
205 // particular analysis points into the `timeline` array.
206 void endAnalysis() override {
207 JOS.objectEnd();
208 JOS.attributeEnd();
209
210 JOS.attributeArray("timeline", [&] {
211 for (const auto &E : Iters) {
212 JOS.object([&] {
213 JOS.attribute("block", blockID(E.Block->getBlockID()));
214 JOS.attribute("iter", E.Iter);
215 JOS.attribute("post_visit", E.PostVisit);
216 JOS.attribute("converged", E.Converged);
217 });
218 }
219 });
220 JOS.attributeObject("cfg", [&] {
221 for (const auto &E : BlockIters)
222 writeBlock(*E.first, E.second);
223 });
224
225 JOS.objectEnd();
226
227 writeCFG();
228
229 *OS << "<script>var HTMLLoggerData = \n";
230 *OS << JSON;
231 *OS << ";\n</script>\n";
232 *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second;
233 }
234
235 void enterBlock(const CFGBlock &B, bool PostVisit) override {
236 llvm::SmallVector<size_t> &BIter = BlockIters[&B];
237 unsigned IterNum = BIter.size() + 1;
238 BIter.push_back(Iters.size());
239 Iters.push_back({&B, IterNum, PostVisit, /*Converged=*/false});
240 if (!PostVisit)
241 BlockConverged[B.getBlockID()] = false;
242 ElementIndex = 0;
243 }
244 void enterElement(const CFGElement &E) override {
245 ++ElementIndex;
246 }
247
248 static std::string blockID(unsigned Block) {
249 return llvm::formatv("B{0}", Block);
250 }
251 static std::string eltID(unsigned Block, unsigned Element) {
252 return llvm::formatv("B{0}.{1}", Block, Element);
253 }
254 static std::string iterID(unsigned Block, unsigned Iter) {
255 return llvm::formatv("B{0}:{1}", Block, Iter);
256 }
257 static std::string elementIterID(unsigned Block, unsigned Iter,
258 unsigned Element) {
259 return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element);
260 }
261
262 // Write the analysis state associated with a particular analysis point.
263 // FIXME: this dump is fairly opaque. We should show:
264 // - values associated with the current Stmt
265 // - values associated with its children
266 // - meaningful names for values
267 // - which boolean values are implied true/false by the flow condition
268 void recordState(TypeErasedDataflowAnalysisState &State) override {
269 unsigned Block = Iters.back().Block->getBlockID();
270 unsigned Iter = Iters.back().Iter;
271 bool PostVisit = Iters.back().PostVisit;
272 JOS.attributeObject(elementIterID(Block, Iter, Element: ElementIndex), [&] {
273 JOS.attribute(Key: "block", Contents: blockID(Block));
274 JOS.attribute("iter", Iter);
275 JOS.attribute("post_visit", PostVisit);
276 JOS.attribute("element", ElementIndex);
277
278 // If this state immediately follows an Expr, show its built-in model.
279 if (ElementIndex > 0) {
280 auto S =
281 Iters.back().Block->Elements[ElementIndex - 1].getAs<CFGStmt>();
282 if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) {
283 if (E->isPRValue()) {
284 if (!E->getType()->isRecordType())
285 if (auto *V = State.Env.getValue(*E))
286 JOS.attributeObject(
287 "value", [&] { ModelDumper(JOS, State.Env).dump(*V); });
288 } else {
289 if (auto *Loc = State.Env.getStorageLocation(*E))
290 JOS.attributeObject(
291 "value", [&] { ModelDumper(JOS, State.Env).dump(*Loc); });
292 }
293 }
294 }
295 if (!ContextLogs.empty()) {
296 JOS.attribute(Key: "logs", Contents: ContextLogs);
297 ContextLogs.clear();
298 }
299 {
300 std::string BuiltinLattice;
301 llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice);
302 State.Env.dump(OS&: BuiltinLatticeS);
303 JOS.attribute(Key: "builtinLattice", Contents: BuiltinLattice);
304 }
305 });
306 }
307 void blockConverged() override {
308 Iters.back().Converged = true;
309 BlockConverged[Iters.back().Block->getBlockID()] = true;
310 }
311
312 void logText(llvm::StringRef S) override {
313 ContextLogs.append(S.begin(), S.end());
314 ContextLogs.push_back(c: '\n');
315 }
316
317private:
318 // Write the CFG block details.
319 // Currently this is just the list of elements in execution order.
320 // FIXME: an AST dump would be a useful view, too.
321 void writeBlock(const CFGBlock &B, llvm::ArrayRef<size_t> ItersForB) {
322 JOS.attributeObject(blockID(Block: B.getBlockID()), [&] {
323 JOS.attributeArray("iters", [&] {
324 for (size_t IterIdx : ItersForB) {
325 const Iteration &Iter = Iters[IterIdx];
326 JOS.object([&] {
327 JOS.attribute("iter", Iter.Iter);
328 JOS.attribute("post_visit", Iter.PostVisit);
329 JOS.attribute("converged", Iter.Converged);
330 });
331 }
332 });
333 JOS.attributeArray("elements", [&] {
334 for (const auto &Elt : B.Elements) {
335 std::string Dump;
336 llvm::raw_string_ostream DumpS(Dump);
337 Elt.dumpToStream(DumpS);
338 JOS.value(Dump);
339 }
340 });
341 });
342 }
343
344 // Write the code of function being examined.
345 // We want to overlay the code with <span>s that mark which BB particular
346 // tokens are associated with, and even which BB element (so that clicking
347 // can select the right element).
348 void writeCode() {
349 const auto &AST = ACFG->getDecl().getASTContext();
350 bool Invalid = false;
351
352 // Extract the source code from the original file.
353 // Pretty-printing from the AST would probably be nicer (no macros or
354 // indentation to worry about), but we need the boundaries of particular
355 // AST nodes and the printer doesn't provide this.
356 auto Range = clang::Lexer::makeFileCharRange(
357 Range: CharSourceRange::getTokenRange(R: ACFG->getDecl().getSourceRange()),
358 SM: AST.getSourceManager(), LangOpts: AST.getLangOpts());
359 if (Range.isInvalid())
360 return;
361 llvm::StringRef Code = clang::Lexer::getSourceText(
362 Range, SM: AST.getSourceManager(), LangOpts: AST.getLangOpts(), Invalid: &Invalid);
363 if (Invalid)
364 return;
365
366 // TokenInfo stores the BB and set of elements that a token is part of.
367 struct TokenInfo {
368 enum : unsigned { Missing = static_cast<unsigned>(-1) };
369
370 // The basic block this is part of.
371 // This is the BB of the stmt with the smallest containing range.
372 unsigned BB = Missing;
373 unsigned BBPriority = 0;
374 // The most specific stmt this is part of (smallest range).
375 unsigned Elt = Missing;
376 unsigned EltPriority = 0;
377 // All stmts this is part of.
378 SmallVector<unsigned> Elts;
379
380 // Mark this token as being part of BB.Elt.
381 // RangeLen is the character length of the element's range, used to
382 // distinguish inner vs outer statements.
383 // For example in `a==0`, token "a" is part of the stmts "a" and "a==0".
384 // However "a" has a smaller range, so is more specific. Clicking on the
385 // token "a" should select the stmt "a".
386 void assign(unsigned BB, unsigned Elt, unsigned RangeLen) {
387 // A worse BB (larger range) => ignore.
388 if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen)
389 return;
390 if (BB != this->BB) {
391 this->BB = BB;
392 Elts.clear();
393 BBPriority = RangeLen;
394 }
395 BBPriority = std::min(BBPriority, RangeLen);
396 Elts.push_back(Elt);
397 if (this->Elt == Missing || EltPriority > RangeLen)
398 this->Elt = Elt;
399 }
400 bool operator==(const TokenInfo &Other) const {
401 return std::tie(BB, Elt, Elts) ==
402 std::tie(Other.BB, Other.Elt, Other.Elts);
403 }
404 // Write the attributes for the <span> on this token.
405 void write(llvm::raw_ostream &OS) const {
406 OS << "class='c";
407 if (BB != Missing)
408 OS << " " << blockID(Block: BB);
409 for (unsigned Elt : Elts)
410 OS << " " << eltID(BB, Elt);
411 OS << "'";
412
413 if (Elt != Missing)
414 OS << " data-elt='" << eltID(Block: BB, Element: Elt) << "'";
415 if (BB != Missing)
416 OS << " data-bb='" << blockID(Block: BB) << "'";
417 }
418 };
419
420 // Construct one TokenInfo per character in a flat array.
421 // This is inefficient (chars in a token all have the same info) but simple.
422 std::vector<TokenInfo> State(Code.size());
423 for (const auto *Block : ACFG->getCFG()) {
424 unsigned EltIndex = 0;
425 for (const auto& Elt : *Block) {
426 ++EltIndex;
427 if (const auto S = Elt.getAs<CFGStmt>()) {
428 auto EltRange = clang::Lexer::makeFileCharRange(
429 CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()),
430 AST.getSourceManager(), AST.getLangOpts());
431 if (EltRange.isInvalid())
432 continue;
433 if (EltRange.getBegin() < Range.getBegin() ||
434 EltRange.getEnd() >= Range.getEnd() ||
435 EltRange.getEnd() < Range.getBegin() ||
436 EltRange.getEnd() >= Range.getEnd())
437 continue;
438 unsigned Off = EltRange.getBegin().getRawEncoding() -
439 Range.getBegin().getRawEncoding();
440 unsigned Len = EltRange.getEnd().getRawEncoding() -
441 EltRange.getBegin().getRawEncoding();
442 for (unsigned I = 0; I < Len; ++I)
443 State[Off + I].assign(Block->getBlockID(), EltIndex, Len);
444 }
445 }
446 }
447
448 // Finally, write the code with the correct <span>s.
449 unsigned Line =
450 AST.getSourceManager().getSpellingLineNumber(Loc: Range.getBegin());
451 *OS << "<template data-copy='code'>\n";
452 *OS << "<code class='filename'>";
453 llvm::printHTMLEscaped(
454 llvm::sys::path::filename(
455 AST.getSourceManager().getFilename(Range.getBegin())),
456 *OS);
457 *OS << "</code>";
458 *OS << "<code class='line' data-line='" << Line++ << "'>";
459 for (unsigned I = 0; I < Code.size(); ++I) {
460 // Don't actually write a <span> around each character, only break spans
461 // when the TokenInfo changes.
462 bool NeedOpen = I == 0 || !(State[I] == State[I-1]);
463 bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]);
464 if (NeedOpen) {
465 *OS << "<span ";
466 State[I].write(*OS);
467 *OS << ">";
468 }
469 if (Code[I] == '\n')
470 *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>";
471 else
472 llvm::printHTMLEscaped(Code.substr(I, 1), *OS);
473 if (NeedClose) *OS << "</span>";
474 }
475 *OS << "</code>\n";
476 *OS << "</template>";
477 }
478
479 // Write the CFG diagram, a graph of basic blocks.
480 // Laying out graphs is hard, so we construct a graphviz description and shell
481 // out to `dot` to turn it into an SVG.
482 void writeCFG() {
483 *OS << "<template data-copy='cfg'>\n";
484 if (auto SVG = renderSVG(buildCFGDot(ACFG->getCFG())))
485 *OS << *SVG;
486 else
487 *OS << "Can't draw CFG: " << toString(SVG.takeError());
488 *OS << "</template>\n";
489 }
490
491 // Produce a graphviz description of a CFG.
492 std::string buildCFGDot(const clang::CFG &CFG) {
493 std::string Graph;
494 llvm::raw_string_ostream GraphS(Graph);
495 // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses.
496 GraphS << R"(digraph {
497 tooltip=" "
498 node[class=bb, shape=square, fontname="sans-serif", tooltip=" "]
499 edge[tooltip = " "]
500)";
501 for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) {
502 std::string Name = blockID(Block: I);
503 // Rightwards arrow, vertical line
504 const char *ConvergenceMarker = (const char *)u8"\\n\u2192\u007c";
505 if (BlockConverged[I])
506 Name += ConvergenceMarker;
507 GraphS << " " << blockID(Block: I) << " [id=" << blockID(Block: I) << " label=\""
508 << Name << "\"]\n";
509 }
510 for (const auto *Block : CFG) {
511 for (const auto &Succ : Block->succs()) {
512 if (Succ.getReachableBlock())
513 GraphS << " " << blockID(Block->getBlockID()) << " -> "
514 << blockID(Succ.getReachableBlock()->getBlockID()) << "\n";
515 }
516 }
517 GraphS << "}\n";
518 return Graph;
519 }
520};
521
522// Nothing interesting here, just subprocess/temp-file plumbing.
523llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) {
524 std::string DotPath;
525 if (const auto *FromEnv = ::getenv(name: "GRAPHVIZ_DOT"))
526 DotPath = FromEnv;
527 else {
528 auto FromPath = llvm::sys::findProgramByName("dot");
529 if (!FromPath)
530 return llvm::createStringError(FromPath.getError(),
531 "'dot' not found on PATH");
532 DotPath = FromPath.get();
533 }
534
535 // Create input and output files for `dot` subprocess.
536 // (We create the output file as empty, to reserve the temp filename).
537 llvm::SmallString<256> Input, Output;
538 int InputFD;
539 if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD,
540 Input))
541 return llvm::createStringError(EC, "failed to create `dot` temp input");
542 llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph;
543 auto DeleteInput =
544 llvm::make_scope_exit([&] { llvm::sys::fs::remove(path: Input); });
545 if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output))
546 return llvm::createStringError(EC, "failed to create `dot` temp output");
547 auto DeleteOutput =
548 llvm::make_scope_exit([&] { llvm::sys::fs::remove(path: Output); });
549
550 std::vector<std::optional<llvm::StringRef>> Redirects = {
551 Input, Output,
552 /*stderr=*/std::nullopt};
553 std::string ErrMsg;
554 int Code = llvm::sys::ExecuteAndWait(
555 Program: DotPath, Args: {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects: Redirects,
556 /*SecondsToWait=*/0, /*MemoryLimit=*/0, ErrMsg: &ErrMsg);
557 if (!ErrMsg.empty())
558 return llvm::createStringError(llvm::inconvertibleErrorCode(),
559 "'dot' failed: " + ErrMsg);
560 if (Code != 0)
561 return llvm::createStringError(EC: llvm::inconvertibleErrorCode(),
562 S: "'dot' failed (" + llvm::Twine(Code) + ")");
563
564 auto Buf = llvm::MemoryBuffer::getFile(Filename: Output);
565 if (!Buf)
566 return llvm::createStringError(Buf.getError(), "Can't read `dot` output");
567
568 // Output has <?xml> prefix we don't want. Skip to <svg> tag.
569 llvm::StringRef Result = Buf.get()->getBuffer();
570 auto Pos = Result.find(Str: "<svg");
571 if (Pos == llvm::StringRef::npos)
572 return llvm::createStringError(EC: llvm::inconvertibleErrorCode(),
573 Msg: "Can't find <svg> tag in `dot` output");
574 return Result.substr(Start: Pos).str();
575}
576
577} // namespace
578
579std::unique_ptr<Logger>
580Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) {
581 return std::make_unique<HTMLLogger>(std::move(Streams));
582}
583
584} // namespace clang::dataflow
585

source code of clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp