1 | //== HTMLRewrite.cpp - Translate source code into prettified HTML --*- C++ -*-// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the HTMLRewriter class, which is used to translate the |
10 | // text of a source file into prettified HTML. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "clang/Rewrite/Core/HTMLRewrite.h" |
15 | #include "clang/Basic/SourceManager.h" |
16 | #include "clang/Lex/Preprocessor.h" |
17 | #include "clang/Lex/TokenConcatenation.h" |
18 | #include "clang/Rewrite/Core/Rewriter.h" |
19 | #include "llvm/ADT/RewriteBuffer.h" |
20 | #include "llvm/ADT/SmallString.h" |
21 | #include "llvm/Support/ErrorHandling.h" |
22 | #include "llvm/Support/MemoryBuffer.h" |
23 | #include "llvm/Support/raw_ostream.h" |
24 | #include <memory> |
25 | |
26 | using namespace clang; |
27 | using namespace llvm; |
28 | using namespace html; |
29 | |
30 | /// HighlightRange - Highlight a range in the source code with the specified |
31 | /// start/end tags. B/E must be in the same file. This ensures that |
32 | /// start/end tags are placed at the start/end of each line if the range is |
33 | /// multiline. |
34 | void html::HighlightRange(Rewriter &R, SourceLocation B, SourceLocation E, |
35 | const char *StartTag, const char *EndTag, |
36 | bool IsTokenRange) { |
37 | SourceManager &SM = R.getSourceMgr(); |
38 | B = SM.getExpansionLoc(Loc: B); |
39 | E = SM.getExpansionLoc(Loc: E); |
40 | FileID FID = SM.getFileID(SpellingLoc: B); |
41 | assert(SM.getFileID(E) == FID && "B/E not in the same file!" ); |
42 | |
43 | unsigned BOffset = SM.getFileOffset(SpellingLoc: B); |
44 | unsigned EOffset = SM.getFileOffset(SpellingLoc: E); |
45 | |
46 | // Include the whole end token in the range. |
47 | if (IsTokenRange) |
48 | EOffset += Lexer::MeasureTokenLength(Loc: E, SM: R.getSourceMgr(), LangOpts: R.getLangOpts()); |
49 | |
50 | bool Invalid = false; |
51 | const char *BufferStart = SM.getBufferData(FID, Invalid: &Invalid).data(); |
52 | if (Invalid) |
53 | return; |
54 | |
55 | HighlightRange(RB&: R.getEditBuffer(FID), B: BOffset, E: EOffset, |
56 | BufferStart, StartTag, EndTag); |
57 | } |
58 | |
59 | /// HighlightRange - This is the same as the above method, but takes |
60 | /// decomposed file locations. |
61 | void html::HighlightRange(RewriteBuffer &RB, unsigned B, unsigned E, |
62 | const char *BufferStart, |
63 | const char *StartTag, const char *EndTag) { |
64 | // Insert the tag at the absolute start/end of the range. |
65 | RB.InsertTextAfter(OrigOffset: B, Str: StartTag); |
66 | RB.InsertTextBefore(OrigOffset: E, Str: EndTag); |
67 | |
68 | // Scan the range to see if there is a \r or \n. If so, and if the line is |
69 | // not blank, insert tags on that line as well. |
70 | bool HadOpenTag = true; |
71 | |
72 | unsigned LastNonWhiteSpace = B; |
73 | for (unsigned i = B; i != E; ++i) { |
74 | switch (BufferStart[i]) { |
75 | case '\r': |
76 | case '\n': |
77 | // Okay, we found a newline in the range. If we have an open tag, we need |
78 | // to insert a close tag at the first non-whitespace before the newline. |
79 | if (HadOpenTag) |
80 | RB.InsertTextBefore(OrigOffset: LastNonWhiteSpace+1, Str: EndTag); |
81 | |
82 | // Instead of inserting an open tag immediately after the newline, we |
83 | // wait until we see a non-whitespace character. This prevents us from |
84 | // inserting tags around blank lines, and also allows the open tag to |
85 | // be put *after* whitespace on a non-blank line. |
86 | HadOpenTag = false; |
87 | break; |
88 | case '\0': |
89 | case ' ': |
90 | case '\t': |
91 | case '\f': |
92 | case '\v': |
93 | // Ignore whitespace. |
94 | break; |
95 | |
96 | default: |
97 | // If there is no tag open, do it now. |
98 | if (!HadOpenTag) { |
99 | RB.InsertTextAfter(OrigOffset: i, Str: StartTag); |
100 | HadOpenTag = true; |
101 | } |
102 | |
103 | // Remember this character. |
104 | LastNonWhiteSpace = i; |
105 | break; |
106 | } |
107 | } |
108 | } |
109 | |
110 | namespace clang::html { |
111 | struct RelexRewriteCache { |
112 | // These structs mimic input arguments of HighlightRange(). |
113 | struct Highlight { |
114 | SourceLocation B, E; |
115 | std::string StartTag, EndTag; |
116 | bool IsTokenRange; |
117 | }; |
118 | struct RawHighlight { |
119 | unsigned B, E; |
120 | std::string StartTag, EndTag; |
121 | }; |
122 | |
123 | // SmallVector isn't appropriate because these vectors are almost never small. |
124 | using HighlightList = std::vector<Highlight>; |
125 | using RawHighlightList = std::vector<RawHighlight>; |
126 | |
127 | DenseMap<FileID, RawHighlightList> SyntaxHighlights; |
128 | DenseMap<FileID, HighlightList> MacroHighlights; |
129 | }; |
130 | } // namespace clang::html |
131 | |
132 | html::RelexRewriteCacheRef html::instantiateRelexRewriteCache() { |
133 | return std::make_shared<RelexRewriteCache>(); |
134 | } |
135 | |
136 | void html::EscapeText(Rewriter &R, FileID FID, |
137 | bool EscapeSpaces, bool ReplaceTabs) { |
138 | |
139 | llvm::MemoryBufferRef Buf = R.getSourceMgr().getBufferOrFake(FID); |
140 | const char* C = Buf.getBufferStart(); |
141 | const char* FileEnd = Buf.getBufferEnd(); |
142 | |
143 | assert (C <= FileEnd); |
144 | |
145 | RewriteBuffer &RB = R.getEditBuffer(FID); |
146 | |
147 | unsigned ColNo = 0; |
148 | for (unsigned FilePos = 0; C != FileEnd ; ++C, ++FilePos) { |
149 | switch (*C) { |
150 | default: ++ColNo; break; |
151 | case '\n': |
152 | case '\r': |
153 | ColNo = 0; |
154 | break; |
155 | |
156 | case ' ': |
157 | if (EscapeSpaces) |
158 | RB.ReplaceText(OrigOffset: FilePos, OrigLength: 1, NewStr: " " ); |
159 | ++ColNo; |
160 | break; |
161 | case '\f': |
162 | RB.ReplaceText(OrigOffset: FilePos, OrigLength: 1, NewStr: "<hr>" ); |
163 | ColNo = 0; |
164 | break; |
165 | |
166 | case '\t': { |
167 | if (!ReplaceTabs) |
168 | break; |
169 | unsigned NumSpaces = 8-(ColNo&7); |
170 | if (EscapeSpaces) |
171 | RB.ReplaceText(OrigOffset: FilePos, OrigLength: 1, |
172 | NewStr: StringRef(" " |
173 | " " , 6*NumSpaces)); |
174 | else |
175 | RB.ReplaceText(OrigOffset: FilePos, OrigLength: 1, NewStr: StringRef(" " , NumSpaces)); |
176 | ColNo += NumSpaces; |
177 | break; |
178 | } |
179 | case '<': |
180 | RB.ReplaceText(OrigOffset: FilePos, OrigLength: 1, NewStr: "<" ); |
181 | ++ColNo; |
182 | break; |
183 | |
184 | case '>': |
185 | RB.ReplaceText(OrigOffset: FilePos, OrigLength: 1, NewStr: ">" ); |
186 | ++ColNo; |
187 | break; |
188 | |
189 | case '&': |
190 | RB.ReplaceText(OrigOffset: FilePos, OrigLength: 1, NewStr: "&" ); |
191 | ++ColNo; |
192 | break; |
193 | } |
194 | } |
195 | } |
196 | |
197 | std::string html::EscapeText(StringRef s, bool EscapeSpaces, bool ReplaceTabs) { |
198 | |
199 | unsigned len = s.size(); |
200 | std::string Str; |
201 | llvm::raw_string_ostream os(Str); |
202 | |
203 | for (unsigned i = 0 ; i < len; ++i) { |
204 | |
205 | char c = s[i]; |
206 | switch (c) { |
207 | default: |
208 | os << c; break; |
209 | |
210 | case ' ': |
211 | if (EscapeSpaces) os << " " ; |
212 | else os << ' '; |
213 | break; |
214 | |
215 | case '\t': |
216 | if (ReplaceTabs) { |
217 | if (EscapeSpaces) |
218 | for (unsigned i = 0; i < 4; ++i) |
219 | os << " " ; |
220 | else |
221 | for (unsigned i = 0; i < 4; ++i) |
222 | os << " " ; |
223 | } |
224 | else |
225 | os << c; |
226 | |
227 | break; |
228 | |
229 | case '<': os << "<" ; break; |
230 | case '>': os << ">" ; break; |
231 | case '&': os << "&" ; break; |
232 | } |
233 | } |
234 | |
235 | return Str; |
236 | } |
237 | |
238 | static void AddLineNumber(RewriteBuffer &RB, unsigned LineNo, |
239 | unsigned B, unsigned E) { |
240 | SmallString<256> Str; |
241 | llvm::raw_svector_ostream OS(Str); |
242 | |
243 | OS << "<tr class=\"codeline\" data-linenumber=\"" << LineNo << "\">" |
244 | << "<td class=\"num\" id=\"LN" << LineNo << "\">" << LineNo |
245 | << "</td><td class=\"line\">" ; |
246 | |
247 | if (B == E) { // Handle empty lines. |
248 | OS << " </td></tr>" ; |
249 | RB.InsertTextBefore(OrigOffset: B, Str: OS.str()); |
250 | } else { |
251 | RB.InsertTextBefore(OrigOffset: B, Str: OS.str()); |
252 | RB.InsertTextBefore(OrigOffset: E, Str: "</td></tr>" ); |
253 | } |
254 | } |
255 | |
256 | void html::AddLineNumbers(Rewriter& R, FileID FID) { |
257 | |
258 | llvm::MemoryBufferRef Buf = R.getSourceMgr().getBufferOrFake(FID); |
259 | const char* FileBeg = Buf.getBufferStart(); |
260 | const char* FileEnd = Buf.getBufferEnd(); |
261 | const char* C = FileBeg; |
262 | RewriteBuffer &RB = R.getEditBuffer(FID); |
263 | |
264 | assert (C <= FileEnd); |
265 | |
266 | unsigned LineNo = 0; |
267 | unsigned FilePos = 0; |
268 | |
269 | while (C != FileEnd) { |
270 | |
271 | ++LineNo; |
272 | unsigned LineStartPos = FilePos; |
273 | unsigned LineEndPos = FileEnd - FileBeg; |
274 | |
275 | assert (FilePos <= LineEndPos); |
276 | assert (C < FileEnd); |
277 | |
278 | // Scan until the newline (or end-of-file). |
279 | |
280 | while (C != FileEnd) { |
281 | char c = *C; |
282 | ++C; |
283 | |
284 | if (c == '\n') { |
285 | LineEndPos = FilePos++; |
286 | break; |
287 | } |
288 | |
289 | ++FilePos; |
290 | } |
291 | |
292 | AddLineNumber(RB, LineNo, B: LineStartPos, E: LineEndPos); |
293 | } |
294 | |
295 | // Add one big table tag that surrounds all of the code. |
296 | std::string s; |
297 | llvm::raw_string_ostream os(s); |
298 | os << "<table class=\"code\" data-fileid=\"" << FID.getHashValue() << "\">\n" ; |
299 | RB.InsertTextBefore(OrigOffset: 0, Str: os.str()); |
300 | RB.InsertTextAfter(OrigOffset: FileEnd - FileBeg, Str: "</table>" ); |
301 | } |
302 | |
303 | void html::(Rewriter &R, FileID FID, |
304 | StringRef title) { |
305 | |
306 | llvm::MemoryBufferRef Buf = R.getSourceMgr().getBufferOrFake(FID); |
307 | const char* FileStart = Buf.getBufferStart(); |
308 | const char* FileEnd = Buf.getBufferEnd(); |
309 | |
310 | SourceLocation StartLoc = R.getSourceMgr().getLocForStartOfFile(FID); |
311 | SourceLocation EndLoc = StartLoc.getLocWithOffset(Offset: FileEnd-FileStart); |
312 | |
313 | std::string s; |
314 | llvm::raw_string_ostream os(s); |
315 | os << "<!doctype html>\n" // Use HTML 5 doctype |
316 | "<html>\n<head>\n" ; |
317 | |
318 | if (!title.empty()) |
319 | os << "<title>" << html::EscapeText(s: title) << "</title>\n" ; |
320 | |
321 | os << R"<<<( |
322 | <style type="text/css"> |
323 | body { color:#000000; background-color:#ffffff } |
324 | body { font-family:Helvetica, sans-serif; font-size:10pt } |
325 | h1 { font-size:14pt } |
326 | .FileName { margin-top: 5px; margin-bottom: 5px; display: inline; } |
327 | .FileNav { margin-left: 5px; margin-right: 5px; display: inline; } |
328 | .FileNav a { text-decoration:none; font-size: larger; } |
329 | .divider { margin-top: 30px; margin-bottom: 30px; height: 15px; } |
330 | .divider { background-color: gray; } |
331 | .code { border-collapse:collapse; width:100%; } |
332 | .code { font-family: "Monospace", monospace; font-size:10pt } |
333 | .code { line-height: 1.2em } |
334 | .comment { color: green; font-style: oblique } |
335 | .keyword { color: blue } |
336 | .string_literal { color: red } |
337 | .directive { color: darkmagenta } |
338 | |
339 | /* Macros and variables could have pop-up notes hidden by default. |
340 | - Macro pop-up: expansion of the macro |
341 | - Variable pop-up: value (table) of the variable */ |
342 | .macro_popup, .variable_popup { display: none; } |
343 | |
344 | /* Pop-up appears on mouse-hover event. */ |
345 | .macro:hover .macro_popup, .variable:hover .variable_popup { |
346 | display: block; |
347 | padding: 2px; |
348 | -webkit-border-radius:5px; |
349 | -webkit-box-shadow:1px 1px 7px #000; |
350 | border-radius:5px; |
351 | box-shadow:1px 1px 7px #000; |
352 | position: absolute; |
353 | top: -1em; |
354 | left:10em; |
355 | z-index: 1 |
356 | } |
357 | |
358 | .macro_popup { |
359 | border: 2px solid red; |
360 | background-color:#FFF0F0; |
361 | font-weight: normal; |
362 | } |
363 | |
364 | .variable_popup { |
365 | border: 2px solid blue; |
366 | background-color:#F0F0FF; |
367 | font-weight: bold; |
368 | font-family: Helvetica, sans-serif; |
369 | font-size: 9pt; |
370 | } |
371 | |
372 | /* Pop-up notes needs a relative position as a base where they pops up. */ |
373 | .macro, .variable { |
374 | background-color: PaleGoldenRod; |
375 | position: relative; |
376 | } |
377 | .macro { color: DarkMagenta; } |
378 | |
379 | #tooltiphint { |
380 | position: fixed; |
381 | width: 50em; |
382 | margin-left: -25em; |
383 | left: 50%; |
384 | padding: 10px; |
385 | border: 1px solid #b0b0b0; |
386 | border-radius: 2px; |
387 | box-shadow: 1px 1px 7px black; |
388 | background-color: #c0c0c0; |
389 | z-index: 2; |
390 | } |
391 | |
392 | .num { width:2.5em; padding-right:2ex; background-color:#eeeeee } |
393 | .num { text-align:right; font-size:8pt } |
394 | .num { color:#444444 } |
395 | .line { padding-left: 1ex; border-left: 3px solid #ccc } |
396 | .line { white-space: pre } |
397 | .msg { -webkit-box-shadow:1px 1px 7px #000 } |
398 | .msg { box-shadow:1px 1px 7px #000 } |
399 | .msg { -webkit-border-radius:5px } |
400 | .msg { border-radius:5px } |
401 | .msg { font-family:Helvetica, sans-serif; font-size:8pt } |
402 | .msg { float:left } |
403 | .msg { position:relative } |
404 | .msg { padding:0.25em 1ex 0.25em 1ex } |
405 | .msg { margin-top:10px; margin-bottom:10px } |
406 | .msg { font-weight:bold } |
407 | .msg { max-width:60em; word-wrap: break-word; white-space: pre-wrap } |
408 | .msgT { padding:0x; spacing:0x } |
409 | .msgEvent { background-color:#fff8b4; color:#000000 } |
410 | .msgControl { background-color:#bbbbbb; color:#000000 } |
411 | .msgNote { background-color:#ddeeff; color:#000000 } |
412 | .mrange { background-color:#dfddf3 } |
413 | .mrange { border-bottom:1px solid #6F9DBE } |
414 | .PathIndex { font-weight: bold; padding:0px 5px; margin-right:5px; } |
415 | .PathIndex { -webkit-border-radius:8px } |
416 | .PathIndex { border-radius:8px } |
417 | .PathIndexEvent { background-color:#bfba87 } |
418 | .PathIndexControl { background-color:#8c8c8c } |
419 | .PathIndexPopUp { background-color: #879abc; } |
420 | .PathNav a { text-decoration:none; font-size: larger } |
421 | .CodeInsertionHint { font-weight: bold; background-color: #10dd10 } |
422 | .CodeRemovalHint { background-color:#de1010 } |
423 | .CodeRemovalHint { border-bottom:1px solid #6F9DBE } |
424 | .msg.selected{ background-color:orange !important; } |
425 | |
426 | table.simpletable { |
427 | padding: 5px; |
428 | font-size:12pt; |
429 | margin:20px; |
430 | border-collapse: collapse; border-spacing: 0px; |
431 | } |
432 | td.rowname { |
433 | text-align: right; |
434 | vertical-align: top; |
435 | font-weight: bold; |
436 | color:#444444; |
437 | padding-right:2ex; |
438 | } |
439 | |
440 | /* Hidden text. */ |
441 | input.spoilerhider + label { |
442 | cursor: pointer; |
443 | text-decoration: underline; |
444 | display: block; |
445 | } |
446 | input.spoilerhider { |
447 | display: none; |
448 | } |
449 | input.spoilerhider ~ .spoiler { |
450 | overflow: hidden; |
451 | margin: 10px auto 0; |
452 | height: 0; |
453 | opacity: 0; |
454 | } |
455 | input.spoilerhider:checked + label + .spoiler{ |
456 | height: auto; |
457 | opacity: 1; |
458 | } |
459 | </style> |
460 | </head> |
461 | <body>)<<<" ; |
462 | |
463 | // Generate header |
464 | R.InsertTextBefore(Loc: StartLoc, Str: os.str()); |
465 | // Generate footer |
466 | |
467 | R.InsertTextAfter(Loc: EndLoc, Str: "</body></html>\n" ); |
468 | } |
469 | |
470 | /// SyntaxHighlight - Relex the specified FileID and annotate the HTML with |
471 | /// information about keywords, macro expansions etc. This uses the macro |
472 | /// table state from the end of the file, so it won't be perfectly perfect, |
473 | /// but it will be reasonably close. |
474 | static void SyntaxHighlightImpl( |
475 | Rewriter &R, FileID FID, const Preprocessor &PP, |
476 | llvm::function_ref<void(RewriteBuffer &, unsigned, unsigned, const char *, |
477 | const char *, const char *)> |
478 | HighlightRangeCallback) { |
479 | |
480 | RewriteBuffer &RB = R.getEditBuffer(FID); |
481 | const SourceManager &SM = PP.getSourceManager(); |
482 | llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID); |
483 | const char *BufferStart = FromFile.getBuffer().data(); |
484 | |
485 | Lexer L(FID, FromFile, SM, PP.getLangOpts()); |
486 | |
487 | // Inform the preprocessor that we want to retain comments as tokens, so we |
488 | // can highlight them. |
489 | L.SetCommentRetentionState(true); |
490 | |
491 | // Lex all the tokens in raw mode, to avoid entering #includes or expanding |
492 | // macros. |
493 | Token Tok; |
494 | L.LexFromRawLexer(Result&: Tok); |
495 | |
496 | while (Tok.isNot(K: tok::eof)) { |
497 | // Since we are lexing unexpanded tokens, all tokens are from the main |
498 | // FileID. |
499 | unsigned TokOffs = SM.getFileOffset(SpellingLoc: Tok.getLocation()); |
500 | unsigned TokLen = Tok.getLength(); |
501 | switch (Tok.getKind()) { |
502 | default: break; |
503 | case tok::identifier: |
504 | llvm_unreachable("tok::identifier in raw lexing mode!" ); |
505 | case tok::raw_identifier: { |
506 | // Fill in Result.IdentifierInfo and update the token kind, |
507 | // looking up the identifier in the identifier table. |
508 | PP.LookUpIdentifierInfo(Identifier&: Tok); |
509 | |
510 | // If this is a pp-identifier, for a keyword, highlight it as such. |
511 | if (Tok.isNot(K: tok::identifier)) |
512 | HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart, |
513 | "<span class='keyword'>" , "</span>" ); |
514 | break; |
515 | } |
516 | case tok::comment: |
517 | HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart, |
518 | "<span class='comment'>" , "</span>" ); |
519 | break; |
520 | case tok::utf8_string_literal: |
521 | // Chop off the u part of u8 prefix |
522 | ++TokOffs; |
523 | --TokLen; |
524 | // FALL THROUGH to chop the 8 |
525 | [[fallthrough]]; |
526 | case tok::wide_string_literal: |
527 | case tok::utf16_string_literal: |
528 | case tok::utf32_string_literal: |
529 | // Chop off the L, u, U or 8 prefix |
530 | ++TokOffs; |
531 | --TokLen; |
532 | [[fallthrough]]; |
533 | case tok::string_literal: |
534 | // FIXME: Exclude the optional ud-suffix from the highlighted range. |
535 | HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart, |
536 | "<span class='string_literal'>" , "</span>" ); |
537 | break; |
538 | case tok::hash: { |
539 | // If this is a preprocessor directive, all tokens to end of line are too. |
540 | if (!Tok.isAtStartOfLine()) |
541 | break; |
542 | |
543 | // Eat all of the tokens until we get to the next one at the start of |
544 | // line. |
545 | unsigned TokEnd = TokOffs+TokLen; |
546 | L.LexFromRawLexer(Result&: Tok); |
547 | while (!Tok.isAtStartOfLine() && Tok.isNot(K: tok::eof)) { |
548 | TokEnd = SM.getFileOffset(SpellingLoc: Tok.getLocation())+Tok.getLength(); |
549 | L.LexFromRawLexer(Result&: Tok); |
550 | } |
551 | |
552 | // Find end of line. This is a hack. |
553 | HighlightRangeCallback(RB, TokOffs, TokEnd, BufferStart, |
554 | "<span class='directive'>" , "</span>" ); |
555 | |
556 | // Don't skip the next token. |
557 | continue; |
558 | } |
559 | } |
560 | |
561 | L.LexFromRawLexer(Result&: Tok); |
562 | } |
563 | } |
564 | void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP, |
565 | RelexRewriteCacheRef Cache) { |
566 | RewriteBuffer &RB = R.getEditBuffer(FID); |
567 | const SourceManager &SM = PP.getSourceManager(); |
568 | llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID); |
569 | const char *BufferStart = FromFile.getBuffer().data(); |
570 | |
571 | if (Cache) { |
572 | auto CacheIt = Cache->SyntaxHighlights.find(Val: FID); |
573 | if (CacheIt != Cache->SyntaxHighlights.end()) { |
574 | for (const RelexRewriteCache::RawHighlight &H : CacheIt->second) { |
575 | HighlightRange(RB, B: H.B, E: H.E, BufferStart, StartTag: H.StartTag.data(), |
576 | EndTag: H.EndTag.data()); |
577 | } |
578 | return; |
579 | } |
580 | } |
581 | |
582 | // "Every time you would call HighlightRange, cache the inputs as well." |
583 | auto HighlightRangeCallback = [&](RewriteBuffer &RB, unsigned B, unsigned E, |
584 | const char *BufferStart, |
585 | const char *StartTag, const char *EndTag) { |
586 | HighlightRange(RB, B, E, BufferStart, StartTag, EndTag); |
587 | |
588 | if (Cache) |
589 | Cache->SyntaxHighlights[FID].push_back(x: {.B: B, .E: E, .StartTag: StartTag, .EndTag: EndTag}); |
590 | }; |
591 | |
592 | SyntaxHighlightImpl(R, FID, PP, HighlightRangeCallback); |
593 | } |
594 | |
595 | static void HighlightMacrosImpl( |
596 | Rewriter &R, FileID FID, const Preprocessor &PP, |
597 | llvm::function_ref<void(Rewriter &, SourceLocation, SourceLocation, |
598 | const char *, const char *, bool)> |
599 | HighlightRangeCallback) { |
600 | |
601 | // Re-lex the raw token stream into a token buffer. |
602 | const SourceManager &SM = PP.getSourceManager(); |
603 | std::vector<Token> TokenStream; |
604 | |
605 | llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID); |
606 | Lexer L(FID, FromFile, SM, PP.getLangOpts()); |
607 | |
608 | // Lex all the tokens in raw mode, to avoid entering #includes or expanding |
609 | // macros. |
610 | while (true) { |
611 | Token Tok; |
612 | L.LexFromRawLexer(Result&: Tok); |
613 | |
614 | // If this is a # at the start of a line, discard it from the token stream. |
615 | // We don't want the re-preprocess step to see #defines, #includes or other |
616 | // preprocessor directives. |
617 | if (Tok.is(K: tok::hash) && Tok.isAtStartOfLine()) |
618 | continue; |
619 | |
620 | // If this is a ## token, change its kind to unknown so that repreprocessing |
621 | // it will not produce an error. |
622 | if (Tok.is(K: tok::hashhash)) |
623 | Tok.setKind(tok::unknown); |
624 | |
625 | // If this raw token is an identifier, the raw lexer won't have looked up |
626 | // the corresponding identifier info for it. Do this now so that it will be |
627 | // macro expanded when we re-preprocess it. |
628 | if (Tok.is(K: tok::raw_identifier)) |
629 | PP.LookUpIdentifierInfo(Identifier&: Tok); |
630 | |
631 | TokenStream.push_back(x: Tok); |
632 | |
633 | if (Tok.is(K: tok::eof)) break; |
634 | } |
635 | |
636 | // Temporarily change the diagnostics object so that we ignore any generated |
637 | // diagnostics from this pass. |
638 | DiagnosticsEngine TmpDiags(PP.getDiagnostics().getDiagnosticIDs(), |
639 | PP.getDiagnostics().getDiagnosticOptions(), |
640 | new IgnoringDiagConsumer); |
641 | |
642 | // FIXME: This is a huge hack; we reuse the input preprocessor because we want |
643 | // its state, but we aren't actually changing it (we hope). This should really |
644 | // construct a copy of the preprocessor. |
645 | Preprocessor &TmpPP = const_cast<Preprocessor&>(PP); |
646 | DiagnosticsEngine *OldDiags = &TmpPP.getDiagnostics(); |
647 | TmpPP.setDiagnostics(TmpDiags); |
648 | |
649 | // Inform the preprocessor that we don't want comments. |
650 | TmpPP.SetCommentRetentionState(KeepComments: false, KeepMacroComments: false); |
651 | |
652 | // We don't want pragmas either. Although we filtered out #pragma, removing |
653 | // _Pragma and __pragma is much harder. |
654 | bool PragmasPreviouslyEnabled = TmpPP.getPragmasEnabled(); |
655 | TmpPP.setPragmasEnabled(false); |
656 | |
657 | // Enter the tokens we just lexed. This will cause them to be macro expanded |
658 | // but won't enter sub-files (because we removed #'s). |
659 | TmpPP.EnterTokenStream(Toks: TokenStream, DisableMacroExpansion: false, /*IsReinject=*/false); |
660 | |
661 | TokenConcatenation ConcatInfo(TmpPP); |
662 | |
663 | // Lex all the tokens. |
664 | Token Tok; |
665 | TmpPP.Lex(Result&: Tok); |
666 | while (Tok.isNot(K: tok::eof)) { |
667 | // Ignore non-macro tokens. |
668 | if (!Tok.getLocation().isMacroID()) { |
669 | TmpPP.Lex(Result&: Tok); |
670 | continue; |
671 | } |
672 | |
673 | // Okay, we have the first token of a macro expansion: highlight the |
674 | // expansion by inserting a start tag before the macro expansion and |
675 | // end tag after it. |
676 | CharSourceRange LLoc = SM.getExpansionRange(Loc: Tok.getLocation()); |
677 | |
678 | // Ignore tokens whose instantiation location was not the main file. |
679 | if (SM.getFileID(SpellingLoc: LLoc.getBegin()) != FID) { |
680 | TmpPP.Lex(Result&: Tok); |
681 | continue; |
682 | } |
683 | |
684 | assert(SM.getFileID(LLoc.getEnd()) == FID && |
685 | "Start and end of expansion must be in the same ultimate file!" ); |
686 | |
687 | std::string Expansion = EscapeText(s: TmpPP.getSpelling(Tok)); |
688 | unsigned LineLen = Expansion.size(); |
689 | |
690 | Token PrevPrevTok; |
691 | Token PrevTok = Tok; |
692 | // Okay, eat this token, getting the next one. |
693 | TmpPP.Lex(Result&: Tok); |
694 | |
695 | // Skip all the rest of the tokens that are part of this macro |
696 | // instantiation. It would be really nice to pop up a window with all the |
697 | // spelling of the tokens or something. |
698 | while (!Tok.is(K: tok::eof) && |
699 | SM.getExpansionLoc(Loc: Tok.getLocation()) == LLoc.getBegin()) { |
700 | // Insert a newline if the macro expansion is getting large. |
701 | if (LineLen > 60) { |
702 | Expansion += "<br>" ; |
703 | LineLen = 0; |
704 | } |
705 | |
706 | LineLen -= Expansion.size(); |
707 | |
708 | // If the tokens were already space separated, or if they must be to avoid |
709 | // them being implicitly pasted, add a space between them. |
710 | if (Tok.hasLeadingSpace() || |
711 | ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok)) |
712 | Expansion += ' '; |
713 | |
714 | // Escape any special characters in the token text. |
715 | Expansion += EscapeText(s: TmpPP.getSpelling(Tok)); |
716 | LineLen += Expansion.size(); |
717 | |
718 | PrevPrevTok = PrevTok; |
719 | PrevTok = Tok; |
720 | TmpPP.Lex(Result&: Tok); |
721 | } |
722 | |
723 | // Insert the 'macro_popup' as the end tag, so that multi-line macros all |
724 | // get highlighted. |
725 | Expansion = "<span class='macro_popup'>" + Expansion + "</span></span>" ; |
726 | |
727 | HighlightRangeCallback(R, LLoc.getBegin(), LLoc.getEnd(), |
728 | "<span class='macro'>" , Expansion.c_str(), |
729 | LLoc.isTokenRange()); |
730 | } |
731 | |
732 | // Restore the preprocessor's old state. |
733 | TmpPP.setDiagnostics(*OldDiags); |
734 | TmpPP.setPragmasEnabled(PragmasPreviouslyEnabled); |
735 | } |
736 | |
737 | /// HighlightMacros - This uses the macro table state from the end of the |
738 | /// file, to re-expand macros and insert (into the HTML) information about the |
739 | /// macro expansions. This won't be perfectly perfect, but it will be |
740 | /// reasonably close. |
741 | void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor &PP, |
742 | RelexRewriteCacheRef Cache) { |
743 | if (Cache) { |
744 | auto CacheIt = Cache->MacroHighlights.find(Val: FID); |
745 | if (CacheIt != Cache->MacroHighlights.end()) { |
746 | for (const RelexRewriteCache::Highlight &H : CacheIt->second) { |
747 | HighlightRange(R, B: H.B, E: H.E, StartTag: H.StartTag.data(), EndTag: H.EndTag.data(), |
748 | IsTokenRange: H.IsTokenRange); |
749 | } |
750 | return; |
751 | } |
752 | } |
753 | |
754 | // "Every time you would call HighlightRange, cache the inputs as well." |
755 | auto HighlightRangeCallback = [&](Rewriter &R, SourceLocation B, |
756 | SourceLocation E, const char *StartTag, |
757 | const char *EndTag, bool isTokenRange) { |
758 | HighlightRange(R, B, E, StartTag, EndTag, IsTokenRange: isTokenRange); |
759 | |
760 | if (Cache) { |
761 | Cache->MacroHighlights[FID].push_back( |
762 | x: {.B: B, .E: E, .StartTag: StartTag, .EndTag: EndTag, .IsTokenRange: isTokenRange}); |
763 | } |
764 | }; |
765 | |
766 | HighlightMacrosImpl(R, FID, PP, HighlightRangeCallback); |
767 | } |
768 | |