1//===-- ClangHighlighter.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ClangHighlighter.h"
10
11#include "lldb/Host/FileSystem.h"
12#include "lldb/Target/Language.h"
13#include "lldb/Utility/AnsiTerminal.h"
14#include "lldb/Utility/StreamString.h"
15
16#include "clang/Basic/FileManager.h"
17#include "clang/Basic/SourceManager.h"
18#include "clang/Lex/Lexer.h"
19#include "llvm/ADT/StringSet.h"
20#include "llvm/Support/MemoryBuffer.h"
21#include <optional>
22
23using namespace lldb_private;
24
25bool ClangHighlighter::isKeyword(llvm::StringRef token) const {
26 return keywords.contains(key: token);
27}
28
29ClangHighlighter::ClangHighlighter() {
30#define KEYWORD(X, N) keywords.insert(#X);
31#include "clang/Basic/TokenKinds.def"
32}
33
34/// Determines which style should be applied to the given token.
35/// \param highlighter
36/// The current highlighter that should use the style.
37/// \param token
38/// The current token.
39/// \param tok_str
40/// The string in the source code the token represents.
41/// \param options
42/// The style we use for coloring the source code.
43/// \param in_pp_directive
44/// If we are currently in a preprocessor directive. NOTE: This is
45/// passed by reference and will be updated if the current token starts
46/// or ends a preprocessor directive.
47/// \return
48/// The ColorStyle that should be applied to the token.
49static HighlightStyle::ColorStyle
50determineClangStyle(const ClangHighlighter &highlighter,
51 const clang::Token &token, llvm::StringRef tok_str,
52 const HighlightStyle &options, bool &in_pp_directive) {
53 using namespace clang;
54
55 if (token.is(K: tok::comment)) {
56 // If we were in a preprocessor directive before, we now left it.
57 in_pp_directive = false;
58 return options.comment;
59 } else if (in_pp_directive || token.getKind() == tok::hash) {
60 // Let's assume that the rest of the line is a PP directive.
61 in_pp_directive = true;
62 // Preprocessor directives are hard to match, so we have to hack this in.
63 return options.pp_directive;
64 } else if (tok::isStringLiteral(K: token.getKind()))
65 return options.string_literal;
66 else if (tok::isLiteral(K: token.getKind()))
67 return options.scalar_literal;
68 else if (highlighter.isKeyword(token: tok_str))
69 return options.keyword;
70 else
71 switch (token.getKind()) {
72 case tok::raw_identifier:
73 case tok::identifier:
74 return options.identifier;
75 case tok::l_brace:
76 case tok::r_brace:
77 return options.braces;
78 case tok::l_square:
79 case tok::r_square:
80 return options.square_brackets;
81 case tok::l_paren:
82 case tok::r_paren:
83 return options.parentheses;
84 case tok::comma:
85 return options.comma;
86 case tok::coloncolon:
87 case tok::colon:
88 return options.colon;
89
90 case tok::amp:
91 case tok::ampamp:
92 case tok::ampequal:
93 case tok::star:
94 case tok::starequal:
95 case tok::plus:
96 case tok::plusplus:
97 case tok::plusequal:
98 case tok::minus:
99 case tok::arrow:
100 case tok::minusminus:
101 case tok::minusequal:
102 case tok::tilde:
103 case tok::exclaim:
104 case tok::exclaimequal:
105 case tok::slash:
106 case tok::slashequal:
107 case tok::percent:
108 case tok::percentequal:
109 case tok::less:
110 case tok::lessless:
111 case tok::lessequal:
112 case tok::lesslessequal:
113 case tok::spaceship:
114 case tok::greater:
115 case tok::greatergreater:
116 case tok::greaterequal:
117 case tok::greatergreaterequal:
118 case tok::caret:
119 case tok::caretequal:
120 case tok::pipe:
121 case tok::pipepipe:
122 case tok::pipeequal:
123 case tok::question:
124 case tok::equal:
125 case tok::equalequal:
126 return options.operators;
127 default:
128 break;
129 }
130 return HighlightStyle::ColorStyle();
131}
132
133void ClangHighlighter::Highlight(const HighlightStyle &options,
134 llvm::StringRef line,
135 std::optional<size_t> cursor_pos,
136 llvm::StringRef previous_lines,
137 Stream &result) const {
138 using namespace clang;
139
140 FileSystemOptions file_opts;
141 FileManager file_mgr(file_opts,
142 FileSystem::Instance().GetVirtualFileSystem());
143
144 // The line might end in a backslash which would cause Clang to drop the
145 // backslash and the terminating new line. This makes sense when parsing C++,
146 // but when highlighting we care about preserving the backslash/newline. To
147 // not lose this information we remove the new line here so that Clang knows
148 // this is just a single line we are highlighting. We add back the newline
149 // after tokenizing.
150 llvm::StringRef line_ending = "";
151 // There are a few legal line endings Clang recognizes and we need to
152 // temporarily remove from the string.
153 if (line.consume_back(Suffix: "\r\n"))
154 line_ending = "\r\n";
155 else if (line.consume_back(Suffix: "\n"))
156 line_ending = "\n";
157 else if (line.consume_back(Suffix: "\r"))
158 line_ending = "\r";
159
160 unsigned line_number = previous_lines.count(C: '\n') + 1U;
161
162 // Let's build the actual source code Clang needs and setup some utility
163 // objects.
164 std::string full_source = previous_lines.str() + line.str();
165 llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs());
166 DiagnosticOptions diags_opts;
167 DiagnosticsEngine diags(diag_ids, diags_opts);
168 clang::SourceManager SM(diags, file_mgr);
169 auto buf = llvm::MemoryBuffer::getMemBuffer(InputData: full_source);
170
171 FileID FID = SM.createFileID(Buffer: buf->getMemBufferRef());
172
173 // Let's just enable the latest ObjC and C++ which should get most tokens
174 // right.
175 LangOptions Opts;
176 Opts.ObjC = true;
177 // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too
178 Opts.CPlusPlus17 = true;
179 Opts.LineComment = true;
180
181 Lexer lex(FID, buf->getMemBufferRef(), SM, Opts);
182 // The lexer should keep whitespace around.
183 lex.SetKeepWhitespaceMode(true);
184
185 // Keeps track if we have entered a PP directive.
186 bool in_pp_directive = false;
187
188 // True once we actually lexed the user provided line.
189 bool found_user_line = false;
190
191 // True if we already highlighted the token under the cursor, false otherwise.
192 bool highlighted_cursor = false;
193 Token token;
194 bool exit = false;
195 while (!exit) {
196 // Returns true if this is the last token we get from the lexer.
197 exit = lex.LexFromRawLexer(Result&: token);
198
199 bool invalid = false;
200 unsigned current_line_number =
201 SM.getSpellingLineNumber(Loc: token.getLocation(), Invalid: &invalid);
202 if (current_line_number != line_number)
203 continue;
204 found_user_line = true;
205
206 // We don't need to print any tokens without a spelling line number.
207 if (invalid)
208 continue;
209
210 // Same as above but with the column number.
211 invalid = false;
212 unsigned start = SM.getSpellingColumnNumber(Loc: token.getLocation(), Invalid: &invalid);
213 if (invalid)
214 continue;
215 // Column numbers start at 1, but indexes in our string start at 0.
216 --start;
217
218 // Annotations don't have a length, so let's skip them.
219 if (token.isAnnotation())
220 continue;
221
222 // Extract the token string from our source code.
223 llvm::StringRef tok_str = line.substr(Start: start, N: token.getLength());
224
225 // If the token is just an empty string, we can skip all the work below.
226 if (tok_str.empty())
227 continue;
228
229 // If the cursor is inside this token, we have to apply the 'selected'
230 // highlight style before applying the actual token color.
231 llvm::StringRef to_print = tok_str;
232 StreamString storage;
233 auto end = start + token.getLength();
234 if (cursor_pos && end > *cursor_pos && !highlighted_cursor) {
235 highlighted_cursor = true;
236 options.selected.Apply(s&: storage, value: tok_str);
237 to_print = storage.GetString();
238 }
239
240 // See how we are supposed to highlight this token.
241 HighlightStyle::ColorStyle color =
242 determineClangStyle(highlighter: *this, token, tok_str, options, in_pp_directive);
243
244 color.Apply(s&: result, value: to_print);
245 }
246
247 // Add the line ending we trimmed before tokenizing.
248 result << line_ending;
249
250 // If we went over the whole file but couldn't find our own file, then
251 // somehow our setup was wrong. When we're in release mode we just give the
252 // user the normal line and pretend we don't know how to highlight it. In
253 // debug mode we bail out with an assert as this should never happen.
254 if (!found_user_line) {
255 result << line;
256 assert(false && "We couldn't find the user line in the input file?");
257 }
258}
259

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of lldb/source/Plugins/Language/ClangCommon/ClangHighlighter.cpp