ClangHighlighter.cpp source code [lldb/source/Plugins/Language/ClangCommon/ClangHighlighter.cpp]

1	//===-- ClangHighlighter.cpp ----------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "ClangHighlighter.h"
10
11	#include "lldb/Host/FileSystem.h"
12	#include "lldb/Target/Language.h"
13	#include "lldb/Utility/AnsiTerminal.h"
14	#include "lldb/Utility/StreamString.h"
15
16	#include "clang/Basic/FileManager.h"
17	#include "clang/Basic/SourceManager.h"
18	#include "clang/Lex/Lexer.h"
19	#include "llvm/ADT/StringSet.h"
20	#include "llvm/Support/MemoryBuffer.h"
21	#include <optional>
22
23	using namespace lldb_private;
24
25	bool ClangHighlighter::isKeyword(llvm::StringRef token) const {
26	return keywords.contains(key: token);
27	}
28
29	ClangHighlighter::ClangHighlighter() {
30	#define KEYWORD(X, N) keywords.insert(#X);
31	#include "clang/Basic/TokenKinds.def"
32	}
33
34	/// Determines which style should be applied to the given token.
35	/// \param highlighter
36	/// The current highlighter that should use the style.
37	/// \param token
38	/// The current token.
39	/// \param tok_str
40	/// The string in the source code the token represents.
41	/// \param options
42	/// The style we use for coloring the source code.
43	/// \param in_pp_directive
44	/// If we are currently in a preprocessor directive. NOTE: This is
45	/// passed by reference and will be updated if the current token starts
46	/// or ends a preprocessor directive.
47	/// \return
48	/// The ColorStyle that should be applied to the token.
49	static HighlightStyle::ColorStyle
50	determineClangStyle(const ClangHighlighter &highlighter,
51	const clang::Token &token, llvm::StringRef tok_str,
52	const HighlightStyle &options, bool &in_pp_directive) {
53	using namespace clang;
54
55	if (token.is(K: tok::comment)) {
56	// If we were in a preprocessor directive before, we now left it.
57	in_pp_directive = false;
58	return options.comment;
59	} else if (in_pp_directive \|\| token.getKind() == tok::hash) {
60	// Let's assume that the rest of the line is a PP directive.
61	in_pp_directive = true;
62	// Preprocessor directives are hard to match, so we have to hack this in.
63	return options.pp_directive;
64	} else if (tok::isStringLiteral(K: token.getKind()))
65	return options.string_literal;
66	else if (tok::isLiteral(K: token.getKind()))
67	return options.scalar_literal;
68	else if (highlighter.isKeyword(token: tok_str))
69	return options.keyword;
70	else
71	switch (token.getKind()) {
72	case tok::raw_identifier:
73	case tok::identifier:
74	return options.identifier;
75	case tok::l_brace:
76	case tok::r_brace:
77	return options.braces;
78	case tok::l_square:
79	case tok::r_square:
80	return options.square_brackets;
81	case tok::l_paren:
82	case tok::r_paren:
83	return options.parentheses;
84	case tok::comma:
85	return options.comma;
86	case tok::coloncolon:
87	case tok::colon:
88	return options.colon;
89
90	case tok::amp:
91	case tok::ampamp:
92	case tok::ampequal:
93	case tok::star:
94	case tok::starequal:
95	case tok::plus:
96	case tok::plusplus:
97	case tok::plusequal:
98	case tok::minus:
99	case tok::arrow:
100	case tok::minusminus:
101	case tok::minusequal:
102	case tok::tilde:
103	case tok::exclaim:
104	case tok::exclaimequal:
105	case tok::slash:
106	case tok::slashequal:
107	case tok::percent:
108	case tok::percentequal:
109	case tok::less:
110	case tok::lessless:
111	case tok::lessequal:
112	case tok::lesslessequal:
113	case tok::spaceship:
114	case tok::greater:
115	case tok::greatergreater:
116	case tok::greaterequal:
117	case tok::greatergreaterequal:
118	case tok::caret:
119	case tok::caretequal:
120	case tok::pipe:
121	case tok::pipepipe:
122	case tok::pipeequal:
123	case tok::question:
124	case tok::equal:
125	case tok::equalequal:
126	return options.operators;
127	default:
128	break;
129	}
130	return HighlightStyle::ColorStyle ();
131	}
132
133	void ClangHighlighter::Highlight(const HighlightStyle &options,
134	llvm::StringRef line,
135	std::optional<size_t> cursor_pos,
136	llvm::StringRef previous_lines,
137	Stream &result) const {
138	using namespace clang;
139
140	FileSystemOptions file_opts;
141	FileManager file_mgr(file_opts,
142	FileSystem::Instance().GetVirtualFileSystem());
143
144	// The line might end in a backslash which would cause Clang to drop the
145	// backslash and the terminating new line. This makes sense when parsing C++,
146	// but when highlighting we care about preserving the backslash/newline. To
147	// not lose this information we remove the new line here so that Clang knows
148	// this is just a single line we are highlighting. We add back the newline
149	// after tokenizing.
150	llvm::StringRef line_ending = "";
151	// There are a few legal line endings Clang recognizes and we need to
152	// temporarily remove from the string.
153	if (line.consume_back(Suffix: "\r\n"))
154	line_ending = "\r\n";
155	else if (line.consume_back(Suffix: "\n"))
156	line_ending = "\n";
157	else if (line.consume_back(Suffix: "\r"))
158	line_ending = "\r";
159
160	unsigned line_number = previous_lines.count(C: `'\n'`) + `1U`;
161
162	// Let's build the actual source code Clang needs and setup some utility
163	// objects.
164	std::string full_source = previous_lines.str() + line.str();
165	llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs ());
166	DiagnosticOptions diags_opts;
167	DiagnosticsEngine diags(diag_ids, diags_opts);
168	clang::SourceManager SM(diags, file_mgr);
169	auto buf = llvm::MemoryBuffer::getMemBuffer(InputData: full_source);
170
171	FileID FID = SM.createFileID(Buffer: buf ->getMemBufferRef());
172
173	// Let's just enable the latest ObjC and C++ which should get most tokens
174	// right.
175	LangOptions Opts;
176	Opts.ObjC = true;
177	// FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too
178	Opts.CPlusPlus17 = true;
179	Opts.LineComment = true;
180
181	Lexer lex(FID, buf ->getMemBufferRef(), SM, Opts);
182	// The lexer should keep whitespace around.
183	lex.SetKeepWhitespaceMode(true);
184
185	// Keeps track if we have entered a PP directive.
186	bool in_pp_directive = false;
187
188	// True once we actually lexed the user provided line.
189	bool found_user_line = false;
190
191	// True if we already highlighted the token under the cursor, false otherwise.
192	bool highlighted_cursor = false;
193	Token token;
194	bool exit = false;
195	while (!exit) {
196	// Returns true if this is the last token we get from the lexer.
197	exit = lex.LexFromRawLexer(Result&: token);
198
199	bool invalid = false;
200	unsigned current_line_number =
201	SM.getSpellingLineNumber(Loc: token.getLocation(), Invalid: &invalid);
202	if (current_line_number != line_number)
203	continue;
204	found_user_line = true;
205
206	// We don't need to print any tokens without a spelling line number.
207	if (invalid)
208	continue;
209
210	// Same as above but with the column number.
211	invalid = false;
212	unsigned start = SM.getSpellingColumnNumber(Loc: token.getLocation(), Invalid: &invalid);
213	if (invalid)
214	continue;
215	// Column numbers start at 1, but indexes in our string start at 0.
216	--start;
217
218	// Annotations don't have a length, so let's skip them.
219	if (token.isAnnotation())
220	continue;
221
222	// Extract the token string from our source code.
223	llvm::StringRef tok_str = line.substr(Start: start, N: token.getLength());
224
225	// If the token is just an empty string, we can skip all the work below.
226	if (tok_str.empty())
227	continue;
228
229	// If the cursor is inside this token, we have to apply the 'selected'
230	// highlight style before applying the actual token color.
231	llvm::StringRef to_print = tok_str;
232	StreamString storage;
233	auto end = start + token.getLength();
234	if (cursor_pos && end > *cursor_pos && !highlighted_cursor) {
235	highlighted_cursor = true;
236	options.selected.Apply(s&: storage, value: tok_str);
237	to_print = storage.GetString();
238	}
239
240	// See how we are supposed to highlight this token.
241	HighlightStyle::ColorStyle color =
242	determineClangStyle(highlighter: *this, token, tok_str, options, in_pp_directive);
243
244	color.Apply(s&: result, value: to_print);
245	}
246
247	// Add the line ending we trimmed before tokenizing.
248	result << line_ending;
249
250	// If we went over the whole file but couldn't find our own file, then
251	// somehow our setup was wrong. When we're in release mode we just give the
252	// user the normal line and pretend we don't know how to highlight it. In
253	// debug mode we bail out with an assert as this should never happen.
254	if (!found_user_line) {
255	result << line;
256	assert(false && "We couldn't find the user line in the input file?");
257	}
258	}
259

Provided by KDAB

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of lldb/source/Plugins/Language/ClangCommon/ClangHighlighter.cpp