HeaderIncludes.cpp source code [clang/lib/Tooling/Inclusions/HeaderIncludes.cpp]

1	//===--- HeaderIncludes.cpp - Insert/Delete #includes --- C++ -----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "clang/Tooling/Inclusions/HeaderIncludes.h"
10	#include "clang/Basic/SourceManager.h"
11	#include "clang/Lex/Lexer.h"
12	#include "llvm/Support/FormatVariadic.h"
13	#include "llvm/Support/Path.h"
14	#include <optional>
15
16	namespace clang {
17	namespace tooling {
18	namespace {
19
20	LangOptions createLangOpts() {
21	LangOptions LangOpts;
22	LangOpts.CPlusPlus = `1`;
23	LangOpts.CPlusPlus11 = `1`;
24	LangOpts.CPlusPlus14 = `1`;
25	LangOpts.LineComment = `1`;
26	LangOpts.CXXOperatorNames = `1`;
27	LangOpts.Bool = `1`;
28	LangOpts.ObjC = `1`;
29	LangOpts.MicrosoftExt = `1`; // To get kw___try, kw___finally.
30	LangOpts.DeclSpecKeyword = `1`; // To get __declspec.
31	LangOpts.WChar = `1`; // To get wchar_t
32	return LangOpts;
33	}
34
35	// Returns the offset after skipping a sequence of tokens, matched by \p
36	// GetOffsetAfterSequence, from the start of the code.
37	// \p GetOffsetAfterSequence should be a function that matches a sequence of
38	// tokens and returns an offset after the sequence.
39	unsigned getOffsetAfterTokenSequence(
40	StringRef FileName, StringRef Code, const IncludeStyle &Style,
41	llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
42	GetOffsetAfterSequence) {
43	SourceManagerForFile VirtualSM(FileName, Code);
44	SourceManager &SM = VirtualSM.get();
45	LangOptions LangOpts = createLangOpts();
46	Lexer Lex(SM.getMainFileID(), SM.getBufferOrFake(FID: SM.getMainFileID()), SM,
47	LangOpts);
48	Token Tok;
49	// Get the first token.
50	Lex.LexFromRawLexer(Result&: Tok);
51	return GetOffsetAfterSequence (SM, Lex, Tok);
52	}
53
54	// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
55	// \p Tok will be the token after this directive; otherwise, it can be any token
56	// after the given \p Tok (including \p Tok). If \p RawIDName is provided, the
57	// (second) raw_identifier name is checked.
58	bool checkAndConsumeDirectiveWithName(
59	Lexer &Lex, StringRef Name, Token &Tok,
60	std::optional<StringRef> RawIDName = std::nullopt) {
61	bool Matched = Tok.is(K: tok::hash) && !Lex.LexFromRawLexer(Result&: Tok) &&
62	Tok.is(K: tok::raw_identifier) &&
63	Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Result&: Tok) &&
64	Tok.is(K: tok::raw_identifier) &&
65	(!RawIDName \|\| Tok.getRawIdentifier() == *RawIDName);
66	if (Matched)
67	Lex.LexFromRawLexer(Result&: Tok);
68	return Matched;
69	}
70
71	void skipComments(Lexer &Lex, Token &Tok) {
72	while (Tok.is(K: tok::comment))
73	if (Lex.LexFromRawLexer(Result&: Tok))
74	return;
75	}
76
77	// Returns the offset after header guard directives and any comments
78	// before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no
79	// header guard is present in the code, this will return the offset after
80	// skipping all comments from the start of the code.
81	unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
82	StringRef Code,
83	const IncludeStyle &Style) {
84	// \p Consume returns location after header guard or 0 if no header guard is
85	// found.
86	auto ConsumeHeaderGuardAndComment =
87	[&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
88	Token Tok)>
89	Consume) {
90	return getOffsetAfterTokenSequence(
91	FileName, Code, Style,
92	GetOffsetAfterSequence: [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
93	skipComments(Lex, Tok);
94	unsigned InitialOffset = SM.getFileOffset(SpellingLoc: Tok.getLocation());
95	return std::max(a: InitialOffset, b: Consume (SM, Lex, Tok));
96	});
97	};
98	return std::max(
99	// #ifndef/#define
100	a: ConsumeHeaderGuardAndComment (
101	[](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
102	if (checkAndConsumeDirectiveWithName(Lex, Name: "ifndef", Tok)) {
103	skipComments(Lex, Tok);
104	if (checkAndConsumeDirectiveWithName(Lex, Name: "define", Tok) &&
105	Tok.isAtStartOfLine())
106	return SM.getFileOffset(SpellingLoc: Tok.getLocation());
107	}
108	return `0`;
109	}),
110	// #pragma once
111	b: ConsumeHeaderGuardAndComment (
112	[](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
113	if (checkAndConsumeDirectiveWithName(Lex, Name: "pragma", Tok,
114	RawIDName: StringRef("once")))
115	return SM.getFileOffset(SpellingLoc: Tok.getLocation());
116	return `0`;
117	}));
118	}
119
120	// Check if a sequence of tokens is like
121	// "#include ("header.h" \| <header.h>)".
122	// If it is, \p Tok will be the token after this directive; otherwise, it can be
123	// any token after the given \p Tok (including \p Tok).
124	bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
125	auto Matched = [&]() {
126	Lex.LexFromRawLexer(Result&: Tok);
127	return true;
128	};
129	if (Tok.is(K: tok::hash) && !Lex.LexFromRawLexer(Result&: Tok) &&
130	Tok.is(K: tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
131	if (Lex.LexFromRawLexer(Result&: Tok))
132	return false;
133	if (Tok.is(K: tok::string_literal))
134	return Matched ();
135	if (Tok.is(K: tok::less)) {
136	while (!Lex.LexFromRawLexer(Result&: Tok) && Tok.isNot(K: tok::greater)) {
137	}
138	if (Tok.is(K: tok::greater))
139	return Matched ();
140	}
141	}
142	return false;
143	}
144
145	// Returns the offset of the last #include directive after which a new
146	// #include can be inserted. This ignores #include's after the #include block(s)
147	// in the beginning of a file to avoid inserting headers into code sections
148	// where new #include's should not be added by default.
149	// These code sections include:
150	// - raw string literals (containing #include).
151	// - #if blocks.
152	// - Special #include's among declarations (e.g. functions).
153	//
154	// If no #include after which a new #include can be inserted, this returns the
155	// offset after skipping all comments from the start of the code.
156	// Inserting after an #include is not allowed if it comes after code that is not
157	// #include (e.g. pre-processing directive that is not #include, declarations).
158	unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
159	const IncludeStyle &Style) {
160	return getOffsetAfterTokenSequence(
161	FileName, Code, Style,
162	GetOffsetAfterSequence: [](const SourceManager &SM, Lexer &Lex, Token Tok) {
163	skipComments(Lex, Tok);
164	unsigned MaxOffset = SM.getFileOffset(SpellingLoc: Tok.getLocation());
165	while (checkAndConsumeInclusiveDirective(Lex, Tok))
166	MaxOffset = SM.getFileOffset(SpellingLoc: Tok.getLocation());
167	return MaxOffset;
168	});
169	}
170
171	inline StringRef trimInclude(StringRef IncludeName) {
172	return IncludeName.trim(Chars: "\"<>");
173	}
174
175	const char IncludeRegexPattern[] =
176	R"(^[\t\ ]#[\t\ ](import\|include)[^"<](["<][^">][">]))";
177
178	// The filename of Path excluding extension.
179	// Used to match implementation with headers, this differs from sys::path::stem:
180	// - in names with multiple dots (foo.cu.cc) it terminates at the first
181	// - an empty stem is never returned: /foo/.bar.x => .bar
182	// - we don't bother to handle . and .. specially
183	StringRef matchingStem(llvm::StringRef Path) {
184	StringRef Name = llvm::sys::path::filename(path: Path);
185	return Name.substr(Start: `0`, N: Name.find(C: `'.'`, From: `1`));
186	}
187
188	} // anonymous namespace
189
190	IncludeCategoryManager::IncludeCategoryManager(const IncludeStyle &Style,
191	StringRef FileName)
192	: Style (Style), FileName (FileName) {
193	for (const auto &Category : Style.IncludeCategories) {
194	CategoryRegexs.emplace_back(Args: Category.Regex, Args: Category.RegexIsCaseSensitive
195	? llvm::Regex::NoFlags
196	: llvm::Regex::IgnoreCase);
197	}
198	IsMainFile = FileName.ends_with(Suffix: ".c") \|\| FileName.ends_with(Suffix: ".cc") \|\|
199	FileName.ends_with(Suffix: ".cpp") \|\| FileName.ends_with(Suffix: ".c++") \|\|
200	FileName.ends_with(Suffix: ".cxx") \|\| FileName.ends_with(Suffix: ".m") \|\|
201	FileName.ends_with(Suffix: ".mm");
202	if (!Style.IncludeIsMainSourceRegex.empty()) {
203	llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex);
204	IsMainFile \|= MainFileRegex.match(String: FileName);
205	}
206	}
207
208	int IncludeCategoryManager::getIncludePriority(StringRef IncludeName,
209	bool CheckMainHeader) const {
210	int Ret = INT_MAX;
211	for (unsigned i = `0`, e = CategoryRegexs.size(); i != e; ++i)
212	if (CategoryRegexs [i].match(String: IncludeName)) {
213	Ret = Style.IncludeCategories [i].Priority;
214	break;
215	}
216	if (CheckMainHeader && IsMainFile && Ret > `0` && isMainHeader(IncludeName))
217	Ret = `0`;
218	return Ret;
219	}
220
221	int IncludeCategoryManager::getSortIncludePriority(StringRef IncludeName,
222	bool CheckMainHeader) const {
223	int Ret = INT_MAX;
224	for (unsigned i = `0`, e = CategoryRegexs.size(); i != e; ++i)
225	if (CategoryRegexs [i].match(String: IncludeName)) {
226	Ret = Style.IncludeCategories [i].SortPriority;
227	if (Ret == `0`)
228	Ret = Style.IncludeCategories [i].Priority;
229	break;
230	}
231	if (CheckMainHeader && IsMainFile && Ret > `0` && isMainHeader(IncludeName))
232	Ret = `0`;
233	return Ret;
234	}
235	bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const {
236	switch (Style.MainIncludeChar) {
237	case IncludeStyle::MICD_Quote:
238	if (!IncludeName.starts_with(Prefix: "\""))
239	return false;
240	break;
241	case IncludeStyle::MICD_AngleBracket:
242	if (!IncludeName.starts_with(Prefix: "<"))
243	return false;
244	break;
245	case IncludeStyle::MICD_Any:
246	break;
247	}
248
249	IncludeName =
250	IncludeName.drop_front(N: `1`).drop_back(N: `1`); // remove the surrounding "" or <>
251	// Not matchingStem: implementation files may have compound extensions but
252	// headers may not.
253	StringRef HeaderStem = llvm::sys::path::stem(path: IncludeName);
254	StringRef FileStem = llvm::sys::path::stem(path: FileName); // foo.cu for foo.cu.cc
255	StringRef MatchingFileStem = matchingStem(Path: FileName); // foo for foo.cu.cc
256	// main-header examples:
257	// 1) foo.h => foo.cc
258	// 2) foo.h => foo.cu.cc
259	// 3) foo.proto.h => foo.proto.cc
260	//
261	// non-main-header examples:
262	// 1) foo.h => bar.cc
263	// 2) foo.proto.h => foo.cc
264	StringRef Matching;
265	if (MatchingFileStem.starts_with_insensitive(Prefix: HeaderStem))
266	Matching = MatchingFileStem; // example 1), 2)
267	else if (FileStem.equals_insensitive(RHS: HeaderStem))
268	Matching = FileStem; // example 3)
269	if (!Matching.empty()) {
270	llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex,
271	llvm::Regex::IgnoreCase);
272	if (MainIncludeRegex.match(String: Matching))
273	return true;
274	}
275	return false;
276	}
277
278	const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern);
279
280	HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
281	const IncludeStyle &Style)
282	: FileName (FileName), Code (Code), FirstIncludeOffset(-`1`),
283	MinInsertOffset(
284	getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
285	MaxInsertOffset(MinInsertOffset +
286	getMaxHeaderInsertionOffset(
287	FileName, Code: Code.drop_front(N: MinInsertOffset), Style)),
288	MainIncludeFound(false),
289	Categories (Style, FileName) {
290	// Add 0 for main header and INT_MAX for headers that are not in any
291	// category.
292	Priorities = {`0`, INT_MAX};
293	for (const auto &Category : Style.IncludeCategories)
294	Priorities.insert(x: Category.Priority);
295	SmallVector<StringRef, `32`> Lines;
296	Code.drop_front(N: MinInsertOffset).split(A&: Lines, Separator: "\n");
297
298	unsigned Offset = MinInsertOffset;
299	unsigned NextLineOffset;
300	SmallVector<StringRef, `4`> Matches;
301	for (auto Line : Lines) {
302	NextLineOffset = std::min(a: Code.size(), b: Offset + Line.size() + `1`);
303	if (IncludeRegex.match(String: Line, Matches: &Matches)) {
304	// If this is the last line without trailing newline, we need to make
305	// sure we don't delete across the file boundary.
306	addExistingInclude(
307	IncludeToAdd: Include (Matches [`2`],
308	tooling::Range (
309	Offset, std::min(a: Line.size() + `1`, b: Code.size() - Offset)),
310	Matches [`1`] == "import" ? tooling::IncludeDirective::Import
311	: tooling::IncludeDirective::Include),
312	NextLineOffset);
313	}
314	Offset = NextLineOffset;
315	}
316
317	// Populate CategoryEndOfssets:
318	// - Ensure that CategoryEndOffset[Highest] is always populated.
319	// - If CategoryEndOffset[Priority] isn't set, use the next higher value
320	// that is set, up to CategoryEndOffset[Highest].
321	auto Highest = Priorities.begin();
322	auto [It, Inserted] = CategoryEndOffsets.try_emplace(k: *Highest);
323	if (Inserted)
324	It ->second = FirstIncludeOffset >= `0` ? FirstIncludeOffset : MinInsertOffset;
325	// By this point, CategoryEndOffset[Highest] is always set appropriately:
326	// - to an appropriate location before/after existing #includes, or
327	// - to right after the header guard, or
328	// - to the beginning of the file.
329	for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
330	if (CategoryEndOffsets.find(x: *I) == CategoryEndOffsets.end())
331	CategoryEndOffsets [I] = CategoryEndOffsets [std::prev(x: I)];
332	}
333
334	// \p Offset: the start of the line following this include directive.
335	void HeaderIncludes::addExistingInclude(Include IncludeToAdd,
336	unsigned NextLineOffset) {
337	auto &Incs = ExistingIncludes [trimInclude(IncludeName: IncludeToAdd.Name)];
338	Incs.push_back(x: std::move(IncludeToAdd));
339	auto &CurInclude = Incs.back();
340	// The header name with quotes or angle brackets.
341	// Only record the offset of current #include if we can insert after it.
342	if (CurInclude.R.getOffset() <= MaxInsertOffset) {
343	int Priority = Categories.getIncludePriority(
344	IncludeName: CurInclude.Name, /CheckMainHeader=/!MainIncludeFound);
345	if (Priority == `0`)
346	MainIncludeFound = true;
347	CategoryEndOffsets [Priority] = NextLineOffset;
348	IncludesByPriority [Priority].push_back(Elt: &CurInclude);
349	if (FirstIncludeOffset < `0`)
350	FirstIncludeOffset = CurInclude.R.getOffset();
351	}
352	}
353
354	std::optional<tooling::Replacement>
355	HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled,
356	IncludeDirective Directive) const {
357	assert(IncludeName == trimInclude(IncludeName));
358	// If a <header> ("header") already exists in code, "header" (<header>) with
359	// different quotation and/or directive will still be inserted.
360	// FIXME: figure out if this is the best behavior.
361	auto It = ExistingIncludes.find(Key: IncludeName);
362	if (It != ExistingIncludes.end()) {
363	for (const auto &Inc : It ->second)
364	if (Inc.Directive == Directive &&
365	((IsAngled && StringRef(Inc.Name).starts_with(Prefix: "<")) \|\|
366	(!IsAngled && StringRef(Inc.Name).starts_with(Prefix: "\""))))
367	return std::nullopt;
368	}
369	std::string Quoted =
370	std::string(llvm::formatv(Fmt: IsAngled ? "<{0}>" : "\"{0}\"", Vals&: IncludeName));
371	StringRef QuotedName = Quoted;
372	int Priority = Categories.getIncludePriority(
373	IncludeName: QuotedName, /CheckMainHeader=/!MainIncludeFound);
374	auto CatOffset = CategoryEndOffsets.find(x: Priority);
375	assert(CatOffset != CategoryEndOffsets.end());
376	unsigned InsertOffset = CatOffset ->second; // Fall back offset
377	auto Iter = IncludesByPriority.find(x: Priority);
378	if (Iter != IncludesByPriority.end()) {
379	for (const auto *Inc : Iter ->second) {
380	if (QuotedName < Inc->Name) {
381	InsertOffset = Inc->R.getOffset();
382	break;
383	}
384	}
385	}
386	assert(InsertOffset <= Code.size());
387	llvm::StringRef DirectiveSpelling =
388	Directive == IncludeDirective::Include ? "include" : "import";
389	std::string NewInclude =
390	llvm::formatv(Fmt: "#{0} {1}\n", Vals&: DirectiveSpelling, Vals&: QuotedName);
391	// When inserting headers at end of the code, also append '\n' to the code
392	// if it does not end with '\n'.
393	// FIXME: when inserting multiple #includes at the end of code, only one
394	// newline should be added.
395	if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != `'\n'`))
396	NewInclude = "\n" + NewInclude;
397	return tooling::Replacement (FileName, InsertOffset, `0`, NewInclude);
398	}
399
400	tooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName,
401	bool IsAngled) const {
402	assert(IncludeName == trimInclude(IncludeName));
403	tooling::Replacements Result;
404	auto Iter = ExistingIncludes.find(Key: IncludeName);
405	if (Iter == ExistingIncludes.end())
406	return Result;
407	for (const auto &Inc : Iter ->second) {
408	if ((IsAngled && StringRef(Inc.Name).starts_with(Prefix: "\"")) \|\|
409	(!IsAngled && StringRef(Inc.Name).starts_with(Prefix: "<")))
410	continue;
411	llvm::Error Err = Result.add(R: tooling::Replacement (
412	FileName, Inc.R.getOffset(), Inc.R.getLength(), ""));
413	if (Err) {
414	auto ErrMsg = "Unexpected conflicts in #include deletions: " +
415	llvm::toString(E: std::move(Err));
416	llvm_unreachable(ErrMsg.c_str());
417	}
418	}
419	return Result;
420	}
421
422	} // namespace tooling
423	} // namespace clang
424

Provided by KDAB

Definitions

createLangOpts
getOffsetAfterTokenSequence
checkAndConsumeDirectiveWithName
skipComments
getOffsetAfterHeaderGuardsAndComments
checkAndConsumeInclusiveDirective
getMaxHeaderInsertionOffset
trimInclude
IncludeRegexPattern
matchingStem
IncludeCategoryManager
getIncludePriority
getSortIncludePriority
isMainHeader
IncludeRegex
HeaderIncludes
addExistingInclude
insert

Learn to use CMake with our Intro Training

Find out more

Definitions

source code of clang/lib/Tooling/Inclusions/HeaderIncludes.cpp