1//===--- HeaderIncludes.cpp - Insert/Delete #includes --*- C++ -*----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Tooling/Inclusions/HeaderIncludes.h"
10#include "clang/Basic/SourceManager.h"
11#include "clang/Lex/Lexer.h"
12#include "llvm/Support/FormatVariadic.h"
13#include "llvm/Support/Path.h"
14#include <optional>
15
16namespace clang {
17namespace tooling {
18namespace {
19
20LangOptions createLangOpts() {
21 LangOptions LangOpts;
22 LangOpts.CPlusPlus = 1;
23 LangOpts.CPlusPlus11 = 1;
24 LangOpts.CPlusPlus14 = 1;
25 LangOpts.LineComment = 1;
26 LangOpts.CXXOperatorNames = 1;
27 LangOpts.Bool = 1;
28 LangOpts.ObjC = 1;
29 LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
30 LangOpts.DeclSpecKeyword = 1; // To get __declspec.
31 LangOpts.WChar = 1; // To get wchar_t
32 return LangOpts;
33}
34
35// Returns the offset after skipping a sequence of tokens, matched by \p
36// GetOffsetAfterSequence, from the start of the code.
37// \p GetOffsetAfterSequence should be a function that matches a sequence of
38// tokens and returns an offset after the sequence.
39unsigned getOffsetAfterTokenSequence(
40 StringRef FileName, StringRef Code, const IncludeStyle &Style,
41 llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
42 GetOffsetAfterSequence) {
43 SourceManagerForFile VirtualSM(FileName, Code);
44 SourceManager &SM = VirtualSM.get();
45 LangOptions LangOpts = createLangOpts();
46 Lexer Lex(SM.getMainFileID(), SM.getBufferOrFake(FID: SM.getMainFileID()), SM,
47 LangOpts);
48 Token Tok;
49 // Get the first token.
50 Lex.LexFromRawLexer(Result&: Tok);
51 return GetOffsetAfterSequence(SM, Lex, Tok);
52}
53
54// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
55// \p Tok will be the token after this directive; otherwise, it can be any token
56// after the given \p Tok (including \p Tok). If \p RawIDName is provided, the
57// (second) raw_identifier name is checked.
58bool checkAndConsumeDirectiveWithName(
59 Lexer &Lex, StringRef Name, Token &Tok,
60 std::optional<StringRef> RawIDName = std::nullopt) {
61 bool Matched = Tok.is(K: tok::hash) && !Lex.LexFromRawLexer(Result&: Tok) &&
62 Tok.is(K: tok::raw_identifier) &&
63 Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Result&: Tok) &&
64 Tok.is(K: tok::raw_identifier) &&
65 (!RawIDName || Tok.getRawIdentifier() == *RawIDName);
66 if (Matched)
67 Lex.LexFromRawLexer(Result&: Tok);
68 return Matched;
69}
70
71void skipComments(Lexer &Lex, Token &Tok) {
72 while (Tok.is(K: tok::comment))
73 if (Lex.LexFromRawLexer(Result&: Tok))
74 return;
75}
76
77// Returns the offset after header guard directives and any comments
78// before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no
79// header guard is present in the code, this will return the offset after
80// skipping all comments from the start of the code.
81unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
82 StringRef Code,
83 const IncludeStyle &Style) {
84 // \p Consume returns location after header guard or 0 if no header guard is
85 // found.
86 auto ConsumeHeaderGuardAndComment =
87 [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
88 Token Tok)>
89 Consume) {
90 return getOffsetAfterTokenSequence(
91 FileName, Code, Style,
92 GetOffsetAfterSequence: [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
93 skipComments(Lex, Tok);
94 unsigned InitialOffset = SM.getFileOffset(SpellingLoc: Tok.getLocation());
95 return std::max(a: InitialOffset, b: Consume(SM, Lex, Tok));
96 });
97 };
98 return std::max(
99 // #ifndef/#define
100 a: ConsumeHeaderGuardAndComment(
101 [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
102 if (checkAndConsumeDirectiveWithName(Lex, Name: "ifndef", Tok)) {
103 skipComments(Lex, Tok);
104 if (checkAndConsumeDirectiveWithName(Lex, Name: "define", Tok) &&
105 Tok.isAtStartOfLine())
106 return SM.getFileOffset(SpellingLoc: Tok.getLocation());
107 }
108 return 0;
109 }),
110 // #pragma once
111 b: ConsumeHeaderGuardAndComment(
112 [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
113 if (checkAndConsumeDirectiveWithName(Lex, Name: "pragma", Tok,
114 RawIDName: StringRef("once")))
115 return SM.getFileOffset(SpellingLoc: Tok.getLocation());
116 return 0;
117 }));
118}
119
120// Check if a sequence of tokens is like
121// "#include ("header.h" | <header.h>)".
122// If it is, \p Tok will be the token after this directive; otherwise, it can be
123// any token after the given \p Tok (including \p Tok).
124bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
125 auto Matched = [&]() {
126 Lex.LexFromRawLexer(Result&: Tok);
127 return true;
128 };
129 if (Tok.is(K: tok::hash) && !Lex.LexFromRawLexer(Result&: Tok) &&
130 Tok.is(K: tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
131 if (Lex.LexFromRawLexer(Result&: Tok))
132 return false;
133 if (Tok.is(K: tok::string_literal))
134 return Matched();
135 if (Tok.is(K: tok::less)) {
136 while (!Lex.LexFromRawLexer(Result&: Tok) && Tok.isNot(K: tok::greater)) {
137 }
138 if (Tok.is(K: tok::greater))
139 return Matched();
140 }
141 }
142 return false;
143}
144
145// Returns the offset of the last #include directive after which a new
146// #include can be inserted. This ignores #include's after the #include block(s)
147// in the beginning of a file to avoid inserting headers into code sections
148// where new #include's should not be added by default.
149// These code sections include:
150// - raw string literals (containing #include).
151// - #if blocks.
152// - Special #include's among declarations (e.g. functions).
153//
154// If no #include after which a new #include can be inserted, this returns the
155// offset after skipping all comments from the start of the code.
156// Inserting after an #include is not allowed if it comes after code that is not
157// #include (e.g. pre-processing directive that is not #include, declarations).
158unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
159 const IncludeStyle &Style) {
160 return getOffsetAfterTokenSequence(
161 FileName, Code, Style,
162 GetOffsetAfterSequence: [](const SourceManager &SM, Lexer &Lex, Token Tok) {
163 skipComments(Lex, Tok);
164 unsigned MaxOffset = SM.getFileOffset(SpellingLoc: Tok.getLocation());
165 while (checkAndConsumeInclusiveDirective(Lex, Tok))
166 MaxOffset = SM.getFileOffset(SpellingLoc: Tok.getLocation());
167 return MaxOffset;
168 });
169}
170
171inline StringRef trimInclude(StringRef IncludeName) {
172 return IncludeName.trim(Chars: "\"<>");
173}
174
175const char IncludeRegexPattern[] =
176 R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
177
178// The filename of Path excluding extension.
179// Used to match implementation with headers, this differs from sys::path::stem:
180// - in names with multiple dots (foo.cu.cc) it terminates at the *first*
181// - an empty stem is never returned: /foo/.bar.x => .bar
182// - we don't bother to handle . and .. specially
183StringRef matchingStem(llvm::StringRef Path) {
184 StringRef Name = llvm::sys::path::filename(path: Path);
185 return Name.substr(Start: 0, N: Name.find(C: '.', From: 1));
186}
187
188} // anonymous namespace
189
190IncludeCategoryManager::IncludeCategoryManager(const IncludeStyle &Style,
191 StringRef FileName)
192 : Style(Style), FileName(FileName) {
193 for (const auto &Category : Style.IncludeCategories) {
194 CategoryRegexs.emplace_back(Args: Category.Regex, Args: Category.RegexIsCaseSensitive
195 ? llvm::Regex::NoFlags
196 : llvm::Regex::IgnoreCase);
197 }
198 IsMainFile = FileName.ends_with(Suffix: ".c") || FileName.ends_with(Suffix: ".cc") ||
199 FileName.ends_with(Suffix: ".cpp") || FileName.ends_with(Suffix: ".c++") ||
200 FileName.ends_with(Suffix: ".cxx") || FileName.ends_with(Suffix: ".m") ||
201 FileName.ends_with(Suffix: ".mm");
202 if (!Style.IncludeIsMainSourceRegex.empty()) {
203 llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex);
204 IsMainFile |= MainFileRegex.match(String: FileName);
205 }
206}
207
208int IncludeCategoryManager::getIncludePriority(StringRef IncludeName,
209 bool CheckMainHeader) const {
210 int Ret = INT_MAX;
211 for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
212 if (CategoryRegexs[i].match(String: IncludeName)) {
213 Ret = Style.IncludeCategories[i].Priority;
214 break;
215 }
216 if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
217 Ret = 0;
218 return Ret;
219}
220
221int IncludeCategoryManager::getSortIncludePriority(StringRef IncludeName,
222 bool CheckMainHeader) const {
223 int Ret = INT_MAX;
224 for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
225 if (CategoryRegexs[i].match(String: IncludeName)) {
226 Ret = Style.IncludeCategories[i].SortPriority;
227 if (Ret == 0)
228 Ret = Style.IncludeCategories[i].Priority;
229 break;
230 }
231 if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
232 Ret = 0;
233 return Ret;
234}
235bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const {
236 switch (Style.MainIncludeChar) {
237 case IncludeStyle::MICD_Quote:
238 if (!IncludeName.starts_with(Prefix: "\""))
239 return false;
240 break;
241 case IncludeStyle::MICD_AngleBracket:
242 if (!IncludeName.starts_with(Prefix: "<"))
243 return false;
244 break;
245 case IncludeStyle::MICD_Any:
246 break;
247 }
248
249 IncludeName =
250 IncludeName.drop_front(N: 1).drop_back(N: 1); // remove the surrounding "" or <>
251 // Not matchingStem: implementation files may have compound extensions but
252 // headers may not.
253 StringRef HeaderStem = llvm::sys::path::stem(path: IncludeName);
254 StringRef FileStem = llvm::sys::path::stem(path: FileName); // foo.cu for foo.cu.cc
255 StringRef MatchingFileStem = matchingStem(Path: FileName); // foo for foo.cu.cc
256 // main-header examples:
257 // 1) foo.h => foo.cc
258 // 2) foo.h => foo.cu.cc
259 // 3) foo.proto.h => foo.proto.cc
260 //
261 // non-main-header examples:
262 // 1) foo.h => bar.cc
263 // 2) foo.proto.h => foo.cc
264 StringRef Matching;
265 if (MatchingFileStem.starts_with_insensitive(Prefix: HeaderStem))
266 Matching = MatchingFileStem; // example 1), 2)
267 else if (FileStem.equals_insensitive(RHS: HeaderStem))
268 Matching = FileStem; // example 3)
269 if (!Matching.empty()) {
270 llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex,
271 llvm::Regex::IgnoreCase);
272 if (MainIncludeRegex.match(String: Matching))
273 return true;
274 }
275 return false;
276}
277
278const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern);
279
280HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
281 const IncludeStyle &Style)
282 : FileName(FileName), Code(Code), FirstIncludeOffset(-1),
283 MinInsertOffset(
284 getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
285 MaxInsertOffset(MinInsertOffset +
286 getMaxHeaderInsertionOffset(
287 FileName, Code: Code.drop_front(N: MinInsertOffset), Style)),
288 MainIncludeFound(false),
289 Categories(Style, FileName) {
290 // Add 0 for main header and INT_MAX for headers that are not in any
291 // category.
292 Priorities = {0, INT_MAX};
293 for (const auto &Category : Style.IncludeCategories)
294 Priorities.insert(x: Category.Priority);
295 SmallVector<StringRef, 32> Lines;
296 Code.drop_front(N: MinInsertOffset).split(A&: Lines, Separator: "\n");
297
298 unsigned Offset = MinInsertOffset;
299 unsigned NextLineOffset;
300 SmallVector<StringRef, 4> Matches;
301 for (auto Line : Lines) {
302 NextLineOffset = std::min(a: Code.size(), b: Offset + Line.size() + 1);
303 if (IncludeRegex.match(String: Line, Matches: &Matches)) {
304 // If this is the last line without trailing newline, we need to make
305 // sure we don't delete across the file boundary.
306 addExistingInclude(
307 IncludeToAdd: Include(Matches[2],
308 tooling::Range(
309 Offset, std::min(a: Line.size() + 1, b: Code.size() - Offset)),
310 Matches[1] == "import" ? tooling::IncludeDirective::Import
311 : tooling::IncludeDirective::Include),
312 NextLineOffset);
313 }
314 Offset = NextLineOffset;
315 }
316
317 // Populate CategoryEndOfssets:
318 // - Ensure that CategoryEndOffset[Highest] is always populated.
319 // - If CategoryEndOffset[Priority] isn't set, use the next higher value
320 // that is set, up to CategoryEndOffset[Highest].
321 auto Highest = Priorities.begin();
322 auto [It, Inserted] = CategoryEndOffsets.try_emplace(k: *Highest);
323 if (Inserted)
324 It->second = FirstIncludeOffset >= 0 ? FirstIncludeOffset : MinInsertOffset;
325 // By this point, CategoryEndOffset[Highest] is always set appropriately:
326 // - to an appropriate location before/after existing #includes, or
327 // - to right after the header guard, or
328 // - to the beginning of the file.
329 for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
330 if (CategoryEndOffsets.find(x: *I) == CategoryEndOffsets.end())
331 CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(x: I)];
332}
333
334// \p Offset: the start of the line following this include directive.
335void HeaderIncludes::addExistingInclude(Include IncludeToAdd,
336 unsigned NextLineOffset) {
337 auto &Incs = ExistingIncludes[trimInclude(IncludeName: IncludeToAdd.Name)];
338 Incs.push_back(x: std::move(IncludeToAdd));
339 auto &CurInclude = Incs.back();
340 // The header name with quotes or angle brackets.
341 // Only record the offset of current #include if we can insert after it.
342 if (CurInclude.R.getOffset() <= MaxInsertOffset) {
343 int Priority = Categories.getIncludePriority(
344 IncludeName: CurInclude.Name, /*CheckMainHeader=*/!MainIncludeFound);
345 if (Priority == 0)
346 MainIncludeFound = true;
347 CategoryEndOffsets[Priority] = NextLineOffset;
348 IncludesByPriority[Priority].push_back(Elt: &CurInclude);
349 if (FirstIncludeOffset < 0)
350 FirstIncludeOffset = CurInclude.R.getOffset();
351 }
352}
353
354std::optional<tooling::Replacement>
355HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled,
356 IncludeDirective Directive) const {
357 assert(IncludeName == trimInclude(IncludeName));
358 // If a <header> ("header") already exists in code, "header" (<header>) with
359 // different quotation and/or directive will still be inserted.
360 // FIXME: figure out if this is the best behavior.
361 auto It = ExistingIncludes.find(Key: IncludeName);
362 if (It != ExistingIncludes.end()) {
363 for (const auto &Inc : It->second)
364 if (Inc.Directive == Directive &&
365 ((IsAngled && StringRef(Inc.Name).starts_with(Prefix: "<")) ||
366 (!IsAngled && StringRef(Inc.Name).starts_with(Prefix: "\""))))
367 return std::nullopt;
368 }
369 std::string Quoted =
370 std::string(llvm::formatv(Fmt: IsAngled ? "<{0}>" : "\"{0}\"", Vals&: IncludeName));
371 StringRef QuotedName = Quoted;
372 int Priority = Categories.getIncludePriority(
373 IncludeName: QuotedName, /*CheckMainHeader=*/!MainIncludeFound);
374 auto CatOffset = CategoryEndOffsets.find(x: Priority);
375 assert(CatOffset != CategoryEndOffsets.end());
376 unsigned InsertOffset = CatOffset->second; // Fall back offset
377 auto Iter = IncludesByPriority.find(x: Priority);
378 if (Iter != IncludesByPriority.end()) {
379 for (const auto *Inc : Iter->second) {
380 if (QuotedName < Inc->Name) {
381 InsertOffset = Inc->R.getOffset();
382 break;
383 }
384 }
385 }
386 assert(InsertOffset <= Code.size());
387 llvm::StringRef DirectiveSpelling =
388 Directive == IncludeDirective::Include ? "include" : "import";
389 std::string NewInclude =
390 llvm::formatv(Fmt: "#{0} {1}\n", Vals&: DirectiveSpelling, Vals&: QuotedName);
391 // When inserting headers at end of the code, also append '\n' to the code
392 // if it does not end with '\n'.
393 // FIXME: when inserting multiple #includes at the end of code, only one
394 // newline should be added.
395 if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n'))
396 NewInclude = "\n" + NewInclude;
397 return tooling::Replacement(FileName, InsertOffset, 0, NewInclude);
398}
399
400tooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName,
401 bool IsAngled) const {
402 assert(IncludeName == trimInclude(IncludeName));
403 tooling::Replacements Result;
404 auto Iter = ExistingIncludes.find(Key: IncludeName);
405 if (Iter == ExistingIncludes.end())
406 return Result;
407 for (const auto &Inc : Iter->second) {
408 if ((IsAngled && StringRef(Inc.Name).starts_with(Prefix: "\"")) ||
409 (!IsAngled && StringRef(Inc.Name).starts_with(Prefix: "<")))
410 continue;
411 llvm::Error Err = Result.add(R: tooling::Replacement(
412 FileName, Inc.R.getOffset(), Inc.R.getLength(), ""));
413 if (Err) {
414 auto ErrMsg = "Unexpected conflicts in #include deletions: " +
415 llvm::toString(E: std::move(Err));
416 llvm_unreachable(ErrMsg.c_str());
417 }
418 }
419 return Result;
420}
421
422} // namespace tooling
423} // namespace clang
424

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of clang/lib/Tooling/Inclusions/HeaderIncludes.cpp