1 | //===--- HeaderIncludes.cpp - Insert/Delete #includes --*- C++ -*----------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/Tooling/Inclusions/HeaderIncludes.h" |
10 | #include "clang/Basic/SourceManager.h" |
11 | #include "clang/Lex/Lexer.h" |
12 | #include "llvm/Support/FormatVariadic.h" |
13 | #include "llvm/Support/Path.h" |
14 | #include <optional> |
15 | |
16 | namespace clang { |
17 | namespace tooling { |
18 | namespace { |
19 | |
20 | LangOptions createLangOpts() { |
21 | LangOptions LangOpts; |
22 | LangOpts.CPlusPlus = 1; |
23 | LangOpts.CPlusPlus11 = 1; |
24 | LangOpts.CPlusPlus14 = 1; |
25 | LangOpts.LineComment = 1; |
26 | LangOpts.CXXOperatorNames = 1; |
27 | LangOpts.Bool = 1; |
28 | LangOpts.ObjC = 1; |
29 | LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. |
30 | LangOpts.DeclSpecKeyword = 1; // To get __declspec. |
31 | LangOpts.WChar = 1; // To get wchar_t |
32 | return LangOpts; |
33 | } |
34 | |
35 | // Returns the offset after skipping a sequence of tokens, matched by \p |
36 | // GetOffsetAfterSequence, from the start of the code. |
37 | // \p GetOffsetAfterSequence should be a function that matches a sequence of |
38 | // tokens and returns an offset after the sequence. |
39 | unsigned getOffsetAfterTokenSequence( |
40 | StringRef FileName, StringRef Code, const IncludeStyle &Style, |
41 | llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)> |
42 | GetOffsetAfterSequence) { |
43 | SourceManagerForFile VirtualSM(FileName, Code); |
44 | SourceManager &SM = VirtualSM.get(); |
45 | LangOptions LangOpts = createLangOpts(); |
46 | Lexer Lex(SM.getMainFileID(), SM.getBufferOrFake(FID: SM.getMainFileID()), SM, |
47 | LangOpts); |
48 | Token Tok; |
49 | // Get the first token. |
50 | Lex.LexFromRawLexer(Result&: Tok); |
51 | return GetOffsetAfterSequence(SM, Lex, Tok); |
52 | } |
53 | |
54 | // Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is, |
55 | // \p Tok will be the token after this directive; otherwise, it can be any token |
56 | // after the given \p Tok (including \p Tok). If \p RawIDName is provided, the |
57 | // (second) raw_identifier name is checked. |
58 | bool checkAndConsumeDirectiveWithName( |
59 | Lexer &Lex, StringRef Name, Token &Tok, |
60 | std::optional<StringRef> RawIDName = std::nullopt) { |
61 | bool Matched = Tok.is(K: tok::hash) && !Lex.LexFromRawLexer(Result&: Tok) && |
62 | Tok.is(K: tok::raw_identifier) && |
63 | Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Result&: Tok) && |
64 | Tok.is(K: tok::raw_identifier) && |
65 | (!RawIDName || Tok.getRawIdentifier() == *RawIDName); |
66 | if (Matched) |
67 | Lex.LexFromRawLexer(Result&: Tok); |
68 | return Matched; |
69 | } |
70 | |
71 | void skipComments(Lexer &Lex, Token &Tok) { |
72 | while (Tok.is(K: tok::comment)) |
73 | if (Lex.LexFromRawLexer(Result&: Tok)) |
74 | return; |
75 | } |
76 | |
77 | // Returns the offset after header guard directives and any comments |
78 | // before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no |
79 | // header guard is present in the code, this will return the offset after |
80 | // skipping all comments from the start of the code. |
81 | unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, |
82 | StringRef Code, |
83 | const IncludeStyle &Style) { |
84 | // \p Consume returns location after header guard or 0 if no header guard is |
85 | // found. |
86 | auto ConsumeHeaderGuardAndComment = |
87 | [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex, |
88 | Token Tok)> |
89 | Consume) { |
90 | return getOffsetAfterTokenSequence( |
91 | FileName, Code, Style, |
92 | GetOffsetAfterSequence: [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) { |
93 | skipComments(Lex, Tok); |
94 | unsigned InitialOffset = SM.getFileOffset(SpellingLoc: Tok.getLocation()); |
95 | return std::max(a: InitialOffset, b: Consume(SM, Lex, Tok)); |
96 | }); |
97 | }; |
98 | return std::max( |
99 | // #ifndef/#define |
100 | a: ConsumeHeaderGuardAndComment( |
101 | [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { |
102 | if (checkAndConsumeDirectiveWithName(Lex, Name: "ifndef", Tok)) { |
103 | skipComments(Lex, Tok); |
104 | if (checkAndConsumeDirectiveWithName(Lex, Name: "define", Tok) && |
105 | Tok.isAtStartOfLine()) |
106 | return SM.getFileOffset(SpellingLoc: Tok.getLocation()); |
107 | } |
108 | return 0; |
109 | }), |
110 | // #pragma once |
111 | b: ConsumeHeaderGuardAndComment( |
112 | [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { |
113 | if (checkAndConsumeDirectiveWithName(Lex, Name: "pragma", Tok, |
114 | RawIDName: StringRef("once"))) |
115 | return SM.getFileOffset(SpellingLoc: Tok.getLocation()); |
116 | return 0; |
117 | })); |
118 | } |
119 | |
120 | // Check if a sequence of tokens is like |
121 | // "#include ("header.h" | <header.h>)". |
122 | // If it is, \p Tok will be the token after this directive; otherwise, it can be |
123 | // any token after the given \p Tok (including \p Tok). |
124 | bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) { |
125 | auto Matched = [&]() { |
126 | Lex.LexFromRawLexer(Result&: Tok); |
127 | return true; |
128 | }; |
129 | if (Tok.is(K: tok::hash) && !Lex.LexFromRawLexer(Result&: Tok) && |
130 | Tok.is(K: tok::raw_identifier) && Tok.getRawIdentifier() == "include") { |
131 | if (Lex.LexFromRawLexer(Result&: Tok)) |
132 | return false; |
133 | if (Tok.is(K: tok::string_literal)) |
134 | return Matched(); |
135 | if (Tok.is(K: tok::less)) { |
136 | while (!Lex.LexFromRawLexer(Result&: Tok) && Tok.isNot(K: tok::greater)) { |
137 | } |
138 | if (Tok.is(K: tok::greater)) |
139 | return Matched(); |
140 | } |
141 | } |
142 | return false; |
143 | } |
144 | |
145 | // Returns the offset of the last #include directive after which a new |
146 | // #include can be inserted. This ignores #include's after the #include block(s) |
147 | // in the beginning of a file to avoid inserting headers into code sections |
148 | // where new #include's should not be added by default. |
149 | // These code sections include: |
150 | // - raw string literals (containing #include). |
151 | // - #if blocks. |
152 | // - Special #include's among declarations (e.g. functions). |
153 | // |
154 | // If no #include after which a new #include can be inserted, this returns the |
155 | // offset after skipping all comments from the start of the code. |
156 | // Inserting after an #include is not allowed if it comes after code that is not |
157 | // #include (e.g. pre-processing directive that is not #include, declarations). |
158 | unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code, |
159 | const IncludeStyle &Style) { |
160 | return getOffsetAfterTokenSequence( |
161 | FileName, Code, Style, |
162 | GetOffsetAfterSequence: [](const SourceManager &SM, Lexer &Lex, Token Tok) { |
163 | skipComments(Lex, Tok); |
164 | unsigned MaxOffset = SM.getFileOffset(SpellingLoc: Tok.getLocation()); |
165 | while (checkAndConsumeInclusiveDirective(Lex, Tok)) |
166 | MaxOffset = SM.getFileOffset(SpellingLoc: Tok.getLocation()); |
167 | return MaxOffset; |
168 | }); |
169 | } |
170 | |
171 | inline StringRef trimInclude(StringRef IncludeName) { |
172 | return IncludeName.trim(Chars: "\"<>"); |
173 | } |
174 | |
175 | const char IncludeRegexPattern[] = |
176 | R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))"; |
177 | |
178 | // The filename of Path excluding extension. |
179 | // Used to match implementation with headers, this differs from sys::path::stem: |
180 | // - in names with multiple dots (foo.cu.cc) it terminates at the *first* |
181 | // - an empty stem is never returned: /foo/.bar.x => .bar |
182 | // - we don't bother to handle . and .. specially |
183 | StringRef matchingStem(llvm::StringRef Path) { |
184 | StringRef Name = llvm::sys::path::filename(path: Path); |
185 | return Name.substr(Start: 0, N: Name.find(C: '.', From: 1)); |
186 | } |
187 | |
188 | } // anonymous namespace |
189 | |
190 | IncludeCategoryManager::IncludeCategoryManager(const IncludeStyle &Style, |
191 | StringRef FileName) |
192 | : Style(Style), FileName(FileName) { |
193 | for (const auto &Category : Style.IncludeCategories) { |
194 | CategoryRegexs.emplace_back(Args: Category.Regex, Args: Category.RegexIsCaseSensitive |
195 | ? llvm::Regex::NoFlags |
196 | : llvm::Regex::IgnoreCase); |
197 | } |
198 | IsMainFile = FileName.ends_with(Suffix: ".c") || FileName.ends_with(Suffix: ".cc") || |
199 | FileName.ends_with(Suffix: ".cpp") || FileName.ends_with(Suffix: ".c++") || |
200 | FileName.ends_with(Suffix: ".cxx") || FileName.ends_with(Suffix: ".m") || |
201 | FileName.ends_with(Suffix: ".mm"); |
202 | if (!Style.IncludeIsMainSourceRegex.empty()) { |
203 | llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex); |
204 | IsMainFile |= MainFileRegex.match(String: FileName); |
205 | } |
206 | } |
207 | |
208 | int IncludeCategoryManager::getIncludePriority(StringRef IncludeName, |
209 | bool CheckMainHeader) const { |
210 | int Ret = INT_MAX; |
211 | for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) |
212 | if (CategoryRegexs[i].match(String: IncludeName)) { |
213 | Ret = Style.IncludeCategories[i].Priority; |
214 | break; |
215 | } |
216 | if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName)) |
217 | Ret = 0; |
218 | return Ret; |
219 | } |
220 | |
221 | int IncludeCategoryManager::getSortIncludePriority(StringRef IncludeName, |
222 | bool CheckMainHeader) const { |
223 | int Ret = INT_MAX; |
224 | for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) |
225 | if (CategoryRegexs[i].match(String: IncludeName)) { |
226 | Ret = Style.IncludeCategories[i].SortPriority; |
227 | if (Ret == 0) |
228 | Ret = Style.IncludeCategories[i].Priority; |
229 | break; |
230 | } |
231 | if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName)) |
232 | Ret = 0; |
233 | return Ret; |
234 | } |
235 | bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const { |
236 | switch (Style.MainIncludeChar) { |
237 | case IncludeStyle::MICD_Quote: |
238 | if (!IncludeName.starts_with(Prefix: "\"")) |
239 | return false; |
240 | break; |
241 | case IncludeStyle::MICD_AngleBracket: |
242 | if (!IncludeName.starts_with(Prefix: "<")) |
243 | return false; |
244 | break; |
245 | case IncludeStyle::MICD_Any: |
246 | break; |
247 | } |
248 | |
249 | IncludeName = |
250 | IncludeName.drop_front(N: 1).drop_back(N: 1); // remove the surrounding "" or <> |
251 | // Not matchingStem: implementation files may have compound extensions but |
252 | // headers may not. |
253 | StringRef HeaderStem = llvm::sys::path::stem(path: IncludeName); |
254 | StringRef FileStem = llvm::sys::path::stem(path: FileName); // foo.cu for foo.cu.cc |
255 | StringRef MatchingFileStem = matchingStem(Path: FileName); // foo for foo.cu.cc |
256 | // main-header examples: |
257 | // 1) foo.h => foo.cc |
258 | // 2) foo.h => foo.cu.cc |
259 | // 3) foo.proto.h => foo.proto.cc |
260 | // |
261 | // non-main-header examples: |
262 | // 1) foo.h => bar.cc |
263 | // 2) foo.proto.h => foo.cc |
264 | StringRef Matching; |
265 | if (MatchingFileStem.starts_with_insensitive(Prefix: HeaderStem)) |
266 | Matching = MatchingFileStem; // example 1), 2) |
267 | else if (FileStem.equals_insensitive(RHS: HeaderStem)) |
268 | Matching = FileStem; // example 3) |
269 | if (!Matching.empty()) { |
270 | llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex, |
271 | llvm::Regex::IgnoreCase); |
272 | if (MainIncludeRegex.match(String: Matching)) |
273 | return true; |
274 | } |
275 | return false; |
276 | } |
277 | |
278 | const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern); |
279 | |
280 | HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code, |
281 | const IncludeStyle &Style) |
282 | : FileName(FileName), Code(Code), FirstIncludeOffset(-1), |
283 | MinInsertOffset( |
284 | getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)), |
285 | MaxInsertOffset(MinInsertOffset + |
286 | getMaxHeaderInsertionOffset( |
287 | FileName, Code: Code.drop_front(N: MinInsertOffset), Style)), |
288 | MainIncludeFound(false), |
289 | Categories(Style, FileName) { |
290 | // Add 0 for main header and INT_MAX for headers that are not in any |
291 | // category. |
292 | Priorities = {0, INT_MAX}; |
293 | for (const auto &Category : Style.IncludeCategories) |
294 | Priorities.insert(x: Category.Priority); |
295 | SmallVector<StringRef, 32> Lines; |
296 | Code.drop_front(N: MinInsertOffset).split(A&: Lines, Separator: "\n"); |
297 | |
298 | unsigned Offset = MinInsertOffset; |
299 | unsigned NextLineOffset; |
300 | SmallVector<StringRef, 4> Matches; |
301 | for (auto Line : Lines) { |
302 | NextLineOffset = std::min(a: Code.size(), b: Offset + Line.size() + 1); |
303 | if (IncludeRegex.match(String: Line, Matches: &Matches)) { |
304 | // If this is the last line without trailing newline, we need to make |
305 | // sure we don't delete across the file boundary. |
306 | addExistingInclude( |
307 | IncludeToAdd: Include(Matches[2], |
308 | tooling::Range( |
309 | Offset, std::min(a: Line.size() + 1, b: Code.size() - Offset)), |
310 | Matches[1] == "import"? tooling::IncludeDirective::Import |
311 | : tooling::IncludeDirective::Include), |
312 | NextLineOffset); |
313 | } |
314 | Offset = NextLineOffset; |
315 | } |
316 | |
317 | // Populate CategoryEndOfssets: |
318 | // - Ensure that CategoryEndOffset[Highest] is always populated. |
319 | // - If CategoryEndOffset[Priority] isn't set, use the next higher value |
320 | // that is set, up to CategoryEndOffset[Highest]. |
321 | auto Highest = Priorities.begin(); |
322 | auto [It, Inserted] = CategoryEndOffsets.try_emplace(k: *Highest); |
323 | if (Inserted) |
324 | It->second = FirstIncludeOffset >= 0 ? FirstIncludeOffset : MinInsertOffset; |
325 | // By this point, CategoryEndOffset[Highest] is always set appropriately: |
326 | // - to an appropriate location before/after existing #includes, or |
327 | // - to right after the header guard, or |
328 | // - to the beginning of the file. |
329 | for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I) |
330 | if (CategoryEndOffsets.find(x: *I) == CategoryEndOffsets.end()) |
331 | CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(x: I)]; |
332 | } |
333 | |
334 | // \p Offset: the start of the line following this include directive. |
335 | void HeaderIncludes::addExistingInclude(Include IncludeToAdd, |
336 | unsigned NextLineOffset) { |
337 | auto &Incs = ExistingIncludes[trimInclude(IncludeName: IncludeToAdd.Name)]; |
338 | Incs.push_back(x: std::move(IncludeToAdd)); |
339 | auto &CurInclude = Incs.back(); |
340 | // The header name with quotes or angle brackets. |
341 | // Only record the offset of current #include if we can insert after it. |
342 | if (CurInclude.R.getOffset() <= MaxInsertOffset) { |
343 | int Priority = Categories.getIncludePriority( |
344 | IncludeName: CurInclude.Name, /*CheckMainHeader=*/!MainIncludeFound); |
345 | if (Priority == 0) |
346 | MainIncludeFound = true; |
347 | CategoryEndOffsets[Priority] = NextLineOffset; |
348 | IncludesByPriority[Priority].push_back(Elt: &CurInclude); |
349 | if (FirstIncludeOffset < 0) |
350 | FirstIncludeOffset = CurInclude.R.getOffset(); |
351 | } |
352 | } |
353 | |
354 | std::optional<tooling::Replacement> |
355 | HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled, |
356 | IncludeDirective Directive) const { |
357 | assert(IncludeName == trimInclude(IncludeName)); |
358 | // If a <header> ("header") already exists in code, "header" (<header>) with |
359 | // different quotation and/or directive will still be inserted. |
360 | // FIXME: figure out if this is the best behavior. |
361 | auto It = ExistingIncludes.find(Key: IncludeName); |
362 | if (It != ExistingIncludes.end()) { |
363 | for (const auto &Inc : It->second) |
364 | if (Inc.Directive == Directive && |
365 | ((IsAngled && StringRef(Inc.Name).starts_with(Prefix: "<")) || |
366 | (!IsAngled && StringRef(Inc.Name).starts_with(Prefix: "\"")))) |
367 | return std::nullopt; |
368 | } |
369 | std::string Quoted = |
370 | std::string(llvm::formatv(Fmt: IsAngled ? "<{0}>": "\"{0}\"", Vals&: IncludeName)); |
371 | StringRef QuotedName = Quoted; |
372 | int Priority = Categories.getIncludePriority( |
373 | IncludeName: QuotedName, /*CheckMainHeader=*/!MainIncludeFound); |
374 | auto CatOffset = CategoryEndOffsets.find(x: Priority); |
375 | assert(CatOffset != CategoryEndOffsets.end()); |
376 | unsigned InsertOffset = CatOffset->second; // Fall back offset |
377 | auto Iter = IncludesByPriority.find(x: Priority); |
378 | if (Iter != IncludesByPriority.end()) { |
379 | for (const auto *Inc : Iter->second) { |
380 | if (QuotedName < Inc->Name) { |
381 | InsertOffset = Inc->R.getOffset(); |
382 | break; |
383 | } |
384 | } |
385 | } |
386 | assert(InsertOffset <= Code.size()); |
387 | llvm::StringRef DirectiveSpelling = |
388 | Directive == IncludeDirective::Include ? "include": "import"; |
389 | std::string NewInclude = |
390 | llvm::formatv(Fmt: "#{0} {1}\n", Vals&: DirectiveSpelling, Vals&: QuotedName); |
391 | // When inserting headers at end of the code, also append '\n' to the code |
392 | // if it does not end with '\n'. |
393 | // FIXME: when inserting multiple #includes at the end of code, only one |
394 | // newline should be added. |
395 | if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n')) |
396 | NewInclude = "\n"+ NewInclude; |
397 | return tooling::Replacement(FileName, InsertOffset, 0, NewInclude); |
398 | } |
399 | |
400 | tooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName, |
401 | bool IsAngled) const { |
402 | assert(IncludeName == trimInclude(IncludeName)); |
403 | tooling::Replacements Result; |
404 | auto Iter = ExistingIncludes.find(Key: IncludeName); |
405 | if (Iter == ExistingIncludes.end()) |
406 | return Result; |
407 | for (const auto &Inc : Iter->second) { |
408 | if ((IsAngled && StringRef(Inc.Name).starts_with(Prefix: "\"")) || |
409 | (!IsAngled && StringRef(Inc.Name).starts_with(Prefix: "<"))) |
410 | continue; |
411 | llvm::Error Err = Result.add(R: tooling::Replacement( |
412 | FileName, Inc.R.getOffset(), Inc.R.getLength(), "")); |
413 | if (Err) { |
414 | auto ErrMsg = "Unexpected conflicts in #include deletions: "+ |
415 | llvm::toString(E: std::move(Err)); |
416 | llvm_unreachable(ErrMsg.c_str()); |
417 | } |
418 | } |
419 | return Result; |
420 | } |
421 | |
422 | } // namespace tooling |
423 | } // namespace clang |
424 |
Definitions
- createLangOpts
- getOffsetAfterTokenSequence
- checkAndConsumeDirectiveWithName
- skipComments
- getOffsetAfterHeaderGuardsAndComments
- checkAndConsumeInclusiveDirective
- getMaxHeaderInsertionOffset
- trimInclude
- IncludeRegexPattern
- matchingStem
- IncludeCategoryManager
- getIncludePriority
- getSortIncludePriority
- isMainHeader
- IncludeRegex
- HeaderIncludes
- addExistingInclude
- insert
Learn to use CMake with our Intro Training
Find out more