1 | //===--- HeaderIncludes.cpp - Insert/Delete #includes --*- C++ -*----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/Tooling/Inclusions/HeaderIncludes.h" |
10 | #include "clang/Basic/FileManager.h" |
11 | #include "clang/Basic/SourceManager.h" |
12 | #include "clang/Lex/Lexer.h" |
13 | #include "llvm/Support/FormatVariadic.h" |
14 | #include "llvm/Support/Path.h" |
15 | #include <optional> |
16 | |
17 | namespace clang { |
18 | namespace tooling { |
19 | namespace { |
20 | |
21 | LangOptions createLangOpts() { |
22 | LangOptions LangOpts; |
23 | LangOpts.CPlusPlus = 1; |
24 | LangOpts.CPlusPlus11 = 1; |
25 | LangOpts.CPlusPlus14 = 1; |
26 | LangOpts.LineComment = 1; |
27 | LangOpts.CXXOperatorNames = 1; |
28 | LangOpts.Bool = 1; |
29 | LangOpts.ObjC = 1; |
30 | LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. |
31 | LangOpts.DeclSpecKeyword = 1; // To get __declspec. |
32 | LangOpts.WChar = 1; // To get wchar_t |
33 | return LangOpts; |
34 | } |
35 | |
36 | // Returns the offset after skipping a sequence of tokens, matched by \p |
37 | // GetOffsetAfterSequence, from the start of the code. |
38 | // \p GetOffsetAfterSequence should be a function that matches a sequence of |
39 | // tokens and returns an offset after the sequence. |
40 | unsigned getOffsetAfterTokenSequence( |
41 | StringRef FileName, StringRef Code, const IncludeStyle &Style, |
42 | llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)> |
43 | GetOffsetAfterSequence) { |
44 | SourceManagerForFile VirtualSM(FileName, Code); |
45 | SourceManager &SM = VirtualSM.get(); |
46 | LangOptions LangOpts = createLangOpts(); |
47 | Lexer Lex(SM.getMainFileID(), SM.getBufferOrFake(FID: SM.getMainFileID()), SM, |
48 | LangOpts); |
49 | Token Tok; |
50 | // Get the first token. |
51 | Lex.LexFromRawLexer(Result&: Tok); |
52 | return GetOffsetAfterSequence(SM, Lex, Tok); |
53 | } |
54 | |
55 | // Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is, |
56 | // \p Tok will be the token after this directive; otherwise, it can be any token |
57 | // after the given \p Tok (including \p Tok). If \p RawIDName is provided, the |
58 | // (second) raw_identifier name is checked. |
59 | bool checkAndConsumeDirectiveWithName( |
60 | Lexer &Lex, StringRef Name, Token &Tok, |
61 | std::optional<StringRef> RawIDName = std::nullopt) { |
62 | bool Matched = Tok.is(K: tok::hash) && !Lex.LexFromRawLexer(Result&: Tok) && |
63 | Tok.is(K: tok::raw_identifier) && |
64 | Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Result&: Tok) && |
65 | Tok.is(K: tok::raw_identifier) && |
66 | (!RawIDName || Tok.getRawIdentifier() == *RawIDName); |
67 | if (Matched) |
68 | Lex.LexFromRawLexer(Result&: Tok); |
69 | return Matched; |
70 | } |
71 | |
72 | void (Lexer &Lex, Token &Tok) { |
73 | while (Tok.is(K: tok::comment)) |
74 | if (Lex.LexFromRawLexer(Result&: Tok)) |
75 | return; |
76 | } |
77 | |
78 | // Returns the offset after header guard directives and any comments |
79 | // before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no |
80 | // header guard is present in the code, this will return the offset after |
81 | // skipping all comments from the start of the code. |
82 | unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, |
83 | StringRef Code, |
84 | const IncludeStyle &Style) { |
85 | // \p Consume returns location after header guard or 0 if no header guard is |
86 | // found. |
87 | auto ConsumeHeaderGuardAndComment = |
88 | [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex, |
89 | Token Tok)> |
90 | Consume) { |
91 | return getOffsetAfterTokenSequence( |
92 | FileName, Code, Style, |
93 | GetOffsetAfterSequence: [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) { |
94 | skipComments(Lex, Tok); |
95 | unsigned InitialOffset = SM.getFileOffset(SpellingLoc: Tok.getLocation()); |
96 | return std::max(a: InitialOffset, b: Consume(SM, Lex, Tok)); |
97 | }); |
98 | }; |
99 | return std::max( |
100 | // #ifndef/#define |
101 | a: ConsumeHeaderGuardAndComment( |
102 | [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { |
103 | if (checkAndConsumeDirectiveWithName(Lex, Name: "ifndef" , Tok)) { |
104 | skipComments(Lex, Tok); |
105 | if (checkAndConsumeDirectiveWithName(Lex, Name: "define" , Tok) && |
106 | Tok.isAtStartOfLine()) |
107 | return SM.getFileOffset(SpellingLoc: Tok.getLocation()); |
108 | } |
109 | return 0; |
110 | }), |
111 | // #pragma once |
112 | b: ConsumeHeaderGuardAndComment( |
113 | [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { |
114 | if (checkAndConsumeDirectiveWithName(Lex, Name: "pragma" , Tok, |
115 | RawIDName: StringRef("once" ))) |
116 | return SM.getFileOffset(SpellingLoc: Tok.getLocation()); |
117 | return 0; |
118 | })); |
119 | } |
120 | |
121 | // Check if a sequence of tokens is like |
122 | // "#include ("header.h" | <header.h>)". |
123 | // If it is, \p Tok will be the token after this directive; otherwise, it can be |
124 | // any token after the given \p Tok (including \p Tok). |
125 | bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) { |
126 | auto Matched = [&]() { |
127 | Lex.LexFromRawLexer(Result&: Tok); |
128 | return true; |
129 | }; |
130 | if (Tok.is(K: tok::hash) && !Lex.LexFromRawLexer(Result&: Tok) && |
131 | Tok.is(K: tok::raw_identifier) && Tok.getRawIdentifier() == "include" ) { |
132 | if (Lex.LexFromRawLexer(Result&: Tok)) |
133 | return false; |
134 | if (Tok.is(K: tok::string_literal)) |
135 | return Matched(); |
136 | if (Tok.is(K: tok::less)) { |
137 | while (!Lex.LexFromRawLexer(Result&: Tok) && Tok.isNot(K: tok::greater)) { |
138 | } |
139 | if (Tok.is(K: tok::greater)) |
140 | return Matched(); |
141 | } |
142 | } |
143 | return false; |
144 | } |
145 | |
146 | // Returns the offset of the last #include directive after which a new |
147 | // #include can be inserted. This ignores #include's after the #include block(s) |
148 | // in the beginning of a file to avoid inserting headers into code sections |
149 | // where new #include's should not be added by default. |
150 | // These code sections include: |
151 | // - raw string literals (containing #include). |
152 | // - #if blocks. |
153 | // - Special #include's among declarations (e.g. functions). |
154 | // |
155 | // If no #include after which a new #include can be inserted, this returns the |
156 | // offset after skipping all comments from the start of the code. |
157 | // Inserting after an #include is not allowed if it comes after code that is not |
158 | // #include (e.g. pre-processing directive that is not #include, declarations). |
159 | unsigned (StringRef FileName, StringRef Code, |
160 | const IncludeStyle &Style) { |
161 | return getOffsetAfterTokenSequence( |
162 | FileName, Code, Style, |
163 | GetOffsetAfterSequence: [](const SourceManager &SM, Lexer &Lex, Token Tok) { |
164 | skipComments(Lex, Tok); |
165 | unsigned MaxOffset = SM.getFileOffset(SpellingLoc: Tok.getLocation()); |
166 | while (checkAndConsumeInclusiveDirective(Lex, Tok)) |
167 | MaxOffset = SM.getFileOffset(SpellingLoc: Tok.getLocation()); |
168 | return MaxOffset; |
169 | }); |
170 | } |
171 | |
172 | inline StringRef trimInclude(StringRef IncludeName) { |
173 | return IncludeName.trim(Chars: "\"<>" ); |
174 | } |
175 | |
176 | const char IncludeRegexPattern[] = |
177 | R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))" ; |
178 | |
179 | // The filename of Path excluding extension. |
180 | // Used to match implementation with headers, this differs from sys::path::stem: |
181 | // - in names with multiple dots (foo.cu.cc) it terminates at the *first* |
182 | // - an empty stem is never returned: /foo/.bar.x => .bar |
183 | // - we don't bother to handle . and .. specially |
184 | StringRef matchingStem(llvm::StringRef Path) { |
185 | StringRef Name = llvm::sys::path::filename(path: Path); |
186 | return Name.substr(Start: 0, N: Name.find(C: '.', From: 1)); |
187 | } |
188 | |
189 | } // anonymous namespace |
190 | |
191 | IncludeCategoryManager::IncludeCategoryManager(const IncludeStyle &Style, |
192 | StringRef FileName) |
193 | : Style(Style), FileName(FileName) { |
194 | for (const auto &Category : Style.IncludeCategories) { |
195 | CategoryRegexs.emplace_back(Args: Category.Regex, Args: Category.RegexIsCaseSensitive |
196 | ? llvm::Regex::NoFlags |
197 | : llvm::Regex::IgnoreCase); |
198 | } |
199 | IsMainFile = FileName.ends_with(Suffix: ".c" ) || FileName.ends_with(Suffix: ".cc" ) || |
200 | FileName.ends_with(Suffix: ".cpp" ) || FileName.ends_with(Suffix: ".c++" ) || |
201 | FileName.ends_with(Suffix: ".cxx" ) || FileName.ends_with(Suffix: ".m" ) || |
202 | FileName.ends_with(Suffix: ".mm" ); |
203 | if (!Style.IncludeIsMainSourceRegex.empty()) { |
204 | llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex); |
205 | IsMainFile |= MainFileRegex.match(String: FileName); |
206 | } |
207 | } |
208 | |
209 | int IncludeCategoryManager::getIncludePriority(StringRef IncludeName, |
210 | bool CheckMainHeader) const { |
211 | int Ret = INT_MAX; |
212 | for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) |
213 | if (CategoryRegexs[i].match(String: IncludeName)) { |
214 | Ret = Style.IncludeCategories[i].Priority; |
215 | break; |
216 | } |
217 | if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName)) |
218 | Ret = 0; |
219 | return Ret; |
220 | } |
221 | |
222 | int IncludeCategoryManager::getSortIncludePriority(StringRef IncludeName, |
223 | bool CheckMainHeader) const { |
224 | int Ret = INT_MAX; |
225 | for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) |
226 | if (CategoryRegexs[i].match(String: IncludeName)) { |
227 | Ret = Style.IncludeCategories[i].SortPriority; |
228 | if (Ret == 0) |
229 | Ret = Style.IncludeCategories[i].Priority; |
230 | break; |
231 | } |
232 | if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName)) |
233 | Ret = 0; |
234 | return Ret; |
235 | } |
236 | bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const { |
237 | switch (Style.MainIncludeChar) { |
238 | case IncludeStyle::MICD_Quote: |
239 | if (!IncludeName.starts_with(Prefix: "\"" )) |
240 | return false; |
241 | break; |
242 | case IncludeStyle::MICD_AngleBracket: |
243 | if (!IncludeName.starts_with(Prefix: "<" )) |
244 | return false; |
245 | break; |
246 | case IncludeStyle::MICD_Any: |
247 | break; |
248 | } |
249 | |
250 | IncludeName = |
251 | IncludeName.drop_front(N: 1).drop_back(N: 1); // remove the surrounding "" or <> |
252 | // Not matchingStem: implementation files may have compound extensions but |
253 | // headers may not. |
254 | StringRef = llvm::sys::path::stem(path: IncludeName); |
255 | StringRef FileStem = llvm::sys::path::stem(path: FileName); // foo.cu for foo.cu.cc |
256 | StringRef MatchingFileStem = matchingStem(Path: FileName); // foo for foo.cu.cc |
257 | // main-header examples: |
258 | // 1) foo.h => foo.cc |
259 | // 2) foo.h => foo.cu.cc |
260 | // 3) foo.proto.h => foo.proto.cc |
261 | // |
262 | // non-main-header examples: |
263 | // 1) foo.h => bar.cc |
264 | // 2) foo.proto.h => foo.cc |
265 | StringRef Matching; |
266 | if (MatchingFileStem.starts_with_insensitive(Prefix: HeaderStem)) |
267 | Matching = MatchingFileStem; // example 1), 2) |
268 | else if (FileStem.equals_insensitive(RHS: HeaderStem)) |
269 | Matching = FileStem; // example 3) |
270 | if (!Matching.empty()) { |
271 | llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex, |
272 | llvm::Regex::IgnoreCase); |
273 | if (MainIncludeRegex.match(String: Matching)) |
274 | return true; |
275 | } |
276 | return false; |
277 | } |
278 | |
279 | const llvm::Regex HeaderIncludes::(IncludeRegexPattern); |
280 | |
281 | HeaderIncludes::(StringRef FileName, StringRef Code, |
282 | const IncludeStyle &Style) |
283 | : FileName(FileName), Code(Code), FirstIncludeOffset(-1), |
284 | MinInsertOffset( |
285 | getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)), |
286 | MaxInsertOffset(MinInsertOffset + |
287 | getMaxHeaderInsertionOffset( |
288 | FileName, Code: Code.drop_front(N: MinInsertOffset), Style)), |
289 | MainIncludeFound(false), |
290 | Categories(Style, FileName) { |
291 | // Add 0 for main header and INT_MAX for headers that are not in any |
292 | // category. |
293 | Priorities = {0, INT_MAX}; |
294 | for (const auto &Category : Style.IncludeCategories) |
295 | Priorities.insert(x: Category.Priority); |
296 | SmallVector<StringRef, 32> Lines; |
297 | Code.drop_front(N: MinInsertOffset).split(A&: Lines, Separator: "\n" ); |
298 | |
299 | unsigned Offset = MinInsertOffset; |
300 | unsigned NextLineOffset; |
301 | SmallVector<StringRef, 4> Matches; |
302 | for (auto Line : Lines) { |
303 | NextLineOffset = std::min(a: Code.size(), b: Offset + Line.size() + 1); |
304 | if (IncludeRegex.match(String: Line, Matches: &Matches)) { |
305 | // If this is the last line without trailing newline, we need to make |
306 | // sure we don't delete across the file boundary. |
307 | addExistingInclude( |
308 | IncludeToAdd: Include(Matches[2], |
309 | tooling::Range( |
310 | Offset, std::min(a: Line.size() + 1, b: Code.size() - Offset)), |
311 | Matches[1] == "import" ? tooling::IncludeDirective::Import |
312 | : tooling::IncludeDirective::Include), |
313 | NextLineOffset); |
314 | } |
315 | Offset = NextLineOffset; |
316 | } |
317 | |
318 | // Populate CategoryEndOfssets: |
319 | // - Ensure that CategoryEndOffset[Highest] is always populated. |
320 | // - If CategoryEndOffset[Priority] isn't set, use the next higher value |
321 | // that is set, up to CategoryEndOffset[Highest]. |
322 | auto Highest = Priorities.begin(); |
323 | if (CategoryEndOffsets.find(x: *Highest) == CategoryEndOffsets.end()) { |
324 | if (FirstIncludeOffset >= 0) |
325 | CategoryEndOffsets[*Highest] = FirstIncludeOffset; |
326 | else |
327 | CategoryEndOffsets[*Highest] = MinInsertOffset; |
328 | } |
329 | // By this point, CategoryEndOffset[Highest] is always set appropriately: |
330 | // - to an appropriate location before/after existing #includes, or |
331 | // - to right after the header guard, or |
332 | // - to the beginning of the file. |
333 | for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I) |
334 | if (CategoryEndOffsets.find(x: *I) == CategoryEndOffsets.end()) |
335 | CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(x: I)]; |
336 | } |
337 | |
338 | // \p Offset: the start of the line following this include directive. |
339 | void HeaderIncludes::(Include IncludeToAdd, |
340 | unsigned NextLineOffset) { |
341 | auto Iter = |
342 | ExistingIncludes.try_emplace(Key: trimInclude(IncludeName: IncludeToAdd.Name)).first; |
343 | Iter->second.push_back(x: std::move(IncludeToAdd)); |
344 | auto &CurInclude = Iter->second.back(); |
345 | // The header name with quotes or angle brackets. |
346 | // Only record the offset of current #include if we can insert after it. |
347 | if (CurInclude.R.getOffset() <= MaxInsertOffset) { |
348 | int Priority = Categories.getIncludePriority( |
349 | IncludeName: CurInclude.Name, /*CheckMainHeader=*/!MainIncludeFound); |
350 | if (Priority == 0) |
351 | MainIncludeFound = true; |
352 | CategoryEndOffsets[Priority] = NextLineOffset; |
353 | IncludesByPriority[Priority].push_back(Elt: &CurInclude); |
354 | if (FirstIncludeOffset < 0) |
355 | FirstIncludeOffset = CurInclude.R.getOffset(); |
356 | } |
357 | } |
358 | |
359 | std::optional<tooling::Replacement> |
360 | HeaderIncludes::(llvm::StringRef IncludeName, bool IsAngled, |
361 | IncludeDirective Directive) const { |
362 | assert(IncludeName == trimInclude(IncludeName)); |
363 | // If a <header> ("header") already exists in code, "header" (<header>) with |
364 | // different quotation and/or directive will still be inserted. |
365 | // FIXME: figure out if this is the best behavior. |
366 | auto It = ExistingIncludes.find(Key: IncludeName); |
367 | if (It != ExistingIncludes.end()) { |
368 | for (const auto &Inc : It->second) |
369 | if (Inc.Directive == Directive && |
370 | ((IsAngled && StringRef(Inc.Name).starts_with(Prefix: "<" )) || |
371 | (!IsAngled && StringRef(Inc.Name).starts_with(Prefix: "\"" )))) |
372 | return std::nullopt; |
373 | } |
374 | std::string Quoted = |
375 | std::string(llvm::formatv(Fmt: IsAngled ? "<{0}>" : "\"{0}\"" , Vals&: IncludeName)); |
376 | StringRef QuotedName = Quoted; |
377 | int Priority = Categories.getIncludePriority( |
378 | IncludeName: QuotedName, /*CheckMainHeader=*/!MainIncludeFound); |
379 | auto CatOffset = CategoryEndOffsets.find(x: Priority); |
380 | assert(CatOffset != CategoryEndOffsets.end()); |
381 | unsigned InsertOffset = CatOffset->second; // Fall back offset |
382 | auto Iter = IncludesByPriority.find(x: Priority); |
383 | if (Iter != IncludesByPriority.end()) { |
384 | for (const auto *Inc : Iter->second) { |
385 | if (QuotedName < Inc->Name) { |
386 | InsertOffset = Inc->R.getOffset(); |
387 | break; |
388 | } |
389 | } |
390 | } |
391 | assert(InsertOffset <= Code.size()); |
392 | llvm::StringRef DirectiveSpelling = |
393 | Directive == IncludeDirective::Include ? "include" : "import" ; |
394 | std::string NewInclude = |
395 | llvm::formatv(Fmt: "#{0} {1}\n" , Vals&: DirectiveSpelling, Vals&: QuotedName); |
396 | // When inserting headers at end of the code, also append '\n' to the code |
397 | // if it does not end with '\n'. |
398 | // FIXME: when inserting multiple #includes at the end of code, only one |
399 | // newline should be added. |
400 | if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n')) |
401 | NewInclude = "\n" + NewInclude; |
402 | return tooling::Replacement(FileName, InsertOffset, 0, NewInclude); |
403 | } |
404 | |
405 | tooling::Replacements HeaderIncludes::(llvm::StringRef IncludeName, |
406 | bool IsAngled) const { |
407 | assert(IncludeName == trimInclude(IncludeName)); |
408 | tooling::Replacements Result; |
409 | auto Iter = ExistingIncludes.find(Key: IncludeName); |
410 | if (Iter == ExistingIncludes.end()) |
411 | return Result; |
412 | for (const auto &Inc : Iter->second) { |
413 | if ((IsAngled && StringRef(Inc.Name).starts_with(Prefix: "\"" )) || |
414 | (!IsAngled && StringRef(Inc.Name).starts_with(Prefix: "<" ))) |
415 | continue; |
416 | llvm::Error Err = Result.add(R: tooling::Replacement( |
417 | FileName, Inc.R.getOffset(), Inc.R.getLength(), "" )); |
418 | if (Err) { |
419 | auto ErrMsg = "Unexpected conflicts in #include deletions: " + |
420 | llvm::toString(E: std::move(Err)); |
421 | llvm_unreachable(ErrMsg.c_str()); |
422 | } |
423 | } |
424 | return Result; |
425 | } |
426 | |
427 | } // namespace tooling |
428 | } // namespace clang |
429 | |