1 | //===-- IncludeFixer.cpp - Include inserter based on sema callbacks -------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "IncludeFixer.h" |
10 | #include "clang/Format/Format.h" |
11 | #include "clang/Frontend/CompilerInstance.h" |
12 | #include "clang/Lex/HeaderSearch.h" |
13 | #include "clang/Lex/Preprocessor.h" |
14 | #include "clang/Parse/ParseAST.h" |
15 | #include "clang/Sema/Sema.h" |
16 | #include "llvm/Support/Debug.h" |
17 | #include "llvm/Support/raw_ostream.h" |
18 | |
19 | #define DEBUG_TYPE "clang-include-fixer" |
20 | |
21 | using namespace clang; |
22 | |
23 | namespace clang { |
24 | namespace include_fixer { |
25 | namespace { |
26 | /// Manages the parse, gathers include suggestions. |
27 | class Action : public clang::ASTFrontendAction { |
28 | public: |
29 | explicit Action(SymbolIndexManager &SymbolIndexMgr, bool MinimizeIncludePaths) |
30 | : SemaSource(new IncludeFixerSemaSource(SymbolIndexMgr, |
31 | MinimizeIncludePaths, |
32 | /*GenerateDiagnostics=*/false)) {} |
33 | |
34 | std::unique_ptr<clang::ASTConsumer> |
35 | CreateASTConsumer(clang::CompilerInstance &Compiler, |
36 | StringRef InFile) override { |
37 | SemaSource->setFilePath(InFile); |
38 | return std::make_unique<clang::ASTConsumer>(); |
39 | } |
40 | |
41 | void ExecuteAction() override { |
42 | clang::CompilerInstance *Compiler = &getCompilerInstance(); |
43 | assert(!Compiler->hasSema() && "CI already has Sema" ); |
44 | |
45 | // Set up our hooks into sema and parse the AST. |
46 | if (hasCodeCompletionSupport() && |
47 | !Compiler->getFrontendOpts().CodeCompletionAt.FileName.empty()) |
48 | Compiler->createCodeCompletionConsumer(); |
49 | |
50 | clang::CodeCompleteConsumer *CompletionConsumer = nullptr; |
51 | if (Compiler->hasCodeCompletionConsumer()) |
52 | CompletionConsumer = &Compiler->getCodeCompletionConsumer(); |
53 | |
54 | Compiler->createSema(TUKind: getTranslationUnitKind(), CompletionConsumer); |
55 | SemaSource->setCompilerInstance(Compiler); |
56 | Compiler->getSema().addExternalSource(E: SemaSource.get()); |
57 | |
58 | clang::ParseAST(S&: Compiler->getSema(), PrintStats: Compiler->getFrontendOpts().ShowStats, |
59 | SkipFunctionBodies: Compiler->getFrontendOpts().SkipFunctionBodies); |
60 | } |
61 | |
62 | IncludeFixerContext |
63 | (const clang::SourceManager &SourceManager, |
64 | clang::HeaderSearch &) const { |
65 | return SemaSource->getIncludeFixerContext(SourceManager, HeaderSearch, |
66 | MatchedSymbols: SemaSource->getMatchedSymbols()); |
67 | } |
68 | |
69 | private: |
70 | IntrusiveRefCntPtr<IncludeFixerSemaSource> SemaSource; |
71 | }; |
72 | |
73 | } // namespace |
74 | |
75 | IncludeFixerActionFactory::IncludeFixerActionFactory( |
76 | SymbolIndexManager &SymbolIndexMgr, |
77 | std::vector<IncludeFixerContext> &Contexts, StringRef StyleName, |
78 | bool MinimizeIncludePaths) |
79 | : SymbolIndexMgr(SymbolIndexMgr), Contexts(Contexts), |
80 | MinimizeIncludePaths(MinimizeIncludePaths) {} |
81 | |
82 | IncludeFixerActionFactory::~IncludeFixerActionFactory() = default; |
83 | |
84 | bool IncludeFixerActionFactory::runInvocation( |
85 | std::shared_ptr<clang::CompilerInvocation> Invocation, |
86 | clang::FileManager *Files, |
87 | std::shared_ptr<clang::PCHContainerOperations> PCHContainerOps, |
88 | clang::DiagnosticConsumer *Diagnostics) { |
89 | assert(Invocation->getFrontendOpts().Inputs.size() == 1); |
90 | |
91 | // Set up Clang. |
92 | clang::CompilerInstance Compiler(PCHContainerOps); |
93 | Compiler.setInvocation(std::move(Invocation)); |
94 | Compiler.setFileManager(Files); |
95 | |
96 | // Create the compiler's actual diagnostics engine. We want to drop all |
97 | // diagnostics here. |
98 | Compiler.createDiagnostics(Client: new clang::IgnoringDiagConsumer, |
99 | /*ShouldOwnClient=*/true); |
100 | Compiler.createSourceManager(FileMgr&: *Files); |
101 | |
102 | // We abort on fatal errors so don't let a large number of errors become |
103 | // fatal. A missing #include can cause thousands of errors. |
104 | Compiler.getDiagnostics().setErrorLimit(0); |
105 | |
106 | // Run the parser, gather missing includes. |
107 | auto ScopedToolAction = |
108 | std::make_unique<Action>(args&: SymbolIndexMgr, args&: MinimizeIncludePaths); |
109 | Compiler.ExecuteAction(Act&: *ScopedToolAction); |
110 | |
111 | Contexts.push_back(x: ScopedToolAction->getIncludeFixerContext( |
112 | SourceManager: Compiler.getSourceManager(), |
113 | HeaderSearch&: Compiler.getPreprocessor().getHeaderSearchInfo())); |
114 | |
115 | // Technically this should only return true if we're sure that we have a |
116 | // parseable file. We don't know that though. Only inform users of fatal |
117 | // errors. |
118 | return !Compiler.getDiagnostics().hasFatalErrorOccurred(); |
119 | } |
120 | |
121 | static bool addDiagnosticsForContext(TypoCorrection &Correction, |
122 | const IncludeFixerContext &Context, |
123 | StringRef Code, SourceLocation StartOfFile, |
124 | ASTContext &Ctx) { |
125 | auto Reps = createIncludeFixerReplacements( |
126 | Code, Context, Style: format::getLLVMStyle(), /*AddQualifiers=*/false); |
127 | if (!Reps || Reps->size() != 1) |
128 | return false; |
129 | |
130 | unsigned DiagID = Ctx.getDiagnostics().getCustomDiagID( |
131 | L: DiagnosticsEngine::Note, FormatString: "Add '#include %0' to provide the missing " |
132 | "declaration [clang-include-fixer]" ); |
133 | |
134 | // FIXME: Currently we only generate a diagnostic for the first header. Give |
135 | // the user choices. |
136 | const tooling::Replacement &Placed = *Reps->begin(); |
137 | |
138 | auto Begin = StartOfFile.getLocWithOffset(Offset: Placed.getOffset()); |
139 | auto End = Begin.getLocWithOffset(Offset: std::max(a: 0, b: (int)Placed.getLength() - 1)); |
140 | PartialDiagnostic PD(DiagID, Ctx.getDiagAllocator()); |
141 | PD << Context.getHeaderInfos().front().Header |
142 | << FixItHint::CreateReplacement(RemoveRange: CharSourceRange::getCharRange(B: Begin, E: End), |
143 | Code: Placed.getReplacementText()); |
144 | Correction.addExtraDiagnostic(PD: std::move(PD)); |
145 | return true; |
146 | } |
147 | |
148 | /// Callback for incomplete types. If we encounter a forward declaration we |
149 | /// have the fully qualified name ready. Just query that. |
150 | bool IncludeFixerSemaSource::MaybeDiagnoseMissingCompleteType( |
151 | clang::SourceLocation Loc, clang::QualType T) { |
152 | // Ignore spurious callbacks from SFINAE contexts. |
153 | if (CI->getSema().isSFINAEContext()) |
154 | return false; |
155 | |
156 | clang::ASTContext &context = CI->getASTContext(); |
157 | std::string QueryString = QualType(T->getUnqualifiedDesugaredType(), 0) |
158 | .getAsString(Policy: context.getPrintingPolicy()); |
159 | LLVM_DEBUG(llvm::dbgs() << "Query missing complete type '" << QueryString |
160 | << "'" ); |
161 | // Pass an empty range here since we don't add qualifier in this case. |
162 | std::vector<find_all_symbols::SymbolInfo> MatchedSymbols = |
163 | query(Query: QueryString, ScopedQualifiers: "" , Range: tooling::Range()); |
164 | |
165 | if (!MatchedSymbols.empty() && GenerateDiagnostics) { |
166 | TypoCorrection Correction; |
167 | FileID FID = CI->getSourceManager().getFileID(SpellingLoc: Loc); |
168 | StringRef Code = CI->getSourceManager().getBufferData(FID); |
169 | SourceLocation StartOfFile = |
170 | CI->getSourceManager().getLocForStartOfFile(FID); |
171 | addDiagnosticsForContext( |
172 | Correction, |
173 | Context: getIncludeFixerContext(SourceManager: CI->getSourceManager(), |
174 | HeaderSearch&: CI->getPreprocessor().getHeaderSearchInfo(), |
175 | MatchedSymbols), |
176 | Code, StartOfFile, Ctx&: CI->getASTContext()); |
177 | for (const PartialDiagnostic &PD : Correction.getExtraDiagnostics()) |
178 | CI->getSema().Diag(Loc, PD); |
179 | } |
180 | return true; |
181 | } |
182 | |
183 | /// Callback for unknown identifiers. Try to piece together as much |
184 | /// qualification as we can get and do a query. |
185 | clang::TypoCorrection IncludeFixerSemaSource::CorrectTypo( |
186 | const DeclarationNameInfo &Typo, int LookupKind, Scope *S, CXXScopeSpec *SS, |
187 | CorrectionCandidateCallback &CCC, DeclContext *MemberContext, |
188 | bool EnteringContext, const ObjCObjectPointerType *OPT) { |
189 | // Ignore spurious callbacks from SFINAE contexts. |
190 | if (CI->getSema().isSFINAEContext()) |
191 | return clang::TypoCorrection(); |
192 | |
193 | // We currently ignore the unidentified symbol which is not from the |
194 | // main file. |
195 | // |
196 | // However, this is not always true due to templates in a non-self contained |
197 | // header, consider the case: |
198 | // |
199 | // // header.h |
200 | // template <typename T> |
201 | // class Foo { |
202 | // T t; |
203 | // }; |
204 | // |
205 | // // test.cc |
206 | // // We need to add <bar.h> in test.cc instead of header.h. |
207 | // class Bar; |
208 | // Foo<Bar> foo; |
209 | // |
210 | // FIXME: Add the missing header to the header file where the symbol comes |
211 | // from. |
212 | if (!CI->getSourceManager().isWrittenInMainFile(Loc: Typo.getLoc())) |
213 | return clang::TypoCorrection(); |
214 | |
215 | std::string TypoScopeString; |
216 | if (S) { |
217 | // FIXME: Currently we only use namespace contexts. Use other context |
218 | // types for query. |
219 | for (const auto *Context = S->getEntity(); Context; |
220 | Context = Context->getParent()) { |
221 | if (const auto *ND = dyn_cast<NamespaceDecl>(Val: Context)) { |
222 | if (!ND->getName().empty()) |
223 | TypoScopeString = ND->getNameAsString() + "::" + TypoScopeString; |
224 | } |
225 | } |
226 | } |
227 | |
228 | auto ExtendNestedNameSpecifier = [this](CharSourceRange Range) { |
229 | StringRef Source = |
230 | Lexer::getSourceText(Range, SM: CI->getSourceManager(), LangOpts: CI->getLangOpts()); |
231 | |
232 | // Skip forward until we find a character that's neither identifier nor |
233 | // colon. This is a bit of a hack around the fact that we will only get a |
234 | // single callback for a long nested name if a part of the beginning is |
235 | // unknown. For example: |
236 | // |
237 | // llvm::sys::path::parent_path(...) |
238 | // ^~~~ ^~~ |
239 | // known |
240 | // ^~~~ |
241 | // unknown, last callback |
242 | // ^~~~~~~~~~~ |
243 | // no callback |
244 | // |
245 | // With the extension we get the full nested name specifier including |
246 | // parent_path. |
247 | // FIXME: Don't rely on source text. |
248 | const char *End = Source.end(); |
249 | while (isAsciiIdentifierContinue(c: *End) || *End == ':') |
250 | ++End; |
251 | |
252 | return std::string(Source.begin(), End); |
253 | }; |
254 | |
255 | /// If we have a scope specification, use that to get more precise results. |
256 | std::string QueryString; |
257 | tooling::Range SymbolRange; |
258 | const auto &SM = CI->getSourceManager(); |
259 | auto CreateToolingRange = [&QueryString, &SM](SourceLocation BeginLoc) { |
260 | return tooling::Range(SM.getDecomposedLoc(Loc: BeginLoc).second, |
261 | QueryString.size()); |
262 | }; |
263 | if (SS && SS->getRange().isValid()) { |
264 | auto Range = CharSourceRange::getTokenRange(B: SS->getRange().getBegin(), |
265 | E: Typo.getLoc()); |
266 | |
267 | QueryString = ExtendNestedNameSpecifier(Range); |
268 | SymbolRange = CreateToolingRange(Range.getBegin()); |
269 | } else if (Typo.getName().isIdentifier() && !Typo.getLoc().isMacroID()) { |
270 | auto Range = |
271 | CharSourceRange::getTokenRange(B: Typo.getBeginLoc(), E: Typo.getEndLoc()); |
272 | |
273 | QueryString = ExtendNestedNameSpecifier(Range); |
274 | SymbolRange = CreateToolingRange(Range.getBegin()); |
275 | } else { |
276 | QueryString = Typo.getAsString(); |
277 | SymbolRange = CreateToolingRange(Typo.getLoc()); |
278 | } |
279 | |
280 | LLVM_DEBUG(llvm::dbgs() << "TypoScopeQualifiers: " << TypoScopeString |
281 | << "\n" ); |
282 | std::vector<find_all_symbols::SymbolInfo> MatchedSymbols = |
283 | query(Query: QueryString, ScopedQualifiers: TypoScopeString, Range: SymbolRange); |
284 | |
285 | if (!MatchedSymbols.empty() && GenerateDiagnostics) { |
286 | TypoCorrection Correction(Typo.getName()); |
287 | Correction.setCorrectionRange(SS, TypoName: Typo); |
288 | FileID FID = SM.getFileID(SpellingLoc: Typo.getLoc()); |
289 | StringRef Code = SM.getBufferData(FID); |
290 | SourceLocation StartOfFile = SM.getLocForStartOfFile(FID); |
291 | if (addDiagnosticsForContext( |
292 | Correction, Context: getIncludeFixerContext( |
293 | SourceManager: SM, HeaderSearch&: CI->getPreprocessor().getHeaderSearchInfo(), |
294 | MatchedSymbols), |
295 | Code, StartOfFile, Ctx&: CI->getASTContext())) |
296 | return Correction; |
297 | } |
298 | return TypoCorrection(); |
299 | } |
300 | |
301 | /// Get the minimal include for a given path. |
302 | std::string IncludeFixerSemaSource::( |
303 | StringRef Include, const clang::SourceManager &SourceManager, |
304 | clang::HeaderSearch &) const { |
305 | if (!MinimizeIncludePaths) |
306 | return std::string(Include); |
307 | |
308 | // Get the FileEntry for the include. |
309 | StringRef StrippedInclude = Include.trim(Chars: "\"<>" ); |
310 | auto Entry = |
311 | SourceManager.getFileManager().getOptionalFileRef(Filename: StrippedInclude); |
312 | |
313 | // If the file doesn't exist return the path from the database. |
314 | // FIXME: This should never happen. |
315 | if (!Entry) |
316 | return std::string(Include); |
317 | |
318 | bool IsAngled = false; |
319 | std::string Suggestion = |
320 | HeaderSearch.suggestPathToFileForDiagnostics(File: *Entry, MainFile: "" , IsAngled: &IsAngled); |
321 | |
322 | return IsAngled ? '<' + Suggestion + '>' : '"' + Suggestion + '"'; |
323 | } |
324 | |
325 | /// Get the include fixer context for the queried symbol. |
326 | IncludeFixerContext IncludeFixerSemaSource::( |
327 | const clang::SourceManager &SourceManager, |
328 | clang::HeaderSearch &, |
329 | ArrayRef<find_all_symbols::SymbolInfo> MatchedSymbols) const { |
330 | std::vector<find_all_symbols::SymbolInfo> SymbolCandidates; |
331 | for (const auto &Symbol : MatchedSymbols) { |
332 | std::string FilePath = Symbol.getFilePath().str(); |
333 | std::string MinimizedFilePath = minimizeInclude( |
334 | Include: ((FilePath[0] == '"' || FilePath[0] == '<') ? FilePath |
335 | : "\"" + FilePath + "\"" ), |
336 | SourceManager, HeaderSearch); |
337 | SymbolCandidates.emplace_back(args: Symbol.getName(), args: Symbol.getSymbolKind(), |
338 | args&: MinimizedFilePath, args: Symbol.getContexts()); |
339 | } |
340 | return IncludeFixerContext(FilePath, QuerySymbolInfos, SymbolCandidates); |
341 | } |
342 | |
343 | std::vector<find_all_symbols::SymbolInfo> |
344 | IncludeFixerSemaSource::query(StringRef Query, StringRef ScopedQualifiers, |
345 | tooling::Range Range) { |
346 | assert(!Query.empty() && "Empty query!" ); |
347 | |
348 | // Save all instances of an unidentified symbol. |
349 | // |
350 | // We use conservative behavior for detecting the same unidentified symbol |
351 | // here. The symbols which have the same ScopedQualifier and RawIdentifier |
352 | // are considered equal. So that clang-include-fixer avoids false positives, |
353 | // and always adds missing qualifiers to correct symbols. |
354 | if (!GenerateDiagnostics && !QuerySymbolInfos.empty()) { |
355 | if (ScopedQualifiers == QuerySymbolInfos.front().ScopedQualifiers && |
356 | Query == QuerySymbolInfos.front().RawIdentifier) { |
357 | QuerySymbolInfos.push_back( |
358 | x: {.RawIdentifier: Query.str(), .ScopedQualifiers: std::string(ScopedQualifiers), .Range: Range}); |
359 | } |
360 | return {}; |
361 | } |
362 | |
363 | LLVM_DEBUG(llvm::dbgs() << "Looking up '" << Query << "' at " ); |
364 | LLVM_DEBUG(CI->getSourceManager() |
365 | .getLocForStartOfFile(CI->getSourceManager().getMainFileID()) |
366 | .getLocWithOffset(Range.getOffset()) |
367 | .print(llvm::dbgs(), CI->getSourceManager())); |
368 | LLVM_DEBUG(llvm::dbgs() << " ..." ); |
369 | llvm::StringRef FileName = CI->getSourceManager().getFilename( |
370 | SpellingLoc: CI->getSourceManager().getLocForStartOfFile( |
371 | FID: CI->getSourceManager().getMainFileID())); |
372 | |
373 | QuerySymbolInfos.push_back( |
374 | x: {.RawIdentifier: Query.str(), .ScopedQualifiers: std::string(ScopedQualifiers), .Range: Range}); |
375 | |
376 | // Query the symbol based on C++ name Lookup rules. |
377 | // Firstly, lookup the identifier with scoped namespace contexts; |
378 | // If that fails, falls back to look up the identifier directly. |
379 | // |
380 | // For example: |
381 | // |
382 | // namespace a { |
383 | // b::foo f; |
384 | // } |
385 | // |
386 | // 1. lookup a::b::foo. |
387 | // 2. lookup b::foo. |
388 | std::string QueryString = ScopedQualifiers.str() + Query.str(); |
389 | // It's unsafe to do nested search for the identifier with scoped namespace |
390 | // context, it might treat the identifier as a nested class of the scoped |
391 | // namespace. |
392 | std::vector<find_all_symbols::SymbolInfo> MatchedSymbols = |
393 | SymbolIndexMgr.search(Identifier: QueryString, /*IsNestedSearch=*/false, FileName); |
394 | if (MatchedSymbols.empty()) |
395 | MatchedSymbols = |
396 | SymbolIndexMgr.search(Identifier: Query, /*IsNestedSearch=*/true, FileName); |
397 | LLVM_DEBUG(llvm::dbgs() << "Having found " << MatchedSymbols.size() |
398 | << " symbols\n" ); |
399 | // We store a copy of MatchedSymbols in a place where it's globally reachable. |
400 | // This is used by the standalone version of the tool. |
401 | this->MatchedSymbols = MatchedSymbols; |
402 | return MatchedSymbols; |
403 | } |
404 | |
405 | llvm::Expected<tooling::Replacements> createIncludeFixerReplacements( |
406 | StringRef Code, const IncludeFixerContext &Context, |
407 | const clang::format::FormatStyle &Style, bool AddQualifiers) { |
408 | if (Context.getHeaderInfos().empty()) |
409 | return tooling::Replacements(); |
410 | StringRef FilePath = Context.getFilePath(); |
411 | std::string IncludeName = |
412 | "#include " + Context.getHeaderInfos().front().Header + "\n" ; |
413 | // Create replacements for the new header. |
414 | clang::tooling::Replacements Insertions; |
415 | auto Err = |
416 | Insertions.add(R: tooling::Replacement(FilePath, UINT_MAX, 0, IncludeName)); |
417 | if (Err) |
418 | return std::move(Err); |
419 | |
420 | auto CleanReplaces = cleanupAroundReplacements(Code, Replaces: Insertions, Style); |
421 | if (!CleanReplaces) |
422 | return CleanReplaces; |
423 | |
424 | auto Replaces = std::move(*CleanReplaces); |
425 | if (AddQualifiers) { |
426 | for (const auto &Info : Context.getQuerySymbolInfos()) { |
427 | // Ignore the empty range. |
428 | if (Info.Range.getLength() > 0) { |
429 | auto R = tooling::Replacement( |
430 | {FilePath, Info.Range.getOffset(), Info.Range.getLength(), |
431 | Context.getHeaderInfos().front().QualifiedName}); |
432 | auto Err = Replaces.add(R); |
433 | if (Err) { |
434 | llvm::consumeError(Err: std::move(Err)); |
435 | R = tooling::Replacement( |
436 | R.getFilePath(), Replaces.getShiftedCodePosition(Position: R.getOffset()), |
437 | R.getLength(), R.getReplacementText()); |
438 | Replaces = Replaces.merge(Replaces: tooling::Replacements(R)); |
439 | } |
440 | } |
441 | } |
442 | } |
443 | return formatReplacements(Code, Replaces, Style); |
444 | } |
445 | |
446 | } // namespace include_fixer |
447 | } // namespace clang |
448 | |