1 | //===-- StdLib.cpp ----------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #include "StdLib.h" |
9 | #include <fstream> |
10 | #include <memory> |
11 | #include <optional> |
12 | #include <string> |
13 | #include <vector> |
14 | |
15 | #include "Compiler.h" |
16 | #include "Config.h" |
17 | #include "SymbolCollector.h" |
18 | #include "index/IndexAction.h" |
19 | #include "support/Logger.h" |
20 | #include "support/ThreadsafeFS.h" |
21 | #include "support/Trace.h" |
22 | #include "clang/Basic/LangOptions.h" |
23 | #include "clang/Frontend/CompilerInvocation.h" |
24 | #include "clang/Lex/PreprocessorOptions.h" |
25 | #include "clang/Tooling/Inclusions/StandardLibrary.h" |
26 | #include "llvm/ADT/IntrusiveRefCntPtr.h" |
27 | #include "llvm/ADT/StringRef.h" |
28 | #include "llvm/Support/MemoryBuffer.h" |
29 | #include "llvm/Support/Path.h" |
30 | |
31 | namespace clang { |
32 | namespace clangd { |
33 | namespace { |
34 | |
35 | enum Lang { C, CXX }; |
36 | |
37 | Lang langFromOpts(const LangOptions &LO) { return LO.CPlusPlus ? CXX : C; } |
38 | llvm::StringLiteral mandatoryHeader(Lang L) { |
39 | switch (L) { |
40 | case C: |
41 | return "stdio.h" ; |
42 | case CXX: |
43 | return "vector" ; |
44 | } |
45 | llvm_unreachable("unhandled Lang" ); |
46 | } |
47 | |
48 | LangStandard::Kind standardFromOpts(const LangOptions &LO) { |
49 | if (LO.CPlusPlus) { |
50 | if (LO.CPlusPlus23) |
51 | return LangStandard::lang_cxx23; |
52 | if (LO.CPlusPlus20) |
53 | return LangStandard::lang_cxx20; |
54 | if (LO.CPlusPlus17) |
55 | return LangStandard::lang_cxx17; |
56 | if (LO.CPlusPlus14) |
57 | return LangStandard::lang_cxx14; |
58 | if (LO.CPlusPlus11) |
59 | return LangStandard::lang_cxx11; |
60 | return LangStandard::lang_cxx98; |
61 | } |
62 | if (LO.C23) |
63 | return LangStandard::lang_c23; |
64 | // C17 has no new features, so treat {C11,C17} as C17. |
65 | if (LO.C11) |
66 | return LangStandard::lang_c17; |
67 | return LangStandard::lang_c99; |
68 | } |
69 | |
70 | std::string (llvm::StringLiteral Mandatory, |
71 | llvm::ArrayRef<tooling::stdlib::Header> ) { |
72 | std::string Result; |
73 | llvm::raw_string_ostream OS(Result); |
74 | |
75 | // We __has_include guard all our #includes to avoid errors when using older |
76 | // stdlib version that don't have headers for the newest language standards. |
77 | // But make sure we get *some* error if things are totally broken. |
78 | OS << llvm::formatv( |
79 | Fmt: "#if !__has_include(<{0}>)\n" |
80 | "#error Mandatory header <{0}> not found in standard library!\n" |
81 | "#endif\n" , |
82 | Vals&: Mandatory); |
83 | |
84 | for (auto : Headers) { |
85 | OS << llvm::formatv(Fmt: "#if __has_include({0})\n" |
86 | "#include {0}\n" |
87 | "#endif\n" , |
88 | Vals&: Header); |
89 | } |
90 | OS.flush(); |
91 | return Result; |
92 | } |
93 | |
94 | } // namespace |
95 | |
96 | llvm::StringRef (const LangOptions &LO) { |
97 | // The umbrella header is the same for all versions of each language. |
98 | // Headers that are unsupported in old lang versions are usually guarded by |
99 | // #if. Some headers may be not present in old stdlib versions, the umbrella |
100 | // header guards with __has_include for this purpose. |
101 | Lang L = langFromOpts(LO); |
102 | switch (L) { |
103 | case CXX: |
104 | static std::string *UmbrellaCXX = new std::string(buildUmbrella( |
105 | Mandatory: mandatoryHeader(L), |
106 | Headers: tooling::stdlib::Header::all(L: tooling::stdlib::Lang::CXX))); |
107 | return *UmbrellaCXX; |
108 | case C: |
109 | static std::string *UmbrellaC = new std::string( |
110 | buildUmbrella(Mandatory: mandatoryHeader(L), |
111 | Headers: tooling::stdlib::Header::all(L: tooling::stdlib::Lang::C))); |
112 | return *UmbrellaC; |
113 | } |
114 | llvm_unreachable("invalid Lang in langFromOpts" ); |
115 | } |
116 | |
117 | namespace { |
118 | |
119 | // Including the standard library leaks unwanted transitively included symbols. |
120 | // |
121 | // We want to drop these, they're a bit tricky to identify: |
122 | // - we don't want to limit to symbols on our list, as our list has only |
123 | // top-level symbols (and there may be legitimate stdlib extensions). |
124 | // - we can't limit to only symbols defined in known stdlib headers, as stdlib |
125 | // internal structure is murky |
126 | // - we can't strictly require symbols to come from a particular path, e.g. |
127 | // libstdc++ is mostly under /usr/include/c++/10/... |
128 | // but std::ctype_base is under /usr/include/<platform>/c++/10/... |
129 | // We require the symbol to come from a header that is *either* from |
130 | // the standard library path (as identified by the location of <vector>), or |
131 | // another header that defines a symbol from our stdlib list. |
132 | SymbolSlab filter(SymbolSlab Slab, const StdLibLocation &Loc) { |
133 | SymbolSlab::Builder Result; |
134 | |
135 | static auto &StandardHeaders = *[] { |
136 | auto *Set = new llvm::DenseSet<llvm::StringRef>(); |
137 | for (auto : tooling::stdlib::Header::all(L: tooling::stdlib::Lang::CXX)) |
138 | Set->insert(V: Header.name()); |
139 | for (auto : tooling::stdlib::Header::all(L: tooling::stdlib::Lang::C)) |
140 | Set->insert(V: Header.name()); |
141 | return Set; |
142 | }(); |
143 | |
144 | // Form prefixes like file:///usr/include/c++/10/ |
145 | // These can be trivially prefix-compared with URIs in the indexed symbols. |
146 | llvm::SmallVector<std::string> StdLibURIPrefixes; |
147 | for (const auto &Path : Loc.Paths) { |
148 | StdLibURIPrefixes.push_back(Elt: URI::create(AbsolutePath: Path).toString()); |
149 | if (StdLibURIPrefixes.back().back() != '/') |
150 | StdLibURIPrefixes.back().push_back(c: '/'); |
151 | } |
152 | // For each header URI, is it *either* prefixed by StdLibURIPrefixes *or* |
153 | // owner of a symbol whose insertable header is in StandardHeaders? |
154 | // Pointer key because strings in a SymbolSlab are interned. |
155 | llvm::DenseMap<const char *, bool> ; |
156 | for (const Symbol &S : Slab) { |
157 | if (!S.IncludeHeaders.empty() && |
158 | StandardHeaders.contains(V: S.IncludeHeaders.front().IncludeHeader)) { |
159 | GoodHeader[S.CanonicalDeclaration.FileURI] = true; |
160 | GoodHeader[S.Definition.FileURI] = true; |
161 | continue; |
162 | } |
163 | for (const char *URI : |
164 | {S.CanonicalDeclaration.FileURI, S.Definition.FileURI}) { |
165 | auto R = GoodHeader.try_emplace(Key: URI, Args: false); |
166 | if (R.second) { |
167 | R.first->second = llvm::any_of( |
168 | Range&: StdLibURIPrefixes, |
169 | P: [&, URIStr(llvm::StringRef(URI))](const std::string &Prefix) { |
170 | return URIStr.starts_with(Prefix); |
171 | }); |
172 | } |
173 | } |
174 | } |
175 | #ifndef NDEBUG |
176 | for (const auto &Good : GoodHeader) |
177 | if (Good.second && *Good.first) |
178 | dlog("Stdlib header: {0}" , Good.first); |
179 | #endif |
180 | // Empty URIs aren't considered good. (Definition can be blank). |
181 | auto = [&](const char *C) { return *C && GoodHeader.lookup(Val: C); }; |
182 | |
183 | for (const Symbol &S : Slab) { |
184 | if (!(IsGoodHeader(S.CanonicalDeclaration.FileURI) || |
185 | IsGoodHeader(S.Definition.FileURI))) { |
186 | dlog("Ignoring wrong-header symbol {0}{1} in {2}" , S.Scope, S.Name, |
187 | S.CanonicalDeclaration.FileURI); |
188 | continue; |
189 | } |
190 | Result.insert(S); |
191 | } |
192 | |
193 | return std::move(Result).build(); |
194 | } |
195 | |
196 | } // namespace |
197 | |
198 | SymbolSlab indexStandardLibrary(llvm::StringRef , |
199 | std::unique_ptr<CompilerInvocation> CI, |
200 | const StdLibLocation &Loc, |
201 | const ThreadsafeFS &TFS) { |
202 | if (CI->getFrontendOpts().Inputs.size() != 1 || |
203 | !CI->getPreprocessorOpts().ImplicitPCHInclude.empty()) { |
204 | elog(Fmt: "Indexing standard library failed: bad CompilerInvocation" ); |
205 | assert(false && "indexing stdlib with a dubious CompilerInvocation!" ); |
206 | return SymbolSlab(); |
207 | } |
208 | const FrontendInputFile &Input = CI->getFrontendOpts().Inputs.front(); |
209 | trace::Span Tracer("StandardLibraryIndex" ); |
210 | LangStandard::Kind LangStd = standardFromOpts(LO: CI->getLangOpts()); |
211 | log(Fmt: "Indexing {0} standard library in the context of {1}" , |
212 | Vals: LangStandard::getLangStandardForKind(K: LangStd).getName(), Vals: Input.getFile()); |
213 | |
214 | SymbolSlab Symbols; |
215 | IgnoreDiagnostics IgnoreDiags; |
216 | // CompilerInvocation is taken from elsewhere, and may map a dirty buffer. |
217 | CI->getPreprocessorOpts().clearRemappedFiles(); |
218 | auto Clang = prepareCompilerInstance( |
219 | std::move(CI), /*Preamble=*/nullptr, |
220 | MainFile: llvm::MemoryBuffer::getMemBuffer(InputData: HeaderSources, BufferName: Input.getFile()), |
221 | TFS.view(/*CWD=*/std::nullopt), IgnoreDiags); |
222 | if (!Clang) { |
223 | elog(Fmt: "Standard Library Index: Couldn't build compiler instance" ); |
224 | return Symbols; |
225 | } |
226 | |
227 | SymbolCollector::Options IndexOpts; |
228 | IndexOpts.Origin = SymbolOrigin::StdLib; |
229 | IndexOpts.CollectMainFileSymbols = false; |
230 | IndexOpts.CollectMainFileRefs = false; |
231 | IndexOpts.CollectMacro = true; |
232 | IndexOpts.StoreAllDocumentation = true; |
233 | // Sadly we can't use IndexOpts.FileFilter to restrict indexing scope. |
234 | // Files from outside the StdLibLocation may define true std symbols anyway. |
235 | // We end up "blessing" such headers, and can only do that by indexing |
236 | // everything first. |
237 | |
238 | // Refs, relations, include graph in the stdlib mostly aren't useful. |
239 | auto Action = createStaticIndexingAction( |
240 | Opts: IndexOpts, SymbolsCallback: [&](SymbolSlab S) { Symbols = std::move(S); }, RefsCallback: nullptr, |
241 | RelationsCallback: nullptr, IncludeGraphCallback: nullptr); |
242 | |
243 | if (!Action->BeginSourceFile(CI&: *Clang, Input)) { |
244 | elog(Fmt: "Standard Library Index: BeginSourceFile() failed" ); |
245 | return Symbols; |
246 | } |
247 | |
248 | if (llvm::Error Err = Action->Execute()) { |
249 | elog(Fmt: "Standard Library Index: Execute failed: {0}" , Vals: std::move(Err)); |
250 | return Symbols; |
251 | } |
252 | |
253 | Action->EndSourceFile(); |
254 | |
255 | unsigned SymbolsBeforeFilter = Symbols.size(); |
256 | Symbols = filter(Slab: std::move(Symbols), Loc); |
257 | bool Errors = Clang->hasDiagnostics() && |
258 | Clang->getDiagnostics().hasUncompilableErrorOccurred(); |
259 | log(Fmt: "Indexed {0} standard library{3}: {1} symbols, {2} filtered" , |
260 | Vals: LangStandard::getLangStandardForKind(K: LangStd).getName(), Vals: Symbols.size(), |
261 | Vals: SymbolsBeforeFilter - Symbols.size(), |
262 | Vals: Errors ? " (incomplete due to errors)" : "" ); |
263 | SPAN_ATTACH(Tracer, "symbols" , int(Symbols.size())); |
264 | return Symbols; |
265 | } |
266 | |
267 | SymbolSlab indexStandardLibrary(std::unique_ptr<CompilerInvocation> Invocation, |
268 | const StdLibLocation &Loc, |
269 | const ThreadsafeFS &TFS) { |
270 | llvm::StringRef = getStdlibUmbrellaHeader(LO: Invocation->getLangOpts()); |
271 | return indexStandardLibrary(HeaderSources: Header, CI: std::move(Invocation), Loc, TFS); |
272 | } |
273 | |
274 | bool StdLibSet::isBest(const LangOptions &LO) const { |
275 | return standardFromOpts(LO) >= |
276 | Best[langFromOpts(LO)].load(m: std::memory_order_acquire); |
277 | } |
278 | |
279 | std::optional<StdLibLocation> StdLibSet::(const LangOptions &LO, |
280 | const HeaderSearch &HS) { |
281 | Lang L = langFromOpts(LO); |
282 | int OldVersion = Best[L].load(m: std::memory_order_acquire); |
283 | int NewVersion = standardFromOpts(LO); |
284 | dlog("Index stdlib? {0}" , |
285 | LangStandard::getLangStandardForKind(standardFromOpts(LO)).getName()); |
286 | |
287 | if (!Config::current().Index.StandardLibrary) { |
288 | dlog("No: disabled in config" ); |
289 | return std::nullopt; |
290 | } |
291 | |
292 | if (NewVersion <= OldVersion) { |
293 | dlog("No: have {0}, {1}>={2}" , |
294 | LangStandard::getLangStandardForKind( |
295 | static_cast<LangStandard::Kind>(NewVersion)) |
296 | .getName(), |
297 | OldVersion, NewVersion); |
298 | return std::nullopt; |
299 | } |
300 | |
301 | // We'd like to index a standard library here if there is one. |
302 | // Check for the existence of <vector> on the search path. |
303 | // We could cache this, but we only get here repeatedly when there's no |
304 | // stdlib, and even then only once per preamble build. |
305 | llvm::StringLiteral = mandatoryHeader(L); |
306 | llvm::SmallString<256> Path; // Scratch space. |
307 | llvm::SmallVector<std::string> SearchPaths; |
308 | auto = [&](llvm::StringRef ) { |
309 | llvm::StringRef DirPath = llvm::sys::path::parent_path(path: HeaderPath); |
310 | if (!HS.getFileMgr().getVirtualFileSystem().getRealPath(Path: DirPath, Output&: Path)) |
311 | SearchPaths.emplace_back(Args&: Path); |
312 | }; |
313 | for (const auto &DL : |
314 | llvm::make_range(x: HS.search_dir_begin(), y: HS.search_dir_end())) { |
315 | switch (DL.getLookupType()) { |
316 | case DirectoryLookup::LT_NormalDir: { |
317 | Path = DL.getDirRef()->getName(); |
318 | llvm::sys::path::append(path&: Path, a: ProbeHeader); |
319 | llvm::vfs::Status Stat; |
320 | if (!HS.getFileMgr().getNoncachedStatValue(Path, Result&: Stat) && |
321 | Stat.isRegularFile()) |
322 | RecordHeaderPath(Path); |
323 | break; |
324 | } |
325 | case DirectoryLookup::LT_Framework: |
326 | // stdlib can't be a framework (framework includes must have a slash) |
327 | continue; |
328 | case DirectoryLookup::LT_HeaderMap: |
329 | llvm::StringRef Target = |
330 | DL.getHeaderMap()->lookupFilename(Filename: ProbeHeader, DestPath&: Path); |
331 | if (!Target.empty()) |
332 | RecordHeaderPath(Target); |
333 | break; |
334 | } |
335 | } |
336 | if (SearchPaths.empty()) |
337 | return std::nullopt; |
338 | |
339 | dlog("Found standard library in {0}" , llvm::join(SearchPaths, ", " )); |
340 | |
341 | while (!Best[L].compare_exchange_weak(i1&: OldVersion, i2: NewVersion, |
342 | m: std::memory_order_acq_rel)) |
343 | if (OldVersion >= NewVersion) { |
344 | dlog("No: lost the race" ); |
345 | return std::nullopt; // Another thread won the race while we were |
346 | // checking. |
347 | } |
348 | |
349 | dlog("Yes, index stdlib!" ); |
350 | return StdLibLocation{.Paths: std::move(SearchPaths)}; |
351 | } |
352 | |
353 | } // namespace clangd |
354 | } // namespace clang |
355 | |