1//===-- StdLib.cpp ----------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "StdLib.h"
9#include <fstream>
10#include <memory>
11#include <optional>
12#include <string>
13#include <vector>
14
15#include "Compiler.h"
16#include "Config.h"
17#include "SymbolCollector.h"
18#include "clang-include-cleaner/Record.h"
19#include "index/FileIndex.h"
20#include "index/IndexAction.h"
21#include "support/Logger.h"
22#include "support/ThreadsafeFS.h"
23#include "support/Trace.h"
24#include "clang/Basic/LangOptions.h"
25#include "clang/Frontend/CompilerInvocation.h"
26#include "clang/Frontend/FrontendActions.h"
27#include "clang/Lex/PreprocessorOptions.h"
28#include "clang/Tooling/Inclusions/StandardLibrary.h"
29#include "llvm/ADT/IntrusiveRefCntPtr.h"
30#include "llvm/ADT/StringRef.h"
31#include "llvm/Support/MemoryBuffer.h"
32#include "llvm/Support/Path.h"
33
34namespace clang {
35namespace clangd {
36namespace {
37
38enum Lang { C, CXX };
39
40Lang langFromOpts(const LangOptions &LO) { return LO.CPlusPlus ? CXX : C; }
41llvm::StringLiteral mandatoryHeader(Lang L) {
42 switch (L) {
43 case C:
44 return "stdio.h";
45 case CXX:
46 return "vector";
47 }
48 llvm_unreachable("unhandled Lang");
49}
50
51LangStandard::Kind standardFromOpts(const LangOptions &LO) {
52 if (LO.CPlusPlus) {
53 if (LO.CPlusPlus23)
54 return LangStandard::lang_cxx23;
55 if (LO.CPlusPlus20)
56 return LangStandard::lang_cxx20;
57 if (LO.CPlusPlus17)
58 return LangStandard::lang_cxx17;
59 if (LO.CPlusPlus14)
60 return LangStandard::lang_cxx14;
61 if (LO.CPlusPlus11)
62 return LangStandard::lang_cxx11;
63 return LangStandard::lang_cxx98;
64 }
65 if (LO.C23)
66 return LangStandard::lang_c23;
67 // C17 has no new features, so treat {C11,C17} as C17.
68 if (LO.C11)
69 return LangStandard::lang_c17;
70 return LangStandard::lang_c99;
71}
72
73std::string buildUmbrella(llvm::StringLiteral Mandatory,
74 llvm::ArrayRef<tooling::stdlib::Header> Headers) {
75 std::string Result;
76 llvm::raw_string_ostream OS(Result);
77
78 // We __has_include guard all our #includes to avoid errors when using older
79 // stdlib version that don't have headers for the newest language standards.
80 // But make sure we get *some* error if things are totally broken.
81 OS << llvm::formatv(
82 Fmt: "#if !__has_include(<{0}>)\n"
83 "#error Mandatory header <{0}> not found in standard library!\n"
84 "#endif\n",
85 Vals&: Mandatory);
86
87 for (auto Header : Headers) {
88 OS << llvm::formatv(Fmt: "#if __has_include({0})\n"
89 "#include {0}\n"
90 "#endif\n",
91 Vals&: Header);
92 }
93 return Result;
94}
95
96} // namespace
97
98llvm::StringRef getStdlibUmbrellaHeader(const LangOptions &LO) {
99 // The umbrella header is the same for all versions of each language.
100 // Headers that are unsupported in old lang versions are usually guarded by
101 // #if. Some headers may be not present in old stdlib versions, the umbrella
102 // header guards with __has_include for this purpose.
103 Lang L = langFromOpts(LO);
104 switch (L) {
105 case CXX:
106 static std::string *UmbrellaCXX = new std::string(buildUmbrella(
107 Mandatory: mandatoryHeader(L),
108 Headers: tooling::stdlib::Header::all(L: tooling::stdlib::Lang::CXX)));
109 return *UmbrellaCXX;
110 case C:
111 static std::string *UmbrellaC = new std::string(
112 buildUmbrella(Mandatory: mandatoryHeader(L),
113 Headers: tooling::stdlib::Header::all(L: tooling::stdlib::Lang::C)));
114 return *UmbrellaC;
115 }
116 llvm_unreachable("invalid Lang in langFromOpts");
117}
118
119namespace {
120
121// Including the standard library leaks unwanted transitively included symbols.
122//
123// We want to drop these, they're a bit tricky to identify:
124// - we don't want to limit to symbols on our list, as our list has only
125// top-level symbols (and there may be legitimate stdlib extensions).
126// - we can't limit to only symbols defined in known stdlib headers, as stdlib
127// internal structure is murky
128// - we can't strictly require symbols to come from a particular path, e.g.
129// libstdc++ is mostly under /usr/include/c++/10/...
130// but std::ctype_base is under /usr/include/<platform>/c++/10/...
131// We require the symbol to come from a header that is *either* from
132// the standard library path (as identified by the location of <vector>), or
133// another header that defines a symbol from our stdlib list.
134SymbolSlab filter(SymbolSlab Slab, const StdLibLocation &Loc) {
135 SymbolSlab::Builder Result;
136
137 static auto &StandardHeaders = *[] {
138 auto *Set = new llvm::DenseSet<llvm::StringRef>();
139 for (auto Header : tooling::stdlib::Header::all(L: tooling::stdlib::Lang::CXX))
140 Set->insert(V: Header.name());
141 for (auto Header : tooling::stdlib::Header::all(L: tooling::stdlib::Lang::C))
142 Set->insert(V: Header.name());
143 return Set;
144 }();
145
146 // Form prefixes like file:///usr/include/c++/10/
147 // These can be trivially prefix-compared with URIs in the indexed symbols.
148 llvm::SmallVector<std::string> StdLibURIPrefixes;
149 for (const auto &Path : Loc.Paths) {
150 StdLibURIPrefixes.push_back(Elt: URI::create(AbsolutePath: Path).toString());
151 if (StdLibURIPrefixes.back().back() != '/')
152 StdLibURIPrefixes.back().push_back(c: '/');
153 }
154 // For each header URI, is it *either* prefixed by StdLibURIPrefixes *or*
155 // owner of a symbol whose insertable header is in StandardHeaders?
156 // Pointer key because strings in a SymbolSlab are interned.
157 llvm::DenseMap<const char *, bool> GoodHeader;
158 for (const Symbol &S : Slab) {
159 if (!S.IncludeHeaders.empty() &&
160 StandardHeaders.contains(V: S.IncludeHeaders.front().IncludeHeader)) {
161 GoodHeader[S.CanonicalDeclaration.FileURI] = true;
162 GoodHeader[S.Definition.FileURI] = true;
163 continue;
164 }
165 for (const char *URI :
166 {S.CanonicalDeclaration.FileURI, S.Definition.FileURI}) {
167 auto R = GoodHeader.try_emplace(Key: URI, Args: false);
168 if (R.second) {
169 R.first->second = llvm::any_of(
170 Range&: StdLibURIPrefixes,
171 P: [&, URIStr(llvm::StringRef(URI))](const std::string &Prefix) {
172 return URIStr.starts_with(Prefix);
173 });
174 }
175 }
176 }
177#ifndef NDEBUG
178 for (const auto &Good : GoodHeader)
179 if (Good.second && *Good.first)
180 dlog("Stdlib header: {0}", Good.first);
181#endif
182 // Empty URIs aren't considered good. (Definition can be blank).
183 auto IsGoodHeader = [&](const char *C) { return *C && GoodHeader.lookup(Val: C); };
184
185 for (const Symbol &S : Slab) {
186 if (!(IsGoodHeader(S.CanonicalDeclaration.FileURI) ||
187 IsGoodHeader(S.Definition.FileURI))) {
188 dlog("Ignoring wrong-header symbol {0}{1} in {2}", S.Scope, S.Name,
189 S.CanonicalDeclaration.FileURI);
190 continue;
191 }
192 Result.insert(S);
193 }
194
195 return std::move(Result).build();
196}
197
198} // namespace
199
200SymbolSlab indexStandardLibrary(llvm::StringRef HeaderSources,
201 std::unique_ptr<CompilerInvocation> CI,
202 const StdLibLocation &Loc,
203 const ThreadsafeFS &TFS) {
204 if (CI->getFrontendOpts().Inputs.size() != 1 ||
205 !CI->getPreprocessorOpts().ImplicitPCHInclude.empty()) {
206 elog(Fmt: "Indexing standard library failed: bad CompilerInvocation");
207 assert(false && "indexing stdlib with a dubious CompilerInvocation!");
208 return SymbolSlab();
209 }
210 const FrontendInputFile &Input = CI->getFrontendOpts().Inputs.front();
211 trace::Span Tracer("StandardLibraryIndex");
212 LangStandard::Kind LangStd = standardFromOpts(LO: CI->getLangOpts());
213 log(Fmt: "Indexing {0} standard library in the context of {1}",
214 Vals: LangStandard::getLangStandardForKind(K: LangStd).getName(), Vals: Input.getFile());
215
216 SymbolSlab Symbols;
217 IgnoreDiagnostics IgnoreDiags;
218 // CompilerInvocation is taken from elsewhere, and may map a dirty buffer.
219 CI->getPreprocessorOpts().clearRemappedFiles();
220 auto Clang = prepareCompilerInstance(
221 std::move(CI), /*Preamble=*/nullptr,
222 MainFile: llvm::MemoryBuffer::getMemBuffer(InputData: HeaderSources, BufferName: Input.getFile()),
223 TFS.view(/*CWD=*/std::nullopt), IgnoreDiags);
224 if (!Clang) {
225 elog(Fmt: "Standard Library Index: Couldn't build compiler instance");
226 return Symbols;
227 }
228
229 SyntaxOnlyAction Action;
230
231 if (!Action.BeginSourceFile(CI&: *Clang, Input)) {
232 elog(Fmt: "Standard Library Index: BeginSourceFile() failed");
233 return Symbols;
234 }
235
236 if (llvm::Error Err = Action.Execute()) {
237 elog(Fmt: "Standard Library Index: Execute failed: {0}", Vals: std::move(Err));
238 return Symbols;
239 }
240
241 // We don't care about include graph for stdlib headers, so provide a no-op
242 // PI.
243 include_cleaner::PragmaIncludes PI;
244 auto Slabs =
245 indexHeaderSymbols(Version: "", AST&: Clang->getASTContext(), PP&: Clang->getPreprocessor(),
246 PI, Origin: SymbolOrigin::StdLib);
247 Symbols = std::move(std::get<0>(t&: Slabs));
248
249 // Run EndSourceFile() after indexing completes, so ensure the AST and
250 // preprocessor state is alive during indexing.
251 Action.EndSourceFile();
252
253 unsigned SymbolsBeforeFilter = Symbols.size();
254 Symbols = filter(Slab: std::move(Symbols), Loc);
255 bool Errors = Clang->hasDiagnostics() &&
256 Clang->getDiagnostics().hasUncompilableErrorOccurred();
257 log(Fmt: "Indexed {0} standard library{3}: {1} symbols, {2} filtered",
258 Vals: LangStandard::getLangStandardForKind(K: LangStd).getName(), Vals: Symbols.size(),
259 Vals: SymbolsBeforeFilter - Symbols.size(),
260 Vals: Errors ? " (incomplete due to errors)" : "");
261 SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
262 return Symbols;
263}
264
265SymbolSlab indexStandardLibrary(std::unique_ptr<CompilerInvocation> Invocation,
266 const StdLibLocation &Loc,
267 const ThreadsafeFS &TFS) {
268 llvm::StringRef Header = getStdlibUmbrellaHeader(LO: Invocation->getLangOpts());
269 return indexStandardLibrary(HeaderSources: Header, CI: std::move(Invocation), Loc, TFS);
270}
271
272bool StdLibSet::isBest(const LangOptions &LO) const {
273 return standardFromOpts(LO) >=
274 Best[langFromOpts(LO)].load(m: std::memory_order_acquire);
275}
276
277std::optional<StdLibLocation> StdLibSet::add(const LangOptions &LO,
278 const HeaderSearch &HS) {
279 Lang L = langFromOpts(LO);
280 int OldVersion = Best[L].load(m: std::memory_order_acquire);
281 int NewVersion = standardFromOpts(LO);
282 dlog("Index stdlib? {0}",
283 LangStandard::getLangStandardForKind(standardFromOpts(LO)).getName());
284
285 if (!Config::current().Index.StandardLibrary) {
286 dlog("No: disabled in config");
287 return std::nullopt;
288 }
289
290 if (NewVersion <= OldVersion) {
291 dlog("No: have {0}, {1}>={2}",
292 LangStandard::getLangStandardForKind(
293 static_cast<LangStandard::Kind>(NewVersion))
294 .getName(),
295 OldVersion, NewVersion);
296 return std::nullopt;
297 }
298
299 // We'd like to index a standard library here if there is one.
300 // Check for the existence of <vector> on the search path.
301 // We could cache this, but we only get here repeatedly when there's no
302 // stdlib, and even then only once per preamble build.
303 llvm::StringLiteral ProbeHeader = mandatoryHeader(L);
304 llvm::SmallString<256> Path; // Scratch space.
305 llvm::SmallVector<std::string> SearchPaths;
306 auto RecordHeaderPath = [&](llvm::StringRef HeaderPath) {
307 llvm::StringRef DirPath = llvm::sys::path::parent_path(path: HeaderPath);
308 if (!HS.getFileMgr().getVirtualFileSystem().getRealPath(Path: DirPath, Output&: Path))
309 SearchPaths.emplace_back(Args&: Path);
310 };
311 for (const auto &DL :
312 llvm::make_range(x: HS.search_dir_begin(), y: HS.search_dir_end())) {
313 switch (DL.getLookupType()) {
314 case DirectoryLookup::LT_NormalDir: {
315 Path = DL.getDirRef()->getName();
316 llvm::sys::path::append(path&: Path, a: ProbeHeader);
317 llvm::vfs::Status Stat;
318 if (!HS.getFileMgr().getNoncachedStatValue(Path, Result&: Stat) &&
319 Stat.isRegularFile())
320 RecordHeaderPath(Path);
321 break;
322 }
323 case DirectoryLookup::LT_Framework:
324 // stdlib can't be a framework (framework includes must have a slash)
325 continue;
326 case DirectoryLookup::LT_HeaderMap:
327 llvm::StringRef Target =
328 DL.getHeaderMap()->lookupFilename(Filename: ProbeHeader, DestPath&: Path);
329 if (!Target.empty())
330 RecordHeaderPath(Target);
331 break;
332 }
333 }
334 if (SearchPaths.empty())
335 return std::nullopt;
336
337 dlog("Found standard library in {0}", llvm::join(SearchPaths, ", "));
338
339 while (!Best[L].compare_exchange_weak(i1&: OldVersion, i2: NewVersion,
340 m: std::memory_order_acq_rel))
341 if (OldVersion >= NewVersion) {
342 dlog("No: lost the race");
343 return std::nullopt; // Another thread won the race while we were
344 // checking.
345 }
346
347 dlog("Yes, index stdlib!");
348 return StdLibLocation{.Paths: std::move(SearchPaths)};
349}
350
351} // namespace clangd
352} // namespace clang
353

source code of clang-tools-extra/clangd/index/StdLib.cpp