1 | //===- JSONCompilationDatabase.cpp ----------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the implementation of the JSONCompilationDatabase. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "clang/Tooling/JSONCompilationDatabase.h" |
14 | #include "clang/Basic/LLVM.h" |
15 | #include "clang/Tooling/CompilationDatabase.h" |
16 | #include "clang/Tooling/CompilationDatabasePluginRegistry.h" |
17 | #include "clang/Tooling/Tooling.h" |
18 | #include "llvm/ADT/STLExtras.h" |
19 | #include "llvm/ADT/SmallString.h" |
20 | #include "llvm/ADT/SmallVector.h" |
21 | #include "llvm/ADT/StringRef.h" |
22 | #include "llvm/Support/Allocator.h" |
23 | #include "llvm/Support/Casting.h" |
24 | #include "llvm/Support/CommandLine.h" |
25 | #include "llvm/Support/ErrorOr.h" |
26 | #include "llvm/Support/MemoryBuffer.h" |
27 | #include "llvm/Support/Path.h" |
28 | #include "llvm/Support/StringSaver.h" |
29 | #include "llvm/Support/VirtualFileSystem.h" |
30 | #include "llvm/Support/YAMLParser.h" |
31 | #include "llvm/Support/raw_ostream.h" |
32 | #include "llvm/TargetParser/Host.h" |
33 | #include "llvm/TargetParser/Triple.h" |
34 | #include <cassert> |
35 | #include <memory> |
36 | #include <optional> |
37 | #include <string> |
38 | #include <system_error> |
39 | #include <tuple> |
40 | #include <utility> |
41 | #include <vector> |
42 | |
43 | using namespace clang; |
44 | using namespace tooling; |
45 | |
46 | namespace { |
47 | |
48 | /// A parser for escaped strings of command line arguments. |
49 | /// |
50 | /// Assumes \-escaping for quoted arguments (see the documentation of |
51 | /// unescapeCommandLine(...)). |
52 | class CommandLineArgumentParser { |
53 | public: |
54 | CommandLineArgumentParser(StringRef CommandLine) |
55 | : Input(CommandLine), Position(Input.begin()-1) {} |
56 | |
57 | std::vector<std::string> parse() { |
58 | bool HasMoreInput = true; |
59 | while (HasMoreInput && nextNonWhitespace()) { |
60 | std::string Argument; |
61 | HasMoreInput = parseStringInto(String&: Argument); |
62 | CommandLine.push_back(x: Argument); |
63 | } |
64 | return CommandLine; |
65 | } |
66 | |
67 | private: |
68 | // All private methods return true if there is more input available. |
69 | |
70 | bool parseStringInto(std::string &String) { |
71 | do { |
72 | if (*Position == '"') { |
73 | if (!parseDoubleQuotedStringInto(String)) return false; |
74 | } else if (*Position == '\'') { |
75 | if (!parseSingleQuotedStringInto(String)) return false; |
76 | } else { |
77 | if (!parseFreeStringInto(String)) return false; |
78 | } |
79 | } while (*Position != ' '); |
80 | return true; |
81 | } |
82 | |
83 | bool parseDoubleQuotedStringInto(std::string &String) { |
84 | if (!next()) return false; |
85 | while (*Position != '"') { |
86 | if (!skipEscapeCharacter()) return false; |
87 | String.push_back(c: *Position); |
88 | if (!next()) return false; |
89 | } |
90 | return next(); |
91 | } |
92 | |
93 | bool parseSingleQuotedStringInto(std::string &String) { |
94 | if (!next()) return false; |
95 | while (*Position != '\'') { |
96 | String.push_back(c: *Position); |
97 | if (!next()) return false; |
98 | } |
99 | return next(); |
100 | } |
101 | |
102 | bool parseFreeStringInto(std::string &String) { |
103 | do { |
104 | if (!skipEscapeCharacter()) return false; |
105 | String.push_back(c: *Position); |
106 | if (!next()) return false; |
107 | } while (*Position != ' ' && *Position != '"' && *Position != '\''); |
108 | return true; |
109 | } |
110 | |
111 | bool skipEscapeCharacter() { |
112 | if (*Position == '\\') { |
113 | return next(); |
114 | } |
115 | return true; |
116 | } |
117 | |
118 | bool nextNonWhitespace() { |
119 | do { |
120 | if (!next()) return false; |
121 | } while (*Position == ' '); |
122 | return true; |
123 | } |
124 | |
125 | bool next() { |
126 | ++Position; |
127 | return Position != Input.end(); |
128 | } |
129 | |
130 | const StringRef Input; |
131 | StringRef::iterator Position; |
132 | std::vector<std::string> CommandLine; |
133 | }; |
134 | |
135 | std::vector<std::string> unescapeCommandLine(JSONCommandLineSyntax Syntax, |
136 | StringRef EscapedCommandLine) { |
137 | if (Syntax == JSONCommandLineSyntax::AutoDetect) { |
138 | #ifdef _WIN32 |
139 | // Assume Windows command line parsing on Win32 |
140 | Syntax = JSONCommandLineSyntax::Windows; |
141 | #else |
142 | Syntax = JSONCommandLineSyntax::Gnu; |
143 | #endif |
144 | } |
145 | |
146 | if (Syntax == JSONCommandLineSyntax::Windows) { |
147 | llvm::BumpPtrAllocator Alloc; |
148 | llvm::StringSaver Saver(Alloc); |
149 | llvm::SmallVector<const char *, 64> T; |
150 | llvm::cl::TokenizeWindowsCommandLine(Source: EscapedCommandLine, Saver, NewArgv&: T); |
151 | std::vector<std::string> Result(T.begin(), T.end()); |
152 | return Result; |
153 | } |
154 | assert(Syntax == JSONCommandLineSyntax::Gnu); |
155 | CommandLineArgumentParser parser(EscapedCommandLine); |
156 | return parser.parse(); |
157 | } |
158 | |
159 | // This plugin locates a nearby compile_command.json file, and also infers |
160 | // compile commands for files not present in the database. |
161 | class JSONCompilationDatabasePlugin : public CompilationDatabasePlugin { |
162 | std::unique_ptr<CompilationDatabase> |
163 | loadFromDirectory(StringRef Directory, std::string &ErrorMessage) override { |
164 | SmallString<1024> JSONDatabasePath(Directory); |
165 | llvm::sys::path::append(path&: JSONDatabasePath, a: "compile_commands.json" ); |
166 | auto Base = JSONCompilationDatabase::loadFromFile( |
167 | FilePath: JSONDatabasePath, ErrorMessage, Syntax: JSONCommandLineSyntax::AutoDetect); |
168 | return Base ? inferTargetAndDriverMode( |
169 | Base: inferMissingCompileCommands(expandResponseFiles( |
170 | Base: std::move(Base), FS: llvm::vfs::getRealFileSystem()))) |
171 | : nullptr; |
172 | } |
173 | }; |
174 | |
175 | } // namespace |
176 | |
177 | // Register the JSONCompilationDatabasePlugin with the |
178 | // CompilationDatabasePluginRegistry using this statically initialized variable. |
179 | static CompilationDatabasePluginRegistry::Add<JSONCompilationDatabasePlugin> |
180 | X("json-compilation-database" , "Reads JSON formatted compilation databases" ); |
181 | |
182 | namespace clang { |
183 | namespace tooling { |
184 | |
185 | // This anchor is used to force the linker to link in the generated object file |
186 | // and thus register the JSONCompilationDatabasePlugin. |
187 | volatile int JSONAnchorSource = 0; |
188 | |
189 | } // namespace tooling |
190 | } // namespace clang |
191 | |
192 | std::unique_ptr<JSONCompilationDatabase> |
193 | JSONCompilationDatabase::loadFromFile(StringRef FilePath, |
194 | std::string &ErrorMessage, |
195 | JSONCommandLineSyntax Syntax) { |
196 | // Don't mmap: if we're a long-lived process, the build system may overwrite. |
197 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> DatabaseBuffer = |
198 | llvm::MemoryBuffer::getFile(Filename: FilePath, /*IsText=*/false, |
199 | /*RequiresNullTerminator=*/true, |
200 | /*IsVolatile=*/true); |
201 | if (std::error_code Result = DatabaseBuffer.getError()) { |
202 | ErrorMessage = "Error while opening JSON database: " + Result.message(); |
203 | return nullptr; |
204 | } |
205 | std::unique_ptr<JSONCompilationDatabase> Database( |
206 | new JSONCompilationDatabase(std::move(*DatabaseBuffer), Syntax)); |
207 | if (!Database->parse(ErrorMessage)) |
208 | return nullptr; |
209 | return Database; |
210 | } |
211 | |
212 | std::unique_ptr<JSONCompilationDatabase> |
213 | JSONCompilationDatabase::loadFromBuffer(StringRef DatabaseString, |
214 | std::string &ErrorMessage, |
215 | JSONCommandLineSyntax Syntax) { |
216 | std::unique_ptr<llvm::MemoryBuffer> DatabaseBuffer( |
217 | llvm::MemoryBuffer::getMemBufferCopy(InputData: DatabaseString)); |
218 | std::unique_ptr<JSONCompilationDatabase> Database( |
219 | new JSONCompilationDatabase(std::move(DatabaseBuffer), Syntax)); |
220 | if (!Database->parse(ErrorMessage)) |
221 | return nullptr; |
222 | return Database; |
223 | } |
224 | |
225 | std::vector<CompileCommand> |
226 | JSONCompilationDatabase::getCompileCommands(StringRef FilePath) const { |
227 | SmallString<128> NativeFilePath; |
228 | llvm::sys::path::native(path: FilePath, result&: NativeFilePath); |
229 | |
230 | std::string Error; |
231 | llvm::raw_string_ostream ES(Error); |
232 | StringRef Match = MatchTrie.findEquivalent(FileName: NativeFilePath, Error&: ES); |
233 | if (Match.empty()) |
234 | return {}; |
235 | const auto CommandsRefI = IndexByFile.find(Key: Match); |
236 | if (CommandsRefI == IndexByFile.end()) |
237 | return {}; |
238 | std::vector<CompileCommand> Commands; |
239 | getCommands(CommandsRef: CommandsRefI->getValue(), Commands); |
240 | return Commands; |
241 | } |
242 | |
243 | std::vector<std::string> |
244 | JSONCompilationDatabase::getAllFiles() const { |
245 | std::vector<std::string> Result; |
246 | for (const auto &CommandRef : IndexByFile) |
247 | Result.push_back(x: CommandRef.first().str()); |
248 | return Result; |
249 | } |
250 | |
251 | std::vector<CompileCommand> |
252 | JSONCompilationDatabase::getAllCompileCommands() const { |
253 | std::vector<CompileCommand> Commands; |
254 | getCommands(CommandsRef: AllCommands, Commands); |
255 | return Commands; |
256 | } |
257 | |
258 | static llvm::StringRef stripExecutableExtension(llvm::StringRef Name) { |
259 | Name.consume_back(Suffix: ".exe" ); |
260 | return Name; |
261 | } |
262 | |
263 | // There are compiler-wrappers (ccache, distcc, gomacc) that take the "real" |
264 | // compiler as an argument, e.g. distcc gcc -O3 foo.c. |
265 | // These end up in compile_commands.json when people set CC="distcc gcc". |
266 | // Clang's driver doesn't understand this, so we need to unwrap. |
267 | static bool unwrapCommand(std::vector<std::string> &Args) { |
268 | if (Args.size() < 2) |
269 | return false; |
270 | StringRef Wrapper = |
271 | stripExecutableExtension(Name: llvm::sys::path::filename(path: Args.front())); |
272 | if (Wrapper == "distcc" || Wrapper == "gomacc" || Wrapper == "ccache" || |
273 | Wrapper == "sccache" ) { |
274 | // Most of these wrappers support being invoked 3 ways: |
275 | // `distcc g++ file.c` This is the mode we're trying to match. |
276 | // We need to drop `distcc`. |
277 | // `distcc file.c` This acts like compiler is cc or similar. |
278 | // Clang's driver can handle this, no change needed. |
279 | // `g++ file.c` g++ is a symlink to distcc. |
280 | // We don't even notice this case, and all is well. |
281 | // |
282 | // We need to distinguish between the first and second case. |
283 | // The wrappers themselves don't take flags, so Args[1] is a compiler flag, |
284 | // an input file, or a compiler. Inputs have extensions, compilers don't. |
285 | bool HasCompiler = |
286 | (Args[1][0] != '-') && |
287 | !llvm::sys::path::has_extension(path: stripExecutableExtension(Name: Args[1])); |
288 | if (HasCompiler) { |
289 | Args.erase(position: Args.begin()); |
290 | return true; |
291 | } |
292 | // If !HasCompiler, wrappers act like GCC. Fine: so do we. |
293 | } |
294 | return false; |
295 | } |
296 | |
297 | static std::vector<std::string> |
298 | nodeToCommandLine(JSONCommandLineSyntax Syntax, |
299 | const std::vector<llvm::yaml::ScalarNode *> &Nodes) { |
300 | SmallString<1024> Storage; |
301 | std::vector<std::string> Arguments; |
302 | if (Nodes.size() == 1) |
303 | Arguments = unescapeCommandLine(Syntax, EscapedCommandLine: Nodes[0]->getValue(Storage)); |
304 | else |
305 | for (const auto *Node : Nodes) |
306 | Arguments.push_back(x: std::string(Node->getValue(Storage))); |
307 | // There may be multiple wrappers: using distcc and ccache together is common. |
308 | while (unwrapCommand(Args&: Arguments)) |
309 | ; |
310 | return Arguments; |
311 | } |
312 | |
313 | void JSONCompilationDatabase::getCommands( |
314 | ArrayRef<CompileCommandRef> CommandsRef, |
315 | std::vector<CompileCommand> &Commands) const { |
316 | for (const auto &CommandRef : CommandsRef) { |
317 | SmallString<8> DirectoryStorage; |
318 | SmallString<32> FilenameStorage; |
319 | SmallString<32> OutputStorage; |
320 | auto Output = std::get<3>(t: CommandRef); |
321 | Commands.emplace_back( |
322 | args: std::get<0>(t: CommandRef)->getValue(Storage&: DirectoryStorage), |
323 | args: std::get<1>(t: CommandRef)->getValue(Storage&: FilenameStorage), |
324 | args: nodeToCommandLine(Syntax, Nodes: std::get<2>(t: CommandRef)), |
325 | args: Output ? Output->getValue(Storage&: OutputStorage) : "" ); |
326 | } |
327 | } |
328 | |
329 | bool JSONCompilationDatabase::parse(std::string &ErrorMessage) { |
330 | llvm::yaml::document_iterator I = YAMLStream.begin(); |
331 | if (I == YAMLStream.end()) { |
332 | ErrorMessage = "Error while parsing YAML." ; |
333 | return false; |
334 | } |
335 | llvm::yaml::Node *Root = I->getRoot(); |
336 | if (!Root) { |
337 | ErrorMessage = "Error while parsing YAML." ; |
338 | return false; |
339 | } |
340 | auto *Array = dyn_cast<llvm::yaml::SequenceNode>(Val: Root); |
341 | if (!Array) { |
342 | ErrorMessage = "Expected array." ; |
343 | return false; |
344 | } |
345 | for (auto &NextObject : *Array) { |
346 | auto *Object = dyn_cast<llvm::yaml::MappingNode>(Val: &NextObject); |
347 | if (!Object) { |
348 | ErrorMessage = "Expected object." ; |
349 | return false; |
350 | } |
351 | llvm::yaml::ScalarNode *Directory = nullptr; |
352 | std::optional<std::vector<llvm::yaml::ScalarNode *>> Command; |
353 | llvm::yaml::ScalarNode *File = nullptr; |
354 | llvm::yaml::ScalarNode *Output = nullptr; |
355 | for (auto& NextKeyValue : *Object) { |
356 | auto *KeyString = dyn_cast<llvm::yaml::ScalarNode>(Val: NextKeyValue.getKey()); |
357 | if (!KeyString) { |
358 | ErrorMessage = "Expected strings as key." ; |
359 | return false; |
360 | } |
361 | SmallString<10> KeyStorage; |
362 | StringRef KeyValue = KeyString->getValue(Storage&: KeyStorage); |
363 | llvm::yaml::Node *Value = NextKeyValue.getValue(); |
364 | if (!Value) { |
365 | ErrorMessage = "Expected value." ; |
366 | return false; |
367 | } |
368 | auto *ValueString = dyn_cast<llvm::yaml::ScalarNode>(Val: Value); |
369 | auto *SequenceString = dyn_cast<llvm::yaml::SequenceNode>(Val: Value); |
370 | if (KeyValue == "arguments" ) { |
371 | if (!SequenceString) { |
372 | ErrorMessage = "Expected sequence as value." ; |
373 | return false; |
374 | } |
375 | Command = std::vector<llvm::yaml::ScalarNode *>(); |
376 | for (auto &Argument : *SequenceString) { |
377 | auto *Scalar = dyn_cast<llvm::yaml::ScalarNode>(Val: &Argument); |
378 | if (!Scalar) { |
379 | ErrorMessage = "Only strings are allowed in 'arguments'." ; |
380 | return false; |
381 | } |
382 | Command->push_back(x: Scalar); |
383 | } |
384 | } else { |
385 | if (!ValueString) { |
386 | ErrorMessage = "Expected string as value." ; |
387 | return false; |
388 | } |
389 | if (KeyValue == "directory" ) { |
390 | Directory = ValueString; |
391 | } else if (KeyValue == "command" ) { |
392 | if (!Command) |
393 | Command = std::vector<llvm::yaml::ScalarNode *>(1, ValueString); |
394 | } else if (KeyValue == "file" ) { |
395 | File = ValueString; |
396 | } else if (KeyValue == "output" ) { |
397 | Output = ValueString; |
398 | } else { |
399 | ErrorMessage = |
400 | ("Unknown key: \"" + KeyString->getRawValue() + "\"" ).str(); |
401 | return false; |
402 | } |
403 | } |
404 | } |
405 | if (!File) { |
406 | ErrorMessage = "Missing key: \"file\"." ; |
407 | return false; |
408 | } |
409 | if (!Command) { |
410 | ErrorMessage = "Missing key: \"command\" or \"arguments\"." ; |
411 | return false; |
412 | } |
413 | if (!Directory) { |
414 | ErrorMessage = "Missing key: \"directory\"." ; |
415 | return false; |
416 | } |
417 | SmallString<8> FileStorage; |
418 | StringRef FileName = File->getValue(Storage&: FileStorage); |
419 | SmallString<128> NativeFilePath; |
420 | if (llvm::sys::path::is_relative(path: FileName)) { |
421 | SmallString<8> DirectoryStorage; |
422 | SmallString<128> AbsolutePath(Directory->getValue(Storage&: DirectoryStorage)); |
423 | llvm::sys::path::append(path&: AbsolutePath, a: FileName); |
424 | llvm::sys::path::native(path: AbsolutePath, result&: NativeFilePath); |
425 | } else { |
426 | llvm::sys::path::native(path: FileName, result&: NativeFilePath); |
427 | } |
428 | llvm::sys::path::remove_dots(path&: NativeFilePath, /*remove_dot_dot=*/true); |
429 | auto Cmd = CompileCommandRef(Directory, File, *Command, Output); |
430 | IndexByFile[NativeFilePath].push_back(x: Cmd); |
431 | AllCommands.push_back(x: Cmd); |
432 | MatchTrie.insert(NewPath: NativeFilePath); |
433 | } |
434 | return true; |
435 | } |
436 | |