1 | //===-- SerializationTests.cpp - Binary and YAML serialization unit tests -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "Headers.h" |
10 | #include "RIFF.h" |
11 | #include "index/Serialization.h" |
12 | #include "support/Logger.h" |
13 | #include "clang/Tooling/CompilationDatabase.h" |
14 | #include "llvm/ADT/StringExtras.h" |
15 | #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX |
16 | #include "llvm/Support/Compression.h" |
17 | #include "llvm/Support/Error.h" |
18 | #include "llvm/Support/ScopedPrinter.h" |
19 | #include "gmock/gmock.h" |
20 | #include "gtest/gtest.h" |
21 | #ifdef LLVM_ON_UNIX |
22 | #include <sys/resource.h> |
23 | #endif |
24 | |
25 | using ::testing::ElementsAre; |
26 | using ::testing::Pair; |
27 | using ::testing::UnorderedElementsAre; |
28 | using ::testing::UnorderedElementsAreArray; |
29 | |
30 | namespace clang { |
31 | namespace clangd { |
32 | namespace { |
33 | |
34 | const char *YAML = R"( |
35 | --- |
36 | !Symbol |
37 | ID: 057557CEBF6E6B2D |
38 | Name: 'Foo1' |
39 | Scope: 'clang::' |
40 | SymInfo: |
41 | Kind: Function |
42 | Lang: Cpp |
43 | CanonicalDeclaration: |
44 | FileURI: file:///path/foo.h |
45 | Start: |
46 | Line: 1 |
47 | Column: 0 |
48 | End: |
49 | Line: 1 |
50 | Column: 1 |
51 | Flags: 129 |
52 | Documentation: 'Foo doc' |
53 | ReturnType: 'int' |
54 | IncludeHeaders: |
55 | - Header: 'include1' |
56 | References: 7 |
57 | Directives: [ Include ] |
58 | - Header: 'include2' |
59 | References: 3 |
60 | Directives: [ Import ] |
61 | - Header: 'include3' |
62 | References: 2 |
63 | Directives: [ Include, Import ] |
64 | - Header: 'include4' |
65 | References: 1 |
66 | Directives: [ ] |
67 | ... |
68 | --- |
69 | !Symbol |
70 | ID: 057557CEBF6E6B2E |
71 | Name: 'Foo2' |
72 | Scope: 'clang::' |
73 | SymInfo: |
74 | Kind: Function |
75 | Lang: Cpp |
76 | CanonicalDeclaration: |
77 | FileURI: file:///path/bar.h |
78 | Start: |
79 | Line: 1 |
80 | Column: 0 |
81 | End: |
82 | Line: 1 |
83 | Column: 1 |
84 | Flags: 2 |
85 | Signature: '-sig' |
86 | CompletionSnippetSuffix: '-snippet' |
87 | ... |
88 | !Refs |
89 | ID: 057557CEBF6E6B2D |
90 | References: |
91 | - Kind: 4 |
92 | Location: |
93 | FileURI: file:///path/foo.cc |
94 | Start: |
95 | Line: 5 |
96 | Column: 3 |
97 | End: |
98 | Line: 5 |
99 | Column: 8 |
100 | ... |
101 | --- !Relations |
102 | Subject: |
103 | ID: 6481EE7AF2841756 |
104 | Predicate: 0 |
105 | Object: |
106 | ID: 6512AEC512EA3A2D |
107 | ... |
108 | --- !Cmd |
109 | Directory: 'testdir' |
110 | CommandLine: |
111 | - 'cmd1' |
112 | - 'cmd2' |
113 | ... |
114 | --- !Source |
115 | URI: 'file:///path/source1.cpp' |
116 | Flags: 1 |
117 | Digest: EED8F5EAF25C453C |
118 | DirectIncludes: |
119 | - 'file:///path/inc1.h' |
120 | - 'file:///path/inc2.h' |
121 | ... |
122 | )" ; |
123 | |
124 | MATCHER_P(id, I, "" ) { return arg.ID == cantFail(SymbolID::fromStr(I)); } |
125 | MATCHER_P(qName, Name, "" ) { return (arg.Scope + arg.Name).str() == Name; } |
126 | MATCHER_P3(IncludeHeaderWithRefAndDirectives, IncludeHeader, References, |
127 | SupportedDirectives, "" ) { |
128 | return (arg.IncludeHeader == IncludeHeader) && |
129 | (arg.References == References) && |
130 | (arg.SupportedDirectives == SupportedDirectives); |
131 | } |
132 | |
133 | auto readIndexFile(llvm::StringRef Text) { |
134 | return readIndexFile(Text, SymbolOrigin::Static); |
135 | } |
136 | |
137 | TEST(SerializationTest, NoCrashOnEmptyYAML) { |
138 | EXPECT_TRUE(bool(readIndexFile("" ))); |
139 | } |
140 | |
141 | TEST(SerializationTest, YAMLConversions) { |
142 | auto ParsedYAML = readIndexFile(Text: YAML); |
143 | ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError(); |
144 | ASSERT_TRUE(bool(ParsedYAML->Symbols)); |
145 | EXPECT_THAT( |
146 | *ParsedYAML->Symbols, |
147 | UnorderedElementsAre(id("057557CEBF6E6B2D" ), id("057557CEBF6E6B2E" ))); |
148 | |
149 | auto Sym1 = *ParsedYAML->Symbols->find( |
150 | SymID: cantFail(ValOrErr: SymbolID::fromStr("057557CEBF6E6B2D" ))); |
151 | auto Sym2 = *ParsedYAML->Symbols->find( |
152 | SymID: cantFail(ValOrErr: SymbolID::fromStr("057557CEBF6E6B2E" ))); |
153 | |
154 | EXPECT_THAT(Sym1, qName("clang::Foo1" )); |
155 | EXPECT_EQ(Sym1.Signature, "" ); |
156 | EXPECT_EQ(Sym1.Documentation, "Foo doc" ); |
157 | EXPECT_EQ(Sym1.ReturnType, "int" ); |
158 | EXPECT_EQ(StringRef(Sym1.CanonicalDeclaration.FileURI), "file:///path/foo.h" ); |
159 | EXPECT_EQ(Sym1.Origin, SymbolOrigin::Static); |
160 | EXPECT_EQ(static_cast<uint8_t>(Sym1.Flags), 129); |
161 | EXPECT_TRUE(Sym1.Flags & Symbol::IndexedForCodeCompletion); |
162 | EXPECT_FALSE(Sym1.Flags & Symbol::Deprecated); |
163 | EXPECT_THAT( |
164 | Sym1.IncludeHeaders, |
165 | UnorderedElementsAre( |
166 | IncludeHeaderWithRefAndDirectives("include1" , 7u, Symbol::Include), |
167 | IncludeHeaderWithRefAndDirectives("include2" , 3u, Symbol::Import), |
168 | IncludeHeaderWithRefAndDirectives("include3" , 2u, |
169 | Symbol::Include | Symbol::Import), |
170 | IncludeHeaderWithRefAndDirectives("include4" , 1u, Symbol::Invalid))); |
171 | |
172 | EXPECT_THAT(Sym2, qName("clang::Foo2" )); |
173 | EXPECT_EQ(Sym2.Signature, "-sig" ); |
174 | EXPECT_EQ(Sym2.ReturnType, "" ); |
175 | EXPECT_EQ(llvm::StringRef(Sym2.CanonicalDeclaration.FileURI), |
176 | "file:///path/bar.h" ); |
177 | EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion); |
178 | EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated); |
179 | |
180 | ASSERT_TRUE(bool(ParsedYAML->Refs)); |
181 | EXPECT_THAT( |
182 | *ParsedYAML->Refs, |
183 | UnorderedElementsAre(Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2D" )), |
184 | ::testing::SizeIs(1)))); |
185 | auto Ref1 = ParsedYAML->Refs->begin()->second.front(); |
186 | EXPECT_EQ(Ref1.Kind, RefKind::Reference); |
187 | EXPECT_EQ(StringRef(Ref1.Location.FileURI), "file:///path/foo.cc" ); |
188 | |
189 | SymbolID Base = cantFail(ValOrErr: SymbolID::fromStr("6481EE7AF2841756" )); |
190 | SymbolID Derived = cantFail(ValOrErr: SymbolID::fromStr("6512AEC512EA3A2D" )); |
191 | ASSERT_TRUE(bool(ParsedYAML->Relations)); |
192 | EXPECT_THAT( |
193 | *ParsedYAML->Relations, |
194 | UnorderedElementsAre(Relation{Base, RelationKind::BaseOf, Derived})); |
195 | |
196 | ASSERT_TRUE(bool(ParsedYAML->Cmd)); |
197 | auto &Cmd = *ParsedYAML->Cmd; |
198 | ASSERT_EQ(Cmd.Directory, "testdir" ); |
199 | EXPECT_THAT(Cmd.CommandLine, ElementsAre("cmd1" , "cmd2" )); |
200 | |
201 | ASSERT_TRUE(bool(ParsedYAML->Sources)); |
202 | const auto *URI = "file:///path/source1.cpp" ; |
203 | ASSERT_TRUE(ParsedYAML->Sources->count(URI)); |
204 | auto IGNDeserialized = ParsedYAML->Sources->lookup(Key: URI); |
205 | EXPECT_EQ(llvm::toHex(IGNDeserialized.Digest), "EED8F5EAF25C453C" ); |
206 | EXPECT_THAT(IGNDeserialized.DirectIncludes, |
207 | ElementsAre("file:///path/inc1.h" , "file:///path/inc2.h" )); |
208 | EXPECT_EQ(IGNDeserialized.URI, URI); |
209 | EXPECT_EQ(IGNDeserialized.Flags, IncludeGraphNode::SourceFlag(1)); |
210 | } |
211 | |
212 | std::vector<std::string> yamlFromSymbols(const SymbolSlab &Slab) { |
213 | std::vector<std::string> Result; |
214 | for (const auto &Sym : Slab) |
215 | Result.push_back(x: toYAML(Sym)); |
216 | return Result; |
217 | } |
218 | std::vector<std::string> yamlFromRefs(const RefSlab &Slab) { |
219 | std::vector<std::string> Result; |
220 | for (const auto &Refs : Slab) |
221 | Result.push_back(x: toYAML(Refs)); |
222 | return Result; |
223 | } |
224 | |
225 | std::vector<std::string> yamlFromRelations(const RelationSlab &Slab) { |
226 | std::vector<std::string> Result; |
227 | for (const auto &Rel : Slab) |
228 | Result.push_back(x: toYAML(Rel)); |
229 | return Result; |
230 | } |
231 | |
232 | TEST(SerializationTest, BinaryConversions) { |
233 | auto In = readIndexFile(Text: YAML); |
234 | EXPECT_TRUE(bool(In)) << In.takeError(); |
235 | |
236 | // Write to binary format, and parse again. |
237 | IndexFileOut Out(*In); |
238 | Out.Format = IndexFileFormat::RIFF; |
239 | std::string Serialized = llvm::to_string(Value: Out); |
240 | |
241 | auto In2 = readIndexFile(Text: Serialized); |
242 | ASSERT_TRUE(bool(In2)) << In2.takeError(); |
243 | ASSERT_TRUE(In2->Symbols); |
244 | ASSERT_TRUE(In2->Refs); |
245 | ASSERT_TRUE(In2->Relations); |
246 | |
247 | // Assert the YAML serializations match, for nice comparisons and diffs. |
248 | EXPECT_THAT(yamlFromSymbols(*In2->Symbols), |
249 | UnorderedElementsAreArray(yamlFromSymbols(*In->Symbols))); |
250 | EXPECT_THAT(yamlFromRefs(*In2->Refs), |
251 | UnorderedElementsAreArray(yamlFromRefs(*In->Refs))); |
252 | EXPECT_THAT(yamlFromRelations(*In2->Relations), |
253 | UnorderedElementsAreArray(yamlFromRelations(*In->Relations))); |
254 | } |
255 | |
256 | TEST(SerializationTest, SrcsTest) { |
257 | auto In = readIndexFile(Text: YAML); |
258 | EXPECT_TRUE(bool(In)) << In.takeError(); |
259 | |
260 | std::string TestContent("TestContent" ); |
261 | IncludeGraphNode IGN; |
262 | IGN.Digest = digest(Content: TestContent); |
263 | IGN.DirectIncludes = {"inc1" , "inc2" }; |
264 | IGN.URI = "URI" ; |
265 | IGN.Flags |= IncludeGraphNode::SourceFlag::IsTU; |
266 | IGN.Flags |= IncludeGraphNode::SourceFlag::HadErrors; |
267 | IncludeGraph Sources; |
268 | Sources[IGN.URI] = IGN; |
269 | // Write to binary format, and parse again. |
270 | IndexFileOut Out(*In); |
271 | Out.Format = IndexFileFormat::RIFF; |
272 | Out.Sources = &Sources; |
273 | { |
274 | std::string Serialized = llvm::to_string(Value: Out); |
275 | |
276 | auto In = readIndexFile(Text: Serialized); |
277 | ASSERT_TRUE(bool(In)) << In.takeError(); |
278 | ASSERT_TRUE(In->Symbols); |
279 | ASSERT_TRUE(In->Refs); |
280 | ASSERT_TRUE(In->Sources); |
281 | ASSERT_TRUE(In->Sources->count(IGN.URI)); |
282 | // Assert the YAML serializations match, for nice comparisons and diffs. |
283 | EXPECT_THAT(yamlFromSymbols(*In->Symbols), |
284 | UnorderedElementsAreArray(yamlFromSymbols(*In->Symbols))); |
285 | EXPECT_THAT(yamlFromRefs(*In->Refs), |
286 | UnorderedElementsAreArray(yamlFromRefs(*In->Refs))); |
287 | auto IGNDeserialized = In->Sources->lookup(Key: IGN.URI); |
288 | EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest); |
289 | EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes); |
290 | EXPECT_EQ(IGNDeserialized.URI, IGN.URI); |
291 | EXPECT_EQ(IGNDeserialized.Flags, IGN.Flags); |
292 | } |
293 | } |
294 | |
295 | TEST(SerializationTest, CmdlTest) { |
296 | auto In = readIndexFile(Text: YAML); |
297 | EXPECT_TRUE(bool(In)) << In.takeError(); |
298 | |
299 | tooling::CompileCommand Cmd; |
300 | Cmd.Directory = "testdir" ; |
301 | Cmd.CommandLine.push_back(x: "cmd1" ); |
302 | Cmd.CommandLine.push_back(x: "cmd2" ); |
303 | Cmd.Filename = "ignored" ; |
304 | Cmd.Heuristic = "ignored" ; |
305 | Cmd.Output = "ignored" ; |
306 | |
307 | IndexFileOut Out(*In); |
308 | Out.Format = IndexFileFormat::RIFF; |
309 | Out.Cmd = &Cmd; |
310 | { |
311 | std::string Serialized = llvm::to_string(Value: Out); |
312 | |
313 | auto In = readIndexFile(Text: Serialized); |
314 | ASSERT_TRUE(bool(In)) << In.takeError(); |
315 | ASSERT_TRUE(In->Cmd); |
316 | |
317 | const tooling::CompileCommand &SerializedCmd = *In->Cmd; |
318 | EXPECT_EQ(SerializedCmd.CommandLine, Cmd.CommandLine); |
319 | EXPECT_EQ(SerializedCmd.Directory, Cmd.Directory); |
320 | EXPECT_NE(SerializedCmd.Filename, Cmd.Filename); |
321 | EXPECT_NE(SerializedCmd.Heuristic, Cmd.Heuristic); |
322 | EXPECT_NE(SerializedCmd.Output, Cmd.Output); |
323 | } |
324 | } |
325 | |
326 | // rlimit is part of POSIX. RLIMIT_AS does not exist in OpenBSD. |
327 | // Sanitizers use a lot of address space, so we can't apply strict limits. |
328 | #if LLVM_ON_UNIX && defined(RLIMIT_AS) && !LLVM_ADDRESS_SANITIZER_BUILD && \ |
329 | !LLVM_MEMORY_SANITIZER_BUILD && !LLVM_THREAD_SANITIZER_BUILD |
330 | class ScopedMemoryLimit { |
331 | struct rlimit OriginalLimit; |
332 | bool Succeeded = false; |
333 | |
334 | public: |
335 | ScopedMemoryLimit(rlim_t Bytes) { |
336 | if (!getrlimit(RLIMIT_AS, rlimits: &OriginalLimit)) { |
337 | struct rlimit NewLimit = OriginalLimit; |
338 | NewLimit.rlim_cur = Bytes; |
339 | Succeeded = !setrlimit(RLIMIT_AS, rlimits: &NewLimit); |
340 | } |
341 | if (!Succeeded) |
342 | log(Fmt: "Failed to set rlimit" ); |
343 | } |
344 | |
345 | ~ScopedMemoryLimit() { |
346 | if (Succeeded) |
347 | setrlimit(RLIMIT_AS, rlimits: &OriginalLimit); |
348 | } |
349 | }; |
350 | #else |
351 | class ScopedMemoryLimit { |
352 | public: |
353 | ScopedMemoryLimit(unsigned Bytes) { log("rlimit unsupported" ); } |
354 | }; |
355 | #endif |
356 | |
357 | // Test that our deserialization detects invalid array sizes without allocating. |
358 | // If this detection fails, the test should allocate a huge array and crash. |
359 | TEST(SerializationTest, NoCrashOnBadArraySize) { |
360 | // This test is tricky because we need to construct a subtly invalid file. |
361 | // First, create a valid serialized file. |
362 | auto In = readIndexFile(Text: YAML); |
363 | ASSERT_FALSE(!In) << In.takeError(); |
364 | IndexFileOut Out(*In); |
365 | Out.Format = IndexFileFormat::RIFF; |
366 | std::string Serialized = llvm::to_string(Value: Out); |
367 | |
368 | // Low-level parse it again and find the `srcs` chunk we're going to corrupt. |
369 | auto Parsed = riff::readFile(Stream: Serialized); |
370 | ASSERT_FALSE(!Parsed) << Parsed.takeError(); |
371 | auto Srcs = llvm::find_if(Range&: Parsed->Chunks, P: [](riff::Chunk C) { |
372 | return C.ID == riff::fourCC(Literal: "srcs" ); |
373 | }); |
374 | ASSERT_NE(Srcs, Parsed->Chunks.end()); |
375 | |
376 | // Srcs consists of a sequence of IncludeGraphNodes. In our case, just one. |
377 | // The node has: |
378 | // - 1 byte: flags (1) |
379 | // - varint(stringID): URI |
380 | // - 8 byte: file digest |
381 | // - varint: DirectIncludes.length |
382 | // - repeated varint(stringID): DirectIncludes |
383 | // We want to set DirectIncludes.length to a huge number. |
384 | // The offset isn't trivial to find, so we use the file digest. |
385 | std::string FileDigest = llvm::fromHex(Input: "EED8F5EAF25C453C" ); |
386 | unsigned Pos = Srcs->Data.find_first_of(Chars: FileDigest); |
387 | ASSERT_NE(Pos, StringRef::npos) << "Couldn't locate file digest" ; |
388 | Pos += FileDigest.size(); |
389 | |
390 | // Varints are little-endian base-128 numbers, where the top-bit of each byte |
391 | // indicates whether there are more. ffffffff0f -> 0xffffffff. |
392 | std::string CorruptSrcs = |
393 | (Srcs->Data.take_front(N: Pos) + llvm::fromHex(Input: "ffffffff0f" ) + |
394 | "some_random_garbage" ) |
395 | .str(); |
396 | Srcs->Data = CorruptSrcs; |
397 | |
398 | // Try to crash rather than hang on large allocation. |
399 | ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB |
400 | |
401 | std::string CorruptFile = llvm::to_string(Value: *Parsed); |
402 | auto CorruptParsed = readIndexFile(Text: CorruptFile); |
403 | ASSERT_TRUE(!CorruptParsed); |
404 | EXPECT_EQ(llvm::toString(CorruptParsed.takeError()), |
405 | "malformed or truncated include uri" ); |
406 | } |
407 | |
408 | // Check we detect invalid string table size size without allocating it first. |
409 | // If this detection fails, the test should allocate a huge array and crash. |
410 | TEST(SerializationTest, NoCrashOnBadStringTableSize) { |
411 | if (!llvm::compression::zlib::isAvailable()) { |
412 | log(Fmt: "skipping test, no zlib" ); |
413 | return; |
414 | } |
415 | |
416 | // First, create a valid serialized file. |
417 | auto In = readIndexFile(Text: YAML); |
418 | ASSERT_FALSE(!In) << In.takeError(); |
419 | IndexFileOut Out(*In); |
420 | Out.Format = IndexFileFormat::RIFF; |
421 | std::string Serialized = llvm::to_string(Value: Out); |
422 | |
423 | // Low-level parse it again, we're going to replace the `stri` chunk. |
424 | auto Parsed = riff::readFile(Stream: Serialized); |
425 | ASSERT_FALSE(!Parsed) << Parsed.takeError(); |
426 | auto Stri = llvm::find_if(Range&: Parsed->Chunks, P: [](riff::Chunk C) { |
427 | return C.ID == riff::fourCC(Literal: "stri" ); |
428 | }); |
429 | ASSERT_NE(Stri, Parsed->Chunks.end()); |
430 | |
431 | // stri consists of an 8 byte uncompressed-size, and then compressed data. |
432 | // We'll claim our small amount of data expands to 4GB |
433 | std::string CorruptStri = |
434 | (llvm::fromHex(Input: "ffffffff" ) + Stri->Data.drop_front(N: 4)).str(); |
435 | Stri->Data = CorruptStri; |
436 | |
437 | // Try to crash rather than hang on large allocation. |
438 | ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB |
439 | |
440 | std::string CorruptFile = llvm::to_string(Value: *Parsed); |
441 | auto CorruptParsed = readIndexFile(Text: CorruptFile); |
442 | ASSERT_TRUE(!CorruptParsed); |
443 | EXPECT_THAT(llvm::toString(CorruptParsed.takeError()), |
444 | testing::HasSubstr("bytes is implausible" )); |
445 | } |
446 | |
447 | } // namespace |
448 | } // namespace clangd |
449 | } // namespace clang |
450 | |