1 | //===-- SerializationTests.cpp - Binary and YAML serialization unit tests -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "Headers.h" |
10 | #include "RIFF.h" |
11 | #include "index/Serialization.h" |
12 | #include "support/Logger.h" |
13 | #include "clang/Tooling/CompilationDatabase.h" |
14 | #include "llvm/ADT/StringExtras.h" |
15 | #include "llvm/Support/Compression.h" |
16 | #include "llvm/Support/Error.h" |
17 | #include "llvm/Support/ScopedPrinter.h" |
18 | #include "gmock/gmock.h" |
19 | #include "gtest/gtest.h" |
20 | #ifdef LLVM_ON_UNIX |
21 | #include <sys/resource.h> |
22 | #endif |
23 | |
24 | using ::testing::ElementsAre; |
25 | using ::testing::Pair; |
26 | using ::testing::UnorderedElementsAre; |
27 | using ::testing::UnorderedElementsAreArray; |
28 | |
29 | namespace clang { |
30 | namespace clangd { |
31 | namespace { |
32 | |
33 | const char *YAML = R"( |
34 | --- |
35 | !Symbol |
36 | ID: 057557CEBF6E6B2D |
37 | Name: 'Foo1' |
38 | Scope: 'clang::' |
39 | SymInfo: |
40 | Kind: Function |
41 | Lang: Cpp |
42 | CanonicalDeclaration: |
43 | FileURI: file:///path/foo.h |
44 | Start: |
45 | Line: 1 |
46 | Column: 0 |
47 | End: |
48 | Line: 1 |
49 | Column: 1 |
50 | Flags: 129 |
51 | Documentation: 'Foo doc' |
52 | ReturnType: 'int' |
53 | IncludeHeaders: |
54 | - Header: 'include1' |
55 | References: 7 |
56 | Directives: [ Include ] |
57 | - Header: 'include2' |
58 | References: 3 |
59 | Directives: [ Import ] |
60 | - Header: 'include3' |
61 | References: 2 |
62 | Directives: [ Include, Import ] |
63 | - Header: 'include4' |
64 | References: 1 |
65 | Directives: [ ] |
66 | ... |
67 | --- |
68 | !Symbol |
69 | ID: 057557CEBF6E6B2E |
70 | Name: 'Foo2' |
71 | Scope: 'clang::' |
72 | SymInfo: |
73 | Kind: Function |
74 | Lang: Cpp |
75 | CanonicalDeclaration: |
76 | FileURI: file:///path/bar.h |
77 | Start: |
78 | Line: 1 |
79 | Column: 0 |
80 | End: |
81 | Line: 1 |
82 | Column: 1 |
83 | Flags: 2 |
84 | Signature: '-sig' |
85 | CompletionSnippetSuffix: '-snippet' |
86 | ... |
87 | !Refs |
88 | ID: 057557CEBF6E6B2D |
89 | References: |
90 | - Kind: 4 |
91 | Location: |
92 | FileURI: file:///path/foo.cc |
93 | Start: |
94 | Line: 5 |
95 | Column: 3 |
96 | End: |
97 | Line: 5 |
98 | Column: 8 |
99 | ... |
100 | --- !Relations |
101 | Subject: |
102 | ID: 6481EE7AF2841756 |
103 | Predicate: 0 |
104 | Object: |
105 | ID: 6512AEC512EA3A2D |
106 | ... |
107 | --- !Cmd |
108 | Directory: 'testdir' |
109 | CommandLine: |
110 | - 'cmd1' |
111 | - 'cmd2' |
112 | ... |
113 | --- !Source |
114 | URI: 'file:///path/source1.cpp' |
115 | Flags: 1 |
116 | Digest: EED8F5EAF25C453C |
117 | DirectIncludes: |
118 | - 'file:///path/inc1.h' |
119 | - 'file:///path/inc2.h' |
120 | ... |
121 | )" ; |
122 | |
123 | MATCHER_P(id, I, "" ) { return arg.ID == cantFail(SymbolID::fromStr(I)); } |
124 | MATCHER_P(qName, Name, "" ) { return (arg.Scope + arg.Name).str() == Name; } |
125 | MATCHER_P3(IncludeHeaderWithRefAndDirectives, IncludeHeader, References, |
126 | SupportedDirectives, "" ) { |
127 | return (arg.IncludeHeader == IncludeHeader) && |
128 | (arg.References == References) && |
129 | (arg.SupportedDirectives == SupportedDirectives); |
130 | } |
131 | |
132 | auto readIndexFile(llvm::StringRef Text) { |
133 | return readIndexFile(Text, SymbolOrigin::Static); |
134 | } |
135 | |
136 | TEST(SerializationTest, NoCrashOnEmptyYAML) { |
137 | EXPECT_TRUE(bool(readIndexFile("" ))); |
138 | } |
139 | |
140 | TEST(SerializationTest, YAMLConversions) { |
141 | auto ParsedYAML = readIndexFile(Text: YAML); |
142 | ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError(); |
143 | ASSERT_TRUE(bool(ParsedYAML->Symbols)); |
144 | EXPECT_THAT( |
145 | *ParsedYAML->Symbols, |
146 | UnorderedElementsAre(id("057557CEBF6E6B2D" ), id("057557CEBF6E6B2E" ))); |
147 | |
148 | auto Sym1 = *ParsedYAML->Symbols->find( |
149 | SymID: cantFail(ValOrErr: SymbolID::fromStr("057557CEBF6E6B2D" ))); |
150 | auto Sym2 = *ParsedYAML->Symbols->find( |
151 | SymID: cantFail(ValOrErr: SymbolID::fromStr("057557CEBF6E6B2E" ))); |
152 | |
153 | EXPECT_THAT(Sym1, qName("clang::Foo1" )); |
154 | EXPECT_EQ(Sym1.Signature, "" ); |
155 | EXPECT_EQ(Sym1.Documentation, "Foo doc" ); |
156 | EXPECT_EQ(Sym1.ReturnType, "int" ); |
157 | EXPECT_EQ(StringRef(Sym1.CanonicalDeclaration.FileURI), "file:///path/foo.h" ); |
158 | EXPECT_EQ(Sym1.Origin, SymbolOrigin::Static); |
159 | EXPECT_EQ(static_cast<uint8_t>(Sym1.Flags), 129); |
160 | EXPECT_TRUE(Sym1.Flags & Symbol::IndexedForCodeCompletion); |
161 | EXPECT_FALSE(Sym1.Flags & Symbol::Deprecated); |
162 | EXPECT_THAT( |
163 | Sym1.IncludeHeaders, |
164 | UnorderedElementsAre( |
165 | IncludeHeaderWithRefAndDirectives("include1" , 7u, Symbol::Include), |
166 | IncludeHeaderWithRefAndDirectives("include2" , 3u, Symbol::Import), |
167 | IncludeHeaderWithRefAndDirectives("include3" , 2u, |
168 | Symbol::Include | Symbol::Import), |
169 | IncludeHeaderWithRefAndDirectives("include4" , 1u, Symbol::Invalid))); |
170 | |
171 | EXPECT_THAT(Sym2, qName("clang::Foo2" )); |
172 | EXPECT_EQ(Sym2.Signature, "-sig" ); |
173 | EXPECT_EQ(Sym2.ReturnType, "" ); |
174 | EXPECT_EQ(llvm::StringRef(Sym2.CanonicalDeclaration.FileURI), |
175 | "file:///path/bar.h" ); |
176 | EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion); |
177 | EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated); |
178 | |
179 | ASSERT_TRUE(bool(ParsedYAML->Refs)); |
180 | EXPECT_THAT( |
181 | *ParsedYAML->Refs, |
182 | UnorderedElementsAre(Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2D" )), |
183 | ::testing::SizeIs(1)))); |
184 | auto Ref1 = ParsedYAML->Refs->begin()->second.front(); |
185 | EXPECT_EQ(Ref1.Kind, RefKind::Reference); |
186 | EXPECT_EQ(StringRef(Ref1.Location.FileURI), "file:///path/foo.cc" ); |
187 | |
188 | SymbolID Base = cantFail(ValOrErr: SymbolID::fromStr("6481EE7AF2841756" )); |
189 | SymbolID Derived = cantFail(ValOrErr: SymbolID::fromStr("6512AEC512EA3A2D" )); |
190 | ASSERT_TRUE(bool(ParsedYAML->Relations)); |
191 | EXPECT_THAT( |
192 | *ParsedYAML->Relations, |
193 | UnorderedElementsAre(Relation{Base, RelationKind::BaseOf, Derived})); |
194 | |
195 | ASSERT_TRUE(bool(ParsedYAML->Cmd)); |
196 | auto &Cmd = *ParsedYAML->Cmd; |
197 | ASSERT_EQ(Cmd.Directory, "testdir" ); |
198 | EXPECT_THAT(Cmd.CommandLine, ElementsAre("cmd1" , "cmd2" )); |
199 | |
200 | ASSERT_TRUE(bool(ParsedYAML->Sources)); |
201 | const auto *URI = "file:///path/source1.cpp" ; |
202 | ASSERT_TRUE(ParsedYAML->Sources->count(URI)); |
203 | auto IGNDeserialized = ParsedYAML->Sources->lookup(Key: URI); |
204 | EXPECT_EQ(llvm::toHex(IGNDeserialized.Digest), "EED8F5EAF25C453C" ); |
205 | EXPECT_THAT(IGNDeserialized.DirectIncludes, |
206 | ElementsAre("file:///path/inc1.h" , "file:///path/inc2.h" )); |
207 | EXPECT_EQ(IGNDeserialized.URI, URI); |
208 | EXPECT_EQ(IGNDeserialized.Flags, IncludeGraphNode::SourceFlag(1)); |
209 | } |
210 | |
211 | std::vector<std::string> yamlFromSymbols(const SymbolSlab &Slab) { |
212 | std::vector<std::string> Result; |
213 | for (const auto &Sym : Slab) |
214 | Result.push_back(x: toYAML(Sym)); |
215 | return Result; |
216 | } |
217 | std::vector<std::string> yamlFromRefs(const RefSlab &Slab) { |
218 | std::vector<std::string> Result; |
219 | for (const auto &Refs : Slab) |
220 | Result.push_back(x: toYAML(Refs)); |
221 | return Result; |
222 | } |
223 | |
224 | std::vector<std::string> yamlFromRelations(const RelationSlab &Slab) { |
225 | std::vector<std::string> Result; |
226 | for (const auto &Rel : Slab) |
227 | Result.push_back(x: toYAML(Rel)); |
228 | return Result; |
229 | } |
230 | |
231 | TEST(SerializationTest, BinaryConversions) { |
232 | auto In = readIndexFile(Text: YAML); |
233 | EXPECT_TRUE(bool(In)) << In.takeError(); |
234 | |
235 | // Write to binary format, and parse again. |
236 | IndexFileOut Out(*In); |
237 | Out.Format = IndexFileFormat::RIFF; |
238 | std::string Serialized = llvm::to_string(Value: Out); |
239 | |
240 | auto In2 = readIndexFile(Text: Serialized); |
241 | ASSERT_TRUE(bool(In2)) << In2.takeError(); |
242 | ASSERT_TRUE(In2->Symbols); |
243 | ASSERT_TRUE(In2->Refs); |
244 | ASSERT_TRUE(In2->Relations); |
245 | |
246 | // Assert the YAML serializations match, for nice comparisons and diffs. |
247 | EXPECT_THAT(yamlFromSymbols(*In2->Symbols), |
248 | UnorderedElementsAreArray(yamlFromSymbols(*In->Symbols))); |
249 | EXPECT_THAT(yamlFromRefs(*In2->Refs), |
250 | UnorderedElementsAreArray(yamlFromRefs(*In->Refs))); |
251 | EXPECT_THAT(yamlFromRelations(*In2->Relations), |
252 | UnorderedElementsAreArray(yamlFromRelations(*In->Relations))); |
253 | } |
254 | |
255 | TEST(SerializationTest, SrcsTest) { |
256 | auto In = readIndexFile(Text: YAML); |
257 | EXPECT_TRUE(bool(In)) << In.takeError(); |
258 | |
259 | std::string TestContent("TestContent" ); |
260 | IncludeGraphNode IGN; |
261 | IGN.Digest = digest(Content: TestContent); |
262 | IGN.DirectIncludes = {"inc1" , "inc2" }; |
263 | IGN.URI = "URI" ; |
264 | IGN.Flags |= IncludeGraphNode::SourceFlag::IsTU; |
265 | IGN.Flags |= IncludeGraphNode::SourceFlag::HadErrors; |
266 | IncludeGraph Sources; |
267 | Sources[IGN.URI] = IGN; |
268 | // Write to binary format, and parse again. |
269 | IndexFileOut Out(*In); |
270 | Out.Format = IndexFileFormat::RIFF; |
271 | Out.Sources = &Sources; |
272 | { |
273 | std::string Serialized = llvm::to_string(Value: Out); |
274 | |
275 | auto In = readIndexFile(Text: Serialized); |
276 | ASSERT_TRUE(bool(In)) << In.takeError(); |
277 | ASSERT_TRUE(In->Symbols); |
278 | ASSERT_TRUE(In->Refs); |
279 | ASSERT_TRUE(In->Sources); |
280 | ASSERT_TRUE(In->Sources->count(IGN.URI)); |
281 | // Assert the YAML serializations match, for nice comparisons and diffs. |
282 | EXPECT_THAT(yamlFromSymbols(*In->Symbols), |
283 | UnorderedElementsAreArray(yamlFromSymbols(*In->Symbols))); |
284 | EXPECT_THAT(yamlFromRefs(*In->Refs), |
285 | UnorderedElementsAreArray(yamlFromRefs(*In->Refs))); |
286 | auto IGNDeserialized = In->Sources->lookup(Key: IGN.URI); |
287 | EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest); |
288 | EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes); |
289 | EXPECT_EQ(IGNDeserialized.URI, IGN.URI); |
290 | EXPECT_EQ(IGNDeserialized.Flags, IGN.Flags); |
291 | } |
292 | } |
293 | |
294 | TEST(SerializationTest, CmdlTest) { |
295 | auto In = readIndexFile(Text: YAML); |
296 | EXPECT_TRUE(bool(In)) << In.takeError(); |
297 | |
298 | tooling::CompileCommand Cmd; |
299 | Cmd.Directory = "testdir" ; |
300 | Cmd.CommandLine.push_back(x: "cmd1" ); |
301 | Cmd.CommandLine.push_back(x: "cmd2" ); |
302 | Cmd.Filename = "ignored" ; |
303 | Cmd.Heuristic = "ignored" ; |
304 | Cmd.Output = "ignored" ; |
305 | |
306 | IndexFileOut Out(*In); |
307 | Out.Format = IndexFileFormat::RIFF; |
308 | Out.Cmd = &Cmd; |
309 | { |
310 | std::string Serialized = llvm::to_string(Value: Out); |
311 | |
312 | auto In = readIndexFile(Text: Serialized); |
313 | ASSERT_TRUE(bool(In)) << In.takeError(); |
314 | ASSERT_TRUE(In->Cmd); |
315 | |
316 | const tooling::CompileCommand &SerializedCmd = *In->Cmd; |
317 | EXPECT_EQ(SerializedCmd.CommandLine, Cmd.CommandLine); |
318 | EXPECT_EQ(SerializedCmd.Directory, Cmd.Directory); |
319 | EXPECT_NE(SerializedCmd.Filename, Cmd.Filename); |
320 | EXPECT_NE(SerializedCmd.Heuristic, Cmd.Heuristic); |
321 | EXPECT_NE(SerializedCmd.Output, Cmd.Output); |
322 | } |
323 | } |
324 | |
325 | // rlimit is part of POSIX. RLIMIT_AS does not exist in OpenBSD. |
326 | // Sanitizers use a lot of address space, so we can't apply strict limits. |
327 | #if LLVM_ON_UNIX && defined(RLIMIT_AS) && !LLVM_ADDRESS_SANITIZER_BUILD && \ |
328 | !LLVM_MEMORY_SANITIZER_BUILD && !LLVM_THREAD_SANITIZER_BUILD |
329 | class ScopedMemoryLimit { |
330 | struct rlimit OriginalLimit; |
331 | bool Succeeded = false; |
332 | |
333 | public: |
334 | ScopedMemoryLimit(rlim_t Bytes) { |
335 | if (!getrlimit(RLIMIT_AS, rlimits: &OriginalLimit)) { |
336 | struct rlimit NewLimit = OriginalLimit; |
337 | NewLimit.rlim_cur = Bytes; |
338 | Succeeded = !setrlimit(RLIMIT_AS, rlimits: &NewLimit); |
339 | } |
340 | if (!Succeeded) |
341 | log(Fmt: "Failed to set rlimit" ); |
342 | } |
343 | |
344 | ~ScopedMemoryLimit() { |
345 | if (Succeeded) |
346 | setrlimit(RLIMIT_AS, rlimits: &OriginalLimit); |
347 | } |
348 | }; |
349 | #else |
350 | class ScopedMemoryLimit { |
351 | public: |
352 | ScopedMemoryLimit(unsigned Bytes) { log("rlimit unsupported" ); } |
353 | }; |
354 | #endif |
355 | |
356 | // Test that our deserialization detects invalid array sizes without allocating. |
357 | // If this detection fails, the test should allocate a huge array and crash. |
358 | TEST(SerializationTest, NoCrashOnBadArraySize) { |
359 | // This test is tricky because we need to construct a subtly invalid file. |
360 | // First, create a valid serialized file. |
361 | auto In = readIndexFile(Text: YAML); |
362 | ASSERT_FALSE(!In) << In.takeError(); |
363 | IndexFileOut Out(*In); |
364 | Out.Format = IndexFileFormat::RIFF; |
365 | std::string Serialized = llvm::to_string(Value: Out); |
366 | |
367 | // Low-level parse it again and find the `srcs` chunk we're going to corrupt. |
368 | auto Parsed = riff::readFile(Stream: Serialized); |
369 | ASSERT_FALSE(!Parsed) << Parsed.takeError(); |
370 | auto Srcs = llvm::find_if(Range&: Parsed->Chunks, P: [](riff::Chunk C) { |
371 | return C.ID == riff::fourCC(Literal: "srcs" ); |
372 | }); |
373 | ASSERT_NE(Srcs, Parsed->Chunks.end()); |
374 | |
375 | // Srcs consists of a sequence of IncludeGraphNodes. In our case, just one. |
376 | // The node has: |
377 | // - 1 byte: flags (1) |
378 | // - varint(stringID): URI |
379 | // - 8 byte: file digest |
380 | // - varint: DirectIncludes.length |
381 | // - repeated varint(stringID): DirectIncludes |
382 | // We want to set DirectIncludes.length to a huge number. |
383 | // The offset isn't trivial to find, so we use the file digest. |
384 | std::string FileDigest = llvm::fromHex(Input: "EED8F5EAF25C453C" ); |
385 | unsigned Pos = Srcs->Data.find_first_of(Chars: FileDigest); |
386 | ASSERT_NE(Pos, StringRef::npos) << "Couldn't locate file digest" ; |
387 | Pos += FileDigest.size(); |
388 | |
389 | // Varints are little-endian base-128 numbers, where the top-bit of each byte |
390 | // indicates whether there are more. ffffffff0f -> 0xffffffff. |
391 | std::string CorruptSrcs = |
392 | (Srcs->Data.take_front(N: Pos) + llvm::fromHex(Input: "ffffffff0f" ) + |
393 | "some_random_garbage" ) |
394 | .str(); |
395 | Srcs->Data = CorruptSrcs; |
396 | |
397 | // Try to crash rather than hang on large allocation. |
398 | ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB |
399 | |
400 | std::string CorruptFile = llvm::to_string(Value: *Parsed); |
401 | auto CorruptParsed = readIndexFile(Text: CorruptFile); |
402 | ASSERT_TRUE(!CorruptParsed); |
403 | EXPECT_EQ(llvm::toString(CorruptParsed.takeError()), |
404 | "malformed or truncated include uri" ); |
405 | } |
406 | |
407 | // Check we detect invalid string table size size without allocating it first. |
408 | // If this detection fails, the test should allocate a huge array and crash. |
409 | TEST(SerializationTest, NoCrashOnBadStringTableSize) { |
410 | if (!llvm::compression::zlib::isAvailable()) { |
411 | log(Fmt: "skipping test, no zlib" ); |
412 | return; |
413 | } |
414 | |
415 | // First, create a valid serialized file. |
416 | auto In = readIndexFile(Text: YAML); |
417 | ASSERT_FALSE(!In) << In.takeError(); |
418 | IndexFileOut Out(*In); |
419 | Out.Format = IndexFileFormat::RIFF; |
420 | std::string Serialized = llvm::to_string(Value: Out); |
421 | |
422 | // Low-level parse it again, we're going to replace the `stri` chunk. |
423 | auto Parsed = riff::readFile(Stream: Serialized); |
424 | ASSERT_FALSE(!Parsed) << Parsed.takeError(); |
425 | auto Stri = llvm::find_if(Range&: Parsed->Chunks, P: [](riff::Chunk C) { |
426 | return C.ID == riff::fourCC(Literal: "stri" ); |
427 | }); |
428 | ASSERT_NE(Stri, Parsed->Chunks.end()); |
429 | |
430 | // stri consists of an 8 byte uncompressed-size, and then compressed data. |
431 | // We'll claim our small amount of data expands to 4GB |
432 | std::string CorruptStri = |
433 | (llvm::fromHex(Input: "ffffffff" ) + Stri->Data.drop_front(N: 4)).str(); |
434 | Stri->Data = CorruptStri; |
435 | std::string FileDigest = llvm::fromHex(Input: "EED8F5EAF25C453C" ); |
436 | |
437 | // Try to crash rather than hang on large allocation. |
438 | ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB |
439 | |
440 | std::string CorruptFile = llvm::to_string(Value: *Parsed); |
441 | auto CorruptParsed = readIndexFile(Text: CorruptFile); |
442 | ASSERT_TRUE(!CorruptParsed); |
443 | EXPECT_THAT(llvm::toString(CorruptParsed.takeError()), |
444 | testing::HasSubstr("bytes is implausible" )); |
445 | } |
446 | |
447 | } // namespace |
448 | } // namespace clangd |
449 | } // namespace clang |
450 | |