1//===-- SerializationTests.cpp - Binary and YAML serialization unit tests -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Headers.h"
10#include "RIFF.h"
11#include "index/Serialization.h"
12#include "support/Logger.h"
13#include "clang/Tooling/CompilationDatabase.h"
14#include "llvm/ADT/StringExtras.h"
15#include "llvm/Support/Compression.h"
16#include "llvm/Support/Error.h"
17#include "llvm/Support/ScopedPrinter.h"
18#include "gmock/gmock.h"
19#include "gtest/gtest.h"
20#ifdef LLVM_ON_UNIX
21#include <sys/resource.h>
22#endif
23
24using ::testing::ElementsAre;
25using ::testing::Pair;
26using ::testing::UnorderedElementsAre;
27using ::testing::UnorderedElementsAreArray;
28
29namespace clang {
30namespace clangd {
31namespace {
32
33const char *YAML = R"(
34---
35!Symbol
36ID: 057557CEBF6E6B2D
37Name: 'Foo1'
38Scope: 'clang::'
39SymInfo:
40 Kind: Function
41 Lang: Cpp
42CanonicalDeclaration:
43 FileURI: file:///path/foo.h
44 Start:
45 Line: 1
46 Column: 0
47 End:
48 Line: 1
49 Column: 1
50Flags: 129
51Documentation: 'Foo doc'
52ReturnType: 'int'
53IncludeHeaders:
54 - Header: 'include1'
55 References: 7
56 Directives: [ Include ]
57 - Header: 'include2'
58 References: 3
59 Directives: [ Import ]
60 - Header: 'include3'
61 References: 2
62 Directives: [ Include, Import ]
63 - Header: 'include4'
64 References: 1
65 Directives: [ ]
66...
67---
68!Symbol
69ID: 057557CEBF6E6B2E
70Name: 'Foo2'
71Scope: 'clang::'
72SymInfo:
73 Kind: Function
74 Lang: Cpp
75CanonicalDeclaration:
76 FileURI: file:///path/bar.h
77 Start:
78 Line: 1
79 Column: 0
80 End:
81 Line: 1
82 Column: 1
83Flags: 2
84Signature: '-sig'
85CompletionSnippetSuffix: '-snippet'
86...
87!Refs
88ID: 057557CEBF6E6B2D
89References:
90 - Kind: 4
91 Location:
92 FileURI: file:///path/foo.cc
93 Start:
94 Line: 5
95 Column: 3
96 End:
97 Line: 5
98 Column: 8
99...
100--- !Relations
101Subject:
102 ID: 6481EE7AF2841756
103Predicate: 0
104Object:
105 ID: 6512AEC512EA3A2D
106...
107--- !Cmd
108Directory: 'testdir'
109CommandLine:
110 - 'cmd1'
111 - 'cmd2'
112...
113--- !Source
114URI: 'file:///path/source1.cpp'
115Flags: 1
116Digest: EED8F5EAF25C453C
117DirectIncludes:
118 - 'file:///path/inc1.h'
119 - 'file:///path/inc2.h'
120...
121)";
122
123MATCHER_P(id, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); }
124MATCHER_P(qName, Name, "") { return (arg.Scope + arg.Name).str() == Name; }
125MATCHER_P3(IncludeHeaderWithRefAndDirectives, IncludeHeader, References,
126 SupportedDirectives, "") {
127 return (arg.IncludeHeader == IncludeHeader) &&
128 (arg.References == References) &&
129 (arg.SupportedDirectives == SupportedDirectives);
130}
131
132auto readIndexFile(llvm::StringRef Text) {
133 return readIndexFile(Text, SymbolOrigin::Static);
134}
135
136TEST(SerializationTest, NoCrashOnEmptyYAML) {
137 EXPECT_TRUE(bool(readIndexFile("")));
138}
139
140TEST(SerializationTest, YAMLConversions) {
141 auto ParsedYAML = readIndexFile(Text: YAML);
142 ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError();
143 ASSERT_TRUE(bool(ParsedYAML->Symbols));
144 EXPECT_THAT(
145 *ParsedYAML->Symbols,
146 UnorderedElementsAre(id("057557CEBF6E6B2D"), id("057557CEBF6E6B2E")));
147
148 auto Sym1 = *ParsedYAML->Symbols->find(
149 SymID: cantFail(ValOrErr: SymbolID::fromStr("057557CEBF6E6B2D")));
150 auto Sym2 = *ParsedYAML->Symbols->find(
151 SymID: cantFail(ValOrErr: SymbolID::fromStr("057557CEBF6E6B2E")));
152
153 EXPECT_THAT(Sym1, qName("clang::Foo1"));
154 EXPECT_EQ(Sym1.Signature, "");
155 EXPECT_EQ(Sym1.Documentation, "Foo doc");
156 EXPECT_EQ(Sym1.ReturnType, "int");
157 EXPECT_EQ(StringRef(Sym1.CanonicalDeclaration.FileURI), "file:///path/foo.h");
158 EXPECT_EQ(Sym1.Origin, SymbolOrigin::Static);
159 EXPECT_EQ(static_cast<uint8_t>(Sym1.Flags), 129);
160 EXPECT_TRUE(Sym1.Flags & Symbol::IndexedForCodeCompletion);
161 EXPECT_FALSE(Sym1.Flags & Symbol::Deprecated);
162 EXPECT_THAT(
163 Sym1.IncludeHeaders,
164 UnorderedElementsAre(
165 IncludeHeaderWithRefAndDirectives("include1", 7u, Symbol::Include),
166 IncludeHeaderWithRefAndDirectives("include2", 3u, Symbol::Import),
167 IncludeHeaderWithRefAndDirectives("include3", 2u,
168 Symbol::Include | Symbol::Import),
169 IncludeHeaderWithRefAndDirectives("include4", 1u, Symbol::Invalid)));
170
171 EXPECT_THAT(Sym2, qName("clang::Foo2"));
172 EXPECT_EQ(Sym2.Signature, "-sig");
173 EXPECT_EQ(Sym2.ReturnType, "");
174 EXPECT_EQ(llvm::StringRef(Sym2.CanonicalDeclaration.FileURI),
175 "file:///path/bar.h");
176 EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion);
177 EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated);
178
179 ASSERT_TRUE(bool(ParsedYAML->Refs));
180 EXPECT_THAT(
181 *ParsedYAML->Refs,
182 UnorderedElementsAre(Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2D")),
183 ::testing::SizeIs(1))));
184 auto Ref1 = ParsedYAML->Refs->begin()->second.front();
185 EXPECT_EQ(Ref1.Kind, RefKind::Reference);
186 EXPECT_EQ(StringRef(Ref1.Location.FileURI), "file:///path/foo.cc");
187
188 SymbolID Base = cantFail(ValOrErr: SymbolID::fromStr("6481EE7AF2841756"));
189 SymbolID Derived = cantFail(ValOrErr: SymbolID::fromStr("6512AEC512EA3A2D"));
190 ASSERT_TRUE(bool(ParsedYAML->Relations));
191 EXPECT_THAT(
192 *ParsedYAML->Relations,
193 UnorderedElementsAre(Relation{Base, RelationKind::BaseOf, Derived}));
194
195 ASSERT_TRUE(bool(ParsedYAML->Cmd));
196 auto &Cmd = *ParsedYAML->Cmd;
197 ASSERT_EQ(Cmd.Directory, "testdir");
198 EXPECT_THAT(Cmd.CommandLine, ElementsAre("cmd1", "cmd2"));
199
200 ASSERT_TRUE(bool(ParsedYAML->Sources));
201 const auto *URI = "file:///path/source1.cpp";
202 ASSERT_TRUE(ParsedYAML->Sources->count(URI));
203 auto IGNDeserialized = ParsedYAML->Sources->lookup(Key: URI);
204 EXPECT_EQ(llvm::toHex(IGNDeserialized.Digest), "EED8F5EAF25C453C");
205 EXPECT_THAT(IGNDeserialized.DirectIncludes,
206 ElementsAre("file:///path/inc1.h", "file:///path/inc2.h"));
207 EXPECT_EQ(IGNDeserialized.URI, URI);
208 EXPECT_EQ(IGNDeserialized.Flags, IncludeGraphNode::SourceFlag(1));
209}
210
211std::vector<std::string> yamlFromSymbols(const SymbolSlab &Slab) {
212 std::vector<std::string> Result;
213 for (const auto &Sym : Slab)
214 Result.push_back(x: toYAML(Sym));
215 return Result;
216}
217std::vector<std::string> yamlFromRefs(const RefSlab &Slab) {
218 std::vector<std::string> Result;
219 for (const auto &Refs : Slab)
220 Result.push_back(x: toYAML(Refs));
221 return Result;
222}
223
224std::vector<std::string> yamlFromRelations(const RelationSlab &Slab) {
225 std::vector<std::string> Result;
226 for (const auto &Rel : Slab)
227 Result.push_back(x: toYAML(Rel));
228 return Result;
229}
230
231TEST(SerializationTest, BinaryConversions) {
232 auto In = readIndexFile(Text: YAML);
233 EXPECT_TRUE(bool(In)) << In.takeError();
234
235 // Write to binary format, and parse again.
236 IndexFileOut Out(*In);
237 Out.Format = IndexFileFormat::RIFF;
238 std::string Serialized = llvm::to_string(Value: Out);
239
240 auto In2 = readIndexFile(Text: Serialized);
241 ASSERT_TRUE(bool(In2)) << In2.takeError();
242 ASSERT_TRUE(In2->Symbols);
243 ASSERT_TRUE(In2->Refs);
244 ASSERT_TRUE(In2->Relations);
245
246 // Assert the YAML serializations match, for nice comparisons and diffs.
247 EXPECT_THAT(yamlFromSymbols(*In2->Symbols),
248 UnorderedElementsAreArray(yamlFromSymbols(*In->Symbols)));
249 EXPECT_THAT(yamlFromRefs(*In2->Refs),
250 UnorderedElementsAreArray(yamlFromRefs(*In->Refs)));
251 EXPECT_THAT(yamlFromRelations(*In2->Relations),
252 UnorderedElementsAreArray(yamlFromRelations(*In->Relations)));
253}
254
255TEST(SerializationTest, SrcsTest) {
256 auto In = readIndexFile(Text: YAML);
257 EXPECT_TRUE(bool(In)) << In.takeError();
258
259 std::string TestContent("TestContent");
260 IncludeGraphNode IGN;
261 IGN.Digest = digest(Content: TestContent);
262 IGN.DirectIncludes = {"inc1", "inc2"};
263 IGN.URI = "URI";
264 IGN.Flags |= IncludeGraphNode::SourceFlag::IsTU;
265 IGN.Flags |= IncludeGraphNode::SourceFlag::HadErrors;
266 IncludeGraph Sources;
267 Sources[IGN.URI] = IGN;
268 // Write to binary format, and parse again.
269 IndexFileOut Out(*In);
270 Out.Format = IndexFileFormat::RIFF;
271 Out.Sources = &Sources;
272 {
273 std::string Serialized = llvm::to_string(Value: Out);
274
275 auto In = readIndexFile(Text: Serialized);
276 ASSERT_TRUE(bool(In)) << In.takeError();
277 ASSERT_TRUE(In->Symbols);
278 ASSERT_TRUE(In->Refs);
279 ASSERT_TRUE(In->Sources);
280 ASSERT_TRUE(In->Sources->count(IGN.URI));
281 // Assert the YAML serializations match, for nice comparisons and diffs.
282 EXPECT_THAT(yamlFromSymbols(*In->Symbols),
283 UnorderedElementsAreArray(yamlFromSymbols(*In->Symbols)));
284 EXPECT_THAT(yamlFromRefs(*In->Refs),
285 UnorderedElementsAreArray(yamlFromRefs(*In->Refs)));
286 auto IGNDeserialized = In->Sources->lookup(Key: IGN.URI);
287 EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest);
288 EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes);
289 EXPECT_EQ(IGNDeserialized.URI, IGN.URI);
290 EXPECT_EQ(IGNDeserialized.Flags, IGN.Flags);
291 }
292}
293
294TEST(SerializationTest, CmdlTest) {
295 auto In = readIndexFile(Text: YAML);
296 EXPECT_TRUE(bool(In)) << In.takeError();
297
298 tooling::CompileCommand Cmd;
299 Cmd.Directory = "testdir";
300 Cmd.CommandLine.push_back(x: "cmd1");
301 Cmd.CommandLine.push_back(x: "cmd2");
302 Cmd.Filename = "ignored";
303 Cmd.Heuristic = "ignored";
304 Cmd.Output = "ignored";
305
306 IndexFileOut Out(*In);
307 Out.Format = IndexFileFormat::RIFF;
308 Out.Cmd = &Cmd;
309 {
310 std::string Serialized = llvm::to_string(Value: Out);
311
312 auto In = readIndexFile(Text: Serialized);
313 ASSERT_TRUE(bool(In)) << In.takeError();
314 ASSERT_TRUE(In->Cmd);
315
316 const tooling::CompileCommand &SerializedCmd = *In->Cmd;
317 EXPECT_EQ(SerializedCmd.CommandLine, Cmd.CommandLine);
318 EXPECT_EQ(SerializedCmd.Directory, Cmd.Directory);
319 EXPECT_NE(SerializedCmd.Filename, Cmd.Filename);
320 EXPECT_NE(SerializedCmd.Heuristic, Cmd.Heuristic);
321 EXPECT_NE(SerializedCmd.Output, Cmd.Output);
322 }
323}
324
325// rlimit is part of POSIX. RLIMIT_AS does not exist in OpenBSD.
326// Sanitizers use a lot of address space, so we can't apply strict limits.
327#if LLVM_ON_UNIX && defined(RLIMIT_AS) && !LLVM_ADDRESS_SANITIZER_BUILD && \
328 !LLVM_MEMORY_SANITIZER_BUILD && !LLVM_THREAD_SANITIZER_BUILD
329class ScopedMemoryLimit {
330 struct rlimit OriginalLimit;
331 bool Succeeded = false;
332
333public:
334 ScopedMemoryLimit(rlim_t Bytes) {
335 if (!getrlimit(RLIMIT_AS, rlimits: &OriginalLimit)) {
336 struct rlimit NewLimit = OriginalLimit;
337 NewLimit.rlim_cur = Bytes;
338 Succeeded = !setrlimit(RLIMIT_AS, rlimits: &NewLimit);
339 }
340 if (!Succeeded)
341 log(Fmt: "Failed to set rlimit");
342 }
343
344 ~ScopedMemoryLimit() {
345 if (Succeeded)
346 setrlimit(RLIMIT_AS, rlimits: &OriginalLimit);
347 }
348};
349#else
350class ScopedMemoryLimit {
351public:
352 ScopedMemoryLimit(unsigned Bytes) { log("rlimit unsupported"); }
353};
354#endif
355
356// Test that our deserialization detects invalid array sizes without allocating.
357// If this detection fails, the test should allocate a huge array and crash.
358TEST(SerializationTest, NoCrashOnBadArraySize) {
359 // This test is tricky because we need to construct a subtly invalid file.
360 // First, create a valid serialized file.
361 auto In = readIndexFile(Text: YAML);
362 ASSERT_FALSE(!In) << In.takeError();
363 IndexFileOut Out(*In);
364 Out.Format = IndexFileFormat::RIFF;
365 std::string Serialized = llvm::to_string(Value: Out);
366
367 // Low-level parse it again and find the `srcs` chunk we're going to corrupt.
368 auto Parsed = riff::readFile(Stream: Serialized);
369 ASSERT_FALSE(!Parsed) << Parsed.takeError();
370 auto Srcs = llvm::find_if(Range&: Parsed->Chunks, P: [](riff::Chunk C) {
371 return C.ID == riff::fourCC(Literal: "srcs");
372 });
373 ASSERT_NE(Srcs, Parsed->Chunks.end());
374
375 // Srcs consists of a sequence of IncludeGraphNodes. In our case, just one.
376 // The node has:
377 // - 1 byte: flags (1)
378 // - varint(stringID): URI
379 // - 8 byte: file digest
380 // - varint: DirectIncludes.length
381 // - repeated varint(stringID): DirectIncludes
382 // We want to set DirectIncludes.length to a huge number.
383 // The offset isn't trivial to find, so we use the file digest.
384 std::string FileDigest = llvm::fromHex(Input: "EED8F5EAF25C453C");
385 unsigned Pos = Srcs->Data.find_first_of(Chars: FileDigest);
386 ASSERT_NE(Pos, StringRef::npos) << "Couldn't locate file digest";
387 Pos += FileDigest.size();
388
389 // Varints are little-endian base-128 numbers, where the top-bit of each byte
390 // indicates whether there are more. ffffffff0f -> 0xffffffff.
391 std::string CorruptSrcs =
392 (Srcs->Data.take_front(N: Pos) + llvm::fromHex(Input: "ffffffff0f") +
393 "some_random_garbage")
394 .str();
395 Srcs->Data = CorruptSrcs;
396
397 // Try to crash rather than hang on large allocation.
398 ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB
399
400 std::string CorruptFile = llvm::to_string(Value: *Parsed);
401 auto CorruptParsed = readIndexFile(Text: CorruptFile);
402 ASSERT_TRUE(!CorruptParsed);
403 EXPECT_EQ(llvm::toString(CorruptParsed.takeError()),
404 "malformed or truncated include uri");
405}
406
407// Check we detect invalid string table size size without allocating it first.
408// If this detection fails, the test should allocate a huge array and crash.
409TEST(SerializationTest, NoCrashOnBadStringTableSize) {
410 if (!llvm::compression::zlib::isAvailable()) {
411 log(Fmt: "skipping test, no zlib");
412 return;
413 }
414
415 // First, create a valid serialized file.
416 auto In = readIndexFile(Text: YAML);
417 ASSERT_FALSE(!In) << In.takeError();
418 IndexFileOut Out(*In);
419 Out.Format = IndexFileFormat::RIFF;
420 std::string Serialized = llvm::to_string(Value: Out);
421
422 // Low-level parse it again, we're going to replace the `stri` chunk.
423 auto Parsed = riff::readFile(Stream: Serialized);
424 ASSERT_FALSE(!Parsed) << Parsed.takeError();
425 auto Stri = llvm::find_if(Range&: Parsed->Chunks, P: [](riff::Chunk C) {
426 return C.ID == riff::fourCC(Literal: "stri");
427 });
428 ASSERT_NE(Stri, Parsed->Chunks.end());
429
430 // stri consists of an 8 byte uncompressed-size, and then compressed data.
431 // We'll claim our small amount of data expands to 4GB
432 std::string CorruptStri =
433 (llvm::fromHex(Input: "ffffffff") + Stri->Data.drop_front(N: 4)).str();
434 Stri->Data = CorruptStri;
435 std::string FileDigest = llvm::fromHex(Input: "EED8F5EAF25C453C");
436
437 // Try to crash rather than hang on large allocation.
438 ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB
439
440 std::string CorruptFile = llvm::to_string(Value: *Parsed);
441 auto CorruptParsed = readIndexFile(Text: CorruptFile);
442 ASSERT_TRUE(!CorruptParsed);
443 EXPECT_THAT(llvm::toString(CorruptParsed.takeError()),
444 testing::HasSubstr("bytes is implausible"));
445}
446
447} // namespace
448} // namespace clangd
449} // namespace clang
450

source code of clang-tools-extra/clangd/unittests/SerializationTests.cpp