1 | //===-- DexTests.cpp ---------------------------------*- C++ -*-----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "TestFS.h" |
10 | #include "TestIndex.h" |
11 | #include "index/Index.h" |
12 | #include "index/SymbolID.h" |
13 | #include "index/dex/Dex.h" |
14 | #include "index/dex/Iterator.h" |
15 | #include "index/dex/Token.h" |
16 | #include "index/dex/Trigram.h" |
17 | #include "llvm/Support/ScopedPrinter.h" |
18 | #include "gmock/gmock.h" |
19 | #include "gtest/gtest.h" |
20 | #include <string> |
21 | #include <vector> |
22 | |
23 | using ::testing::AnyOf; |
24 | using ::testing::ElementsAre; |
25 | using ::testing::IsEmpty; |
26 | using ::testing::UnorderedElementsAre; |
27 | |
28 | namespace clang { |
29 | namespace clangd { |
30 | namespace dex { |
31 | namespace { |
32 | |
33 | //===----------------------------------------------------------------------===// |
34 | // Query iterator tests. |
35 | //===----------------------------------------------------------------------===// |
36 | |
37 | std::vector<DocID> consumeIDs(Iterator &It) { |
38 | auto IDAndScore = consume(It); |
39 | std::vector<DocID> IDs(IDAndScore.size()); |
40 | for (size_t I = 0; I < IDAndScore.size(); ++I) |
41 | IDs[I] = IDAndScore[I].first; |
42 | return IDs; |
43 | } |
44 | |
45 | TEST(DexIterators, DocumentIterator) { |
46 | const PostingList L({4, 7, 8, 20, 42, 100}); |
47 | auto DocIterator = L.iterator(); |
48 | |
49 | EXPECT_EQ(DocIterator->peek(), 4U); |
50 | EXPECT_FALSE(DocIterator->reachedEnd()); |
51 | |
52 | DocIterator->advance(); |
53 | EXPECT_EQ(DocIterator->peek(), 7U); |
54 | EXPECT_FALSE(DocIterator->reachedEnd()); |
55 | |
56 | DocIterator->advanceTo(ID: 20); |
57 | EXPECT_EQ(DocIterator->peek(), 20U); |
58 | EXPECT_FALSE(DocIterator->reachedEnd()); |
59 | |
60 | DocIterator->advanceTo(ID: 65); |
61 | EXPECT_EQ(DocIterator->peek(), 100U); |
62 | EXPECT_FALSE(DocIterator->reachedEnd()); |
63 | |
64 | DocIterator->advanceTo(ID: 420); |
65 | EXPECT_TRUE(DocIterator->reachedEnd()); |
66 | } |
67 | |
68 | TEST(DexIterators, AndTwoLists) { |
69 | Corpus C{10000}; |
70 | const PostingList L0({0, 5, 7, 10, 42, 320, 9000}); |
71 | const PostingList L1({0, 4, 7, 10, 30, 60, 320, 9000}); |
72 | |
73 | auto And = C.intersect(args: L1.iterator(), args: L0.iterator()); |
74 | |
75 | EXPECT_FALSE(And->reachedEnd()); |
76 | EXPECT_THAT(consumeIDs(*And), ElementsAre(0U, 7U, 10U, 320U, 9000U)); |
77 | |
78 | And = C.intersect(args: L0.iterator(), args: L1.iterator()); |
79 | |
80 | And->advanceTo(ID: 0); |
81 | EXPECT_EQ(And->peek(), 0U); |
82 | And->advanceTo(ID: 5); |
83 | EXPECT_EQ(And->peek(), 7U); |
84 | And->advanceTo(ID: 10); |
85 | EXPECT_EQ(And->peek(), 10U); |
86 | And->advanceTo(ID: 42); |
87 | EXPECT_EQ(And->peek(), 320U); |
88 | And->advanceTo(ID: 8999); |
89 | EXPECT_EQ(And->peek(), 9000U); |
90 | And->advanceTo(ID: 9001); |
91 | } |
92 | |
93 | TEST(DexIterators, AndThreeLists) { |
94 | Corpus C{10000}; |
95 | const PostingList L0({0, 5, 7, 10, 42, 320, 9000}); |
96 | const PostingList L1({0, 4, 7, 10, 30, 60, 320, 9000}); |
97 | const PostingList L2({1, 4, 7, 11, 30, 60, 320, 9000}); |
98 | |
99 | auto And = C.intersect(args: L0.iterator(), args: L1.iterator(), args: L2.iterator()); |
100 | EXPECT_EQ(And->peek(), 7U); |
101 | And->advanceTo(ID: 300); |
102 | EXPECT_EQ(And->peek(), 320U); |
103 | And->advanceTo(ID: 100000); |
104 | |
105 | EXPECT_TRUE(And->reachedEnd()); |
106 | } |
107 | |
108 | TEST(DexIterators, AndEmpty) { |
109 | Corpus C{10000}; |
110 | const PostingList L1{1}; |
111 | const PostingList L2{2}; |
112 | // These iterators are empty, but the optimizer can't tell. |
113 | auto Empty1 = C.intersect(args: L1.iterator(), args: L2.iterator()); |
114 | auto Empty2 = C.intersect(args: L1.iterator(), args: L2.iterator()); |
115 | // And syncs iterators on construction, and used to fail on empty children. |
116 | auto And = C.intersect(args: std::move(Empty1), args: std::move(Empty2)); |
117 | EXPECT_TRUE(And->reachedEnd()); |
118 | } |
119 | |
120 | TEST(DexIterators, OrTwoLists) { |
121 | Corpus C{10000}; |
122 | const PostingList L0({0, 5, 7, 10, 42, 320, 9000}); |
123 | const PostingList L1({0, 4, 7, 10, 30, 60, 320, 9000}); |
124 | |
125 | auto Or = C.unionOf(args: L0.iterator(), args: L1.iterator()); |
126 | |
127 | EXPECT_FALSE(Or->reachedEnd()); |
128 | EXPECT_EQ(Or->peek(), 0U); |
129 | Or->advance(); |
130 | EXPECT_EQ(Or->peek(), 4U); |
131 | Or->advance(); |
132 | EXPECT_EQ(Or->peek(), 5U); |
133 | Or->advance(); |
134 | EXPECT_EQ(Or->peek(), 7U); |
135 | Or->advance(); |
136 | EXPECT_EQ(Or->peek(), 10U); |
137 | Or->advance(); |
138 | EXPECT_EQ(Or->peek(), 30U); |
139 | Or->advanceTo(ID: 42); |
140 | EXPECT_EQ(Or->peek(), 42U); |
141 | Or->advanceTo(ID: 300); |
142 | EXPECT_EQ(Or->peek(), 320U); |
143 | Or->advanceTo(ID: 9000); |
144 | EXPECT_EQ(Or->peek(), 9000U); |
145 | Or->advanceTo(ID: 9001); |
146 | EXPECT_TRUE(Or->reachedEnd()); |
147 | |
148 | Or = C.unionOf(args: L0.iterator(), args: L1.iterator()); |
149 | |
150 | EXPECT_THAT(consumeIDs(*Or), |
151 | ElementsAre(0U, 4U, 5U, 7U, 10U, 30U, 42U, 60U, 320U, 9000U)); |
152 | } |
153 | |
154 | TEST(DexIterators, OrThreeLists) { |
155 | Corpus C{10000}; |
156 | const PostingList L0({0, 5, 7, 10, 42, 320, 9000}); |
157 | const PostingList L1({0, 4, 7, 10, 30, 60, 320, 9000}); |
158 | const PostingList L2({1, 4, 7, 11, 30, 60, 320, 9000}); |
159 | |
160 | auto Or = C.unionOf(args: L0.iterator(), args: L1.iterator(), args: L2.iterator()); |
161 | |
162 | EXPECT_FALSE(Or->reachedEnd()); |
163 | EXPECT_EQ(Or->peek(), 0U); |
164 | |
165 | Or->advance(); |
166 | EXPECT_EQ(Or->peek(), 1U); |
167 | |
168 | Or->advance(); |
169 | EXPECT_EQ(Or->peek(), 4U); |
170 | |
171 | Or->advanceTo(ID: 7); |
172 | |
173 | Or->advanceTo(ID: 59); |
174 | EXPECT_EQ(Or->peek(), 60U); |
175 | |
176 | Or->advanceTo(ID: 9001); |
177 | EXPECT_TRUE(Or->reachedEnd()); |
178 | } |
179 | |
180 | // FIXME(kbobyrev): The testcase below is similar to what is expected in real |
181 | // queries. It should be updated once new iterators (such as boosting, limiting, |
182 | // etc iterators) appear. However, it is not exhaustive and it would be |
183 | // beneficial to implement automatic generation (e.g. fuzzing) of query trees |
184 | // for more comprehensive testing. |
185 | TEST(DexIterators, QueryTree) { |
186 | // |
187 | // +-----------------+ |
188 | // |And Iterator:1, 5| |
189 | // +--------+--------+ |
190 | // | |
191 | // | |
192 | // +-------------+----------------------+ |
193 | // | | |
194 | // | | |
195 | // +----------v----------+ +----------v------------+ |
196 | // |And Iterator: 1, 5, 9| |Or Iterator: 0, 1, 3, 5| |
197 | // +----------+----------+ +----------+------------+ |
198 | // | | |
199 | // +------+-----+ ------------+ |
200 | // | | | | |
201 | // +-------v-----+ +----+---+ +---v----+ +----v---+ |
202 | // |1, 3, 5, 8, 9| |Boost: 2| |Boost: 3| |Boost: 4| |
203 | // +-------------+ +----+---+ +---+----+ +----+---+ |
204 | // | | | |
205 | // +----v-----+ +-v--+ +---v---+ |
206 | // |1, 5, 7, 9| |1, 5| |0, 3, 5| |
207 | // +----------+ +----+ +-------+ |
208 | // |
209 | Corpus C{10}; |
210 | const PostingList L0({1, 3, 5, 8, 9}); |
211 | const PostingList L1({1, 5, 7, 9}); |
212 | const PostingList L2({1, 5}); |
213 | const PostingList L3({0, 3, 5}); |
214 | |
215 | // Root of the query tree: [1, 5] |
216 | auto Root = C.intersect( |
217 | // Lower And Iterator: [1, 5, 9] |
218 | args: C.intersect(args: L0.iterator(), args: C.boost(Child: L1.iterator(), Factor: 2U)), |
219 | // Lower Or Iterator: [0, 1, 5] |
220 | args: C.unionOf(args: C.boost(Child: L2.iterator(), Factor: 3U), args: C.boost(Child: L3.iterator(), Factor: 4U))); |
221 | |
222 | EXPECT_FALSE(Root->reachedEnd()); |
223 | EXPECT_EQ(Root->peek(), 1U); |
224 | Root->advanceTo(ID: 0); |
225 | // Advance multiple times. Shouldn't do anything. |
226 | Root->advanceTo(ID: 1); |
227 | Root->advanceTo(ID: 0); |
228 | EXPECT_EQ(Root->peek(), 1U); |
229 | auto ElementBoost = Root->consume(); |
230 | EXPECT_THAT(ElementBoost, 6); |
231 | Root->advance(); |
232 | EXPECT_EQ(Root->peek(), 5U); |
233 | Root->advanceTo(ID: 5); |
234 | EXPECT_EQ(Root->peek(), 5U); |
235 | ElementBoost = Root->consume(); |
236 | EXPECT_THAT(ElementBoost, 8); |
237 | Root->advanceTo(ID: 9000); |
238 | EXPECT_TRUE(Root->reachedEnd()); |
239 | } |
240 | |
241 | TEST(DexIterators, StringRepresentation) { |
242 | Corpus C{10}; |
243 | const PostingList L1({1, 3, 5}); |
244 | const PostingList L2({1, 7, 9}); |
245 | |
246 | // No token given, prints full posting list. |
247 | auto I1 = L1.iterator(); |
248 | EXPECT_EQ(llvm::to_string(*I1), "[1 3 5]" ); |
249 | |
250 | // Token given, uses token's string representation. |
251 | Token Tok(Token::Kind::Trigram, "L2" ); |
252 | auto I2 = L1.iterator(Tok: &Tok); |
253 | EXPECT_EQ(llvm::to_string(*I2), "T=L2" ); |
254 | |
255 | auto Tree = C.limit(Child: C.intersect(args: std::move(I1), args: std::move(I2)), Limit: 10); |
256 | // AND reorders its children, we don't care which order it prints. |
257 | EXPECT_THAT(llvm::to_string(*Tree), AnyOf("(LIMIT 10 (& [1 3 5] T=L2))" , |
258 | "(LIMIT 10 (& T=L2 [1 3 5]))" )); |
259 | } |
260 | |
261 | TEST(DexIterators, Limit) { |
262 | Corpus C{10000}; |
263 | const PostingList L0({3, 6, 7, 20, 42, 100}); |
264 | const PostingList L1({1, 3, 5, 6, 7, 30, 100}); |
265 | const PostingList L2({0, 3, 5, 7, 8, 100}); |
266 | |
267 | auto DocIterator = C.limit(Child: L0.iterator(), Limit: 42); |
268 | EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7, 20, 42, 100)); |
269 | |
270 | DocIterator = C.limit(Child: L0.iterator(), Limit: 3); |
271 | EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7)); |
272 | |
273 | DocIterator = C.limit(Child: L0.iterator(), Limit: 0); |
274 | EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre()); |
275 | |
276 | auto AndIterator = |
277 | C.intersect(args: C.limit(Child: C.all(), Limit: 343), args: C.limit(Child: L0.iterator(), Limit: 2), |
278 | args: C.limit(Child: L1.iterator(), Limit: 3), args: C.limit(Child: L2.iterator(), Limit: 42)); |
279 | EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(3, 7)); |
280 | } |
281 | |
282 | TEST(DexIterators, True) { |
283 | EXPECT_TRUE(Corpus{0}.all()->reachedEnd()); |
284 | EXPECT_THAT(consumeIDs(*Corpus{4}.all()), ElementsAre(0, 1, 2, 3)); |
285 | } |
286 | |
287 | TEST(DexIterators, Boost) { |
288 | Corpus C{5}; |
289 | auto BoostIterator = C.boost(Child: C.all(), Factor: 42U); |
290 | EXPECT_FALSE(BoostIterator->reachedEnd()); |
291 | auto ElementBoost = BoostIterator->consume(); |
292 | EXPECT_THAT(ElementBoost, 42U); |
293 | |
294 | const PostingList L0({2, 4}); |
295 | const PostingList L1({1, 4}); |
296 | auto Root = C.unionOf(args: C.all(), args: C.boost(Child: L0.iterator(), Factor: 2U), |
297 | args: C.boost(Child: L1.iterator(), Factor: 3U)); |
298 | |
299 | ElementBoost = Root->consume(); |
300 | EXPECT_THAT(ElementBoost, 1); |
301 | Root->advance(); |
302 | EXPECT_THAT(Root->peek(), 1U); |
303 | ElementBoost = Root->consume(); |
304 | EXPECT_THAT(ElementBoost, 3); |
305 | |
306 | Root->advance(); |
307 | EXPECT_THAT(Root->peek(), 2U); |
308 | ElementBoost = Root->consume(); |
309 | EXPECT_THAT(ElementBoost, 2); |
310 | |
311 | Root->advanceTo(ID: 4); |
312 | ElementBoost = Root->consume(); |
313 | EXPECT_THAT(ElementBoost, 3); |
314 | } |
315 | |
316 | TEST(DexIterators, Optimizations) { |
317 | Corpus C{5}; |
318 | const PostingList L1{1}; |
319 | const PostingList L2{2}; |
320 | const PostingList L3{3}; |
321 | |
322 | // empty and/or yield true/false |
323 | EXPECT_EQ(llvm::to_string(*C.intersect()), "true" ); |
324 | EXPECT_EQ(llvm::to_string(*C.unionOf()), "false" ); |
325 | |
326 | // true/false inside and/or short-circuit |
327 | EXPECT_EQ(llvm::to_string(*C.intersect(L1.iterator(), C.all())), "[1]" ); |
328 | EXPECT_EQ(llvm::to_string(*C.intersect(L1.iterator(), C.none())), "false" ); |
329 | // Not optimized to avoid breaking boosts. |
330 | EXPECT_EQ(llvm::to_string(*C.unionOf(L1.iterator(), C.all())), |
331 | "(| [1] true)" ); |
332 | EXPECT_EQ(llvm::to_string(*C.unionOf(L1.iterator(), C.none())), "[1]" ); |
333 | |
334 | // and/or nested inside and/or are flattened |
335 | EXPECT_EQ(llvm::to_string(*C.intersect( |
336 | L1.iterator(), C.intersect(L1.iterator(), L1.iterator()))), |
337 | "(& [1] [1] [1])" ); |
338 | EXPECT_EQ(llvm::to_string(*C.unionOf( |
339 | L1.iterator(), C.unionOf(L2.iterator(), L3.iterator()))), |
340 | "(| [1] [2] [3])" ); |
341 | |
342 | // optimizations combine over multiple levels |
343 | EXPECT_EQ(llvm::to_string(*C.intersect( |
344 | C.intersect(L1.iterator(), C.intersect()), C.unionOf(C.all()))), |
345 | "[1]" ); |
346 | } |
347 | |
348 | //===----------------------------------------------------------------------===// |
349 | // Search token tests. |
350 | //===----------------------------------------------------------------------===// |
351 | |
352 | ::testing::Matcher<std::vector<Token>> |
353 | tokensAre(std::initializer_list<std::string> Strings, Token::Kind Kind) { |
354 | std::vector<Token> Tokens; |
355 | for (const auto &TokenData : Strings) { |
356 | Tokens.push_back(x: Token(Kind, TokenData)); |
357 | } |
358 | return ::testing::UnorderedElementsAreArray(container: Tokens); |
359 | } |
360 | |
361 | ::testing::Matcher<std::vector<Token>> |
362 | trigramsAre(std::initializer_list<std::string> Trigrams) { |
363 | return tokensAre(Strings: Trigrams, Kind: Token::Kind::Trigram); |
364 | } |
365 | |
366 | std::vector<Token> identifierTrigramTokens(llvm::StringRef S) { |
367 | std::vector<Trigram> Trigrams; |
368 | generateIdentifierTrigrams(Identifier: S, Out&: Trigrams); |
369 | std::vector<Token> Tokens; |
370 | for (Trigram T : Trigrams) |
371 | Tokens.emplace_back(args: Token::Kind::Trigram, args: T.str()); |
372 | return Tokens; |
373 | } |
374 | |
375 | TEST(DexTrigrams, IdentifierTrigrams) { |
376 | EXPECT_THAT(identifierTrigramTokens("X86" ), trigramsAre({"x86" , "x" , "x8" })); |
377 | |
378 | EXPECT_THAT(identifierTrigramTokens("nl" ), trigramsAre({"nl" , "n" })); |
379 | |
380 | EXPECT_THAT(identifierTrigramTokens("n" ), trigramsAre({"n" })); |
381 | |
382 | EXPECT_THAT(identifierTrigramTokens("clangd" ), |
383 | trigramsAre({"c" , "cl" , "cla" , "lan" , "ang" , "ngd" })); |
384 | |
385 | EXPECT_THAT(identifierTrigramTokens("abc_def" ), |
386 | trigramsAre({"a" , "d" , "ab" , "ad" , "de" , "abc" , "abd" , "ade" , |
387 | "bcd" , "bde" , "cde" , "def" })); |
388 | |
389 | EXPECT_THAT(identifierTrigramTokens("a_b_c_d_e_" ), |
390 | trigramsAre({"a" , "b" , "ab" , "bc" , "abc" , "bcd" , "cde" })); |
391 | |
392 | EXPECT_THAT(identifierTrigramTokens("unique_ptr" ), |
393 | trigramsAre({"u" , "p" , "un" , "up" , "pt" , "uni" , "unp" , |
394 | "upt" , "niq" , "nip" , "npt" , "iqu" , "iqp" , "ipt" , |
395 | "que" , "qup" , "qpt" , "uep" , "ept" , "ptr" })); |
396 | |
397 | EXPECT_THAT(identifierTrigramTokens("TUDecl" ), |
398 | trigramsAre({"t" , "d" , "tu" , "td" , "de" , "tud" , "tde" , "ude" , |
399 | "dec" , "ecl" })); |
400 | |
401 | EXPECT_THAT(identifierTrigramTokens("IsOK" ), |
402 | trigramsAre({"i" , "o" , "is" , "ok" , "io" , "iso" , "iok" , "sok" })); |
403 | |
404 | EXPECT_THAT(identifierTrigramTokens("_pb" ), |
405 | trigramsAre({"_" , "_p" , "p" , "pb" })); |
406 | EXPECT_THAT(identifierTrigramTokens("__pb" ), |
407 | trigramsAre({"_" , "_p" , "p" , "pb" })); |
408 | |
409 | EXPECT_THAT(identifierTrigramTokens("abc_defGhij__klm" ), |
410 | trigramsAre({"a" , "d" , "ab" , "ad" , "dg" , "de" , "abc" , |
411 | "abd" , "ade" , "adg" , "bcd" , "bde" , "bdg" , "cde" , |
412 | "cdg" , "def" , "deg" , "dgh" , "dgk" , "efg" , "egh" , |
413 | "egk" , "fgh" , "fgk" , "ghi" , "ghk" , "gkl" , "hij" , |
414 | "hik" , "hkl" , "ijk" , "ikl" , "jkl" , "klm" })); |
415 | EXPECT_THAT(identifierTrigramTokens("" ), IsEmpty()); |
416 | } |
417 | |
418 | TEST(DexTrigrams, QueryTrigrams) { |
419 | EXPECT_THAT(generateQueryTrigrams("c" ), trigramsAre({"c" })); |
420 | EXPECT_THAT(generateQueryTrigrams("cl" ), trigramsAre({"cl" })); |
421 | EXPECT_THAT(generateQueryTrigrams("cla" ), trigramsAre({"cla" })); |
422 | |
423 | EXPECT_THAT(generateQueryTrigrams("" ), trigramsAre({})); |
424 | EXPECT_THAT(generateQueryTrigrams("_" ), trigramsAre({"_" })); |
425 | EXPECT_THAT(generateQueryTrigrams("__" ), trigramsAre({"_" })); |
426 | EXPECT_THAT(generateQueryTrigrams("___" ), trigramsAre({"_" })); |
427 | |
428 | EXPECT_THAT(generateQueryTrigrams("m_" ), trigramsAre({"m" })); |
429 | |
430 | EXPECT_THAT(generateQueryTrigrams("p_b" ), trigramsAre({"pb" })); |
431 | EXPECT_THAT(generateQueryTrigrams("pb_" ), trigramsAre({"pb" })); |
432 | EXPECT_THAT(generateQueryTrigrams("_p" ), trigramsAre({"_p" })); |
433 | EXPECT_THAT(generateQueryTrigrams("_pb_" ), trigramsAre({"pb" })); |
434 | EXPECT_THAT(generateQueryTrigrams("__pb" ), trigramsAre({"pb" })); |
435 | |
436 | EXPECT_THAT(generateQueryTrigrams("X86" ), trigramsAre({"x86" })); |
437 | |
438 | EXPECT_THAT(generateQueryTrigrams("clangd" ), |
439 | trigramsAre({"cla" , "lan" , "ang" , "ngd" })); |
440 | |
441 | EXPECT_THAT(generateQueryTrigrams("abc_def" ), |
442 | trigramsAre({"abc" , "bcd" , "cde" , "def" })); |
443 | |
444 | EXPECT_THAT(generateQueryTrigrams("a_b_c_d_e_" ), |
445 | trigramsAre({"abc" , "bcd" , "cde" })); |
446 | |
447 | EXPECT_THAT(generateQueryTrigrams("unique_ptr" ), |
448 | trigramsAre({"uni" , "niq" , "iqu" , "que" , "uep" , "ept" , "ptr" })); |
449 | |
450 | EXPECT_THAT(generateQueryTrigrams("TUDecl" ), |
451 | trigramsAre({"tud" , "ude" , "dec" , "ecl" })); |
452 | |
453 | EXPECT_THAT(generateQueryTrigrams("IsOK" ), trigramsAre({"iso" , "sok" })); |
454 | |
455 | EXPECT_THAT(generateQueryTrigrams("abc_defGhij__klm" ), |
456 | trigramsAre({"abc" , "bcd" , "cde" , "def" , "efg" , "fgh" , "ghi" , |
457 | "hij" , "ijk" , "jkl" , "klm" })); |
458 | } |
459 | |
460 | TEST(DexSearchTokens, SymbolPath) { |
461 | EXPECT_THAT(generateProximityURIs( |
462 | "unittest:///clang-tools-extra/clangd/index/Token.h" ), |
463 | ElementsAre("unittest:///clang-tools-extra/clangd/index/Token.h" , |
464 | "unittest:///clang-tools-extra/clangd/index" , |
465 | "unittest:///clang-tools-extra/clangd" , |
466 | "unittest:///clang-tools-extra" , "unittest:///" )); |
467 | |
468 | EXPECT_THAT(generateProximityURIs("unittest:///a/b/c.h" ), |
469 | ElementsAre("unittest:///a/b/c.h" , "unittest:///a/b" , |
470 | "unittest:///a" , "unittest:///" )); |
471 | } |
472 | |
473 | //===----------------------------------------------------------------------===// |
474 | // Index tests. |
475 | //===----------------------------------------------------------------------===// |
476 | |
477 | TEST(Dex, Lookup) { |
478 | auto I = Dex::build(generateSymbols(QualifiedNames: {"ns::abc" , "ns::xyz" }), RefSlab(), |
479 | RelationSlab()); |
480 | EXPECT_THAT(lookup(*I, SymbolID("ns::abc" )), UnorderedElementsAre("ns::abc" )); |
481 | EXPECT_THAT(lookup(*I, {SymbolID("ns::abc" ), SymbolID("ns::xyz" )}), |
482 | UnorderedElementsAre("ns::abc" , "ns::xyz" )); |
483 | EXPECT_THAT(lookup(*I, {SymbolID("ns::nonono" ), SymbolID("ns::xyz" )}), |
484 | UnorderedElementsAre("ns::xyz" )); |
485 | EXPECT_THAT(lookup(*I, SymbolID("ns::nonono" )), UnorderedElementsAre()); |
486 | } |
487 | |
488 | TEST(Dex, FuzzyFind) { |
489 | auto Index = |
490 | Dex::build(generateSymbols(QualifiedNames: {"ns::ABC" , "ns::BCD" , "::ABC" , |
491 | "ns::nested::ABC" , "other::ABC" , "other::A" }), |
492 | RefSlab(), RelationSlab()); |
493 | FuzzyFindRequest Req; |
494 | Req.Query = "ABC" ; |
495 | Req.Scopes = {"ns::" }; |
496 | EXPECT_THAT(match(*Index, Req), UnorderedElementsAre("ns::ABC" )); |
497 | Req.Scopes = {"ns::" , "ns::nested::" }; |
498 | EXPECT_THAT(match(*Index, Req), |
499 | UnorderedElementsAre("ns::ABC" , "ns::nested::ABC" )); |
500 | Req.Query = "A" ; |
501 | Req.Scopes = {"other::" }; |
502 | EXPECT_THAT(match(*Index, Req), |
503 | UnorderedElementsAre("other::A" , "other::ABC" )); |
504 | Req.Query = "" ; |
505 | Req.Scopes = {}; |
506 | Req.AnyScope = true; |
507 | EXPECT_THAT(match(*Index, Req), |
508 | UnorderedElementsAre("ns::ABC" , "ns::BCD" , "::ABC" , |
509 | "ns::nested::ABC" , "other::ABC" , |
510 | "other::A" )); |
511 | } |
512 | |
513 | TEST(DexTest, DexLimitedNumMatches) { |
514 | auto I = Dex::build(generateNumSymbols(Begin: 0, End: 100), RefSlab(), RelationSlab()); |
515 | FuzzyFindRequest Req; |
516 | Req.Query = "5" ; |
517 | Req.AnyScope = true; |
518 | Req.Limit = 3; |
519 | bool Incomplete; |
520 | auto Matches = match(I: *I, Req, Incomplete: &Incomplete); |
521 | EXPECT_TRUE(Req.Limit); |
522 | EXPECT_EQ(Matches.size(), *Req.Limit); |
523 | EXPECT_TRUE(Incomplete); |
524 | } |
525 | |
526 | TEST(DexTest, FuzzyMatch) { |
527 | auto I = Dex::build( |
528 | generateSymbols(QualifiedNames: {"LaughingOutLoud" , "LionPopulation" , "LittleOldLady" }), |
529 | RefSlab(), RelationSlab()); |
530 | FuzzyFindRequest Req; |
531 | Req.Query = "lol" ; |
532 | Req.AnyScope = true; |
533 | Req.Limit = 2; |
534 | EXPECT_THAT(match(*I, Req), |
535 | UnorderedElementsAre("LaughingOutLoud" , "LittleOldLady" )); |
536 | } |
537 | |
538 | TEST(DexTest, ShortQuery) { |
539 | auto I = Dex::build(generateSymbols(QualifiedNames: {"_OneTwoFourSix" }), RefSlab(), |
540 | RelationSlab()); |
541 | FuzzyFindRequest Req; |
542 | Req.AnyScope = true; |
543 | bool Incomplete; |
544 | |
545 | EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre("_OneTwoFourSix" )); |
546 | EXPECT_FALSE(Incomplete) << "Empty string is not a short query" ; |
547 | |
548 | Req.Query = "o" ; |
549 | EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre("_OneTwoFourSix" )); |
550 | EXPECT_TRUE(Incomplete) << "Using first head as unigram" ; |
551 | |
552 | Req.Query = "_o" ; |
553 | EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre("_OneTwoFourSix" )); |
554 | EXPECT_TRUE(Incomplete) << "Using delimiter and first head as bigram" ; |
555 | |
556 | Req.Query = "on" ; |
557 | EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre("_OneTwoFourSix" )); |
558 | EXPECT_TRUE(Incomplete) << "Using first head and tail as bigram" ; |
559 | |
560 | Req.Query = "ot" ; |
561 | EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre("_OneTwoFourSix" )); |
562 | EXPECT_TRUE(Incomplete) << "Using first two heads as bigram" ; |
563 | |
564 | Req.Query = "tw" ; |
565 | EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre("_OneTwoFourSix" )); |
566 | EXPECT_TRUE(Incomplete) << "Using second head and tail as bigram" ; |
567 | |
568 | Req.Query = "tf" ; |
569 | EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre("_OneTwoFourSix" )); |
570 | EXPECT_TRUE(Incomplete) << "Using second and third heads as bigram" ; |
571 | |
572 | Req.Query = "fo" ; |
573 | EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre()); |
574 | EXPECT_TRUE(Incomplete) << "Short queries have different semantics" ; |
575 | |
576 | Req.Query = "tfs" ; |
577 | EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre("_OneTwoFourSix" )); |
578 | EXPECT_FALSE(Incomplete) << "3-char string is not a short query" ; |
579 | } |
580 | |
581 | TEST(DexTest, MatchQualifiedNamesWithoutSpecificScope) { |
582 | auto I = Dex::build(generateSymbols(QualifiedNames: {"a::y1" , "b::y2" , "y3" }), RefSlab(), |
583 | RelationSlab()); |
584 | FuzzyFindRequest Req; |
585 | Req.AnyScope = true; |
586 | Req.Query = "y" ; |
587 | EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1" , "b::y2" , "y3" )); |
588 | } |
589 | |
590 | TEST(DexTest, MatchQualifiedNamesWithGlobalScope) { |
591 | auto I = Dex::build(generateSymbols(QualifiedNames: {"a::y1" , "b::y2" , "y3" }), RefSlab(), |
592 | RelationSlab()); |
593 | FuzzyFindRequest Req; |
594 | Req.Query = "y" ; |
595 | Req.Scopes = {"" }; |
596 | EXPECT_THAT(match(*I, Req), UnorderedElementsAre("y3" )); |
597 | } |
598 | |
599 | TEST(DexTest, MatchQualifiedNamesWithOneScope) { |
600 | auto I = |
601 | Dex::build(generateSymbols(QualifiedNames: {"a::y1" , "a::y2" , "a::x" , "b::y2" , "y3" }), |
602 | RefSlab(), RelationSlab()); |
603 | FuzzyFindRequest Req; |
604 | Req.Query = "y" ; |
605 | Req.Scopes = {"a::" }; |
606 | EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1" , "a::y2" )); |
607 | } |
608 | |
609 | TEST(DexTest, MatchQualifiedNamesWithMultipleScopes) { |
610 | auto I = |
611 | Dex::build(generateSymbols(QualifiedNames: {"a::y1" , "a::y2" , "a::x" , "b::y3" , "y3" }), |
612 | RefSlab(), RelationSlab()); |
613 | FuzzyFindRequest Req; |
614 | Req.Query = "y" ; |
615 | Req.Scopes = {"a::" , "b::" }; |
616 | EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1" , "a::y2" , "b::y3" )); |
617 | } |
618 | |
619 | TEST(DexTest, NoMatchNestedScopes) { |
620 | auto I = Dex::build(generateSymbols(QualifiedNames: {"a::y1" , "a::b::y2" }), RefSlab(), |
621 | RelationSlab()); |
622 | FuzzyFindRequest Req; |
623 | Req.Query = "y" ; |
624 | Req.Scopes = {"a::" }; |
625 | EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1" )); |
626 | } |
627 | |
628 | TEST(DexTest, WildcardScope) { |
629 | auto I = Dex::build(generateSymbols(QualifiedNames: {"a::y1" , "a::b::y2" , "c::y3" }), |
630 | RefSlab(), RelationSlab()); |
631 | FuzzyFindRequest Req; |
632 | Req.AnyScope = true; |
633 | Req.Query = "y" ; |
634 | Req.Scopes = {"a::" }; |
635 | EXPECT_THAT(match(*I, Req), |
636 | UnorderedElementsAre("a::y1" , "a::b::y2" , "c::y3" )); |
637 | } |
638 | |
639 | TEST(DexTest, IgnoreCases) { |
640 | auto I = Dex::build(generateSymbols(QualifiedNames: {"ns::ABC" , "ns::abc" }), RefSlab(), |
641 | RelationSlab()); |
642 | FuzzyFindRequest Req; |
643 | Req.Query = "AB" ; |
644 | Req.Scopes = {"ns::" }; |
645 | EXPECT_THAT(match(*I, Req), UnorderedElementsAre("ns::ABC" , "ns::abc" )); |
646 | } |
647 | |
648 | TEST(DexTest, UnknownPostingList) { |
649 | // Regression test: we used to ignore unknown scopes and accept any symbol. |
650 | auto I = Dex::build(generateSymbols(QualifiedNames: {"ns::ABC" , "ns::abc" }), RefSlab(), |
651 | RelationSlab()); |
652 | FuzzyFindRequest Req; |
653 | Req.Scopes = {"ns2::" }; |
654 | EXPECT_THAT(match(*I, Req), UnorderedElementsAre()); |
655 | } |
656 | |
657 | TEST(DexTest, Lookup) { |
658 | auto I = Dex::build(generateSymbols(QualifiedNames: {"ns::abc" , "ns::xyz" }), RefSlab(), |
659 | RelationSlab()); |
660 | EXPECT_THAT(lookup(*I, SymbolID("ns::abc" )), UnorderedElementsAre("ns::abc" )); |
661 | EXPECT_THAT(lookup(*I, {SymbolID("ns::abc" ), SymbolID("ns::xyz" )}), |
662 | UnorderedElementsAre("ns::abc" , "ns::xyz" )); |
663 | EXPECT_THAT(lookup(*I, {SymbolID("ns::nonono" ), SymbolID("ns::xyz" )}), |
664 | UnorderedElementsAre("ns::xyz" )); |
665 | EXPECT_THAT(lookup(*I, SymbolID("ns::nonono" )), UnorderedElementsAre()); |
666 | } |
667 | |
668 | TEST(DexTest, SymbolIndexOptionsFilter) { |
669 | auto CodeCompletionSymbol = symbol(QName: "Completion" ); |
670 | auto NonCodeCompletionSymbol = symbol(QName: "NoCompletion" ); |
671 | CodeCompletionSymbol.Flags = Symbol::SymbolFlag::IndexedForCodeCompletion; |
672 | NonCodeCompletionSymbol.Flags = Symbol::SymbolFlag::None; |
673 | std::vector<Symbol> Symbols{CodeCompletionSymbol, NonCodeCompletionSymbol}; |
674 | Dex I(Symbols, RefSlab(), RelationSlab()); |
675 | FuzzyFindRequest Req; |
676 | Req.AnyScope = true; |
677 | Req.RestrictForCodeCompletion = false; |
678 | EXPECT_THAT(match(I, Req), ElementsAre("Completion" , "NoCompletion" )); |
679 | Req.RestrictForCodeCompletion = true; |
680 | EXPECT_THAT(match(I, Req), ElementsAre("Completion" )); |
681 | } |
682 | |
683 | TEST(DexTest, ProximityPathsBoosting) { |
684 | auto RootSymbol = symbol(QName: "root::abc" ); |
685 | RootSymbol.CanonicalDeclaration.FileURI = "unittest:///file.h" ; |
686 | auto CloseSymbol = symbol(QName: "close::abc" ); |
687 | CloseSymbol.CanonicalDeclaration.FileURI = "unittest:///a/b/c/d/e/f/file.h" ; |
688 | |
689 | std::vector<Symbol> Symbols{CloseSymbol, RootSymbol}; |
690 | Dex I(Symbols, RefSlab(), RelationSlab()); |
691 | |
692 | FuzzyFindRequest Req; |
693 | Req.AnyScope = true; |
694 | Req.Query = "abc" ; |
695 | // The best candidate can change depending on the proximity paths. |
696 | Req.Limit = 1; |
697 | |
698 | // FuzzyFind request comes from the file which is far from the root: expect |
699 | // CloseSymbol to come out. |
700 | Req.ProximityPaths = {testPath(File: "a/b/c/d/e/f/file.h" )}; |
701 | EXPECT_THAT(match(I, Req), ElementsAre("close::abc" )); |
702 | |
703 | // FuzzyFind request comes from the file which is close to the root: expect |
704 | // RootSymbol to come out. |
705 | Req.ProximityPaths = {testPath(File: "file.h" )}; |
706 | EXPECT_THAT(match(I, Req), ElementsAre("root::abc" )); |
707 | } |
708 | |
709 | TEST(DexTests, Refs) { |
710 | llvm::DenseMap<SymbolID, std::vector<Ref>> Refs; |
711 | auto AddRef = [&](const Symbol &Sym, const char *Filename, RefKind Kind) { |
712 | auto &SymbolRefs = Refs[Sym.ID]; |
713 | SymbolRefs.emplace_back(); |
714 | SymbolRefs.back().Kind = Kind; |
715 | SymbolRefs.back().Location.FileURI = Filename; |
716 | }; |
717 | auto Foo = symbol(QName: "foo" ); |
718 | auto Bar = symbol(QName: "bar" ); |
719 | AddRef(Foo, "foo.h" , RefKind::Declaration); |
720 | AddRef(Foo, "foo.cc" , RefKind::Definition); |
721 | AddRef(Foo, "reffoo.h" , RefKind::Reference); |
722 | AddRef(Bar, "bar.h" , RefKind::Declaration); |
723 | |
724 | RefsRequest Req; |
725 | Req.IDs.insert(V: Foo.ID); |
726 | Req.Filter = RefKind::Declaration | RefKind::Definition; |
727 | |
728 | std::vector<std::string> Files; |
729 | EXPECT_FALSE(Dex(std::vector<Symbol>{Foo, Bar}, Refs, RelationSlab()) |
730 | .refs(Req, [&](const Ref &R) { |
731 | Files.push_back(R.Location.FileURI); |
732 | })); |
733 | EXPECT_THAT(Files, UnorderedElementsAre("foo.h" , "foo.cc" )); |
734 | |
735 | Req.Limit = 1; |
736 | Files.clear(); |
737 | EXPECT_TRUE(Dex(std::vector<Symbol>{Foo, Bar}, Refs, RelationSlab()) |
738 | .refs(Req, [&](const Ref &R) { |
739 | Files.push_back(R.Location.FileURI); |
740 | })); |
741 | EXPECT_THAT(Files, ElementsAre(AnyOf("foo.h" , "foo.cc" ))); |
742 | } |
743 | |
744 | TEST(DexTests, Relations) { |
745 | auto Parent = symbol(QName: "Parent" ); |
746 | auto Child1 = symbol(QName: "Child1" ); |
747 | auto Child2 = symbol(QName: "Child2" ); |
748 | |
749 | std::vector<Symbol> Symbols{Parent, Child1, Child2}; |
750 | |
751 | std::vector<Relation> Relations{{.Subject: Parent.ID, .Predicate: RelationKind::BaseOf, .Object: Child1.ID}, |
752 | {.Subject: Parent.ID, .Predicate: RelationKind::BaseOf, .Object: Child2.ID}}; |
753 | |
754 | Dex I{Symbols, RefSlab(), Relations}; |
755 | |
756 | std::vector<SymbolID> Results; |
757 | RelationsRequest Req; |
758 | Req.Subjects.insert(V: Parent.ID); |
759 | Req.Predicate = RelationKind::BaseOf; |
760 | I.relations(Req, Callback: [&](const SymbolID &Subject, const Symbol &Object) { |
761 | Results.push_back(x: Object.ID); |
762 | }); |
763 | EXPECT_THAT(Results, UnorderedElementsAre(Child1.ID, Child2.ID)); |
764 | } |
765 | |
766 | TEST(DexIndex, IndexedFiles) { |
767 | SymbolSlab Symbols; |
768 | RefSlab Refs; |
769 | auto Size = Symbols.bytes() + Refs.bytes(); |
770 | auto Data = std::make_pair(x: std::move(Symbols), y: std::move(Refs)); |
771 | llvm::StringSet<> Files = {"unittest:///foo.cc" , "unittest:///bar.cc" }; |
772 | Dex I(std::move(Data.first), std::move(Data.second), RelationSlab(), |
773 | std::move(Files), IndexContents::All, std::move(Data), Size); |
774 | auto ContainsFile = I.indexedFiles(); |
775 | EXPECT_EQ(ContainsFile("unittest:///foo.cc" ), IndexContents::All); |
776 | EXPECT_EQ(ContainsFile("unittest:///bar.cc" ), IndexContents::All); |
777 | EXPECT_EQ(ContainsFile("unittest:///foobar.cc" ), IndexContents::None); |
778 | } |
779 | |
780 | TEST(DexTest, PreferredTypesBoosting) { |
781 | auto Sym1 = symbol(QName: "t1" ); |
782 | Sym1.Type = "T1" ; |
783 | auto Sym2 = symbol(QName: "t2" ); |
784 | Sym2.Type = "T2" ; |
785 | |
786 | std::vector<Symbol> Symbols{Sym1, Sym2}; |
787 | Dex I(Symbols, RefSlab(), RelationSlab()); |
788 | |
789 | FuzzyFindRequest Req; |
790 | Req.AnyScope = true; |
791 | Req.Query = "t" ; |
792 | // The best candidate can change depending on the preferred type. |
793 | Req.Limit = 1; |
794 | |
795 | Req.PreferredTypes = {std::string(Sym1.Type)}; |
796 | EXPECT_THAT(match(I, Req), ElementsAre("t1" )); |
797 | |
798 | Req.PreferredTypes = {std::string(Sym2.Type)}; |
799 | EXPECT_THAT(match(I, Req), ElementsAre("t2" )); |
800 | } |
801 | |
802 | TEST(DexTest, TemplateSpecialization) { |
803 | SymbolSlab::Builder B; |
804 | |
805 | Symbol S = symbol(QName: "TempSpec" ); |
806 | S.ID = SymbolID("0" ); |
807 | B.insert(S); |
808 | |
809 | S = symbol(QName: "TempSpec" ); |
810 | S.ID = SymbolID("1" ); |
811 | S.TemplateSpecializationArgs = "<int, bool>" ; |
812 | S.SymInfo.Properties = static_cast<index::SymbolPropertySet>( |
813 | index::SymbolProperty::TemplateSpecialization); |
814 | B.insert(S); |
815 | |
816 | S = symbol(QName: "TempSpec" ); |
817 | S.ID = SymbolID("2" ); |
818 | S.TemplateSpecializationArgs = "<int, U>" ; |
819 | S.SymInfo.Properties = static_cast<index::SymbolPropertySet>( |
820 | index::SymbolProperty::TemplatePartialSpecialization); |
821 | B.insert(S); |
822 | |
823 | auto I = dex::Dex::build(std::move(B).build(), RefSlab(), RelationSlab()); |
824 | FuzzyFindRequest Req; |
825 | Req.AnyScope = true; |
826 | |
827 | Req.Query = "TempSpec" ; |
828 | EXPECT_THAT(match(*I, Req), |
829 | UnorderedElementsAre("TempSpec" , "TempSpec<int, bool>" , |
830 | "TempSpec<int, U>" )); |
831 | |
832 | // FIXME: Add filtering for template argument list. |
833 | Req.Query = "TempSpec<int" ; |
834 | EXPECT_THAT(match(*I, Req), IsEmpty()); |
835 | } |
836 | |
837 | } // namespace |
838 | } // namespace dex |
839 | } // namespace clangd |
840 | } // namespace clang |
841 | |