1 | //===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/AST/CommentLexer.h" |
10 | #include "clang/AST/CommentCommandTraits.h" |
11 | #include "clang/Basic/CommentOptions.h" |
12 | #include "clang/Basic/Diagnostic.h" |
13 | #include "clang/Basic/DiagnosticOptions.h" |
14 | #include "clang/Basic/FileManager.h" |
15 | #include "clang/Basic/SourceManager.h" |
16 | #include "llvm/ADT/STLExtras.h" |
17 | #include "gtest/gtest.h" |
18 | #include <vector> |
19 | |
20 | using namespace llvm; |
21 | using namespace clang; |
22 | |
23 | namespace clang { |
24 | namespace comments { |
25 | |
26 | namespace { |
27 | class : public ::testing::Test { |
28 | protected: |
29 | () |
30 | : FileMgr(FileMgrOpts), |
31 | DiagID(new DiagnosticIDs()), |
32 | Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), |
33 | SourceMgr(Diags, FileMgr), |
34 | Traits(Allocator, CommentOptions()) { |
35 | } |
36 | |
37 | FileSystemOptions ; |
38 | FileManager ; |
39 | IntrusiveRefCntPtr<DiagnosticIDs> ; |
40 | DiagnosticsEngine ; |
41 | SourceManager ; |
42 | llvm::BumpPtrAllocator ; |
43 | CommandTraits ; |
44 | |
45 | void lexString(const char *Source, std::vector<Token> &Toks); |
46 | |
47 | StringRef getCommandName(const Token &Tok) { |
48 | return Traits.getCommandInfo(Tok.getCommandID())->Name; |
49 | } |
50 | |
51 | StringRef (const Token &Tok) { |
52 | return Traits.getCommandInfo(Tok.getVerbatimBlockID())->Name; |
53 | } |
54 | |
55 | StringRef (const Token &Tok) { |
56 | return Traits.getCommandInfo(Tok.getVerbatimLineID())->Name; |
57 | } |
58 | }; |
59 | |
60 | void CommentLexerTest::(const char *Source, |
61 | std::vector<Token> &Toks) { |
62 | std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(InputData: Source); |
63 | FileID File = SourceMgr.createFileID(Buffer: std::move(Buf)); |
64 | SourceLocation Begin = SourceMgr.getLocForStartOfFile(FID: File); |
65 | |
66 | Lexer L(Allocator, Diags, Traits, Begin, Source, Source + strlen(Source)); |
67 | |
68 | while (1) { |
69 | Token Tok; |
70 | L.lex(T&: Tok); |
71 | if (Tok.is(K: tok::eof)) |
72 | break; |
73 | Toks.push_back(x: Tok); |
74 | } |
75 | } |
76 | |
77 | } // unnamed namespace |
78 | |
79 | // Empty source range should be handled. |
80 | TEST_F(CommentLexerTest, Basic1) { |
81 | const char *Source = "" ; |
82 | std::vector<Token> Toks; |
83 | |
84 | lexString(Source, Toks); |
85 | |
86 | ASSERT_EQ(0U, Toks.size()); |
87 | } |
88 | |
89 | // Empty comments should be handled. |
90 | TEST_F(CommentLexerTest, Basic2) { |
91 | const char *Sources[] = { |
92 | "//" , "///" , "//!" , "///<" , "//!<" |
93 | }; |
94 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
95 | std::vector<Token> Toks; |
96 | |
97 | lexString(Sources[i], Toks); |
98 | |
99 | ASSERT_EQ(1U, Toks.size()); |
100 | |
101 | ASSERT_EQ(tok::newline, Toks[0].getKind()); |
102 | } |
103 | } |
104 | |
105 | // Empty comments should be handled. |
106 | TEST_F(CommentLexerTest, Basic3) { |
107 | const char *Sources[] = { |
108 | "/**/" , "/***/" , "/*!*/" , "/**<*/" , "/*!<*/" |
109 | }; |
110 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
111 | std::vector<Token> Toks; |
112 | |
113 | lexString(Sources[i], Toks); |
114 | |
115 | ASSERT_EQ(2U, Toks.size()); |
116 | |
117 | ASSERT_EQ(tok::newline, Toks[0].getKind()); |
118 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
119 | } |
120 | } |
121 | |
122 | // Single comment with plain text. |
123 | TEST_F(CommentLexerTest, Basic4) { |
124 | const char *Sources[] = { |
125 | "// Meow" , "/// Meow" , "//! Meow" , |
126 | "// Meow\n" , "// Meow\r\n" , "//! Meow\r" , |
127 | }; |
128 | |
129 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
130 | std::vector<Token> Toks; |
131 | |
132 | lexString(Sources[i], Toks); |
133 | |
134 | ASSERT_EQ(2U, Toks.size()); |
135 | |
136 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
137 | ASSERT_EQ(StringRef(" Meow" ), Toks[0].getText()); |
138 | |
139 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
140 | } |
141 | } |
142 | |
143 | // Single comment with plain text. |
144 | TEST_F(CommentLexerTest, Basic5) { |
145 | const char *Sources[] = { |
146 | "/* Meow*/" , "/** Meow*/" , "/*! Meow*/" |
147 | }; |
148 | |
149 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
150 | std::vector<Token> Toks; |
151 | |
152 | lexString(Sources[i], Toks); |
153 | |
154 | ASSERT_EQ(3U, Toks.size()); |
155 | |
156 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
157 | ASSERT_EQ(StringRef(" Meow" ), Toks[0].getText()); |
158 | |
159 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
160 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
161 | } |
162 | } |
163 | |
164 | // Test newline escaping. |
165 | TEST_F(CommentLexerTest, Basic6) { |
166 | const char *Sources[] = { |
167 | "// Aaa\\\n" " Bbb\\ \n" " Ccc?" "?/\n" , |
168 | "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n" , |
169 | "// Aaa\\\r" " Bbb\\ \r" " Ccc?" "?/\r" |
170 | }; |
171 | |
172 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
173 | std::vector<Token> Toks; |
174 | |
175 | lexString(Sources[i], Toks); |
176 | |
177 | ASSERT_EQ(10U, Toks.size()); |
178 | |
179 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
180 | ASSERT_EQ(StringRef(" Aaa" ), Toks[0].getText()); |
181 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
182 | ASSERT_EQ(StringRef("\\" ), Toks[1].getText()); |
183 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
184 | |
185 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
186 | ASSERT_EQ(StringRef(" Bbb" ), Toks[3].getText()); |
187 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
188 | ASSERT_EQ(StringRef("\\" ), Toks[4].getText()); |
189 | ASSERT_EQ(tok::text, Toks[5].getKind()); |
190 | ASSERT_EQ(StringRef(" " ), Toks[5].getText()); |
191 | ASSERT_EQ(tok::newline, Toks[6].getKind()); |
192 | |
193 | ASSERT_EQ(tok::text, Toks[7].getKind()); |
194 | ASSERT_EQ(StringRef(" Ccc?" "?/" ), Toks[7].getText()); |
195 | ASSERT_EQ(tok::newline, Toks[8].getKind()); |
196 | |
197 | ASSERT_EQ(tok::newline, Toks[9].getKind()); |
198 | } |
199 | } |
200 | |
201 | // Check that we skip C-style aligned stars correctly. |
202 | TEST_F(CommentLexerTest, Basic7) { |
203 | const char *Source = |
204 | "/* Aaa\n" |
205 | " * Bbb\r\n" |
206 | "\t* Ccc\n" |
207 | " ! Ddd\n" |
208 | " * Eee\n" |
209 | " ** Fff\n" |
210 | " */" ; |
211 | std::vector<Token> Toks; |
212 | |
213 | lexString(Source, Toks); |
214 | |
215 | ASSERT_EQ(15U, Toks.size()); |
216 | |
217 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
218 | ASSERT_EQ(StringRef(" Aaa" ), Toks[0].getText()); |
219 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
220 | |
221 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
222 | ASSERT_EQ(StringRef(" Bbb" ), Toks[2].getText()); |
223 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
224 | |
225 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
226 | ASSERT_EQ(StringRef(" Ccc" ), Toks[4].getText()); |
227 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
228 | |
229 | ASSERT_EQ(tok::text, Toks[6].getKind()); |
230 | ASSERT_EQ(StringRef(" ! Ddd" ), Toks[6].getText()); |
231 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
232 | |
233 | ASSERT_EQ(tok::text, Toks[8].getKind()); |
234 | ASSERT_EQ(StringRef(" Eee" ), Toks[8].getText()); |
235 | ASSERT_EQ(tok::newline, Toks[9].getKind()); |
236 | |
237 | ASSERT_EQ(tok::text, Toks[10].getKind()); |
238 | ASSERT_EQ(StringRef("* Fff" ), Toks[10].getText()); |
239 | ASSERT_EQ(tok::newline, Toks[11].getKind()); |
240 | |
241 | ASSERT_EQ(tok::text, Toks[12].getKind()); |
242 | ASSERT_EQ(StringRef(" " ), Toks[12].getText()); |
243 | |
244 | ASSERT_EQ(tok::newline, Toks[13].getKind()); |
245 | ASSERT_EQ(tok::newline, Toks[14].getKind()); |
246 | } |
247 | |
248 | // A command marker followed by comment end. |
249 | TEST_F(CommentLexerTest, DoxygenCommand1) { |
250 | const char *Sources[] = { "//@" , "///@" , "//!@" }; |
251 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
252 | std::vector<Token> Toks; |
253 | |
254 | lexString(Sources[i], Toks); |
255 | |
256 | ASSERT_EQ(2U, Toks.size()); |
257 | |
258 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
259 | ASSERT_EQ(StringRef("@" ), Toks[0].getText()); |
260 | |
261 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
262 | } |
263 | } |
264 | |
265 | // A command marker followed by comment end. |
266 | TEST_F(CommentLexerTest, DoxygenCommand2) { |
267 | const char *Sources[] = { "/*@*/" , "/**@*/" , "/*!@*/" }; |
268 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
269 | std::vector<Token> Toks; |
270 | |
271 | lexString(Sources[i], Toks); |
272 | |
273 | ASSERT_EQ(3U, Toks.size()); |
274 | |
275 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
276 | ASSERT_EQ(StringRef("@" ), Toks[0].getText()); |
277 | |
278 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
279 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
280 | } |
281 | } |
282 | |
283 | // A command marker followed by comment end. |
284 | TEST_F(CommentLexerTest, DoxygenCommand3) { |
285 | const char *Sources[] = { "/*\\*/" , "/**\\*/" }; |
286 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
287 | std::vector<Token> Toks; |
288 | |
289 | lexString(Sources[i], Toks); |
290 | |
291 | ASSERT_EQ(3U, Toks.size()); |
292 | |
293 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
294 | ASSERT_EQ(StringRef("\\" ), Toks[0].getText()); |
295 | |
296 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
297 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
298 | } |
299 | } |
300 | |
301 | // Doxygen escape sequences. |
302 | TEST_F(CommentLexerTest, DoxygenCommand4) { |
303 | const char *Sources[] = { |
304 | "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::" , |
305 | "/// @\\ @@ @& @$ @# @< @> @% @\" @. @::" |
306 | }; |
307 | const char *Text[] = { |
308 | " " , |
309 | "\\" , " " , "@" , " " , "&" , " " , "$" , " " , "#" , " " , |
310 | "<" , " " , ">" , " " , "%" , " " , "\"" , " " , "." , " " , |
311 | "::" , "" |
312 | }; |
313 | |
314 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
315 | std::vector<Token> Toks; |
316 | |
317 | lexString(Sources[i], Toks); |
318 | |
319 | ASSERT_EQ(std::size(Text), Toks.size()); |
320 | |
321 | for (size_t j = 0, e = Toks.size(); j != e; j++) { |
322 | if(Toks[j].is(K: tok::text)) { |
323 | ASSERT_EQ(StringRef(Text[j]), Toks[j].getText()) |
324 | << "index " << i; |
325 | } |
326 | } |
327 | } |
328 | } |
329 | |
330 | // A command marker followed by a non-letter that is not a part of an escape |
331 | // sequence. |
332 | TEST_F(CommentLexerTest, DoxygenCommand5) { |
333 | const char *Source = "/// \\^ \\0" ; |
334 | std::vector<Token> Toks; |
335 | |
336 | lexString(Source, Toks); |
337 | |
338 | ASSERT_EQ(6U, Toks.size()); |
339 | |
340 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
341 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
342 | |
343 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
344 | ASSERT_EQ(StringRef("\\" ), Toks[1].getText()); |
345 | |
346 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
347 | ASSERT_EQ(StringRef("^ " ), Toks[2].getText()); |
348 | |
349 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
350 | ASSERT_EQ(StringRef("\\" ), Toks[3].getText()); |
351 | |
352 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
353 | ASSERT_EQ(StringRef("0" ), Toks[4].getText()); |
354 | |
355 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
356 | } |
357 | |
358 | TEST_F(CommentLexerTest, DoxygenCommand6) { |
359 | const char *Source = "/// \\brief Aaa." ; |
360 | std::vector<Token> Toks; |
361 | |
362 | lexString(Source, Toks); |
363 | |
364 | ASSERT_EQ(4U, Toks.size()); |
365 | |
366 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
367 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
368 | |
369 | ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); |
370 | ASSERT_EQ(StringRef("brief" ), getCommandName(Toks[1])); |
371 | |
372 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
373 | ASSERT_EQ(StringRef(" Aaa." ), Toks[2].getText()); |
374 | |
375 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
376 | } |
377 | |
378 | TEST_F(CommentLexerTest, DoxygenCommand7) { |
379 | const char *Source = "/// \\em\\em \\em\t\\em\n" ; |
380 | std::vector<Token> Toks; |
381 | |
382 | lexString(Source, Toks); |
383 | |
384 | ASSERT_EQ(8U, Toks.size()); |
385 | |
386 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
387 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
388 | |
389 | ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); |
390 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[1])); |
391 | |
392 | ASSERT_EQ(tok::backslash_command, Toks[2].getKind()); |
393 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[2])); |
394 | |
395 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
396 | ASSERT_EQ(StringRef(" " ), Toks[3].getText()); |
397 | |
398 | ASSERT_EQ(tok::backslash_command, Toks[4].getKind()); |
399 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[4])); |
400 | |
401 | ASSERT_EQ(tok::text, Toks[5].getKind()); |
402 | ASSERT_EQ(StringRef("\t" ), Toks[5].getText()); |
403 | |
404 | ASSERT_EQ(tok::backslash_command, Toks[6].getKind()); |
405 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[6])); |
406 | |
407 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
408 | } |
409 | |
410 | TEST_F(CommentLexerTest, DoxygenCommand8) { |
411 | const char *Source = "/// @em@em @em\t@em\n" ; |
412 | std::vector<Token> Toks; |
413 | |
414 | lexString(Source, Toks); |
415 | |
416 | ASSERT_EQ(8U, Toks.size()); |
417 | |
418 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
419 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
420 | |
421 | ASSERT_EQ(tok::at_command, Toks[1].getKind()); |
422 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[1])); |
423 | |
424 | ASSERT_EQ(tok::at_command, Toks[2].getKind()); |
425 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[2])); |
426 | |
427 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
428 | ASSERT_EQ(StringRef(" " ), Toks[3].getText()); |
429 | |
430 | ASSERT_EQ(tok::at_command, Toks[4].getKind()); |
431 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[4])); |
432 | |
433 | ASSERT_EQ(tok::text, Toks[5].getKind()); |
434 | ASSERT_EQ(StringRef("\t" ), Toks[5].getText()); |
435 | |
436 | ASSERT_EQ(tok::at_command, Toks[6].getKind()); |
437 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[6])); |
438 | |
439 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
440 | } |
441 | |
442 | TEST_F(CommentLexerTest, DoxygenCommand9) { |
443 | const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n" ; |
444 | std::vector<Token> Toks; |
445 | |
446 | lexString(Source, Toks); |
447 | |
448 | ASSERT_EQ(8U, Toks.size()); |
449 | |
450 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
451 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
452 | |
453 | ASSERT_EQ(tok::unknown_command, Toks[1].getKind()); |
454 | ASSERT_EQ(StringRef("aaa" ), Toks[1].getUnknownCommandName()); |
455 | |
456 | ASSERT_EQ(tok::unknown_command, Toks[2].getKind()); |
457 | ASSERT_EQ(StringRef("bbb" ), Toks[2].getUnknownCommandName()); |
458 | |
459 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
460 | ASSERT_EQ(StringRef(" " ), Toks[3].getText()); |
461 | |
462 | ASSERT_EQ(tok::unknown_command, Toks[4].getKind()); |
463 | ASSERT_EQ(StringRef("ccc" ), Toks[4].getUnknownCommandName()); |
464 | |
465 | ASSERT_EQ(tok::text, Toks[5].getKind()); |
466 | ASSERT_EQ(StringRef("\t" ), Toks[5].getText()); |
467 | |
468 | ASSERT_EQ(tok::unknown_command, Toks[6].getKind()); |
469 | ASSERT_EQ(StringRef("ddd" ), Toks[6].getUnknownCommandName()); |
470 | |
471 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
472 | } |
473 | |
474 | TEST_F(CommentLexerTest, DoxygenCommand10) { |
475 | const char *Source = "// \\c\n" ; |
476 | std::vector<Token> Toks; |
477 | |
478 | lexString(Source, Toks); |
479 | |
480 | ASSERT_EQ(3U, Toks.size()); |
481 | |
482 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
483 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
484 | |
485 | ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); |
486 | ASSERT_EQ(StringRef("c" ), getCommandName(Toks[1])); |
487 | |
488 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
489 | } |
490 | |
491 | TEST_F(CommentLexerTest, RegisterCustomBlockCommand) { |
492 | const char *Source = |
493 | "/// \\NewBlockCommand Aaa.\n" |
494 | "/// @NewBlockCommand Aaa.\n" ; |
495 | |
496 | Traits.registerBlockCommand(StringRef("NewBlockCommand" )); |
497 | |
498 | std::vector<Token> Toks; |
499 | |
500 | lexString(Source, Toks); |
501 | |
502 | ASSERT_EQ(8U, Toks.size()); |
503 | |
504 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
505 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
506 | |
507 | ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); |
508 | ASSERT_EQ(StringRef("NewBlockCommand" ), getCommandName(Toks[1])); |
509 | |
510 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
511 | ASSERT_EQ(StringRef(" Aaa." ), Toks[2].getText()); |
512 | |
513 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
514 | |
515 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
516 | ASSERT_EQ(StringRef(" " ), Toks[4].getText()); |
517 | |
518 | ASSERT_EQ(tok::at_command, Toks[5].getKind()); |
519 | ASSERT_EQ(StringRef("NewBlockCommand" ), getCommandName(Toks[5])); |
520 | |
521 | ASSERT_EQ(tok::text, Toks[6].getKind()); |
522 | ASSERT_EQ(StringRef(" Aaa." ), Toks[6].getText()); |
523 | |
524 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
525 | } |
526 | |
527 | TEST_F(CommentLexerTest, RegisterMultipleBlockCommands) { |
528 | const char *Source = |
529 | "/// \\Foo\n" |
530 | "/// \\Bar Baz\n" |
531 | "/// \\Blech quux=corge\n" ; |
532 | |
533 | Traits.registerBlockCommand(StringRef("Foo" )); |
534 | Traits.registerBlockCommand(StringRef("Bar" )); |
535 | Traits.registerBlockCommand(StringRef("Blech" )); |
536 | |
537 | std::vector<Token> Toks; |
538 | |
539 | lexString(Source, Toks); |
540 | |
541 | ASSERT_EQ(11U, Toks.size()); |
542 | |
543 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
544 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
545 | |
546 | ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); |
547 | ASSERT_EQ(StringRef("Foo" ), getCommandName(Toks[1])); |
548 | |
549 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
550 | |
551 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
552 | ASSERT_EQ(StringRef(" " ), Toks[3].getText()); |
553 | |
554 | ASSERT_EQ(tok::backslash_command, Toks[4].getKind()); |
555 | ASSERT_EQ(StringRef("Bar" ), getCommandName(Toks[4])); |
556 | |
557 | ASSERT_EQ(tok::text, Toks[5].getKind()); |
558 | ASSERT_EQ(StringRef(" Baz" ), Toks[5].getText()); |
559 | |
560 | ASSERT_EQ(tok::newline, Toks[6].getKind()); |
561 | |
562 | ASSERT_EQ(tok::text, Toks[7].getKind()); |
563 | ASSERT_EQ(StringRef(" " ), Toks[7].getText()); |
564 | |
565 | ASSERT_EQ(tok::backslash_command, Toks[8].getKind()); |
566 | ASSERT_EQ(StringRef("Blech" ), getCommandName(Toks[8])); |
567 | |
568 | ASSERT_EQ(tok::text, Toks[9].getKind()); |
569 | ASSERT_EQ(StringRef(" quux=corge" ), Toks[9].getText()); |
570 | |
571 | ASSERT_EQ(tok::newline, Toks[10].getKind()); |
572 | } |
573 | |
574 | // Empty verbatim block. |
575 | TEST_F(CommentLexerTest, VerbatimBlock1) { |
576 | const char *Sources[] = { |
577 | "/// \\verbatim\\endverbatim\n//" , |
578 | "/** \\verbatim\\endverbatim*/" |
579 | }; |
580 | |
581 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
582 | std::vector<Token> Toks; |
583 | |
584 | lexString(Sources[i], Toks); |
585 | |
586 | ASSERT_EQ(5U, Toks.size()); |
587 | |
588 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
589 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
590 | |
591 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
592 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
593 | |
594 | ASSERT_EQ(tok::verbatim_block_end, Toks[2].getKind()); |
595 | ASSERT_EQ(StringRef("endverbatim" ), getVerbatimBlockName(Toks[2])); |
596 | |
597 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
598 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
599 | } |
600 | } |
601 | |
602 | // Empty verbatim block without an end command. |
603 | TEST_F(CommentLexerTest, VerbatimBlock2) { |
604 | const char *Source = "/// \\verbatim" ; |
605 | |
606 | std::vector<Token> Toks; |
607 | |
608 | lexString(Source, Toks); |
609 | |
610 | ASSERT_EQ(3U, Toks.size()); |
611 | |
612 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
613 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
614 | |
615 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
616 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
617 | |
618 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
619 | } |
620 | |
621 | // Empty verbatim block without an end command. |
622 | TEST_F(CommentLexerTest, VerbatimBlock3) { |
623 | const char *Source = "/** \\verbatim*/" ; |
624 | |
625 | std::vector<Token> Toks; |
626 | |
627 | lexString(Source, Toks); |
628 | |
629 | ASSERT_EQ(4U, Toks.size()); |
630 | |
631 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
632 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
633 | |
634 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
635 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
636 | |
637 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
638 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
639 | } |
640 | |
641 | // Single-line verbatim block. |
642 | TEST_F(CommentLexerTest, VerbatimBlock4) { |
643 | const char *Sources[] = { |
644 | "/// Meow \\verbatim aaa \\endverbatim\n//" , |
645 | "/** Meow \\verbatim aaa \\endverbatim*/" |
646 | }; |
647 | |
648 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
649 | std::vector<Token> Toks; |
650 | |
651 | lexString(Sources[i], Toks); |
652 | |
653 | ASSERT_EQ(6U, Toks.size()); |
654 | |
655 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
656 | ASSERT_EQ(StringRef(" Meow " ), Toks[0].getText()); |
657 | |
658 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
659 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
660 | |
661 | ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); |
662 | ASSERT_EQ(StringRef(" aaa " ), Toks[2].getVerbatimBlockText()); |
663 | |
664 | ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind()); |
665 | ASSERT_EQ(StringRef("endverbatim" ), getVerbatimBlockName(Toks[3])); |
666 | |
667 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
668 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
669 | } |
670 | } |
671 | |
672 | // Single-line verbatim block without an end command. |
673 | TEST_F(CommentLexerTest, VerbatimBlock5) { |
674 | const char *Sources[] = { |
675 | "/// Meow \\verbatim aaa \n//" , |
676 | "/** Meow \\verbatim aaa */" |
677 | }; |
678 | |
679 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
680 | std::vector<Token> Toks; |
681 | |
682 | lexString(Sources[i], Toks); |
683 | |
684 | ASSERT_EQ(5U, Toks.size()); |
685 | |
686 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
687 | ASSERT_EQ(StringRef(" Meow " ), Toks[0].getText()); |
688 | |
689 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
690 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
691 | |
692 | ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); |
693 | ASSERT_EQ(StringRef(" aaa " ), Toks[2].getVerbatimBlockText()); |
694 | |
695 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
696 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
697 | } |
698 | } |
699 | |
700 | TEST_F(CommentLexerTest, VerbatimBlock6) { |
701 | const char *Source = |
702 | "// \\verbatim\n" |
703 | "// Aaa\n" |
704 | "//\n" |
705 | "// Bbb\n" |
706 | "// \\endverbatim\n" ; |
707 | |
708 | std::vector<Token> Toks; |
709 | |
710 | lexString(Source, Toks); |
711 | |
712 | ASSERT_EQ(10U, Toks.size()); |
713 | |
714 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
715 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
716 | |
717 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
718 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
719 | |
720 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
721 | |
722 | ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); |
723 | ASSERT_EQ(StringRef(" Aaa" ), Toks[3].getVerbatimBlockText()); |
724 | |
725 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
726 | |
727 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
728 | |
729 | ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind()); |
730 | ASSERT_EQ(StringRef(" Bbb" ), Toks[6].getVerbatimBlockText()); |
731 | |
732 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
733 | |
734 | ASSERT_EQ(tok::verbatim_block_end, Toks[8].getKind()); |
735 | ASSERT_EQ(StringRef("endverbatim" ), getVerbatimBlockName(Toks[8])); |
736 | |
737 | ASSERT_EQ(tok::newline, Toks[9].getKind()); |
738 | } |
739 | |
740 | TEST_F(CommentLexerTest, VerbatimBlock7) { |
741 | const char *Source = |
742 | "/* \\verbatim\n" |
743 | " * Aaa\n" |
744 | " *\n" |
745 | " * Bbb\n" |
746 | " * \\endverbatim\n" |
747 | " */" ; |
748 | |
749 | std::vector<Token> Toks; |
750 | |
751 | lexString(Source, Toks); |
752 | |
753 | ASSERT_EQ(10U, Toks.size()); |
754 | |
755 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
756 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
757 | |
758 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
759 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
760 | |
761 | ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); |
762 | ASSERT_EQ(StringRef(" Aaa" ), Toks[2].getVerbatimBlockText()); |
763 | |
764 | ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); |
765 | ASSERT_EQ(StringRef("" ), Toks[3].getVerbatimBlockText()); |
766 | |
767 | ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind()); |
768 | ASSERT_EQ(StringRef(" Bbb" ), Toks[4].getVerbatimBlockText()); |
769 | |
770 | ASSERT_EQ(tok::verbatim_block_end, Toks[5].getKind()); |
771 | ASSERT_EQ(StringRef("endverbatim" ), getVerbatimBlockName(Toks[5])); |
772 | |
773 | ASSERT_EQ(tok::newline, Toks[6].getKind()); |
774 | |
775 | ASSERT_EQ(tok::text, Toks[7].getKind()); |
776 | ASSERT_EQ(StringRef(" " ), Toks[7].getText()); |
777 | |
778 | ASSERT_EQ(tok::newline, Toks[8].getKind()); |
779 | ASSERT_EQ(tok::newline, Toks[9].getKind()); |
780 | } |
781 | |
782 | // Complex test for verbatim blocks. |
783 | TEST_F(CommentLexerTest, VerbatimBlock8) { |
784 | const char *Source = |
785 | "/* Meow \\verbatim aaa\\$\\@\n" |
786 | "bbb \\endverbati\r" |
787 | "ccc\r\n" |
788 | "ddd \\endverbatim Blah \\verbatim eee\n" |
789 | "\\endverbatim BlahBlah*/" ; |
790 | std::vector<Token> Toks; |
791 | |
792 | lexString(Source, Toks); |
793 | |
794 | ASSERT_EQ(14U, Toks.size()); |
795 | |
796 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
797 | ASSERT_EQ(StringRef(" Meow " ), Toks[0].getText()); |
798 | |
799 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
800 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
801 | |
802 | ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); |
803 | ASSERT_EQ(StringRef(" aaa\\$\\@" ), Toks[2].getVerbatimBlockText()); |
804 | |
805 | ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); |
806 | ASSERT_EQ(StringRef("bbb \\endverbati" ), Toks[3].getVerbatimBlockText()); |
807 | |
808 | ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind()); |
809 | ASSERT_EQ(StringRef("ccc" ), Toks[4].getVerbatimBlockText()); |
810 | |
811 | ASSERT_EQ(tok::verbatim_block_line, Toks[5].getKind()); |
812 | ASSERT_EQ(StringRef("ddd " ), Toks[5].getVerbatimBlockText()); |
813 | |
814 | ASSERT_EQ(tok::verbatim_block_end, Toks[6].getKind()); |
815 | ASSERT_EQ(StringRef("endverbatim" ), getVerbatimBlockName(Toks[6])); |
816 | |
817 | ASSERT_EQ(tok::text, Toks[7].getKind()); |
818 | ASSERT_EQ(StringRef(" Blah " ), Toks[7].getText()); |
819 | |
820 | ASSERT_EQ(tok::verbatim_block_begin, Toks[8].getKind()); |
821 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[8])); |
822 | |
823 | ASSERT_EQ(tok::verbatim_block_line, Toks[9].getKind()); |
824 | ASSERT_EQ(StringRef(" eee" ), Toks[9].getVerbatimBlockText()); |
825 | |
826 | ASSERT_EQ(tok::verbatim_block_end, Toks[10].getKind()); |
827 | ASSERT_EQ(StringRef("endverbatim" ), getVerbatimBlockName(Toks[10])); |
828 | |
829 | ASSERT_EQ(tok::text, Toks[11].getKind()); |
830 | ASSERT_EQ(StringRef(" BlahBlah" ), Toks[11].getText()); |
831 | |
832 | ASSERT_EQ(tok::newline, Toks[12].getKind()); |
833 | ASSERT_EQ(tok::newline, Toks[13].getKind()); |
834 | } |
835 | |
836 | // LaTeX verbatim blocks. |
837 | TEST_F(CommentLexerTest, VerbatimBlock9) { |
838 | const char *Source = |
839 | "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f} \\f( Ddd \\f)" ; |
840 | std::vector<Token> Toks; |
841 | |
842 | lexString(Source, Toks); |
843 | |
844 | ASSERT_EQ(17U, Toks.size()); |
845 | |
846 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
847 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
848 | |
849 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
850 | ASSERT_EQ(StringRef("f$" ), getVerbatimBlockName(Toks[1])); |
851 | |
852 | ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); |
853 | ASSERT_EQ(StringRef(" Aaa " ), Toks[2].getVerbatimBlockText()); |
854 | |
855 | ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind()); |
856 | ASSERT_EQ(StringRef("f$" ), getVerbatimBlockName(Toks[3])); |
857 | |
858 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
859 | ASSERT_EQ(StringRef(" " ), Toks[4].getText()); |
860 | |
861 | ASSERT_EQ(tok::verbatim_block_begin, Toks[5].getKind()); |
862 | ASSERT_EQ(StringRef("f[" ), getVerbatimBlockName(Toks[5])); |
863 | |
864 | ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind()); |
865 | ASSERT_EQ(StringRef(" Bbb " ), Toks[6].getVerbatimBlockText()); |
866 | |
867 | ASSERT_EQ(tok::verbatim_block_end, Toks[7].getKind()); |
868 | ASSERT_EQ(StringRef("f]" ), getVerbatimBlockName(Toks[7])); |
869 | |
870 | ASSERT_EQ(tok::text, Toks[8].getKind()); |
871 | ASSERT_EQ(StringRef(" " ), Toks[8].getText()); |
872 | |
873 | ASSERT_EQ(tok::verbatim_block_begin, Toks[9].getKind()); |
874 | ASSERT_EQ(StringRef("f{" ), getVerbatimBlockName(Toks[9])); |
875 | |
876 | ASSERT_EQ(tok::verbatim_block_line, Toks[10].getKind()); |
877 | ASSERT_EQ(StringRef(" Ccc " ), Toks[10].getVerbatimBlockText()); |
878 | |
879 | ASSERT_EQ(tok::verbatim_block_end, Toks[11].getKind()); |
880 | ASSERT_EQ(StringRef("f}" ), getVerbatimBlockName(Toks[11])); |
881 | |
882 | ASSERT_EQ(tok::text, Toks[12].getKind()); |
883 | ASSERT_EQ(StringRef(" " ), Toks[12].getText()); |
884 | |
885 | ASSERT_EQ(tok::verbatim_block_begin, Toks[13].getKind()); |
886 | ASSERT_EQ(StringRef("f(" ), getVerbatimBlockName(Toks[13])); |
887 | |
888 | ASSERT_EQ(tok::verbatim_block_line, Toks[14].getKind()); |
889 | ASSERT_EQ(StringRef(" Ddd " ), Toks[14].getVerbatimBlockText()); |
890 | |
891 | ASSERT_EQ(tok::verbatim_block_end, Toks[15].getKind()); |
892 | ASSERT_EQ(StringRef("f)" ), getVerbatimBlockName(Toks[15])); |
893 | |
894 | ASSERT_EQ(tok::newline, Toks[16].getKind()); |
895 | } |
896 | |
897 | // Empty verbatim line. |
898 | TEST_F(CommentLexerTest, VerbatimLine1) { |
899 | const char *Sources[] = { |
900 | "/// \\fn\n//" , |
901 | "/** \\fn*/" |
902 | }; |
903 | |
904 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
905 | std::vector<Token> Toks; |
906 | |
907 | lexString(Sources[i], Toks); |
908 | |
909 | ASSERT_EQ(4U, Toks.size()); |
910 | |
911 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
912 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
913 | |
914 | ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind()); |
915 | ASSERT_EQ(StringRef("fn" ), getVerbatimLineName(Toks[1])); |
916 | |
917 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
918 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
919 | } |
920 | } |
921 | |
922 | // Verbatim line with Doxygen escape sequences, which should not be expanded. |
923 | TEST_F(CommentLexerTest, VerbatimLine2) { |
924 | const char *Sources[] = { |
925 | "/// \\fn void *foo(const char *zzz = \"\\$\");\n//" , |
926 | "/** \\fn void *foo(const char *zzz = \"\\$\");*/" |
927 | }; |
928 | |
929 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
930 | std::vector<Token> Toks; |
931 | |
932 | lexString(Sources[i], Toks); |
933 | |
934 | ASSERT_EQ(5U, Toks.size()); |
935 | |
936 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
937 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
938 | |
939 | ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind()); |
940 | ASSERT_EQ(StringRef("fn" ), getVerbatimLineName(Toks[1])); |
941 | |
942 | ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind()); |
943 | ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");" ), |
944 | Toks[2].getVerbatimLineText()); |
945 | |
946 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
947 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
948 | } |
949 | } |
950 | |
951 | // Verbatim line should not eat anything from next source line. |
952 | TEST_F(CommentLexerTest, VerbatimLine3) { |
953 | const char *Source = |
954 | "/** \\fn void *foo(const char *zzz = \"\\$\");\n" |
955 | " * Meow\n" |
956 | " */" ; |
957 | |
958 | std::vector<Token> Toks; |
959 | |
960 | lexString(Source, Toks); |
961 | |
962 | ASSERT_EQ(9U, Toks.size()); |
963 | |
964 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
965 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
966 | |
967 | ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind()); |
968 | ASSERT_EQ(StringRef("fn" ), getVerbatimLineName(Toks[1])); |
969 | |
970 | ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind()); |
971 | ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");" ), |
972 | Toks[2].getVerbatimLineText()); |
973 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
974 | |
975 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
976 | ASSERT_EQ(StringRef(" Meow" ), Toks[4].getText()); |
977 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
978 | |
979 | ASSERT_EQ(tok::text, Toks[6].getKind()); |
980 | ASSERT_EQ(StringRef(" " ), Toks[6].getText()); |
981 | |
982 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
983 | ASSERT_EQ(tok::newline, Toks[8].getKind()); |
984 | } |
985 | |
986 | TEST_F(CommentLexerTest, HTML1) { |
987 | const char *Source = |
988 | "// <" ; |
989 | |
990 | std::vector<Token> Toks; |
991 | |
992 | lexString(Source, Toks); |
993 | |
994 | ASSERT_EQ(3U, Toks.size()); |
995 | |
996 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
997 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
998 | |
999 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1000 | ASSERT_EQ(StringRef("<" ), Toks[1].getText()); |
1001 | |
1002 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1003 | } |
1004 | |
1005 | TEST_F(CommentLexerTest, HTML2) { |
1006 | const char *Source = |
1007 | "// a<2" ; |
1008 | |
1009 | std::vector<Token> Toks; |
1010 | |
1011 | lexString(Source, Toks); |
1012 | |
1013 | ASSERT_EQ(4U, Toks.size()); |
1014 | |
1015 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1016 | ASSERT_EQ(StringRef(" a" ), Toks[0].getText()); |
1017 | |
1018 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1019 | ASSERT_EQ(StringRef("<" ), Toks[1].getText()); |
1020 | |
1021 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1022 | ASSERT_EQ(StringRef("2" ), Toks[2].getText()); |
1023 | |
1024 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1025 | } |
1026 | |
1027 | TEST_F(CommentLexerTest, HTML3) { |
1028 | const char *Source = |
1029 | "// < img" ; |
1030 | |
1031 | std::vector<Token> Toks; |
1032 | |
1033 | lexString(Source, Toks); |
1034 | |
1035 | ASSERT_EQ(4U, Toks.size()); |
1036 | |
1037 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1038 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1039 | |
1040 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1041 | ASSERT_EQ(StringRef("<" ), Toks[1].getText()); |
1042 | |
1043 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1044 | ASSERT_EQ(StringRef(" img" ), Toks[2].getText()); |
1045 | |
1046 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1047 | } |
1048 | |
1049 | TEST_F(CommentLexerTest, HTML4) { |
1050 | const char *Sources[] = { |
1051 | "// <img" , |
1052 | "// <img " |
1053 | }; |
1054 | |
1055 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1056 | std::vector<Token> Toks; |
1057 | |
1058 | lexString(Sources[i], Toks); |
1059 | |
1060 | ASSERT_EQ(3U, Toks.size()); |
1061 | |
1062 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1063 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1064 | |
1065 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1066 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1067 | |
1068 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1069 | } |
1070 | } |
1071 | |
1072 | TEST_F(CommentLexerTest, HTML5) { |
1073 | const char *Source = |
1074 | "// <img 42" ; |
1075 | |
1076 | std::vector<Token> Toks; |
1077 | |
1078 | lexString(Source, Toks); |
1079 | |
1080 | ASSERT_EQ(4U, Toks.size()); |
1081 | |
1082 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1083 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1084 | |
1085 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1086 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1087 | |
1088 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1089 | ASSERT_EQ(StringRef("42" ), Toks[2].getText()); |
1090 | |
1091 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1092 | } |
1093 | |
1094 | TEST_F(CommentLexerTest, HTML6) { |
1095 | const char *Source = "// <img> Meow" ; |
1096 | |
1097 | std::vector<Token> Toks; |
1098 | |
1099 | lexString(Source, Toks); |
1100 | |
1101 | ASSERT_EQ(5U, Toks.size()); |
1102 | |
1103 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1104 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1105 | |
1106 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1107 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1108 | |
1109 | ASSERT_EQ(tok::html_greater, Toks[2].getKind()); |
1110 | |
1111 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
1112 | ASSERT_EQ(StringRef(" Meow" ), Toks[3].getText()); |
1113 | |
1114 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
1115 | } |
1116 | |
1117 | TEST_F(CommentLexerTest, HTML7) { |
1118 | const char *Source = "// <img=" ; |
1119 | |
1120 | std::vector<Token> Toks; |
1121 | |
1122 | lexString(Source, Toks); |
1123 | |
1124 | ASSERT_EQ(4U, Toks.size()); |
1125 | |
1126 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1127 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1128 | |
1129 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1130 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1131 | |
1132 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1133 | ASSERT_EQ(StringRef("=" ), Toks[2].getText()); |
1134 | |
1135 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1136 | } |
1137 | |
1138 | TEST_F(CommentLexerTest, HTML8) { |
1139 | const char *Source = "// <img src=> Meow" ; |
1140 | |
1141 | std::vector<Token> Toks; |
1142 | |
1143 | lexString(Source, Toks); |
1144 | |
1145 | ASSERT_EQ(7U, Toks.size()); |
1146 | |
1147 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1148 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1149 | |
1150 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1151 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1152 | |
1153 | ASSERT_EQ(tok::html_ident, Toks[2].getKind()); |
1154 | ASSERT_EQ(StringRef("src" ), Toks[2].getHTMLIdent()); |
1155 | |
1156 | ASSERT_EQ(tok::html_equals, Toks[3].getKind()); |
1157 | |
1158 | ASSERT_EQ(tok::html_greater, Toks[4].getKind()); |
1159 | |
1160 | ASSERT_EQ(tok::text, Toks[5].getKind()); |
1161 | ASSERT_EQ(StringRef(" Meow" ), Toks[5].getText()); |
1162 | |
1163 | ASSERT_EQ(tok::newline, Toks[6].getKind()); |
1164 | } |
1165 | |
1166 | TEST_F(CommentLexerTest, HTML9) { |
1167 | const char *Sources[] = { |
1168 | "// <img src" , |
1169 | "// <img src " |
1170 | }; |
1171 | |
1172 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1173 | std::vector<Token> Toks; |
1174 | |
1175 | lexString(Sources[i], Toks); |
1176 | |
1177 | ASSERT_EQ(4U, Toks.size()); |
1178 | |
1179 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1180 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1181 | |
1182 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1183 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1184 | |
1185 | ASSERT_EQ(tok::html_ident, Toks[2].getKind()); |
1186 | ASSERT_EQ(StringRef("src" ), Toks[2].getHTMLIdent()); |
1187 | |
1188 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1189 | } |
1190 | } |
1191 | |
1192 | TEST_F(CommentLexerTest, HTML10) { |
1193 | const char *Sources[] = { |
1194 | "// <img src=" , |
1195 | "// <img src =" |
1196 | }; |
1197 | |
1198 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1199 | std::vector<Token> Toks; |
1200 | |
1201 | lexString(Sources[i], Toks); |
1202 | |
1203 | ASSERT_EQ(5U, Toks.size()); |
1204 | |
1205 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1206 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1207 | |
1208 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1209 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1210 | |
1211 | ASSERT_EQ(tok::html_ident, Toks[2].getKind()); |
1212 | ASSERT_EQ(StringRef("src" ), Toks[2].getHTMLIdent()); |
1213 | |
1214 | ASSERT_EQ(tok::html_equals, Toks[3].getKind()); |
1215 | |
1216 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
1217 | } |
1218 | } |
1219 | |
1220 | TEST_F(CommentLexerTest, HTML11) { |
1221 | const char *Sources[] = { |
1222 | "// <img src=\"" , |
1223 | "// <img src = \"" , |
1224 | "// <img src=\'" , |
1225 | "// <img src = \'" |
1226 | }; |
1227 | |
1228 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1229 | std::vector<Token> Toks; |
1230 | |
1231 | lexString(Sources[i], Toks); |
1232 | |
1233 | ASSERT_EQ(6U, Toks.size()); |
1234 | |
1235 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1236 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1237 | |
1238 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1239 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1240 | |
1241 | ASSERT_EQ(tok::html_ident, Toks[2].getKind()); |
1242 | ASSERT_EQ(StringRef("src" ), Toks[2].getHTMLIdent()); |
1243 | |
1244 | ASSERT_EQ(tok::html_equals, Toks[3].getKind()); |
1245 | |
1246 | ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); |
1247 | ASSERT_EQ(StringRef("" ), Toks[4].getHTMLQuotedString()); |
1248 | |
1249 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
1250 | } |
1251 | } |
1252 | |
1253 | TEST_F(CommentLexerTest, HTML12) { |
1254 | const char *Source = "// <img src=@" ; |
1255 | |
1256 | std::vector<Token> Toks; |
1257 | |
1258 | lexString(Source, Toks); |
1259 | |
1260 | ASSERT_EQ(6U, Toks.size()); |
1261 | |
1262 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1263 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1264 | |
1265 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1266 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1267 | |
1268 | ASSERT_EQ(tok::html_ident, Toks[2].getKind()); |
1269 | ASSERT_EQ(StringRef("src" ), Toks[2].getHTMLIdent()); |
1270 | |
1271 | ASSERT_EQ(tok::html_equals, Toks[3].getKind()); |
1272 | |
1273 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
1274 | ASSERT_EQ(StringRef("@" ), Toks[4].getText()); |
1275 | |
1276 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
1277 | } |
1278 | |
1279 | TEST_F(CommentLexerTest, HTML13) { |
1280 | const char *Sources[] = { |
1281 | "// <img src=\"val\\\"\\'val" , |
1282 | "// <img src=\"val\\\"\\'val\"" , |
1283 | "// <img src=\'val\\\"\\'val" , |
1284 | "// <img src=\'val\\\"\\'val\'" |
1285 | }; |
1286 | |
1287 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1288 | std::vector<Token> Toks; |
1289 | |
1290 | lexString(Sources[i], Toks); |
1291 | |
1292 | ASSERT_EQ(6U, Toks.size()); |
1293 | |
1294 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1295 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1296 | |
1297 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1298 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1299 | |
1300 | ASSERT_EQ(tok::html_ident, Toks[2].getKind()); |
1301 | ASSERT_EQ(StringRef("src" ), Toks[2].getHTMLIdent()); |
1302 | |
1303 | ASSERT_EQ(tok::html_equals, Toks[3].getKind()); |
1304 | |
1305 | ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); |
1306 | ASSERT_EQ(StringRef("val\\\"\\'val" ), Toks[4].getHTMLQuotedString()); |
1307 | |
1308 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
1309 | } |
1310 | } |
1311 | |
1312 | TEST_F(CommentLexerTest, HTML14) { |
1313 | const char *Sources[] = { |
1314 | "// <img src=\"val\\\"\\'val\">" , |
1315 | "// <img src=\'val\\\"\\'val\'>" |
1316 | }; |
1317 | |
1318 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1319 | std::vector<Token> Toks; |
1320 | |
1321 | lexString(Sources[i], Toks); |
1322 | |
1323 | ASSERT_EQ(7U, Toks.size()); |
1324 | |
1325 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1326 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1327 | |
1328 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1329 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1330 | |
1331 | ASSERT_EQ(tok::html_ident, Toks[2].getKind()); |
1332 | ASSERT_EQ(StringRef("src" ), Toks[2].getHTMLIdent()); |
1333 | |
1334 | ASSERT_EQ(tok::html_equals, Toks[3].getKind()); |
1335 | |
1336 | ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); |
1337 | ASSERT_EQ(StringRef("val\\\"\\'val" ), Toks[4].getHTMLQuotedString()); |
1338 | |
1339 | ASSERT_EQ(tok::html_greater, Toks[5].getKind()); |
1340 | |
1341 | ASSERT_EQ(tok::newline, Toks[6].getKind()); |
1342 | } |
1343 | } |
1344 | |
1345 | TEST_F(CommentLexerTest, HTML15) { |
1346 | const char *Sources[] = { |
1347 | "// <img/>" , |
1348 | "// <img />" |
1349 | }; |
1350 | |
1351 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1352 | std::vector<Token> Toks; |
1353 | |
1354 | lexString(Sources[i], Toks); |
1355 | |
1356 | ASSERT_EQ(4U, Toks.size()); |
1357 | |
1358 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1359 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1360 | |
1361 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1362 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1363 | |
1364 | ASSERT_EQ(tok::html_slash_greater, Toks[2].getKind()); |
1365 | |
1366 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1367 | } |
1368 | } |
1369 | |
1370 | TEST_F(CommentLexerTest, HTML16) { |
1371 | const char *Sources[] = { |
1372 | "// <img/ Aaa" , |
1373 | "// <img / Aaa" |
1374 | }; |
1375 | |
1376 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1377 | std::vector<Token> Toks; |
1378 | |
1379 | lexString(Sources[i], Toks); |
1380 | |
1381 | ASSERT_EQ(5U, Toks.size()); |
1382 | |
1383 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1384 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1385 | |
1386 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1387 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1388 | |
1389 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1390 | ASSERT_EQ(StringRef("/" ), Toks[2].getText()); |
1391 | |
1392 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
1393 | ASSERT_EQ(StringRef(" Aaa" ), Toks[3].getText()); |
1394 | |
1395 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
1396 | } |
1397 | } |
1398 | |
1399 | TEST_F(CommentLexerTest, HTML17) { |
1400 | const char *Source = "// </" ; |
1401 | |
1402 | std::vector<Token> Toks; |
1403 | |
1404 | lexString(Source, Toks); |
1405 | |
1406 | ASSERT_EQ(3U, Toks.size()); |
1407 | |
1408 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1409 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1410 | |
1411 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1412 | ASSERT_EQ(StringRef("</" ), Toks[1].getText()); |
1413 | |
1414 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1415 | } |
1416 | |
1417 | TEST_F(CommentLexerTest, HTML18) { |
1418 | const char *Source = "// </@" ; |
1419 | |
1420 | std::vector<Token> Toks; |
1421 | |
1422 | lexString(Source, Toks); |
1423 | |
1424 | ASSERT_EQ(4U, Toks.size()); |
1425 | |
1426 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1427 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1428 | |
1429 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1430 | ASSERT_EQ(StringRef("</" ), Toks[1].getText()); |
1431 | |
1432 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1433 | ASSERT_EQ(StringRef("@" ), Toks[2].getText()); |
1434 | |
1435 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1436 | } |
1437 | |
1438 | TEST_F(CommentLexerTest, HTML19) { |
1439 | const char *Source = "// </img" ; |
1440 | |
1441 | std::vector<Token> Toks; |
1442 | |
1443 | lexString(Source, Toks); |
1444 | |
1445 | ASSERT_EQ(3U, Toks.size()); |
1446 | |
1447 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1448 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1449 | |
1450 | ASSERT_EQ(tok::html_end_tag, Toks[1].getKind()); |
1451 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagEndName()); |
1452 | |
1453 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1454 | } |
1455 | |
1456 | TEST_F(CommentLexerTest, NotAKnownHTMLTag1) { |
1457 | const char *Source = "// <tag>" ; |
1458 | |
1459 | std::vector<Token> Toks; |
1460 | |
1461 | lexString(Source, Toks); |
1462 | |
1463 | ASSERT_EQ(4U, Toks.size()); |
1464 | |
1465 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1466 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1467 | |
1468 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1469 | ASSERT_EQ(StringRef("<tag" ), Toks[1].getText()); |
1470 | |
1471 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1472 | ASSERT_EQ(StringRef(">" ), Toks[2].getText()); |
1473 | |
1474 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1475 | } |
1476 | |
1477 | TEST_F(CommentLexerTest, NotAKnownHTMLTag2) { |
1478 | const char *Source = "// </tag>" ; |
1479 | |
1480 | std::vector<Token> Toks; |
1481 | |
1482 | lexString(Source, Toks); |
1483 | |
1484 | ASSERT_EQ(4U, Toks.size()); |
1485 | |
1486 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1487 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1488 | |
1489 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1490 | ASSERT_EQ(StringRef("</tag" ), Toks[1].getText()); |
1491 | |
1492 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1493 | ASSERT_EQ(StringRef(">" ), Toks[2].getText()); |
1494 | |
1495 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1496 | } |
1497 | |
1498 | TEST_F(CommentLexerTest, HTMLCharacterReferences1) { |
1499 | const char *Source = "// &" ; |
1500 | |
1501 | std::vector<Token> Toks; |
1502 | |
1503 | lexString(Source, Toks); |
1504 | |
1505 | ASSERT_EQ(3U, Toks.size()); |
1506 | |
1507 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1508 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1509 | |
1510 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1511 | ASSERT_EQ(StringRef("&" ), Toks[1].getText()); |
1512 | |
1513 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1514 | } |
1515 | |
1516 | TEST_F(CommentLexerTest, HTMLCharacterReferences2) { |
1517 | const char *Source = "// &!" ; |
1518 | |
1519 | std::vector<Token> Toks; |
1520 | |
1521 | lexString(Source, Toks); |
1522 | |
1523 | ASSERT_EQ(4U, Toks.size()); |
1524 | |
1525 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1526 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1527 | |
1528 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1529 | ASSERT_EQ(StringRef("&" ), Toks[1].getText()); |
1530 | |
1531 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1532 | ASSERT_EQ(StringRef("!" ), Toks[2].getText()); |
1533 | |
1534 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1535 | } |
1536 | |
1537 | TEST_F(CommentLexerTest, HTMLCharacterReferences3) { |
1538 | const char *Source = "// &" ; |
1539 | |
1540 | std::vector<Token> Toks; |
1541 | |
1542 | lexString(Source, Toks); |
1543 | |
1544 | ASSERT_EQ(3U, Toks.size()); |
1545 | |
1546 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1547 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1548 | |
1549 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1550 | ASSERT_EQ(StringRef("&" ), Toks[1].getText()); |
1551 | |
1552 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1553 | } |
1554 | |
1555 | TEST_F(CommentLexerTest, HTMLCharacterReferences4) { |
1556 | const char *Source = "// &!" ; |
1557 | |
1558 | std::vector<Token> Toks; |
1559 | |
1560 | lexString(Source, Toks); |
1561 | |
1562 | ASSERT_EQ(4U, Toks.size()); |
1563 | |
1564 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1565 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1566 | |
1567 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1568 | ASSERT_EQ(StringRef("&" ), Toks[1].getText()); |
1569 | |
1570 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1571 | ASSERT_EQ(StringRef("!" ), Toks[2].getText()); |
1572 | |
1573 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1574 | } |
1575 | |
1576 | TEST_F(CommentLexerTest, HTMLCharacterReferences5) { |
1577 | const char *Source = "// &#" ; |
1578 | |
1579 | std::vector<Token> Toks; |
1580 | |
1581 | lexString(Source, Toks); |
1582 | |
1583 | ASSERT_EQ(3U, Toks.size()); |
1584 | |
1585 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1586 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1587 | |
1588 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1589 | ASSERT_EQ(StringRef("&#" ), Toks[1].getText()); |
1590 | |
1591 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1592 | } |
1593 | |
1594 | TEST_F(CommentLexerTest, HTMLCharacterReferences6) { |
1595 | const char *Source = "// &#a" ; |
1596 | |
1597 | std::vector<Token> Toks; |
1598 | |
1599 | lexString(Source, Toks); |
1600 | |
1601 | ASSERT_EQ(4U, Toks.size()); |
1602 | |
1603 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1604 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1605 | |
1606 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1607 | ASSERT_EQ(StringRef("&#" ), Toks[1].getText()); |
1608 | |
1609 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1610 | ASSERT_EQ(StringRef("a" ), Toks[2].getText()); |
1611 | |
1612 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1613 | } |
1614 | |
1615 | TEST_F(CommentLexerTest, HTMLCharacterReferences7) { |
1616 | const char *Source = "// *" ; |
1617 | |
1618 | std::vector<Token> Toks; |
1619 | |
1620 | lexString(Source, Toks); |
1621 | |
1622 | ASSERT_EQ(3U, Toks.size()); |
1623 | |
1624 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1625 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1626 | |
1627 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1628 | ASSERT_EQ(StringRef("*" ), Toks[1].getText()); |
1629 | |
1630 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1631 | } |
1632 | |
1633 | TEST_F(CommentLexerTest, HTMLCharacterReferences8) { |
1634 | const char *Source = "// *a" ; |
1635 | |
1636 | std::vector<Token> Toks; |
1637 | |
1638 | lexString(Source, Toks); |
1639 | |
1640 | ASSERT_EQ(4U, Toks.size()); |
1641 | |
1642 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1643 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1644 | |
1645 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1646 | ASSERT_EQ(StringRef("*" ), Toks[1].getText()); |
1647 | |
1648 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1649 | ASSERT_EQ(StringRef("a" ), Toks[2].getText()); |
1650 | |
1651 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1652 | } |
1653 | |
1654 | TEST_F(CommentLexerTest, HTMLCharacterReferences9) { |
1655 | const char *Source = "// &#x" ; |
1656 | |
1657 | std::vector<Token> Toks; |
1658 | |
1659 | lexString(Source, Toks); |
1660 | |
1661 | ASSERT_EQ(3U, Toks.size()); |
1662 | |
1663 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1664 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1665 | |
1666 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1667 | ASSERT_EQ(StringRef("&#x" ), Toks[1].getText()); |
1668 | |
1669 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1670 | } |
1671 | |
1672 | TEST_F(CommentLexerTest, HTMLCharacterReferences10) { |
1673 | const char *Source = "// &#xz" ; |
1674 | |
1675 | std::vector<Token> Toks; |
1676 | |
1677 | lexString(Source, Toks); |
1678 | |
1679 | ASSERT_EQ(4U, Toks.size()); |
1680 | |
1681 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1682 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1683 | |
1684 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1685 | ASSERT_EQ(StringRef("&#x" ), Toks[1].getText()); |
1686 | |
1687 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1688 | ASSERT_EQ(StringRef("z" ), Toks[2].getText()); |
1689 | |
1690 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1691 | } |
1692 | |
1693 | TEST_F(CommentLexerTest, HTMLCharacterReferences11) { |
1694 | const char *Source = "// «" ; |
1695 | |
1696 | std::vector<Token> Toks; |
1697 | |
1698 | lexString(Source, Toks); |
1699 | |
1700 | ASSERT_EQ(3U, Toks.size()); |
1701 | |
1702 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1703 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1704 | |
1705 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1706 | ASSERT_EQ(StringRef("«" ), Toks[1].getText()); |
1707 | |
1708 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1709 | } |
1710 | |
1711 | TEST_F(CommentLexerTest, HTMLCharacterReferences12) { |
1712 | const char *Source = "// «z" ; |
1713 | |
1714 | std::vector<Token> Toks; |
1715 | |
1716 | lexString(Source, Toks); |
1717 | |
1718 | ASSERT_EQ(4U, Toks.size()); |
1719 | |
1720 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1721 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1722 | |
1723 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1724 | ASSERT_EQ(StringRef("«" ), Toks[1].getText()); |
1725 | |
1726 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1727 | ASSERT_EQ(StringRef("z" ), Toks[2].getText()); |
1728 | |
1729 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1730 | } |
1731 | |
1732 | TEST_F(CommentLexerTest, HTMLCharacterReferences13) { |
1733 | const char *Source = "// &" ; |
1734 | |
1735 | std::vector<Token> Toks; |
1736 | |
1737 | lexString(Source, Toks); |
1738 | |
1739 | ASSERT_EQ(3U, Toks.size()); |
1740 | |
1741 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1742 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1743 | |
1744 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1745 | ASSERT_EQ(StringRef("&" ), Toks[1].getText()); |
1746 | |
1747 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1748 | } |
1749 | |
1750 | TEST_F(CommentLexerTest, HTMLCharacterReferences14) { |
1751 | const char *Source = "// &<" ; |
1752 | |
1753 | std::vector<Token> Toks; |
1754 | |
1755 | lexString(Source, Toks); |
1756 | |
1757 | ASSERT_EQ(4U, Toks.size()); |
1758 | |
1759 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1760 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1761 | |
1762 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1763 | ASSERT_EQ(StringRef("&" ), Toks[1].getText()); |
1764 | |
1765 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1766 | ASSERT_EQ(StringRef("<" ), Toks[2].getText()); |
1767 | |
1768 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1769 | } |
1770 | |
1771 | TEST_F(CommentLexerTest, HTMLCharacterReferences15) { |
1772 | const char *Source = "// & meow" ; |
1773 | |
1774 | std::vector<Token> Toks; |
1775 | |
1776 | lexString(Source, Toks); |
1777 | |
1778 | ASSERT_EQ(4U, Toks.size()); |
1779 | |
1780 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1781 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1782 | |
1783 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1784 | ASSERT_EQ(StringRef("&" ), Toks[1].getText()); |
1785 | |
1786 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1787 | ASSERT_EQ(StringRef(" meow" ), Toks[2].getText()); |
1788 | |
1789 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1790 | } |
1791 | |
1792 | TEST_F(CommentLexerTest, HTMLCharacterReferences16) { |
1793 | const char *Sources[] = { |
1794 | "// =" , |
1795 | "// =" , |
1796 | "// =" , |
1797 | "// =" |
1798 | }; |
1799 | |
1800 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1801 | std::vector<Token> Toks; |
1802 | |
1803 | lexString(Sources[i], Toks); |
1804 | |
1805 | ASSERT_EQ(3U, Toks.size()); |
1806 | |
1807 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1808 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1809 | |
1810 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1811 | ASSERT_EQ(StringRef("=" ), Toks[1].getText()); |
1812 | |
1813 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1814 | } |
1815 | } |
1816 | |
1817 | TEST_F(CommentLexerTest, MultipleComments) { |
1818 | const char *Source = |
1819 | "// Aaa\n" |
1820 | "/// Bbb\n" |
1821 | "/* Ccc\n" |
1822 | " * Ddd*/\n" |
1823 | "/** Eee*/" ; |
1824 | |
1825 | std::vector<Token> Toks; |
1826 | |
1827 | lexString(Source, Toks); |
1828 | |
1829 | ASSERT_EQ(12U, Toks.size()); |
1830 | |
1831 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1832 | ASSERT_EQ(StringRef(" Aaa" ), Toks[0].getText()); |
1833 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
1834 | |
1835 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1836 | ASSERT_EQ(StringRef(" Bbb" ), Toks[2].getText()); |
1837 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1838 | |
1839 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
1840 | ASSERT_EQ(StringRef(" Ccc" ), Toks[4].getText()); |
1841 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
1842 | |
1843 | ASSERT_EQ(tok::text, Toks[6].getKind()); |
1844 | ASSERT_EQ(StringRef(" Ddd" ), Toks[6].getText()); |
1845 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
1846 | ASSERT_EQ(tok::newline, Toks[8].getKind()); |
1847 | |
1848 | ASSERT_EQ(tok::text, Toks[9].getKind()); |
1849 | ASSERT_EQ(StringRef(" Eee" ), Toks[9].getText()); |
1850 | |
1851 | ASSERT_EQ(tok::newline, Toks[10].getKind()); |
1852 | ASSERT_EQ(tok::newline, Toks[11].getKind()); |
1853 | } |
1854 | |
1855 | } // end namespace comments |
1856 | } // end namespace clang |
1857 | |
1858 | |