1 | //===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang/AST/CommentLexer.h" |
10 | #include "clang/AST/CommentCommandTraits.h" |
11 | #include "clang/Basic/CommentOptions.h" |
12 | #include "clang/Basic/Diagnostic.h" |
13 | #include "clang/Basic/DiagnosticOptions.h" |
14 | #include "clang/Basic/FileManager.h" |
15 | #include "clang/Basic/SourceManager.h" |
16 | #include "llvm/ADT/STLExtras.h" |
17 | #include "gtest/gtest.h" |
18 | #include <vector> |
19 | |
20 | using namespace llvm; |
21 | using namespace clang; |
22 | |
23 | namespace clang { |
24 | namespace comments { |
25 | |
26 | namespace { |
27 | class : public ::testing::Test { |
28 | protected: |
29 | () |
30 | : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()), |
31 | Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()), |
32 | SourceMgr(Diags, FileMgr), Traits(Allocator, CommentOptions()) {} |
33 | |
34 | FileSystemOptions ; |
35 | FileManager ; |
36 | IntrusiveRefCntPtr<DiagnosticIDs> ; |
37 | DiagnosticOptions ; |
38 | DiagnosticsEngine ; |
39 | SourceManager ; |
40 | llvm::BumpPtrAllocator ; |
41 | CommandTraits ; |
42 | |
43 | void lexString(const char *Source, std::vector<Token> &Toks); |
44 | |
45 | StringRef getCommandName(const Token &Tok) { |
46 | return Traits.getCommandInfo(Tok.getCommandID())->Name; |
47 | } |
48 | |
49 | StringRef (const Token &Tok) { |
50 | return Traits.getCommandInfo(Tok.getVerbatimBlockID())->Name; |
51 | } |
52 | |
53 | StringRef (const Token &Tok) { |
54 | return Traits.getCommandInfo(Tok.getVerbatimLineID())->Name; |
55 | } |
56 | }; |
57 | |
58 | void CommentLexerTest::(const char *Source, |
59 | std::vector<Token> &Toks) { |
60 | std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(InputData: Source); |
61 | FileID File = SourceMgr.createFileID(Buffer: std::move(Buf)); |
62 | SourceLocation Begin = SourceMgr.getLocForStartOfFile(FID: File); |
63 | |
64 | Lexer L(Allocator, Diags, Traits, Begin, Source, Source + strlen(Source)); |
65 | |
66 | while (1) { |
67 | Token Tok; |
68 | L.lex(T&: Tok); |
69 | if (Tok.is(K: tok::eof)) |
70 | break; |
71 | Toks.push_back(x: Tok); |
72 | } |
73 | } |
74 | |
75 | } // unnamed namespace |
76 | |
77 | // Empty source range should be handled. |
78 | TEST_F(CommentLexerTest, Basic1) { |
79 | const char *Source = "" ; |
80 | std::vector<Token> Toks; |
81 | |
82 | lexString(Source, Toks); |
83 | |
84 | ASSERT_EQ(0U, Toks.size()); |
85 | } |
86 | |
87 | // Empty comments should be handled. |
88 | TEST_F(CommentLexerTest, Basic2) { |
89 | const char *Sources[] = { |
90 | "//" , "///" , "//!" , "///<" , "//!<" |
91 | }; |
92 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
93 | std::vector<Token> Toks; |
94 | |
95 | lexString(Sources[i], Toks); |
96 | |
97 | ASSERT_EQ(1U, Toks.size()); |
98 | |
99 | ASSERT_EQ(tok::newline, Toks[0].getKind()); |
100 | } |
101 | } |
102 | |
103 | // Empty comments should be handled. |
104 | TEST_F(CommentLexerTest, Basic3) { |
105 | const char *Sources[] = { |
106 | "/**/" , "/***/" , "/*!*/" , "/**<*/" , "/*!<*/" |
107 | }; |
108 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
109 | std::vector<Token> Toks; |
110 | |
111 | lexString(Sources[i], Toks); |
112 | |
113 | ASSERT_EQ(2U, Toks.size()); |
114 | |
115 | ASSERT_EQ(tok::newline, Toks[0].getKind()); |
116 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
117 | } |
118 | } |
119 | |
120 | // Single comment with plain text. |
121 | TEST_F(CommentLexerTest, Basic4) { |
122 | const char *Sources[] = { |
123 | "// Meow" , "/// Meow" , "//! Meow" , |
124 | "// Meow\n" , "// Meow\r\n" , "//! Meow\r" , |
125 | }; |
126 | |
127 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
128 | std::vector<Token> Toks; |
129 | |
130 | lexString(Sources[i], Toks); |
131 | |
132 | ASSERT_EQ(2U, Toks.size()); |
133 | |
134 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
135 | ASSERT_EQ(StringRef(" Meow" ), Toks[0].getText()); |
136 | |
137 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
138 | } |
139 | } |
140 | |
141 | // Single comment with plain text. |
142 | TEST_F(CommentLexerTest, Basic5) { |
143 | const char *Sources[] = { |
144 | "/* Meow*/" , "/** Meow*/" , "/*! Meow*/" |
145 | }; |
146 | |
147 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
148 | std::vector<Token> Toks; |
149 | |
150 | lexString(Sources[i], Toks); |
151 | |
152 | ASSERT_EQ(3U, Toks.size()); |
153 | |
154 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
155 | ASSERT_EQ(StringRef(" Meow" ), Toks[0].getText()); |
156 | |
157 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
158 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
159 | } |
160 | } |
161 | |
162 | // Test newline escaping. |
163 | TEST_F(CommentLexerTest, Basic6) { |
164 | const char *Sources[] = { |
165 | "// Aaa\\\n" " Bbb\\ \n" " Ccc?" "?/\n" , |
166 | "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n" , |
167 | "// Aaa\\\r" " Bbb\\ \r" " Ccc?" "?/\r" |
168 | }; |
169 | |
170 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
171 | std::vector<Token> Toks; |
172 | |
173 | lexString(Sources[i], Toks); |
174 | |
175 | ASSERT_EQ(10U, Toks.size()); |
176 | |
177 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
178 | ASSERT_EQ(StringRef(" Aaa" ), Toks[0].getText()); |
179 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
180 | ASSERT_EQ(StringRef("\\" ), Toks[1].getText()); |
181 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
182 | |
183 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
184 | ASSERT_EQ(StringRef(" Bbb" ), Toks[3].getText()); |
185 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
186 | ASSERT_EQ(StringRef("\\" ), Toks[4].getText()); |
187 | ASSERT_EQ(tok::text, Toks[5].getKind()); |
188 | ASSERT_EQ(StringRef(" " ), Toks[5].getText()); |
189 | ASSERT_EQ(tok::newline, Toks[6].getKind()); |
190 | |
191 | ASSERT_EQ(tok::text, Toks[7].getKind()); |
192 | ASSERT_EQ(StringRef(" Ccc?" "?/" ), Toks[7].getText()); |
193 | ASSERT_EQ(tok::newline, Toks[8].getKind()); |
194 | |
195 | ASSERT_EQ(tok::newline, Toks[9].getKind()); |
196 | } |
197 | } |
198 | |
199 | // Check that we skip C-style aligned stars correctly. |
200 | TEST_F(CommentLexerTest, Basic7) { |
201 | const char *Source = |
202 | "/* Aaa\n" |
203 | " * Bbb\r\n" |
204 | "\t* Ccc\n" |
205 | " ! Ddd\n" |
206 | " * Eee\n" |
207 | " ** Fff\n" |
208 | " */" ; |
209 | std::vector<Token> Toks; |
210 | |
211 | lexString(Source, Toks); |
212 | |
213 | ASSERT_EQ(15U, Toks.size()); |
214 | |
215 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
216 | ASSERT_EQ(StringRef(" Aaa" ), Toks[0].getText()); |
217 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
218 | |
219 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
220 | ASSERT_EQ(StringRef(" Bbb" ), Toks[2].getText()); |
221 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
222 | |
223 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
224 | ASSERT_EQ(StringRef(" Ccc" ), Toks[4].getText()); |
225 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
226 | |
227 | ASSERT_EQ(tok::text, Toks[6].getKind()); |
228 | ASSERT_EQ(StringRef(" ! Ddd" ), Toks[6].getText()); |
229 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
230 | |
231 | ASSERT_EQ(tok::text, Toks[8].getKind()); |
232 | ASSERT_EQ(StringRef(" Eee" ), Toks[8].getText()); |
233 | ASSERT_EQ(tok::newline, Toks[9].getKind()); |
234 | |
235 | ASSERT_EQ(tok::text, Toks[10].getKind()); |
236 | ASSERT_EQ(StringRef("* Fff" ), Toks[10].getText()); |
237 | ASSERT_EQ(tok::newline, Toks[11].getKind()); |
238 | |
239 | ASSERT_EQ(tok::text, Toks[12].getKind()); |
240 | ASSERT_EQ(StringRef(" " ), Toks[12].getText()); |
241 | |
242 | ASSERT_EQ(tok::newline, Toks[13].getKind()); |
243 | ASSERT_EQ(tok::newline, Toks[14].getKind()); |
244 | } |
245 | |
246 | // A command marker followed by comment end. |
247 | TEST_F(CommentLexerTest, DoxygenCommand1) { |
248 | const char *Sources[] = { "//@" , "///@" , "//!@" }; |
249 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
250 | std::vector<Token> Toks; |
251 | |
252 | lexString(Sources[i], Toks); |
253 | |
254 | ASSERT_EQ(2U, Toks.size()); |
255 | |
256 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
257 | ASSERT_EQ(StringRef("@" ), Toks[0].getText()); |
258 | |
259 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
260 | } |
261 | } |
262 | |
263 | // A command marker followed by comment end. |
264 | TEST_F(CommentLexerTest, DoxygenCommand2) { |
265 | const char *Sources[] = { "/*@*/" , "/**@*/" , "/*!@*/" }; |
266 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
267 | std::vector<Token> Toks; |
268 | |
269 | lexString(Sources[i], Toks); |
270 | |
271 | ASSERT_EQ(3U, Toks.size()); |
272 | |
273 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
274 | ASSERT_EQ(StringRef("@" ), Toks[0].getText()); |
275 | |
276 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
277 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
278 | } |
279 | } |
280 | |
281 | // A command marker followed by comment end. |
282 | TEST_F(CommentLexerTest, DoxygenCommand3) { |
283 | const char *Sources[] = { "/*\\*/" , "/**\\*/" }; |
284 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
285 | std::vector<Token> Toks; |
286 | |
287 | lexString(Sources[i], Toks); |
288 | |
289 | ASSERT_EQ(3U, Toks.size()); |
290 | |
291 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
292 | ASSERT_EQ(StringRef("\\" ), Toks[0].getText()); |
293 | |
294 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
295 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
296 | } |
297 | } |
298 | |
299 | // Doxygen escape sequences. |
300 | TEST_F(CommentLexerTest, DoxygenCommand4) { |
301 | const char *Sources[] = { |
302 | "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::" , |
303 | "/// @\\ @@ @& @$ @# @< @> @% @\" @. @::" |
304 | }; |
305 | const char *Text[] = { |
306 | " " , |
307 | "\\" , " " , "@" , " " , "&" , " " , "$" , " " , "#" , " " , |
308 | "<" , " " , ">" , " " , "%" , " " , "\"" , " " , "." , " " , |
309 | "::" , "" |
310 | }; |
311 | |
312 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
313 | std::vector<Token> Toks; |
314 | |
315 | lexString(Sources[i], Toks); |
316 | |
317 | ASSERT_EQ(std::size(Text), Toks.size()); |
318 | |
319 | for (size_t j = 0, e = Toks.size(); j != e; j++) { |
320 | if(Toks[j].is(K: tok::text)) { |
321 | ASSERT_EQ(StringRef(Text[j]), Toks[j].getText()) |
322 | << "index " << i; |
323 | } |
324 | } |
325 | } |
326 | } |
327 | |
328 | // A command marker followed by a non-letter that is not a part of an escape |
329 | // sequence. |
330 | TEST_F(CommentLexerTest, DoxygenCommand5) { |
331 | const char *Source = "/// \\^ \\0" ; |
332 | std::vector<Token> Toks; |
333 | |
334 | lexString(Source, Toks); |
335 | |
336 | ASSERT_EQ(6U, Toks.size()); |
337 | |
338 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
339 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
340 | |
341 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
342 | ASSERT_EQ(StringRef("\\" ), Toks[1].getText()); |
343 | |
344 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
345 | ASSERT_EQ(StringRef("^ " ), Toks[2].getText()); |
346 | |
347 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
348 | ASSERT_EQ(StringRef("\\" ), Toks[3].getText()); |
349 | |
350 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
351 | ASSERT_EQ(StringRef("0" ), Toks[4].getText()); |
352 | |
353 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
354 | } |
355 | |
356 | TEST_F(CommentLexerTest, DoxygenCommand6) { |
357 | const char *Source = "/// \\brief Aaa." ; |
358 | std::vector<Token> Toks; |
359 | |
360 | lexString(Source, Toks); |
361 | |
362 | ASSERT_EQ(4U, Toks.size()); |
363 | |
364 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
365 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
366 | |
367 | ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); |
368 | ASSERT_EQ(StringRef("brief" ), getCommandName(Toks[1])); |
369 | |
370 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
371 | ASSERT_EQ(StringRef(" Aaa." ), Toks[2].getText()); |
372 | |
373 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
374 | } |
375 | |
376 | TEST_F(CommentLexerTest, DoxygenCommand7) { |
377 | const char *Source = "/// \\em\\em \\em\t\\em\n" ; |
378 | std::vector<Token> Toks; |
379 | |
380 | lexString(Source, Toks); |
381 | |
382 | ASSERT_EQ(8U, Toks.size()); |
383 | |
384 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
385 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
386 | |
387 | ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); |
388 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[1])); |
389 | |
390 | ASSERT_EQ(tok::backslash_command, Toks[2].getKind()); |
391 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[2])); |
392 | |
393 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
394 | ASSERT_EQ(StringRef(" " ), Toks[3].getText()); |
395 | |
396 | ASSERT_EQ(tok::backslash_command, Toks[4].getKind()); |
397 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[4])); |
398 | |
399 | ASSERT_EQ(tok::text, Toks[5].getKind()); |
400 | ASSERT_EQ(StringRef("\t" ), Toks[5].getText()); |
401 | |
402 | ASSERT_EQ(tok::backslash_command, Toks[6].getKind()); |
403 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[6])); |
404 | |
405 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
406 | } |
407 | |
408 | TEST_F(CommentLexerTest, DoxygenCommand8) { |
409 | const char *Source = "/// @em@em @em\t@em\n" ; |
410 | std::vector<Token> Toks; |
411 | |
412 | lexString(Source, Toks); |
413 | |
414 | ASSERT_EQ(8U, Toks.size()); |
415 | |
416 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
417 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
418 | |
419 | ASSERT_EQ(tok::at_command, Toks[1].getKind()); |
420 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[1])); |
421 | |
422 | ASSERT_EQ(tok::at_command, Toks[2].getKind()); |
423 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[2])); |
424 | |
425 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
426 | ASSERT_EQ(StringRef(" " ), Toks[3].getText()); |
427 | |
428 | ASSERT_EQ(tok::at_command, Toks[4].getKind()); |
429 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[4])); |
430 | |
431 | ASSERT_EQ(tok::text, Toks[5].getKind()); |
432 | ASSERT_EQ(StringRef("\t" ), Toks[5].getText()); |
433 | |
434 | ASSERT_EQ(tok::at_command, Toks[6].getKind()); |
435 | ASSERT_EQ(StringRef("em" ), getCommandName(Toks[6])); |
436 | |
437 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
438 | } |
439 | |
440 | TEST_F(CommentLexerTest, DoxygenCommand9) { |
441 | const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n" ; |
442 | std::vector<Token> Toks; |
443 | |
444 | lexString(Source, Toks); |
445 | |
446 | ASSERT_EQ(8U, Toks.size()); |
447 | |
448 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
449 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
450 | |
451 | ASSERT_EQ(tok::unknown_command, Toks[1].getKind()); |
452 | ASSERT_EQ(StringRef("aaa" ), Toks[1].getUnknownCommandName()); |
453 | |
454 | ASSERT_EQ(tok::unknown_command, Toks[2].getKind()); |
455 | ASSERT_EQ(StringRef("bbb" ), Toks[2].getUnknownCommandName()); |
456 | |
457 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
458 | ASSERT_EQ(StringRef(" " ), Toks[3].getText()); |
459 | |
460 | ASSERT_EQ(tok::unknown_command, Toks[4].getKind()); |
461 | ASSERT_EQ(StringRef("ccc" ), Toks[4].getUnknownCommandName()); |
462 | |
463 | ASSERT_EQ(tok::text, Toks[5].getKind()); |
464 | ASSERT_EQ(StringRef("\t" ), Toks[5].getText()); |
465 | |
466 | ASSERT_EQ(tok::unknown_command, Toks[6].getKind()); |
467 | ASSERT_EQ(StringRef("ddd" ), Toks[6].getUnknownCommandName()); |
468 | |
469 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
470 | } |
471 | |
472 | TEST_F(CommentLexerTest, DoxygenCommand10) { |
473 | const char *Source = "// \\c\n" ; |
474 | std::vector<Token> Toks; |
475 | |
476 | lexString(Source, Toks); |
477 | |
478 | ASSERT_EQ(3U, Toks.size()); |
479 | |
480 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
481 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
482 | |
483 | ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); |
484 | ASSERT_EQ(StringRef("c" ), getCommandName(Toks[1])); |
485 | |
486 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
487 | } |
488 | |
489 | TEST_F(CommentLexerTest, RegisterCustomBlockCommand) { |
490 | const char *Source = |
491 | "/// \\NewBlockCommand Aaa.\n" |
492 | "/// @NewBlockCommand Aaa.\n" ; |
493 | |
494 | Traits.registerBlockCommand(StringRef("NewBlockCommand" )); |
495 | |
496 | std::vector<Token> Toks; |
497 | |
498 | lexString(Source, Toks); |
499 | |
500 | ASSERT_EQ(8U, Toks.size()); |
501 | |
502 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
503 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
504 | |
505 | ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); |
506 | ASSERT_EQ(StringRef("NewBlockCommand" ), getCommandName(Toks[1])); |
507 | |
508 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
509 | ASSERT_EQ(StringRef(" Aaa." ), Toks[2].getText()); |
510 | |
511 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
512 | |
513 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
514 | ASSERT_EQ(StringRef(" " ), Toks[4].getText()); |
515 | |
516 | ASSERT_EQ(tok::at_command, Toks[5].getKind()); |
517 | ASSERT_EQ(StringRef("NewBlockCommand" ), getCommandName(Toks[5])); |
518 | |
519 | ASSERT_EQ(tok::text, Toks[6].getKind()); |
520 | ASSERT_EQ(StringRef(" Aaa." ), Toks[6].getText()); |
521 | |
522 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
523 | } |
524 | |
525 | TEST_F(CommentLexerTest, RegisterMultipleBlockCommands) { |
526 | const char *Source = |
527 | "/// \\Foo\n" |
528 | "/// \\Bar Baz\n" |
529 | "/// \\Blech quux=corge\n" ; |
530 | |
531 | Traits.registerBlockCommand(StringRef("Foo" )); |
532 | Traits.registerBlockCommand(StringRef("Bar" )); |
533 | Traits.registerBlockCommand(StringRef("Blech" )); |
534 | |
535 | std::vector<Token> Toks; |
536 | |
537 | lexString(Source, Toks); |
538 | |
539 | ASSERT_EQ(11U, Toks.size()); |
540 | |
541 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
542 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
543 | |
544 | ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); |
545 | ASSERT_EQ(StringRef("Foo" ), getCommandName(Toks[1])); |
546 | |
547 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
548 | |
549 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
550 | ASSERT_EQ(StringRef(" " ), Toks[3].getText()); |
551 | |
552 | ASSERT_EQ(tok::backslash_command, Toks[4].getKind()); |
553 | ASSERT_EQ(StringRef("Bar" ), getCommandName(Toks[4])); |
554 | |
555 | ASSERT_EQ(tok::text, Toks[5].getKind()); |
556 | ASSERT_EQ(StringRef(" Baz" ), Toks[5].getText()); |
557 | |
558 | ASSERT_EQ(tok::newline, Toks[6].getKind()); |
559 | |
560 | ASSERT_EQ(tok::text, Toks[7].getKind()); |
561 | ASSERT_EQ(StringRef(" " ), Toks[7].getText()); |
562 | |
563 | ASSERT_EQ(tok::backslash_command, Toks[8].getKind()); |
564 | ASSERT_EQ(StringRef("Blech" ), getCommandName(Toks[8])); |
565 | |
566 | ASSERT_EQ(tok::text, Toks[9].getKind()); |
567 | ASSERT_EQ(StringRef(" quux=corge" ), Toks[9].getText()); |
568 | |
569 | ASSERT_EQ(tok::newline, Toks[10].getKind()); |
570 | } |
571 | |
572 | // Empty verbatim block. |
573 | TEST_F(CommentLexerTest, VerbatimBlock1) { |
574 | const char *Sources[] = { |
575 | "/// \\verbatim\\endverbatim\n//" , |
576 | "/** \\verbatim\\endverbatim*/" |
577 | }; |
578 | |
579 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
580 | std::vector<Token> Toks; |
581 | |
582 | lexString(Sources[i], Toks); |
583 | |
584 | ASSERT_EQ(5U, Toks.size()); |
585 | |
586 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
587 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
588 | |
589 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
590 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
591 | |
592 | ASSERT_EQ(tok::verbatim_block_end, Toks[2].getKind()); |
593 | ASSERT_EQ(StringRef("endverbatim" ), getVerbatimBlockName(Toks[2])); |
594 | |
595 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
596 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
597 | } |
598 | } |
599 | |
600 | // Empty verbatim block without an end command. |
601 | TEST_F(CommentLexerTest, VerbatimBlock2) { |
602 | const char *Source = "/// \\verbatim" ; |
603 | |
604 | std::vector<Token> Toks; |
605 | |
606 | lexString(Source, Toks); |
607 | |
608 | ASSERT_EQ(3U, Toks.size()); |
609 | |
610 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
611 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
612 | |
613 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
614 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
615 | |
616 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
617 | } |
618 | |
619 | // Empty verbatim block without an end command. |
620 | TEST_F(CommentLexerTest, VerbatimBlock3) { |
621 | const char *Source = "/** \\verbatim*/" ; |
622 | |
623 | std::vector<Token> Toks; |
624 | |
625 | lexString(Source, Toks); |
626 | |
627 | ASSERT_EQ(4U, Toks.size()); |
628 | |
629 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
630 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
631 | |
632 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
633 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
634 | |
635 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
636 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
637 | } |
638 | |
639 | // Single-line verbatim block. |
640 | TEST_F(CommentLexerTest, VerbatimBlock4) { |
641 | const char *Sources[] = { |
642 | "/// Meow \\verbatim aaa \\endverbatim\n//" , |
643 | "/** Meow \\verbatim aaa \\endverbatim*/" |
644 | }; |
645 | |
646 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
647 | std::vector<Token> Toks; |
648 | |
649 | lexString(Sources[i], Toks); |
650 | |
651 | ASSERT_EQ(6U, Toks.size()); |
652 | |
653 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
654 | ASSERT_EQ(StringRef(" Meow " ), Toks[0].getText()); |
655 | |
656 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
657 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
658 | |
659 | ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); |
660 | ASSERT_EQ(StringRef(" aaa " ), Toks[2].getVerbatimBlockText()); |
661 | |
662 | ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind()); |
663 | ASSERT_EQ(StringRef("endverbatim" ), getVerbatimBlockName(Toks[3])); |
664 | |
665 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
666 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
667 | } |
668 | } |
669 | |
670 | // Single-line verbatim block without an end command. |
671 | TEST_F(CommentLexerTest, VerbatimBlock5) { |
672 | const char *Sources[] = { |
673 | "/// Meow \\verbatim aaa \n//" , |
674 | "/** Meow \\verbatim aaa */" |
675 | }; |
676 | |
677 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
678 | std::vector<Token> Toks; |
679 | |
680 | lexString(Sources[i], Toks); |
681 | |
682 | ASSERT_EQ(5U, Toks.size()); |
683 | |
684 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
685 | ASSERT_EQ(StringRef(" Meow " ), Toks[0].getText()); |
686 | |
687 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
688 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
689 | |
690 | ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); |
691 | ASSERT_EQ(StringRef(" aaa " ), Toks[2].getVerbatimBlockText()); |
692 | |
693 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
694 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
695 | } |
696 | } |
697 | |
698 | TEST_F(CommentLexerTest, VerbatimBlock6) { |
699 | const char *Source = |
700 | "// \\verbatim\n" |
701 | "// Aaa\n" |
702 | "//\n" |
703 | "// Bbb\n" |
704 | "// \\endverbatim\n" ; |
705 | |
706 | std::vector<Token> Toks; |
707 | |
708 | lexString(Source, Toks); |
709 | |
710 | ASSERT_EQ(10U, Toks.size()); |
711 | |
712 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
713 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
714 | |
715 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
716 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
717 | |
718 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
719 | |
720 | ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); |
721 | ASSERT_EQ(StringRef(" Aaa" ), Toks[3].getVerbatimBlockText()); |
722 | |
723 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
724 | |
725 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
726 | |
727 | ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind()); |
728 | ASSERT_EQ(StringRef(" Bbb" ), Toks[6].getVerbatimBlockText()); |
729 | |
730 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
731 | |
732 | ASSERT_EQ(tok::verbatim_block_end, Toks[8].getKind()); |
733 | ASSERT_EQ(StringRef("endverbatim" ), getVerbatimBlockName(Toks[8])); |
734 | |
735 | ASSERT_EQ(tok::newline, Toks[9].getKind()); |
736 | } |
737 | |
738 | TEST_F(CommentLexerTest, VerbatimBlock7) { |
739 | const char *Source = |
740 | "/* \\verbatim\n" |
741 | " * Aaa\n" |
742 | " *\n" |
743 | " * Bbb\n" |
744 | " * \\endverbatim\n" |
745 | " */" ; |
746 | |
747 | std::vector<Token> Toks; |
748 | |
749 | lexString(Source, Toks); |
750 | |
751 | ASSERT_EQ(10U, Toks.size()); |
752 | |
753 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
754 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
755 | |
756 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
757 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
758 | |
759 | ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); |
760 | ASSERT_EQ(StringRef(" Aaa" ), Toks[2].getVerbatimBlockText()); |
761 | |
762 | ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); |
763 | ASSERT_EQ(StringRef("" ), Toks[3].getVerbatimBlockText()); |
764 | |
765 | ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind()); |
766 | ASSERT_EQ(StringRef(" Bbb" ), Toks[4].getVerbatimBlockText()); |
767 | |
768 | ASSERT_EQ(tok::verbatim_block_end, Toks[5].getKind()); |
769 | ASSERT_EQ(StringRef("endverbatim" ), getVerbatimBlockName(Toks[5])); |
770 | |
771 | ASSERT_EQ(tok::newline, Toks[6].getKind()); |
772 | |
773 | ASSERT_EQ(tok::text, Toks[7].getKind()); |
774 | ASSERT_EQ(StringRef(" " ), Toks[7].getText()); |
775 | |
776 | ASSERT_EQ(tok::newline, Toks[8].getKind()); |
777 | ASSERT_EQ(tok::newline, Toks[9].getKind()); |
778 | } |
779 | |
780 | // Complex test for verbatim blocks. |
781 | TEST_F(CommentLexerTest, VerbatimBlock8) { |
782 | const char *Source = |
783 | "/* Meow \\verbatim aaa\\$\\@\n" |
784 | "bbb \\endverbati\r" |
785 | "ccc\r\n" |
786 | "ddd \\endverbatim Blah \\verbatim eee\n" |
787 | "\\endverbatim BlahBlah*/" ; |
788 | std::vector<Token> Toks; |
789 | |
790 | lexString(Source, Toks); |
791 | |
792 | ASSERT_EQ(14U, Toks.size()); |
793 | |
794 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
795 | ASSERT_EQ(StringRef(" Meow " ), Toks[0].getText()); |
796 | |
797 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
798 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[1])); |
799 | |
800 | ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); |
801 | ASSERT_EQ(StringRef(" aaa\\$\\@" ), Toks[2].getVerbatimBlockText()); |
802 | |
803 | ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); |
804 | ASSERT_EQ(StringRef("bbb \\endverbati" ), Toks[3].getVerbatimBlockText()); |
805 | |
806 | ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind()); |
807 | ASSERT_EQ(StringRef("ccc" ), Toks[4].getVerbatimBlockText()); |
808 | |
809 | ASSERT_EQ(tok::verbatim_block_line, Toks[5].getKind()); |
810 | ASSERT_EQ(StringRef("ddd " ), Toks[5].getVerbatimBlockText()); |
811 | |
812 | ASSERT_EQ(tok::verbatim_block_end, Toks[6].getKind()); |
813 | ASSERT_EQ(StringRef("endverbatim" ), getVerbatimBlockName(Toks[6])); |
814 | |
815 | ASSERT_EQ(tok::text, Toks[7].getKind()); |
816 | ASSERT_EQ(StringRef(" Blah " ), Toks[7].getText()); |
817 | |
818 | ASSERT_EQ(tok::verbatim_block_begin, Toks[8].getKind()); |
819 | ASSERT_EQ(StringRef("verbatim" ), getVerbatimBlockName(Toks[8])); |
820 | |
821 | ASSERT_EQ(tok::verbatim_block_line, Toks[9].getKind()); |
822 | ASSERT_EQ(StringRef(" eee" ), Toks[9].getVerbatimBlockText()); |
823 | |
824 | ASSERT_EQ(tok::verbatim_block_end, Toks[10].getKind()); |
825 | ASSERT_EQ(StringRef("endverbatim" ), getVerbatimBlockName(Toks[10])); |
826 | |
827 | ASSERT_EQ(tok::text, Toks[11].getKind()); |
828 | ASSERT_EQ(StringRef(" BlahBlah" ), Toks[11].getText()); |
829 | |
830 | ASSERT_EQ(tok::newline, Toks[12].getKind()); |
831 | ASSERT_EQ(tok::newline, Toks[13].getKind()); |
832 | } |
833 | |
834 | // LaTeX verbatim blocks. |
835 | TEST_F(CommentLexerTest, VerbatimBlock9) { |
836 | const char *Source = |
837 | "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f} \\f( Ddd \\f)" ; |
838 | std::vector<Token> Toks; |
839 | |
840 | lexString(Source, Toks); |
841 | |
842 | ASSERT_EQ(17U, Toks.size()); |
843 | |
844 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
845 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
846 | |
847 | ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); |
848 | ASSERT_EQ(StringRef("f$" ), getVerbatimBlockName(Toks[1])); |
849 | |
850 | ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); |
851 | ASSERT_EQ(StringRef(" Aaa " ), Toks[2].getVerbatimBlockText()); |
852 | |
853 | ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind()); |
854 | ASSERT_EQ(StringRef("f$" ), getVerbatimBlockName(Toks[3])); |
855 | |
856 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
857 | ASSERT_EQ(StringRef(" " ), Toks[4].getText()); |
858 | |
859 | ASSERT_EQ(tok::verbatim_block_begin, Toks[5].getKind()); |
860 | ASSERT_EQ(StringRef("f[" ), getVerbatimBlockName(Toks[5])); |
861 | |
862 | ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind()); |
863 | ASSERT_EQ(StringRef(" Bbb " ), Toks[6].getVerbatimBlockText()); |
864 | |
865 | ASSERT_EQ(tok::verbatim_block_end, Toks[7].getKind()); |
866 | ASSERT_EQ(StringRef("f]" ), getVerbatimBlockName(Toks[7])); |
867 | |
868 | ASSERT_EQ(tok::text, Toks[8].getKind()); |
869 | ASSERT_EQ(StringRef(" " ), Toks[8].getText()); |
870 | |
871 | ASSERT_EQ(tok::verbatim_block_begin, Toks[9].getKind()); |
872 | ASSERT_EQ(StringRef("f{" ), getVerbatimBlockName(Toks[9])); |
873 | |
874 | ASSERT_EQ(tok::verbatim_block_line, Toks[10].getKind()); |
875 | ASSERT_EQ(StringRef(" Ccc " ), Toks[10].getVerbatimBlockText()); |
876 | |
877 | ASSERT_EQ(tok::verbatim_block_end, Toks[11].getKind()); |
878 | ASSERT_EQ(StringRef("f}" ), getVerbatimBlockName(Toks[11])); |
879 | |
880 | ASSERT_EQ(tok::text, Toks[12].getKind()); |
881 | ASSERT_EQ(StringRef(" " ), Toks[12].getText()); |
882 | |
883 | ASSERT_EQ(tok::verbatim_block_begin, Toks[13].getKind()); |
884 | ASSERT_EQ(StringRef("f(" ), getVerbatimBlockName(Toks[13])); |
885 | |
886 | ASSERT_EQ(tok::verbatim_block_line, Toks[14].getKind()); |
887 | ASSERT_EQ(StringRef(" Ddd " ), Toks[14].getVerbatimBlockText()); |
888 | |
889 | ASSERT_EQ(tok::verbatim_block_end, Toks[15].getKind()); |
890 | ASSERT_EQ(StringRef("f)" ), getVerbatimBlockName(Toks[15])); |
891 | |
892 | ASSERT_EQ(tok::newline, Toks[16].getKind()); |
893 | } |
894 | |
895 | // Empty verbatim line. |
896 | TEST_F(CommentLexerTest, VerbatimLine1) { |
897 | const char *Sources[] = { |
898 | "/// \\fn\n//" , |
899 | "/** \\fn*/" |
900 | }; |
901 | |
902 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
903 | std::vector<Token> Toks; |
904 | |
905 | lexString(Sources[i], Toks); |
906 | |
907 | ASSERT_EQ(4U, Toks.size()); |
908 | |
909 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
910 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
911 | |
912 | ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind()); |
913 | ASSERT_EQ(StringRef("fn" ), getVerbatimLineName(Toks[1])); |
914 | |
915 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
916 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
917 | } |
918 | } |
919 | |
920 | // Verbatim line with Doxygen escape sequences, which should not be expanded. |
921 | TEST_F(CommentLexerTest, VerbatimLine2) { |
922 | const char *Sources[] = { |
923 | "/// \\fn void *foo(const char *zzz = \"\\$\");\n//" , |
924 | "/** \\fn void *foo(const char *zzz = \"\\$\");*/" |
925 | }; |
926 | |
927 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
928 | std::vector<Token> Toks; |
929 | |
930 | lexString(Sources[i], Toks); |
931 | |
932 | ASSERT_EQ(5U, Toks.size()); |
933 | |
934 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
935 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
936 | |
937 | ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind()); |
938 | ASSERT_EQ(StringRef("fn" ), getVerbatimLineName(Toks[1])); |
939 | |
940 | ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind()); |
941 | ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");" ), |
942 | Toks[2].getVerbatimLineText()); |
943 | |
944 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
945 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
946 | } |
947 | } |
948 | |
949 | // Verbatim line should not eat anything from next source line. |
950 | TEST_F(CommentLexerTest, VerbatimLine3) { |
951 | const char *Source = |
952 | "/** \\fn void *foo(const char *zzz = \"\\$\");\n" |
953 | " * Meow\n" |
954 | " */" ; |
955 | |
956 | std::vector<Token> Toks; |
957 | |
958 | lexString(Source, Toks); |
959 | |
960 | ASSERT_EQ(9U, Toks.size()); |
961 | |
962 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
963 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
964 | |
965 | ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind()); |
966 | ASSERT_EQ(StringRef("fn" ), getVerbatimLineName(Toks[1])); |
967 | |
968 | ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind()); |
969 | ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");" ), |
970 | Toks[2].getVerbatimLineText()); |
971 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
972 | |
973 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
974 | ASSERT_EQ(StringRef(" Meow" ), Toks[4].getText()); |
975 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
976 | |
977 | ASSERT_EQ(tok::text, Toks[6].getKind()); |
978 | ASSERT_EQ(StringRef(" " ), Toks[6].getText()); |
979 | |
980 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
981 | ASSERT_EQ(tok::newline, Toks[8].getKind()); |
982 | } |
983 | |
984 | TEST_F(CommentLexerTest, HTML1) { |
985 | const char *Source = |
986 | "// <" ; |
987 | |
988 | std::vector<Token> Toks; |
989 | |
990 | lexString(Source, Toks); |
991 | |
992 | ASSERT_EQ(3U, Toks.size()); |
993 | |
994 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
995 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
996 | |
997 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
998 | ASSERT_EQ(StringRef("<" ), Toks[1].getText()); |
999 | |
1000 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1001 | } |
1002 | |
1003 | TEST_F(CommentLexerTest, HTML2) { |
1004 | const char *Source = |
1005 | "// a<2" ; |
1006 | |
1007 | std::vector<Token> Toks; |
1008 | |
1009 | lexString(Source, Toks); |
1010 | |
1011 | ASSERT_EQ(4U, Toks.size()); |
1012 | |
1013 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1014 | ASSERT_EQ(StringRef(" a" ), Toks[0].getText()); |
1015 | |
1016 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1017 | ASSERT_EQ(StringRef("<" ), Toks[1].getText()); |
1018 | |
1019 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1020 | ASSERT_EQ(StringRef("2" ), Toks[2].getText()); |
1021 | |
1022 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1023 | } |
1024 | |
1025 | TEST_F(CommentLexerTest, HTML3) { |
1026 | const char *Source = |
1027 | "// < img" ; |
1028 | |
1029 | std::vector<Token> Toks; |
1030 | |
1031 | lexString(Source, Toks); |
1032 | |
1033 | ASSERT_EQ(4U, Toks.size()); |
1034 | |
1035 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1036 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1037 | |
1038 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1039 | ASSERT_EQ(StringRef("<" ), Toks[1].getText()); |
1040 | |
1041 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1042 | ASSERT_EQ(StringRef(" img" ), Toks[2].getText()); |
1043 | |
1044 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1045 | } |
1046 | |
1047 | TEST_F(CommentLexerTest, HTML4) { |
1048 | const char *Sources[] = { |
1049 | "// <img" , |
1050 | "// <img " |
1051 | }; |
1052 | |
1053 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1054 | std::vector<Token> Toks; |
1055 | |
1056 | lexString(Sources[i], Toks); |
1057 | |
1058 | ASSERT_EQ(3U, Toks.size()); |
1059 | |
1060 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1061 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1062 | |
1063 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1064 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1065 | |
1066 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1067 | } |
1068 | } |
1069 | |
1070 | TEST_F(CommentLexerTest, HTML5) { |
1071 | const char *Source = |
1072 | "// <img 42" ; |
1073 | |
1074 | std::vector<Token> Toks; |
1075 | |
1076 | lexString(Source, Toks); |
1077 | |
1078 | ASSERT_EQ(4U, Toks.size()); |
1079 | |
1080 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1081 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1082 | |
1083 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1084 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1085 | |
1086 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1087 | ASSERT_EQ(StringRef("42" ), Toks[2].getText()); |
1088 | |
1089 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1090 | } |
1091 | |
1092 | TEST_F(CommentLexerTest, HTML6) { |
1093 | const char *Source = "// <img> Meow" ; |
1094 | |
1095 | std::vector<Token> Toks; |
1096 | |
1097 | lexString(Source, Toks); |
1098 | |
1099 | ASSERT_EQ(5U, Toks.size()); |
1100 | |
1101 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1102 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1103 | |
1104 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1105 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1106 | |
1107 | ASSERT_EQ(tok::html_greater, Toks[2].getKind()); |
1108 | |
1109 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
1110 | ASSERT_EQ(StringRef(" Meow" ), Toks[3].getText()); |
1111 | |
1112 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
1113 | } |
1114 | |
1115 | TEST_F(CommentLexerTest, HTML7) { |
1116 | const char *Source = "// <img=" ; |
1117 | |
1118 | std::vector<Token> Toks; |
1119 | |
1120 | lexString(Source, Toks); |
1121 | |
1122 | ASSERT_EQ(4U, Toks.size()); |
1123 | |
1124 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1125 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1126 | |
1127 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1128 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1129 | |
1130 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1131 | ASSERT_EQ(StringRef("=" ), Toks[2].getText()); |
1132 | |
1133 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1134 | } |
1135 | |
1136 | TEST_F(CommentLexerTest, HTML8) { |
1137 | const char *Source = "// <img src=> Meow" ; |
1138 | |
1139 | std::vector<Token> Toks; |
1140 | |
1141 | lexString(Source, Toks); |
1142 | |
1143 | ASSERT_EQ(7U, Toks.size()); |
1144 | |
1145 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1146 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1147 | |
1148 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1149 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1150 | |
1151 | ASSERT_EQ(tok::html_ident, Toks[2].getKind()); |
1152 | ASSERT_EQ(StringRef("src" ), Toks[2].getHTMLIdent()); |
1153 | |
1154 | ASSERT_EQ(tok::html_equals, Toks[3].getKind()); |
1155 | |
1156 | ASSERT_EQ(tok::html_greater, Toks[4].getKind()); |
1157 | |
1158 | ASSERT_EQ(tok::text, Toks[5].getKind()); |
1159 | ASSERT_EQ(StringRef(" Meow" ), Toks[5].getText()); |
1160 | |
1161 | ASSERT_EQ(tok::newline, Toks[6].getKind()); |
1162 | } |
1163 | |
1164 | TEST_F(CommentLexerTest, HTML9) { |
1165 | const char *Sources[] = { |
1166 | "// <img src" , |
1167 | "// <img src " |
1168 | }; |
1169 | |
1170 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1171 | std::vector<Token> Toks; |
1172 | |
1173 | lexString(Sources[i], Toks); |
1174 | |
1175 | ASSERT_EQ(4U, Toks.size()); |
1176 | |
1177 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1178 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1179 | |
1180 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1181 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1182 | |
1183 | ASSERT_EQ(tok::html_ident, Toks[2].getKind()); |
1184 | ASSERT_EQ(StringRef("src" ), Toks[2].getHTMLIdent()); |
1185 | |
1186 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1187 | } |
1188 | } |
1189 | |
1190 | TEST_F(CommentLexerTest, HTML10) { |
1191 | const char *Sources[] = { |
1192 | "// <img src=" , |
1193 | "// <img src =" |
1194 | }; |
1195 | |
1196 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1197 | std::vector<Token> Toks; |
1198 | |
1199 | lexString(Sources[i], Toks); |
1200 | |
1201 | ASSERT_EQ(5U, Toks.size()); |
1202 | |
1203 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1204 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1205 | |
1206 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1207 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1208 | |
1209 | ASSERT_EQ(tok::html_ident, Toks[2].getKind()); |
1210 | ASSERT_EQ(StringRef("src" ), Toks[2].getHTMLIdent()); |
1211 | |
1212 | ASSERT_EQ(tok::html_equals, Toks[3].getKind()); |
1213 | |
1214 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
1215 | } |
1216 | } |
1217 | |
1218 | TEST_F(CommentLexerTest, HTML11) { |
1219 | const char *Sources[] = { |
1220 | "// <img src=\"" , |
1221 | "// <img src = \"" , |
1222 | "// <img src=\'" , |
1223 | "// <img src = \'" |
1224 | }; |
1225 | |
1226 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1227 | std::vector<Token> Toks; |
1228 | |
1229 | lexString(Sources[i], Toks); |
1230 | |
1231 | ASSERT_EQ(6U, Toks.size()); |
1232 | |
1233 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1234 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1235 | |
1236 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1237 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1238 | |
1239 | ASSERT_EQ(tok::html_ident, Toks[2].getKind()); |
1240 | ASSERT_EQ(StringRef("src" ), Toks[2].getHTMLIdent()); |
1241 | |
1242 | ASSERT_EQ(tok::html_equals, Toks[3].getKind()); |
1243 | |
1244 | ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); |
1245 | ASSERT_EQ(StringRef("" ), Toks[4].getHTMLQuotedString()); |
1246 | |
1247 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
1248 | } |
1249 | } |
1250 | |
1251 | TEST_F(CommentLexerTest, HTML12) { |
1252 | const char *Source = "// <img src=@" ; |
1253 | |
1254 | std::vector<Token> Toks; |
1255 | |
1256 | lexString(Source, Toks); |
1257 | |
1258 | ASSERT_EQ(6U, Toks.size()); |
1259 | |
1260 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1261 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1262 | |
1263 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1264 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1265 | |
1266 | ASSERT_EQ(tok::html_ident, Toks[2].getKind()); |
1267 | ASSERT_EQ(StringRef("src" ), Toks[2].getHTMLIdent()); |
1268 | |
1269 | ASSERT_EQ(tok::html_equals, Toks[3].getKind()); |
1270 | |
1271 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
1272 | ASSERT_EQ(StringRef("@" ), Toks[4].getText()); |
1273 | |
1274 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
1275 | } |
1276 | |
1277 | TEST_F(CommentLexerTest, HTML13) { |
1278 | const char *Sources[] = { |
1279 | "// <img src=\"val\\\"\\'val" , |
1280 | "// <img src=\"val\\\"\\'val\"" , |
1281 | "// <img src=\'val\\\"\\'val" , |
1282 | "// <img src=\'val\\\"\\'val\'" |
1283 | }; |
1284 | |
1285 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1286 | std::vector<Token> Toks; |
1287 | |
1288 | lexString(Sources[i], Toks); |
1289 | |
1290 | ASSERT_EQ(6U, Toks.size()); |
1291 | |
1292 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1293 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1294 | |
1295 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1296 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1297 | |
1298 | ASSERT_EQ(tok::html_ident, Toks[2].getKind()); |
1299 | ASSERT_EQ(StringRef("src" ), Toks[2].getHTMLIdent()); |
1300 | |
1301 | ASSERT_EQ(tok::html_equals, Toks[3].getKind()); |
1302 | |
1303 | ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); |
1304 | ASSERT_EQ(StringRef("val\\\"\\'val" ), Toks[4].getHTMLQuotedString()); |
1305 | |
1306 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
1307 | } |
1308 | } |
1309 | |
1310 | TEST_F(CommentLexerTest, HTML14) { |
1311 | const char *Sources[] = { |
1312 | "// <img src=\"val\\\"\\'val\">" , |
1313 | "// <img src=\'val\\\"\\'val\'>" |
1314 | }; |
1315 | |
1316 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1317 | std::vector<Token> Toks; |
1318 | |
1319 | lexString(Sources[i], Toks); |
1320 | |
1321 | ASSERT_EQ(7U, Toks.size()); |
1322 | |
1323 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1324 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1325 | |
1326 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1327 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1328 | |
1329 | ASSERT_EQ(tok::html_ident, Toks[2].getKind()); |
1330 | ASSERT_EQ(StringRef("src" ), Toks[2].getHTMLIdent()); |
1331 | |
1332 | ASSERT_EQ(tok::html_equals, Toks[3].getKind()); |
1333 | |
1334 | ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); |
1335 | ASSERT_EQ(StringRef("val\\\"\\'val" ), Toks[4].getHTMLQuotedString()); |
1336 | |
1337 | ASSERT_EQ(tok::html_greater, Toks[5].getKind()); |
1338 | |
1339 | ASSERT_EQ(tok::newline, Toks[6].getKind()); |
1340 | } |
1341 | } |
1342 | |
1343 | TEST_F(CommentLexerTest, HTML15) { |
1344 | const char *Sources[] = { |
1345 | "// <img/>" , |
1346 | "// <img />" |
1347 | }; |
1348 | |
1349 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1350 | std::vector<Token> Toks; |
1351 | |
1352 | lexString(Sources[i], Toks); |
1353 | |
1354 | ASSERT_EQ(4U, Toks.size()); |
1355 | |
1356 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1357 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1358 | |
1359 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1360 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1361 | |
1362 | ASSERT_EQ(tok::html_slash_greater, Toks[2].getKind()); |
1363 | |
1364 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1365 | } |
1366 | } |
1367 | |
1368 | TEST_F(CommentLexerTest, HTML16) { |
1369 | const char *Sources[] = { |
1370 | "// <img/ Aaa" , |
1371 | "// <img / Aaa" |
1372 | }; |
1373 | |
1374 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1375 | std::vector<Token> Toks; |
1376 | |
1377 | lexString(Sources[i], Toks); |
1378 | |
1379 | ASSERT_EQ(5U, Toks.size()); |
1380 | |
1381 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1382 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1383 | |
1384 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1385 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagStartName()); |
1386 | |
1387 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1388 | ASSERT_EQ(StringRef("/" ), Toks[2].getText()); |
1389 | |
1390 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
1391 | ASSERT_EQ(StringRef(" Aaa" ), Toks[3].getText()); |
1392 | |
1393 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
1394 | } |
1395 | } |
1396 | |
1397 | TEST_F(CommentLexerTest, HTML17) { |
1398 | const char *Source = "// </" ; |
1399 | |
1400 | std::vector<Token> Toks; |
1401 | |
1402 | lexString(Source, Toks); |
1403 | |
1404 | ASSERT_EQ(3U, Toks.size()); |
1405 | |
1406 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1407 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1408 | |
1409 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1410 | ASSERT_EQ(StringRef("</" ), Toks[1].getText()); |
1411 | |
1412 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1413 | } |
1414 | |
1415 | TEST_F(CommentLexerTest, HTML18) { |
1416 | const char *Source = "// </@" ; |
1417 | |
1418 | std::vector<Token> Toks; |
1419 | |
1420 | lexString(Source, Toks); |
1421 | |
1422 | ASSERT_EQ(4U, Toks.size()); |
1423 | |
1424 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1425 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1426 | |
1427 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1428 | ASSERT_EQ(StringRef("</" ), Toks[1].getText()); |
1429 | |
1430 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1431 | ASSERT_EQ(StringRef("@" ), Toks[2].getText()); |
1432 | |
1433 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1434 | } |
1435 | |
1436 | TEST_F(CommentLexerTest, HTML19) { |
1437 | const char *Source = "// </img" ; |
1438 | |
1439 | std::vector<Token> Toks; |
1440 | |
1441 | lexString(Source, Toks); |
1442 | |
1443 | ASSERT_EQ(3U, Toks.size()); |
1444 | |
1445 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1446 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1447 | |
1448 | ASSERT_EQ(tok::html_end_tag, Toks[1].getKind()); |
1449 | ASSERT_EQ(StringRef("img" ), Toks[1].getHTMLTagEndName()); |
1450 | |
1451 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1452 | } |
1453 | |
1454 | TEST_F(CommentLexerTest, HTML20) { |
1455 | const char *Source = "// <a\n" |
1456 | "// \n" |
1457 | "// href=\"foo\"\n" |
1458 | "// \n" |
1459 | "// bar>text</a>" ; |
1460 | |
1461 | std::vector<Token> Toks; |
1462 | |
1463 | lexString(Source, Toks); |
1464 | |
1465 | ASSERT_EQ(11U, Toks.size()); |
1466 | |
1467 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1468 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1469 | |
1470 | ASSERT_EQ(tok::html_start_tag, Toks[1].getKind()); |
1471 | ASSERT_EQ(StringRef("a" ), Toks[1].getHTMLTagStartName()); |
1472 | |
1473 | ASSERT_EQ(tok::html_ident, Toks[2].getKind()); |
1474 | ASSERT_EQ(StringRef("href" ), Toks[2].getHTMLIdent()); |
1475 | |
1476 | ASSERT_EQ(tok::html_equals, Toks[3].getKind()); |
1477 | |
1478 | ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); |
1479 | ASSERT_EQ(StringRef("foo" ), Toks[4].getHTMLQuotedString()); |
1480 | |
1481 | ASSERT_EQ(tok::html_ident, Toks[5].getKind()); |
1482 | ASSERT_EQ(StringRef("bar" ), Toks[5].getHTMLIdent()); |
1483 | |
1484 | ASSERT_EQ(tok::html_greater, Toks[6].getKind()); |
1485 | |
1486 | ASSERT_EQ(tok::text, Toks[7].getKind()); |
1487 | ASSERT_EQ(StringRef("text" ), Toks[7].getText()); |
1488 | |
1489 | ASSERT_EQ(tok::html_end_tag, Toks[8].getKind()); |
1490 | ASSERT_EQ(StringRef("a" ), Toks[8].getHTMLTagEndName()); |
1491 | |
1492 | ASSERT_EQ(tok::html_greater, Toks[9].getKind()); |
1493 | |
1494 | ASSERT_EQ(tok::newline, Toks[10].getKind()); |
1495 | } |
1496 | |
1497 | TEST_F(CommentLexerTest, HTML21) { |
1498 | const char *Source = "/**\n" |
1499 | " * <a\n" |
1500 | " * \n" |
1501 | " * href=\"foo\"\n" |
1502 | " * \n" |
1503 | " * bar>text</a>\n" |
1504 | " */" ; |
1505 | |
1506 | std::vector<Token> Toks; |
1507 | |
1508 | lexString(Source, Toks); |
1509 | |
1510 | ASSERT_EQ(15U, Toks.size()); |
1511 | |
1512 | ASSERT_EQ(tok::newline, Toks[0].getKind()); |
1513 | |
1514 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1515 | ASSERT_EQ(StringRef(" " ), Toks[1].getText()); |
1516 | |
1517 | ASSERT_EQ(tok::html_start_tag, Toks[2].getKind()); |
1518 | ASSERT_EQ(StringRef("a" ), Toks[2].getHTMLTagStartName()); |
1519 | |
1520 | ASSERT_EQ(tok::html_ident, Toks[3].getKind()); |
1521 | ASSERT_EQ(StringRef("href" ), Toks[3].getHTMLIdent()); |
1522 | |
1523 | ASSERT_EQ(tok::html_equals, Toks[4].getKind()); |
1524 | |
1525 | ASSERT_EQ(tok::html_quoted_string, Toks[5].getKind()); |
1526 | ASSERT_EQ(StringRef("foo" ), Toks[5].getHTMLQuotedString()); |
1527 | |
1528 | ASSERT_EQ(tok::html_ident, Toks[6].getKind()); |
1529 | ASSERT_EQ(StringRef("bar" ), Toks[6].getHTMLIdent()); |
1530 | |
1531 | ASSERT_EQ(tok::html_greater, Toks[7].getKind()); |
1532 | |
1533 | ASSERT_EQ(tok::text, Toks[8].getKind()); |
1534 | ASSERT_EQ(StringRef("text" ), Toks[8].getText()); |
1535 | |
1536 | ASSERT_EQ(tok::html_end_tag, Toks[9].getKind()); |
1537 | ASSERT_EQ(StringRef("a" ), Toks[9].getHTMLTagEndName()); |
1538 | |
1539 | ASSERT_EQ(tok::html_greater, Toks[10].getKind()); |
1540 | |
1541 | ASSERT_EQ(tok::newline, Toks[11].getKind()); |
1542 | |
1543 | ASSERT_EQ(tok::text, Toks[12].getKind()); |
1544 | ASSERT_EQ(StringRef(" " ), Toks[12].getText()); |
1545 | |
1546 | ASSERT_EQ(tok::newline, Toks[13].getKind()); |
1547 | |
1548 | ASSERT_EQ(tok::newline, Toks[14].getKind()); |
1549 | } |
1550 | |
1551 | TEST_F(CommentLexerTest, HTML22) { |
1552 | const char *Source = "/**\n" |
1553 | " * <a\n" |
1554 | " */" ; |
1555 | |
1556 | std::vector<Token> Toks; |
1557 | |
1558 | lexString(Source, Toks); |
1559 | |
1560 | ASSERT_EQ(6U, Toks.size()); |
1561 | |
1562 | ASSERT_EQ(tok::newline, Toks[0].getKind()); |
1563 | |
1564 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1565 | ASSERT_EQ(StringRef(" " ), Toks[1].getText()); |
1566 | |
1567 | ASSERT_EQ(tok::html_start_tag, Toks[2].getKind()); |
1568 | ASSERT_EQ(StringRef("a" ), Toks[2].getHTMLTagStartName()); |
1569 | |
1570 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1571 | |
1572 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
1573 | |
1574 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
1575 | } |
1576 | |
1577 | TEST_F(CommentLexerTest, HTML23) { |
1578 | // NOTE: "//<" is considered a comment start |
1579 | const char *Source = "// <\n" |
1580 | "// a\n" |
1581 | "// >" ; |
1582 | |
1583 | std::vector<Token> Toks; |
1584 | |
1585 | lexString(Source, Toks); |
1586 | |
1587 | ASSERT_EQ(7U, Toks.size()); |
1588 | |
1589 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1590 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1591 | |
1592 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1593 | ASSERT_EQ(StringRef("<" ), Toks[1].getText()); |
1594 | |
1595 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1596 | |
1597 | ASSERT_EQ(tok::text, Toks[3].getKind()); |
1598 | ASSERT_EQ(StringRef(" a" ), Toks[3].getText()); |
1599 | |
1600 | ASSERT_EQ(tok::newline, Toks[4].getKind()); |
1601 | |
1602 | ASSERT_EQ(tok::text, Toks[5].getKind()); |
1603 | ASSERT_EQ(StringRef(" >" ), Toks[5].getText()); |
1604 | |
1605 | ASSERT_EQ(tok::newline, Toks[6].getKind()); |
1606 | } |
1607 | |
1608 | TEST_F(CommentLexerTest, NotAKnownHTMLTag1) { |
1609 | const char *Source = "// <tag>" ; |
1610 | |
1611 | std::vector<Token> Toks; |
1612 | |
1613 | lexString(Source, Toks); |
1614 | |
1615 | ASSERT_EQ(4U, Toks.size()); |
1616 | |
1617 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1618 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1619 | |
1620 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1621 | ASSERT_EQ(StringRef("<tag" ), Toks[1].getText()); |
1622 | |
1623 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1624 | ASSERT_EQ(StringRef(">" ), Toks[2].getText()); |
1625 | |
1626 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1627 | } |
1628 | |
1629 | TEST_F(CommentLexerTest, NotAKnownHTMLTag2) { |
1630 | const char *Source = "// </tag>" ; |
1631 | |
1632 | std::vector<Token> Toks; |
1633 | |
1634 | lexString(Source, Toks); |
1635 | |
1636 | ASSERT_EQ(4U, Toks.size()); |
1637 | |
1638 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1639 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1640 | |
1641 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1642 | ASSERT_EQ(StringRef("</tag" ), Toks[1].getText()); |
1643 | |
1644 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1645 | ASSERT_EQ(StringRef(">" ), Toks[2].getText()); |
1646 | |
1647 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1648 | } |
1649 | |
1650 | TEST_F(CommentLexerTest, HTMLCharacterReferences1) { |
1651 | const char *Source = "// &" ; |
1652 | |
1653 | std::vector<Token> Toks; |
1654 | |
1655 | lexString(Source, Toks); |
1656 | |
1657 | ASSERT_EQ(3U, Toks.size()); |
1658 | |
1659 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1660 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1661 | |
1662 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1663 | ASSERT_EQ(StringRef("&" ), Toks[1].getText()); |
1664 | |
1665 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1666 | } |
1667 | |
1668 | TEST_F(CommentLexerTest, HTMLCharacterReferences2) { |
1669 | const char *Source = "// &!" ; |
1670 | |
1671 | std::vector<Token> Toks; |
1672 | |
1673 | lexString(Source, Toks); |
1674 | |
1675 | ASSERT_EQ(4U, Toks.size()); |
1676 | |
1677 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1678 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1679 | |
1680 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1681 | ASSERT_EQ(StringRef("&" ), Toks[1].getText()); |
1682 | |
1683 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1684 | ASSERT_EQ(StringRef("!" ), Toks[2].getText()); |
1685 | |
1686 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1687 | } |
1688 | |
1689 | TEST_F(CommentLexerTest, HTMLCharacterReferences3) { |
1690 | const char *Source = "// &" ; |
1691 | |
1692 | std::vector<Token> Toks; |
1693 | |
1694 | lexString(Source, Toks); |
1695 | |
1696 | ASSERT_EQ(3U, Toks.size()); |
1697 | |
1698 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1699 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1700 | |
1701 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1702 | ASSERT_EQ(StringRef("&" ), Toks[1].getText()); |
1703 | |
1704 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1705 | } |
1706 | |
1707 | TEST_F(CommentLexerTest, HTMLCharacterReferences4) { |
1708 | const char *Source = "// &!" ; |
1709 | |
1710 | std::vector<Token> Toks; |
1711 | |
1712 | lexString(Source, Toks); |
1713 | |
1714 | ASSERT_EQ(4U, Toks.size()); |
1715 | |
1716 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1717 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1718 | |
1719 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1720 | ASSERT_EQ(StringRef("&" ), Toks[1].getText()); |
1721 | |
1722 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1723 | ASSERT_EQ(StringRef("!" ), Toks[2].getText()); |
1724 | |
1725 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1726 | } |
1727 | |
1728 | TEST_F(CommentLexerTest, HTMLCharacterReferences5) { |
1729 | const char *Source = "// &#" ; |
1730 | |
1731 | std::vector<Token> Toks; |
1732 | |
1733 | lexString(Source, Toks); |
1734 | |
1735 | ASSERT_EQ(3U, Toks.size()); |
1736 | |
1737 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1738 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1739 | |
1740 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1741 | ASSERT_EQ(StringRef("&#" ), Toks[1].getText()); |
1742 | |
1743 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1744 | } |
1745 | |
1746 | TEST_F(CommentLexerTest, HTMLCharacterReferences6) { |
1747 | const char *Source = "// &#a" ; |
1748 | |
1749 | std::vector<Token> Toks; |
1750 | |
1751 | lexString(Source, Toks); |
1752 | |
1753 | ASSERT_EQ(4U, Toks.size()); |
1754 | |
1755 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1756 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1757 | |
1758 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1759 | ASSERT_EQ(StringRef("&#" ), Toks[1].getText()); |
1760 | |
1761 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1762 | ASSERT_EQ(StringRef("a" ), Toks[2].getText()); |
1763 | |
1764 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1765 | } |
1766 | |
1767 | TEST_F(CommentLexerTest, HTMLCharacterReferences7) { |
1768 | const char *Source = "// *" ; |
1769 | |
1770 | std::vector<Token> Toks; |
1771 | |
1772 | lexString(Source, Toks); |
1773 | |
1774 | ASSERT_EQ(3U, Toks.size()); |
1775 | |
1776 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1777 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1778 | |
1779 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1780 | ASSERT_EQ(StringRef("*" ), Toks[1].getText()); |
1781 | |
1782 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1783 | } |
1784 | |
1785 | TEST_F(CommentLexerTest, HTMLCharacterReferences8) { |
1786 | const char *Source = "// *a" ; |
1787 | |
1788 | std::vector<Token> Toks; |
1789 | |
1790 | lexString(Source, Toks); |
1791 | |
1792 | ASSERT_EQ(4U, Toks.size()); |
1793 | |
1794 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1795 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1796 | |
1797 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1798 | ASSERT_EQ(StringRef("*" ), Toks[1].getText()); |
1799 | |
1800 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1801 | ASSERT_EQ(StringRef("a" ), Toks[2].getText()); |
1802 | |
1803 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1804 | } |
1805 | |
1806 | TEST_F(CommentLexerTest, HTMLCharacterReferences9) { |
1807 | const char *Source = "// &#x" ; |
1808 | |
1809 | std::vector<Token> Toks; |
1810 | |
1811 | lexString(Source, Toks); |
1812 | |
1813 | ASSERT_EQ(3U, Toks.size()); |
1814 | |
1815 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1816 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1817 | |
1818 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1819 | ASSERT_EQ(StringRef("&#x" ), Toks[1].getText()); |
1820 | |
1821 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1822 | } |
1823 | |
1824 | TEST_F(CommentLexerTest, HTMLCharacterReferences10) { |
1825 | const char *Source = "// &#xz" ; |
1826 | |
1827 | std::vector<Token> Toks; |
1828 | |
1829 | lexString(Source, Toks); |
1830 | |
1831 | ASSERT_EQ(4U, Toks.size()); |
1832 | |
1833 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1834 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1835 | |
1836 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1837 | ASSERT_EQ(StringRef("&#x" ), Toks[1].getText()); |
1838 | |
1839 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1840 | ASSERT_EQ(StringRef("z" ), Toks[2].getText()); |
1841 | |
1842 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1843 | } |
1844 | |
1845 | TEST_F(CommentLexerTest, HTMLCharacterReferences11) { |
1846 | const char *Source = "// «" ; |
1847 | |
1848 | std::vector<Token> Toks; |
1849 | |
1850 | lexString(Source, Toks); |
1851 | |
1852 | ASSERT_EQ(3U, Toks.size()); |
1853 | |
1854 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1855 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1856 | |
1857 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1858 | ASSERT_EQ(StringRef("«" ), Toks[1].getText()); |
1859 | |
1860 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1861 | } |
1862 | |
1863 | TEST_F(CommentLexerTest, HTMLCharacterReferences12) { |
1864 | const char *Source = "// «z" ; |
1865 | |
1866 | std::vector<Token> Toks; |
1867 | |
1868 | lexString(Source, Toks); |
1869 | |
1870 | ASSERT_EQ(4U, Toks.size()); |
1871 | |
1872 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1873 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1874 | |
1875 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1876 | ASSERT_EQ(StringRef("«" ), Toks[1].getText()); |
1877 | |
1878 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1879 | ASSERT_EQ(StringRef("z" ), Toks[2].getText()); |
1880 | |
1881 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1882 | } |
1883 | |
1884 | TEST_F(CommentLexerTest, HTMLCharacterReferences13) { |
1885 | const char *Source = "// &" ; |
1886 | |
1887 | std::vector<Token> Toks; |
1888 | |
1889 | lexString(Source, Toks); |
1890 | |
1891 | ASSERT_EQ(3U, Toks.size()); |
1892 | |
1893 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1894 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1895 | |
1896 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1897 | ASSERT_EQ(StringRef("&" ), Toks[1].getText()); |
1898 | |
1899 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1900 | } |
1901 | |
1902 | TEST_F(CommentLexerTest, HTMLCharacterReferences14) { |
1903 | const char *Source = "// &<" ; |
1904 | |
1905 | std::vector<Token> Toks; |
1906 | |
1907 | lexString(Source, Toks); |
1908 | |
1909 | ASSERT_EQ(4U, Toks.size()); |
1910 | |
1911 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1912 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1913 | |
1914 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1915 | ASSERT_EQ(StringRef("&" ), Toks[1].getText()); |
1916 | |
1917 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1918 | ASSERT_EQ(StringRef("<" ), Toks[2].getText()); |
1919 | |
1920 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1921 | } |
1922 | |
1923 | TEST_F(CommentLexerTest, HTMLCharacterReferences15) { |
1924 | const char *Source = "// & meow" ; |
1925 | |
1926 | std::vector<Token> Toks; |
1927 | |
1928 | lexString(Source, Toks); |
1929 | |
1930 | ASSERT_EQ(4U, Toks.size()); |
1931 | |
1932 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1933 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1934 | |
1935 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1936 | ASSERT_EQ(StringRef("&" ), Toks[1].getText()); |
1937 | |
1938 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1939 | ASSERT_EQ(StringRef(" meow" ), Toks[2].getText()); |
1940 | |
1941 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1942 | } |
1943 | |
1944 | TEST_F(CommentLexerTest, HTMLCharacterReferences16) { |
1945 | const char *Sources[] = { |
1946 | "// =" , |
1947 | "// =" , |
1948 | "// =" , |
1949 | "// =" |
1950 | }; |
1951 | |
1952 | for (size_t i = 0, e = std::size(Sources); i != e; i++) { |
1953 | std::vector<Token> Toks; |
1954 | |
1955 | lexString(Sources[i], Toks); |
1956 | |
1957 | ASSERT_EQ(3U, Toks.size()); |
1958 | |
1959 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1960 | ASSERT_EQ(StringRef(" " ), Toks[0].getText()); |
1961 | |
1962 | ASSERT_EQ(tok::text, Toks[1].getKind()); |
1963 | ASSERT_EQ(StringRef("=" ), Toks[1].getText()); |
1964 | |
1965 | ASSERT_EQ(tok::newline, Toks[2].getKind()); |
1966 | } |
1967 | } |
1968 | |
1969 | TEST_F(CommentLexerTest, MultipleComments) { |
1970 | const char *Source = |
1971 | "// Aaa\n" |
1972 | "/// Bbb\n" |
1973 | "/* Ccc\n" |
1974 | " * Ddd*/\n" |
1975 | "/** Eee*/" ; |
1976 | |
1977 | std::vector<Token> Toks; |
1978 | |
1979 | lexString(Source, Toks); |
1980 | |
1981 | ASSERT_EQ(12U, Toks.size()); |
1982 | |
1983 | ASSERT_EQ(tok::text, Toks[0].getKind()); |
1984 | ASSERT_EQ(StringRef(" Aaa" ), Toks[0].getText()); |
1985 | ASSERT_EQ(tok::newline, Toks[1].getKind()); |
1986 | |
1987 | ASSERT_EQ(tok::text, Toks[2].getKind()); |
1988 | ASSERT_EQ(StringRef(" Bbb" ), Toks[2].getText()); |
1989 | ASSERT_EQ(tok::newline, Toks[3].getKind()); |
1990 | |
1991 | ASSERT_EQ(tok::text, Toks[4].getKind()); |
1992 | ASSERT_EQ(StringRef(" Ccc" ), Toks[4].getText()); |
1993 | ASSERT_EQ(tok::newline, Toks[5].getKind()); |
1994 | |
1995 | ASSERT_EQ(tok::text, Toks[6].getKind()); |
1996 | ASSERT_EQ(StringRef(" Ddd" ), Toks[6].getText()); |
1997 | ASSERT_EQ(tok::newline, Toks[7].getKind()); |
1998 | ASSERT_EQ(tok::newline, Toks[8].getKind()); |
1999 | |
2000 | ASSERT_EQ(tok::text, Toks[9].getKind()); |
2001 | ASSERT_EQ(StringRef(" Eee" ), Toks[9].getText()); |
2002 | |
2003 | ASSERT_EQ(tok::newline, Toks[10].getKind()); |
2004 | ASSERT_EQ(tok::newline, Toks[11].getKind()); |
2005 | } |
2006 | |
2007 | } // end namespace comments |
2008 | } // end namespace clang |
2009 | |
2010 | |