1//===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/AST/CommentLexer.h"
10#include "clang/AST/CommentCommandTraits.h"
11#include "clang/Basic/CommentOptions.h"
12#include "clang/Basic/Diagnostic.h"
13#include "clang/Basic/DiagnosticOptions.h"
14#include "clang/Basic/FileManager.h"
15#include "clang/Basic/SourceManager.h"
16#include "llvm/ADT/STLExtras.h"
17#include "gtest/gtest.h"
18#include <vector>
19
20using namespace llvm;
21using namespace clang;
22
23namespace clang {
24namespace comments {
25
26namespace {
27class CommentLexerTest : public ::testing::Test {
28protected:
29 CommentLexerTest()
30 : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
31 Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
32 SourceMgr(Diags, FileMgr), Traits(Allocator, CommentOptions()) {}
33
34 FileSystemOptions FileMgrOpts;
35 FileManager FileMgr;
36 IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
37 DiagnosticOptions DiagOpts;
38 DiagnosticsEngine Diags;
39 SourceManager SourceMgr;
40 llvm::BumpPtrAllocator Allocator;
41 CommandTraits Traits;
42
43 void lexString(const char *Source, std::vector<Token> &Toks);
44
45 StringRef getCommandName(const Token &Tok) {
46 return Traits.getCommandInfo(Tok.getCommandID())->Name;
47 }
48
49 StringRef getVerbatimBlockName(const Token &Tok) {
50 return Traits.getCommandInfo(Tok.getVerbatimBlockID())->Name;
51 }
52
53 StringRef getVerbatimLineName(const Token &Tok) {
54 return Traits.getCommandInfo(Tok.getVerbatimLineID())->Name;
55 }
56};
57
58void CommentLexerTest::lexString(const char *Source,
59 std::vector<Token> &Toks) {
60 std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(InputData: Source);
61 FileID File = SourceMgr.createFileID(Buffer: std::move(Buf));
62 SourceLocation Begin = SourceMgr.getLocForStartOfFile(FID: File);
63
64 Lexer L(Allocator, Diags, Traits, Begin, Source, Source + strlen(Source));
65
66 while (1) {
67 Token Tok;
68 L.lex(T&: Tok);
69 if (Tok.is(K: tok::eof))
70 break;
71 Toks.push_back(x: Tok);
72 }
73}
74
75} // unnamed namespace
76
77// Empty source range should be handled.
78TEST_F(CommentLexerTest, Basic1) {
79 const char *Source = "";
80 std::vector<Token> Toks;
81
82 lexString(Source, Toks);
83
84 ASSERT_EQ(0U, Toks.size());
85}
86
87// Empty comments should be handled.
88TEST_F(CommentLexerTest, Basic2) {
89 const char *Sources[] = {
90 "//", "///", "//!", "///<", "//!<"
91 };
92 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
93 std::vector<Token> Toks;
94
95 lexString(Sources[i], Toks);
96
97 ASSERT_EQ(1U, Toks.size());
98
99 ASSERT_EQ(tok::newline, Toks[0].getKind());
100 }
101}
102
103// Empty comments should be handled.
104TEST_F(CommentLexerTest, Basic3) {
105 const char *Sources[] = {
106 "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/"
107 };
108 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
109 std::vector<Token> Toks;
110
111 lexString(Sources[i], Toks);
112
113 ASSERT_EQ(2U, Toks.size());
114
115 ASSERT_EQ(tok::newline, Toks[0].getKind());
116 ASSERT_EQ(tok::newline, Toks[1].getKind());
117 }
118}
119
120// Single comment with plain text.
121TEST_F(CommentLexerTest, Basic4) {
122 const char *Sources[] = {
123 "// Meow", "/// Meow", "//! Meow",
124 "// Meow\n", "// Meow\r\n", "//! Meow\r",
125 };
126
127 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
128 std::vector<Token> Toks;
129
130 lexString(Sources[i], Toks);
131
132 ASSERT_EQ(2U, Toks.size());
133
134 ASSERT_EQ(tok::text, Toks[0].getKind());
135 ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
136
137 ASSERT_EQ(tok::newline, Toks[1].getKind());
138 }
139}
140
141// Single comment with plain text.
142TEST_F(CommentLexerTest, Basic5) {
143 const char *Sources[] = {
144 "/* Meow*/", "/** Meow*/", "/*! Meow*/"
145 };
146
147 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
148 std::vector<Token> Toks;
149
150 lexString(Sources[i], Toks);
151
152 ASSERT_EQ(3U, Toks.size());
153
154 ASSERT_EQ(tok::text, Toks[0].getKind());
155 ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
156
157 ASSERT_EQ(tok::newline, Toks[1].getKind());
158 ASSERT_EQ(tok::newline, Toks[2].getKind());
159 }
160}
161
162// Test newline escaping.
163TEST_F(CommentLexerTest, Basic6) {
164 const char *Sources[] = {
165 "// Aaa\\\n" " Bbb\\ \n" " Ccc?" "?/\n",
166 "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n",
167 "// Aaa\\\r" " Bbb\\ \r" " Ccc?" "?/\r"
168 };
169
170 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
171 std::vector<Token> Toks;
172
173 lexString(Sources[i], Toks);
174
175 ASSERT_EQ(10U, Toks.size());
176
177 ASSERT_EQ(tok::text, Toks[0].getKind());
178 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
179 ASSERT_EQ(tok::text, Toks[1].getKind());
180 ASSERT_EQ(StringRef("\\"), Toks[1].getText());
181 ASSERT_EQ(tok::newline, Toks[2].getKind());
182
183 ASSERT_EQ(tok::text, Toks[3].getKind());
184 ASSERT_EQ(StringRef(" Bbb"), Toks[3].getText());
185 ASSERT_EQ(tok::text, Toks[4].getKind());
186 ASSERT_EQ(StringRef("\\"), Toks[4].getText());
187 ASSERT_EQ(tok::text, Toks[5].getKind());
188 ASSERT_EQ(StringRef(" "), Toks[5].getText());
189 ASSERT_EQ(tok::newline, Toks[6].getKind());
190
191 ASSERT_EQ(tok::text, Toks[7].getKind());
192 ASSERT_EQ(StringRef(" Ccc?" "?/"), Toks[7].getText());
193 ASSERT_EQ(tok::newline, Toks[8].getKind());
194
195 ASSERT_EQ(tok::newline, Toks[9].getKind());
196 }
197}
198
199// Check that we skip C-style aligned stars correctly.
200TEST_F(CommentLexerTest, Basic7) {
201 const char *Source =
202 "/* Aaa\n"
203 " * Bbb\r\n"
204 "\t* Ccc\n"
205 " ! Ddd\n"
206 " * Eee\n"
207 " ** Fff\n"
208 " */";
209 std::vector<Token> Toks;
210
211 lexString(Source, Toks);
212
213 ASSERT_EQ(15U, Toks.size());
214
215 ASSERT_EQ(tok::text, Toks[0].getKind());
216 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
217 ASSERT_EQ(tok::newline, Toks[1].getKind());
218
219 ASSERT_EQ(tok::text, Toks[2].getKind());
220 ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText());
221 ASSERT_EQ(tok::newline, Toks[3].getKind());
222
223 ASSERT_EQ(tok::text, Toks[4].getKind());
224 ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText());
225 ASSERT_EQ(tok::newline, Toks[5].getKind());
226
227 ASSERT_EQ(tok::text, Toks[6].getKind());
228 ASSERT_EQ(StringRef(" ! Ddd"), Toks[6].getText());
229 ASSERT_EQ(tok::newline, Toks[7].getKind());
230
231 ASSERT_EQ(tok::text, Toks[8].getKind());
232 ASSERT_EQ(StringRef(" Eee"), Toks[8].getText());
233 ASSERT_EQ(tok::newline, Toks[9].getKind());
234
235 ASSERT_EQ(tok::text, Toks[10].getKind());
236 ASSERT_EQ(StringRef("* Fff"), Toks[10].getText());
237 ASSERT_EQ(tok::newline, Toks[11].getKind());
238
239 ASSERT_EQ(tok::text, Toks[12].getKind());
240 ASSERT_EQ(StringRef(" "), Toks[12].getText());
241
242 ASSERT_EQ(tok::newline, Toks[13].getKind());
243 ASSERT_EQ(tok::newline, Toks[14].getKind());
244}
245
246// A command marker followed by comment end.
247TEST_F(CommentLexerTest, DoxygenCommand1) {
248 const char *Sources[] = { "//@", "///@", "//!@" };
249 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
250 std::vector<Token> Toks;
251
252 lexString(Sources[i], Toks);
253
254 ASSERT_EQ(2U, Toks.size());
255
256 ASSERT_EQ(tok::text, Toks[0].getKind());
257 ASSERT_EQ(StringRef("@"), Toks[0].getText());
258
259 ASSERT_EQ(tok::newline, Toks[1].getKind());
260 }
261}
262
263// A command marker followed by comment end.
264TEST_F(CommentLexerTest, DoxygenCommand2) {
265 const char *Sources[] = { "/*@*/", "/**@*/", "/*!@*/"};
266 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
267 std::vector<Token> Toks;
268
269 lexString(Sources[i], Toks);
270
271 ASSERT_EQ(3U, Toks.size());
272
273 ASSERT_EQ(tok::text, Toks[0].getKind());
274 ASSERT_EQ(StringRef("@"), Toks[0].getText());
275
276 ASSERT_EQ(tok::newline, Toks[1].getKind());
277 ASSERT_EQ(tok::newline, Toks[2].getKind());
278 }
279}
280
281// A command marker followed by comment end.
282TEST_F(CommentLexerTest, DoxygenCommand3) {
283 const char *Sources[] = { "/*\\*/", "/**\\*/" };
284 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
285 std::vector<Token> Toks;
286
287 lexString(Sources[i], Toks);
288
289 ASSERT_EQ(3U, Toks.size());
290
291 ASSERT_EQ(tok::text, Toks[0].getKind());
292 ASSERT_EQ(StringRef("\\"), Toks[0].getText());
293
294 ASSERT_EQ(tok::newline, Toks[1].getKind());
295 ASSERT_EQ(tok::newline, Toks[2].getKind());
296 }
297}
298
299// Doxygen escape sequences.
300TEST_F(CommentLexerTest, DoxygenCommand4) {
301 const char *Sources[] = {
302 "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::",
303 "/// @\\ @@ @& @$ @# @< @> @% @\" @. @::"
304 };
305 const char *Text[] = {
306 " ",
307 "\\", " ", "@", " ", "&", " ", "$", " ", "#", " ",
308 "<", " ", ">", " ", "%", " ", "\"", " ", ".", " ",
309 "::", ""
310 };
311
312 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
313 std::vector<Token> Toks;
314
315 lexString(Sources[i], Toks);
316
317 ASSERT_EQ(std::size(Text), Toks.size());
318
319 for (size_t j = 0, e = Toks.size(); j != e; j++) {
320 if(Toks[j].is(K: tok::text)) {
321 ASSERT_EQ(StringRef(Text[j]), Toks[j].getText())
322 << "index " << i;
323 }
324 }
325 }
326}
327
328// A command marker followed by a non-letter that is not a part of an escape
329// sequence.
330TEST_F(CommentLexerTest, DoxygenCommand5) {
331 const char *Source = "/// \\^ \\0";
332 std::vector<Token> Toks;
333
334 lexString(Source, Toks);
335
336 ASSERT_EQ(6U, Toks.size());
337
338 ASSERT_EQ(tok::text, Toks[0].getKind());
339 ASSERT_EQ(StringRef(" "), Toks[0].getText());
340
341 ASSERT_EQ(tok::text, Toks[1].getKind());
342 ASSERT_EQ(StringRef("\\"), Toks[1].getText());
343
344 ASSERT_EQ(tok::text, Toks[2].getKind());
345 ASSERT_EQ(StringRef("^ "), Toks[2].getText());
346
347 ASSERT_EQ(tok::text, Toks[3].getKind());
348 ASSERT_EQ(StringRef("\\"), Toks[3].getText());
349
350 ASSERT_EQ(tok::text, Toks[4].getKind());
351 ASSERT_EQ(StringRef("0"), Toks[4].getText());
352
353 ASSERT_EQ(tok::newline, Toks[5].getKind());
354}
355
356TEST_F(CommentLexerTest, DoxygenCommand6) {
357 const char *Source = "/// \\brief Aaa.";
358 std::vector<Token> Toks;
359
360 lexString(Source, Toks);
361
362 ASSERT_EQ(4U, Toks.size());
363
364 ASSERT_EQ(tok::text, Toks[0].getKind());
365 ASSERT_EQ(StringRef(" "), Toks[0].getText());
366
367 ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
368 ASSERT_EQ(StringRef("brief"), getCommandName(Toks[1]));
369
370 ASSERT_EQ(tok::text, Toks[2].getKind());
371 ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
372
373 ASSERT_EQ(tok::newline, Toks[3].getKind());
374}
375
376TEST_F(CommentLexerTest, DoxygenCommand7) {
377 const char *Source = "/// \\em\\em \\em\t\\em\n";
378 std::vector<Token> Toks;
379
380 lexString(Source, Toks);
381
382 ASSERT_EQ(8U, Toks.size());
383
384 ASSERT_EQ(tok::text, Toks[0].getKind());
385 ASSERT_EQ(StringRef(" "), Toks[0].getText());
386
387 ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
388 ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
389
390 ASSERT_EQ(tok::backslash_command, Toks[2].getKind());
391 ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
392
393 ASSERT_EQ(tok::text, Toks[3].getKind());
394 ASSERT_EQ(StringRef(" "), Toks[3].getText());
395
396 ASSERT_EQ(tok::backslash_command, Toks[4].getKind());
397 ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
398
399 ASSERT_EQ(tok::text, Toks[5].getKind());
400 ASSERT_EQ(StringRef("\t"), Toks[5].getText());
401
402 ASSERT_EQ(tok::backslash_command, Toks[6].getKind());
403 ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
404
405 ASSERT_EQ(tok::newline, Toks[7].getKind());
406}
407
408TEST_F(CommentLexerTest, DoxygenCommand8) {
409 const char *Source = "/// @em@em @em\t@em\n";
410 std::vector<Token> Toks;
411
412 lexString(Source, Toks);
413
414 ASSERT_EQ(8U, Toks.size());
415
416 ASSERT_EQ(tok::text, Toks[0].getKind());
417 ASSERT_EQ(StringRef(" "), Toks[0].getText());
418
419 ASSERT_EQ(tok::at_command, Toks[1].getKind());
420 ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
421
422 ASSERT_EQ(tok::at_command, Toks[2].getKind());
423 ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
424
425 ASSERT_EQ(tok::text, Toks[3].getKind());
426 ASSERT_EQ(StringRef(" "), Toks[3].getText());
427
428 ASSERT_EQ(tok::at_command, Toks[4].getKind());
429 ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
430
431 ASSERT_EQ(tok::text, Toks[5].getKind());
432 ASSERT_EQ(StringRef("\t"), Toks[5].getText());
433
434 ASSERT_EQ(tok::at_command, Toks[6].getKind());
435 ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
436
437 ASSERT_EQ(tok::newline, Toks[7].getKind());
438}
439
440TEST_F(CommentLexerTest, DoxygenCommand9) {
441 const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n";
442 std::vector<Token> Toks;
443
444 lexString(Source, Toks);
445
446 ASSERT_EQ(8U, Toks.size());
447
448 ASSERT_EQ(tok::text, Toks[0].getKind());
449 ASSERT_EQ(StringRef(" "), Toks[0].getText());
450
451 ASSERT_EQ(tok::unknown_command, Toks[1].getKind());
452 ASSERT_EQ(StringRef("aaa"), Toks[1].getUnknownCommandName());
453
454 ASSERT_EQ(tok::unknown_command, Toks[2].getKind());
455 ASSERT_EQ(StringRef("bbb"), Toks[2].getUnknownCommandName());
456
457 ASSERT_EQ(tok::text, Toks[3].getKind());
458 ASSERT_EQ(StringRef(" "), Toks[3].getText());
459
460 ASSERT_EQ(tok::unknown_command, Toks[4].getKind());
461 ASSERT_EQ(StringRef("ccc"), Toks[4].getUnknownCommandName());
462
463 ASSERT_EQ(tok::text, Toks[5].getKind());
464 ASSERT_EQ(StringRef("\t"), Toks[5].getText());
465
466 ASSERT_EQ(tok::unknown_command, Toks[6].getKind());
467 ASSERT_EQ(StringRef("ddd"), Toks[6].getUnknownCommandName());
468
469 ASSERT_EQ(tok::newline, Toks[7].getKind());
470}
471
472TEST_F(CommentLexerTest, DoxygenCommand10) {
473 const char *Source = "// \\c\n";
474 std::vector<Token> Toks;
475
476 lexString(Source, Toks);
477
478 ASSERT_EQ(3U, Toks.size());
479
480 ASSERT_EQ(tok::text, Toks[0].getKind());
481 ASSERT_EQ(StringRef(" "), Toks[0].getText());
482
483 ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
484 ASSERT_EQ(StringRef("c"), getCommandName(Toks[1]));
485
486 ASSERT_EQ(tok::newline, Toks[2].getKind());
487}
488
489TEST_F(CommentLexerTest, RegisterCustomBlockCommand) {
490 const char *Source =
491 "/// \\NewBlockCommand Aaa.\n"
492 "/// @NewBlockCommand Aaa.\n";
493
494 Traits.registerBlockCommand(StringRef("NewBlockCommand"));
495
496 std::vector<Token> Toks;
497
498 lexString(Source, Toks);
499
500 ASSERT_EQ(8U, Toks.size());
501
502 ASSERT_EQ(tok::text, Toks[0].getKind());
503 ASSERT_EQ(StringRef(" "), Toks[0].getText());
504
505 ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
506 ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[1]));
507
508 ASSERT_EQ(tok::text, Toks[2].getKind());
509 ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
510
511 ASSERT_EQ(tok::newline, Toks[3].getKind());
512
513 ASSERT_EQ(tok::text, Toks[4].getKind());
514 ASSERT_EQ(StringRef(" "), Toks[4].getText());
515
516 ASSERT_EQ(tok::at_command, Toks[5].getKind());
517 ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[5]));
518
519 ASSERT_EQ(tok::text, Toks[6].getKind());
520 ASSERT_EQ(StringRef(" Aaa."), Toks[6].getText());
521
522 ASSERT_EQ(tok::newline, Toks[7].getKind());
523}
524
525TEST_F(CommentLexerTest, RegisterMultipleBlockCommands) {
526 const char *Source =
527 "/// \\Foo\n"
528 "/// \\Bar Baz\n"
529 "/// \\Blech quux=corge\n";
530
531 Traits.registerBlockCommand(StringRef("Foo"));
532 Traits.registerBlockCommand(StringRef("Bar"));
533 Traits.registerBlockCommand(StringRef("Blech"));
534
535 std::vector<Token> Toks;
536
537 lexString(Source, Toks);
538
539 ASSERT_EQ(11U, Toks.size());
540
541 ASSERT_EQ(tok::text, Toks[0].getKind());
542 ASSERT_EQ(StringRef(" "), Toks[0].getText());
543
544 ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
545 ASSERT_EQ(StringRef("Foo"), getCommandName(Toks[1]));
546
547 ASSERT_EQ(tok::newline, Toks[2].getKind());
548
549 ASSERT_EQ(tok::text, Toks[3].getKind());
550 ASSERT_EQ(StringRef(" "), Toks[3].getText());
551
552 ASSERT_EQ(tok::backslash_command, Toks[4].getKind());
553 ASSERT_EQ(StringRef("Bar"), getCommandName(Toks[4]));
554
555 ASSERT_EQ(tok::text, Toks[5].getKind());
556 ASSERT_EQ(StringRef(" Baz"), Toks[5].getText());
557
558 ASSERT_EQ(tok::newline, Toks[6].getKind());
559
560 ASSERT_EQ(tok::text, Toks[7].getKind());
561 ASSERT_EQ(StringRef(" "), Toks[7].getText());
562
563 ASSERT_EQ(tok::backslash_command, Toks[8].getKind());
564 ASSERT_EQ(StringRef("Blech"), getCommandName(Toks[8]));
565
566 ASSERT_EQ(tok::text, Toks[9].getKind());
567 ASSERT_EQ(StringRef(" quux=corge"), Toks[9].getText());
568
569 ASSERT_EQ(tok::newline, Toks[10].getKind());
570}
571
572// Empty verbatim block.
573TEST_F(CommentLexerTest, VerbatimBlock1) {
574 const char *Sources[] = {
575 "/// \\verbatim\\endverbatim\n//",
576 "/** \\verbatim\\endverbatim*/"
577 };
578
579 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
580 std::vector<Token> Toks;
581
582 lexString(Sources[i], Toks);
583
584 ASSERT_EQ(5U, Toks.size());
585
586 ASSERT_EQ(tok::text, Toks[0].getKind());
587 ASSERT_EQ(StringRef(" "), Toks[0].getText());
588
589 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
590 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
591
592 ASSERT_EQ(tok::verbatim_block_end, Toks[2].getKind());
593 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[2]));
594
595 ASSERT_EQ(tok::newline, Toks[3].getKind());
596 ASSERT_EQ(tok::newline, Toks[4].getKind());
597 }
598}
599
600// Empty verbatim block without an end command.
601TEST_F(CommentLexerTest, VerbatimBlock2) {
602 const char *Source = "/// \\verbatim";
603
604 std::vector<Token> Toks;
605
606 lexString(Source, Toks);
607
608 ASSERT_EQ(3U, Toks.size());
609
610 ASSERT_EQ(tok::text, Toks[0].getKind());
611 ASSERT_EQ(StringRef(" "), Toks[0].getText());
612
613 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
614 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
615
616 ASSERT_EQ(tok::newline, Toks[2].getKind());
617}
618
619// Empty verbatim block without an end command.
620TEST_F(CommentLexerTest, VerbatimBlock3) {
621 const char *Source = "/** \\verbatim*/";
622
623 std::vector<Token> Toks;
624
625 lexString(Source, Toks);
626
627 ASSERT_EQ(4U, Toks.size());
628
629 ASSERT_EQ(tok::text, Toks[0].getKind());
630 ASSERT_EQ(StringRef(" "), Toks[0].getText());
631
632 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
633 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
634
635 ASSERT_EQ(tok::newline, Toks[2].getKind());
636 ASSERT_EQ(tok::newline, Toks[3].getKind());
637}
638
639// Single-line verbatim block.
640TEST_F(CommentLexerTest, VerbatimBlock4) {
641 const char *Sources[] = {
642 "/// Meow \\verbatim aaa \\endverbatim\n//",
643 "/** Meow \\verbatim aaa \\endverbatim*/"
644 };
645
646 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
647 std::vector<Token> Toks;
648
649 lexString(Sources[i], Toks);
650
651 ASSERT_EQ(6U, Toks.size());
652
653 ASSERT_EQ(tok::text, Toks[0].getKind());
654 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText());
655
656 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
657 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
658
659 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
660 ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText());
661
662 ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind());
663 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[3]));
664
665 ASSERT_EQ(tok::newline, Toks[4].getKind());
666 ASSERT_EQ(tok::newline, Toks[5].getKind());
667 }
668}
669
670// Single-line verbatim block without an end command.
671TEST_F(CommentLexerTest, VerbatimBlock5) {
672 const char *Sources[] = {
673 "/// Meow \\verbatim aaa \n//",
674 "/** Meow \\verbatim aaa */"
675 };
676
677 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
678 std::vector<Token> Toks;
679
680 lexString(Sources[i], Toks);
681
682 ASSERT_EQ(5U, Toks.size());
683
684 ASSERT_EQ(tok::text, Toks[0].getKind());
685 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText());
686
687 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
688 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
689
690 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
691 ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText());
692
693 ASSERT_EQ(tok::newline, Toks[3].getKind());
694 ASSERT_EQ(tok::newline, Toks[4].getKind());
695 }
696}
697
698TEST_F(CommentLexerTest, VerbatimBlock6) {
699 const char *Source =
700 "// \\verbatim\n"
701 "// Aaa\n"
702 "//\n"
703 "// Bbb\n"
704 "// \\endverbatim\n";
705
706 std::vector<Token> Toks;
707
708 lexString(Source, Toks);
709
710 ASSERT_EQ(10U, Toks.size());
711
712 ASSERT_EQ(tok::text, Toks[0].getKind());
713 ASSERT_EQ(StringRef(" "), Toks[0].getText());
714
715 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
716 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
717
718 ASSERT_EQ(tok::newline, Toks[2].getKind());
719
720 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind());
721 ASSERT_EQ(StringRef(" Aaa"), Toks[3].getVerbatimBlockText());
722
723 ASSERT_EQ(tok::newline, Toks[4].getKind());
724
725 ASSERT_EQ(tok::newline, Toks[5].getKind());
726
727 ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind());
728 ASSERT_EQ(StringRef(" Bbb"), Toks[6].getVerbatimBlockText());
729
730 ASSERT_EQ(tok::newline, Toks[7].getKind());
731
732 ASSERT_EQ(tok::verbatim_block_end, Toks[8].getKind());
733 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[8]));
734
735 ASSERT_EQ(tok::newline, Toks[9].getKind());
736}
737
738TEST_F(CommentLexerTest, VerbatimBlock7) {
739 const char *Source =
740 "/* \\verbatim\n"
741 " * Aaa\n"
742 " *\n"
743 " * Bbb\n"
744 " * \\endverbatim\n"
745 " */";
746
747 std::vector<Token> Toks;
748
749 lexString(Source, Toks);
750
751 ASSERT_EQ(10U, Toks.size());
752
753 ASSERT_EQ(tok::text, Toks[0].getKind());
754 ASSERT_EQ(StringRef(" "), Toks[0].getText());
755
756 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
757 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
758
759 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
760 ASSERT_EQ(StringRef(" Aaa"), Toks[2].getVerbatimBlockText());
761
762 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind());
763 ASSERT_EQ(StringRef(""), Toks[3].getVerbatimBlockText());
764
765 ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind());
766 ASSERT_EQ(StringRef(" Bbb"), Toks[4].getVerbatimBlockText());
767
768 ASSERT_EQ(tok::verbatim_block_end, Toks[5].getKind());
769 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[5]));
770
771 ASSERT_EQ(tok::newline, Toks[6].getKind());
772
773 ASSERT_EQ(tok::text, Toks[7].getKind());
774 ASSERT_EQ(StringRef(" "), Toks[7].getText());
775
776 ASSERT_EQ(tok::newline, Toks[8].getKind());
777 ASSERT_EQ(tok::newline, Toks[9].getKind());
778}
779
780// Complex test for verbatim blocks.
781TEST_F(CommentLexerTest, VerbatimBlock8) {
782 const char *Source =
783 "/* Meow \\verbatim aaa\\$\\@\n"
784 "bbb \\endverbati\r"
785 "ccc\r\n"
786 "ddd \\endverbatim Blah \\verbatim eee\n"
787 "\\endverbatim BlahBlah*/";
788 std::vector<Token> Toks;
789
790 lexString(Source, Toks);
791
792 ASSERT_EQ(14U, Toks.size());
793
794 ASSERT_EQ(tok::text, Toks[0].getKind());
795 ASSERT_EQ(StringRef(" Meow "), Toks[0].getText());
796
797 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
798 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[1]));
799
800 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
801 ASSERT_EQ(StringRef(" aaa\\$\\@"), Toks[2].getVerbatimBlockText());
802
803 ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind());
804 ASSERT_EQ(StringRef("bbb \\endverbati"), Toks[3].getVerbatimBlockText());
805
806 ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind());
807 ASSERT_EQ(StringRef("ccc"), Toks[4].getVerbatimBlockText());
808
809 ASSERT_EQ(tok::verbatim_block_line, Toks[5].getKind());
810 ASSERT_EQ(StringRef("ddd "), Toks[5].getVerbatimBlockText());
811
812 ASSERT_EQ(tok::verbatim_block_end, Toks[6].getKind());
813 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[6]));
814
815 ASSERT_EQ(tok::text, Toks[7].getKind());
816 ASSERT_EQ(StringRef(" Blah "), Toks[7].getText());
817
818 ASSERT_EQ(tok::verbatim_block_begin, Toks[8].getKind());
819 ASSERT_EQ(StringRef("verbatim"), getVerbatimBlockName(Toks[8]));
820
821 ASSERT_EQ(tok::verbatim_block_line, Toks[9].getKind());
822 ASSERT_EQ(StringRef(" eee"), Toks[9].getVerbatimBlockText());
823
824 ASSERT_EQ(tok::verbatim_block_end, Toks[10].getKind());
825 ASSERT_EQ(StringRef("endverbatim"), getVerbatimBlockName(Toks[10]));
826
827 ASSERT_EQ(tok::text, Toks[11].getKind());
828 ASSERT_EQ(StringRef(" BlahBlah"), Toks[11].getText());
829
830 ASSERT_EQ(tok::newline, Toks[12].getKind());
831 ASSERT_EQ(tok::newline, Toks[13].getKind());
832}
833
834// LaTeX verbatim blocks.
835TEST_F(CommentLexerTest, VerbatimBlock9) {
836 const char *Source =
837 "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f} \\f( Ddd \\f)";
838 std::vector<Token> Toks;
839
840 lexString(Source, Toks);
841
842 ASSERT_EQ(17U, Toks.size());
843
844 ASSERT_EQ(tok::text, Toks[0].getKind());
845 ASSERT_EQ(StringRef(" "), Toks[0].getText());
846
847 ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
848 ASSERT_EQ(StringRef("f$"), getVerbatimBlockName(Toks[1]));
849
850 ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
851 ASSERT_EQ(StringRef(" Aaa "), Toks[2].getVerbatimBlockText());
852
853 ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind());
854 ASSERT_EQ(StringRef("f$"), getVerbatimBlockName(Toks[3]));
855
856 ASSERT_EQ(tok::text, Toks[4].getKind());
857 ASSERT_EQ(StringRef(" "), Toks[4].getText());
858
859 ASSERT_EQ(tok::verbatim_block_begin, Toks[5].getKind());
860 ASSERT_EQ(StringRef("f["), getVerbatimBlockName(Toks[5]));
861
862 ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind());
863 ASSERT_EQ(StringRef(" Bbb "), Toks[6].getVerbatimBlockText());
864
865 ASSERT_EQ(tok::verbatim_block_end, Toks[7].getKind());
866 ASSERT_EQ(StringRef("f]"), getVerbatimBlockName(Toks[7]));
867
868 ASSERT_EQ(tok::text, Toks[8].getKind());
869 ASSERT_EQ(StringRef(" "), Toks[8].getText());
870
871 ASSERT_EQ(tok::verbatim_block_begin, Toks[9].getKind());
872 ASSERT_EQ(StringRef("f{"), getVerbatimBlockName(Toks[9]));
873
874 ASSERT_EQ(tok::verbatim_block_line, Toks[10].getKind());
875 ASSERT_EQ(StringRef(" Ccc "), Toks[10].getVerbatimBlockText());
876
877 ASSERT_EQ(tok::verbatim_block_end, Toks[11].getKind());
878 ASSERT_EQ(StringRef("f}"), getVerbatimBlockName(Toks[11]));
879
880 ASSERT_EQ(tok::text, Toks[12].getKind());
881 ASSERT_EQ(StringRef(" "), Toks[12].getText());
882
883 ASSERT_EQ(tok::verbatim_block_begin, Toks[13].getKind());
884 ASSERT_EQ(StringRef("f("), getVerbatimBlockName(Toks[13]));
885
886 ASSERT_EQ(tok::verbatim_block_line, Toks[14].getKind());
887 ASSERT_EQ(StringRef(" Ddd "), Toks[14].getVerbatimBlockText());
888
889 ASSERT_EQ(tok::verbatim_block_end, Toks[15].getKind());
890 ASSERT_EQ(StringRef("f)"), getVerbatimBlockName(Toks[15]));
891
892 ASSERT_EQ(tok::newline, Toks[16].getKind());
893}
894
895// Empty verbatim line.
896TEST_F(CommentLexerTest, VerbatimLine1) {
897 const char *Sources[] = {
898 "/// \\fn\n//",
899 "/** \\fn*/"
900 };
901
902 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
903 std::vector<Token> Toks;
904
905 lexString(Sources[i], Toks);
906
907 ASSERT_EQ(4U, Toks.size());
908
909 ASSERT_EQ(tok::text, Toks[0].getKind());
910 ASSERT_EQ(StringRef(" "), Toks[0].getText());
911
912 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
913 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1]));
914
915 ASSERT_EQ(tok::newline, Toks[2].getKind());
916 ASSERT_EQ(tok::newline, Toks[3].getKind());
917 }
918}
919
920// Verbatim line with Doxygen escape sequences, which should not be expanded.
921TEST_F(CommentLexerTest, VerbatimLine2) {
922 const char *Sources[] = {
923 "/// \\fn void *foo(const char *zzz = \"\\$\");\n//",
924 "/** \\fn void *foo(const char *zzz = \"\\$\");*/"
925 };
926
927 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
928 std::vector<Token> Toks;
929
930 lexString(Sources[i], Toks);
931
932 ASSERT_EQ(5U, Toks.size());
933
934 ASSERT_EQ(tok::text, Toks[0].getKind());
935 ASSERT_EQ(StringRef(" "), Toks[0].getText());
936
937 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
938 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1]));
939
940 ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
941 ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
942 Toks[2].getVerbatimLineText());
943
944 ASSERT_EQ(tok::newline, Toks[3].getKind());
945 ASSERT_EQ(tok::newline, Toks[4].getKind());
946 }
947}
948
949// Verbatim line should not eat anything from next source line.
950TEST_F(CommentLexerTest, VerbatimLine3) {
951 const char *Source =
952 "/** \\fn void *foo(const char *zzz = \"\\$\");\n"
953 " * Meow\n"
954 " */";
955
956 std::vector<Token> Toks;
957
958 lexString(Source, Toks);
959
960 ASSERT_EQ(9U, Toks.size());
961
962 ASSERT_EQ(tok::text, Toks[0].getKind());
963 ASSERT_EQ(StringRef(" "), Toks[0].getText());
964
965 ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
966 ASSERT_EQ(StringRef("fn"), getVerbatimLineName(Toks[1]));
967
968 ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
969 ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
970 Toks[2].getVerbatimLineText());
971 ASSERT_EQ(tok::newline, Toks[3].getKind());
972
973 ASSERT_EQ(tok::text, Toks[4].getKind());
974 ASSERT_EQ(StringRef(" Meow"), Toks[4].getText());
975 ASSERT_EQ(tok::newline, Toks[5].getKind());
976
977 ASSERT_EQ(tok::text, Toks[6].getKind());
978 ASSERT_EQ(StringRef(" "), Toks[6].getText());
979
980 ASSERT_EQ(tok::newline, Toks[7].getKind());
981 ASSERT_EQ(tok::newline, Toks[8].getKind());
982}
983
984TEST_F(CommentLexerTest, HTML1) {
985 const char *Source =
986 "// <";
987
988 std::vector<Token> Toks;
989
990 lexString(Source, Toks);
991
992 ASSERT_EQ(3U, Toks.size());
993
994 ASSERT_EQ(tok::text, Toks[0].getKind());
995 ASSERT_EQ(StringRef(" "), Toks[0].getText());
996
997 ASSERT_EQ(tok::text, Toks[1].getKind());
998 ASSERT_EQ(StringRef("<"), Toks[1].getText());
999
1000 ASSERT_EQ(tok::newline, Toks[2].getKind());
1001}
1002
1003TEST_F(CommentLexerTest, HTML2) {
1004 const char *Source =
1005 "// a<2";
1006
1007 std::vector<Token> Toks;
1008
1009 lexString(Source, Toks);
1010
1011 ASSERT_EQ(4U, Toks.size());
1012
1013 ASSERT_EQ(tok::text, Toks[0].getKind());
1014 ASSERT_EQ(StringRef(" a"), Toks[0].getText());
1015
1016 ASSERT_EQ(tok::text, Toks[1].getKind());
1017 ASSERT_EQ(StringRef("<"), Toks[1].getText());
1018
1019 ASSERT_EQ(tok::text, Toks[2].getKind());
1020 ASSERT_EQ(StringRef("2"), Toks[2].getText());
1021
1022 ASSERT_EQ(tok::newline, Toks[3].getKind());
1023}
1024
1025TEST_F(CommentLexerTest, HTML3) {
1026 const char *Source =
1027 "// < img";
1028
1029 std::vector<Token> Toks;
1030
1031 lexString(Source, Toks);
1032
1033 ASSERT_EQ(4U, Toks.size());
1034
1035 ASSERT_EQ(tok::text, Toks[0].getKind());
1036 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1037
1038 ASSERT_EQ(tok::text, Toks[1].getKind());
1039 ASSERT_EQ(StringRef("<"), Toks[1].getText());
1040
1041 ASSERT_EQ(tok::text, Toks[2].getKind());
1042 ASSERT_EQ(StringRef(" img"), Toks[2].getText());
1043
1044 ASSERT_EQ(tok::newline, Toks[3].getKind());
1045}
1046
1047TEST_F(CommentLexerTest, HTML4) {
1048 const char *Sources[] = {
1049 "// <img",
1050 "// <img "
1051 };
1052
1053 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1054 std::vector<Token> Toks;
1055
1056 lexString(Sources[i], Toks);
1057
1058 ASSERT_EQ(3U, Toks.size());
1059
1060 ASSERT_EQ(tok::text, Toks[0].getKind());
1061 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1062
1063 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1064 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1065
1066 ASSERT_EQ(tok::newline, Toks[2].getKind());
1067 }
1068}
1069
1070TEST_F(CommentLexerTest, HTML5) {
1071 const char *Source =
1072 "// <img 42";
1073
1074 std::vector<Token> Toks;
1075
1076 lexString(Source, Toks);
1077
1078 ASSERT_EQ(4U, Toks.size());
1079
1080 ASSERT_EQ(tok::text, Toks[0].getKind());
1081 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1082
1083 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1084 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1085
1086 ASSERT_EQ(tok::text, Toks[2].getKind());
1087 ASSERT_EQ(StringRef("42"), Toks[2].getText());
1088
1089 ASSERT_EQ(tok::newline, Toks[3].getKind());
1090}
1091
1092TEST_F(CommentLexerTest, HTML6) {
1093 const char *Source = "// <img> Meow";
1094
1095 std::vector<Token> Toks;
1096
1097 lexString(Source, Toks);
1098
1099 ASSERT_EQ(5U, Toks.size());
1100
1101 ASSERT_EQ(tok::text, Toks[0].getKind());
1102 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1103
1104 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1105 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1106
1107 ASSERT_EQ(tok::html_greater, Toks[2].getKind());
1108
1109 ASSERT_EQ(tok::text, Toks[3].getKind());
1110 ASSERT_EQ(StringRef(" Meow"), Toks[3].getText());
1111
1112 ASSERT_EQ(tok::newline, Toks[4].getKind());
1113}
1114
1115TEST_F(CommentLexerTest, HTML7) {
1116 const char *Source = "// <img=";
1117
1118 std::vector<Token> Toks;
1119
1120 lexString(Source, Toks);
1121
1122 ASSERT_EQ(4U, Toks.size());
1123
1124 ASSERT_EQ(tok::text, Toks[0].getKind());
1125 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1126
1127 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1128 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1129
1130 ASSERT_EQ(tok::text, Toks[2].getKind());
1131 ASSERT_EQ(StringRef("="), Toks[2].getText());
1132
1133 ASSERT_EQ(tok::newline, Toks[3].getKind());
1134}
1135
1136TEST_F(CommentLexerTest, HTML8) {
1137 const char *Source = "// <img src=> Meow";
1138
1139 std::vector<Token> Toks;
1140
1141 lexString(Source, Toks);
1142
1143 ASSERT_EQ(7U, Toks.size());
1144
1145 ASSERT_EQ(tok::text, Toks[0].getKind());
1146 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1147
1148 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1149 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1150
1151 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1152 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1153
1154 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1155
1156 ASSERT_EQ(tok::html_greater, Toks[4].getKind());
1157
1158 ASSERT_EQ(tok::text, Toks[5].getKind());
1159 ASSERT_EQ(StringRef(" Meow"), Toks[5].getText());
1160
1161 ASSERT_EQ(tok::newline, Toks[6].getKind());
1162}
1163
1164TEST_F(CommentLexerTest, HTML9) {
1165 const char *Sources[] = {
1166 "// <img src",
1167 "// <img src "
1168 };
1169
1170 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1171 std::vector<Token> Toks;
1172
1173 lexString(Sources[i], Toks);
1174
1175 ASSERT_EQ(4U, Toks.size());
1176
1177 ASSERT_EQ(tok::text, Toks[0].getKind());
1178 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1179
1180 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1181 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1182
1183 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1184 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1185
1186 ASSERT_EQ(tok::newline, Toks[3].getKind());
1187 }
1188}
1189
1190TEST_F(CommentLexerTest, HTML10) {
1191 const char *Sources[] = {
1192 "// <img src=",
1193 "// <img src ="
1194 };
1195
1196 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1197 std::vector<Token> Toks;
1198
1199 lexString(Sources[i], Toks);
1200
1201 ASSERT_EQ(5U, Toks.size());
1202
1203 ASSERT_EQ(tok::text, Toks[0].getKind());
1204 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1205
1206 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1207 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1208
1209 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1210 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1211
1212 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1213
1214 ASSERT_EQ(tok::newline, Toks[4].getKind());
1215 }
1216}
1217
1218TEST_F(CommentLexerTest, HTML11) {
1219 const char *Sources[] = {
1220 "// <img src=\"",
1221 "// <img src = \"",
1222 "// <img src=\'",
1223 "// <img src = \'"
1224 };
1225
1226 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1227 std::vector<Token> Toks;
1228
1229 lexString(Sources[i], Toks);
1230
1231 ASSERT_EQ(6U, Toks.size());
1232
1233 ASSERT_EQ(tok::text, Toks[0].getKind());
1234 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1235
1236 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1237 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1238
1239 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1240 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1241
1242 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1243
1244 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
1245 ASSERT_EQ(StringRef(""), Toks[4].getHTMLQuotedString());
1246
1247 ASSERT_EQ(tok::newline, Toks[5].getKind());
1248 }
1249}
1250
1251TEST_F(CommentLexerTest, HTML12) {
1252 const char *Source = "// <img src=@";
1253
1254 std::vector<Token> Toks;
1255
1256 lexString(Source, Toks);
1257
1258 ASSERT_EQ(6U, Toks.size());
1259
1260 ASSERT_EQ(tok::text, Toks[0].getKind());
1261 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1262
1263 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1264 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1265
1266 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1267 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1268
1269 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1270
1271 ASSERT_EQ(tok::text, Toks[4].getKind());
1272 ASSERT_EQ(StringRef("@"), Toks[4].getText());
1273
1274 ASSERT_EQ(tok::newline, Toks[5].getKind());
1275}
1276
1277TEST_F(CommentLexerTest, HTML13) {
1278 const char *Sources[] = {
1279 "// <img src=\"val\\\"\\'val",
1280 "// <img src=\"val\\\"\\'val\"",
1281 "// <img src=\'val\\\"\\'val",
1282 "// <img src=\'val\\\"\\'val\'"
1283 };
1284
1285 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1286 std::vector<Token> Toks;
1287
1288 lexString(Sources[i], Toks);
1289
1290 ASSERT_EQ(6U, Toks.size());
1291
1292 ASSERT_EQ(tok::text, Toks[0].getKind());
1293 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1294
1295 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1296 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1297
1298 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1299 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1300
1301 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1302
1303 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
1304 ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
1305
1306 ASSERT_EQ(tok::newline, Toks[5].getKind());
1307 }
1308}
1309
1310TEST_F(CommentLexerTest, HTML14) {
1311 const char *Sources[] = {
1312 "// <img src=\"val\\\"\\'val\">",
1313 "// <img src=\'val\\\"\\'val\'>"
1314 };
1315
1316 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1317 std::vector<Token> Toks;
1318
1319 lexString(Sources[i], Toks);
1320
1321 ASSERT_EQ(7U, Toks.size());
1322
1323 ASSERT_EQ(tok::text, Toks[0].getKind());
1324 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1325
1326 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1327 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1328
1329 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1330 ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
1331
1332 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1333
1334 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
1335 ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
1336
1337 ASSERT_EQ(tok::html_greater, Toks[5].getKind());
1338
1339 ASSERT_EQ(tok::newline, Toks[6].getKind());
1340 }
1341}
1342
1343TEST_F(CommentLexerTest, HTML15) {
1344 const char *Sources[] = {
1345 "// <img/>",
1346 "// <img />"
1347 };
1348
1349 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1350 std::vector<Token> Toks;
1351
1352 lexString(Sources[i], Toks);
1353
1354 ASSERT_EQ(4U, Toks.size());
1355
1356 ASSERT_EQ(tok::text, Toks[0].getKind());
1357 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1358
1359 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1360 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1361
1362 ASSERT_EQ(tok::html_slash_greater, Toks[2].getKind());
1363
1364 ASSERT_EQ(tok::newline, Toks[3].getKind());
1365 }
1366}
1367
1368TEST_F(CommentLexerTest, HTML16) {
1369 const char *Sources[] = {
1370 "// <img/ Aaa",
1371 "// <img / Aaa"
1372 };
1373
1374 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1375 std::vector<Token> Toks;
1376
1377 lexString(Sources[i], Toks);
1378
1379 ASSERT_EQ(5U, Toks.size());
1380
1381 ASSERT_EQ(tok::text, Toks[0].getKind());
1382 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1383
1384 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1385 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
1386
1387 ASSERT_EQ(tok::text, Toks[2].getKind());
1388 ASSERT_EQ(StringRef("/"), Toks[2].getText());
1389
1390 ASSERT_EQ(tok::text, Toks[3].getKind());
1391 ASSERT_EQ(StringRef(" Aaa"), Toks[3].getText());
1392
1393 ASSERT_EQ(tok::newline, Toks[4].getKind());
1394 }
1395}
1396
1397TEST_F(CommentLexerTest, HTML17) {
1398 const char *Source = "// </";
1399
1400 std::vector<Token> Toks;
1401
1402 lexString(Source, Toks);
1403
1404 ASSERT_EQ(3U, Toks.size());
1405
1406 ASSERT_EQ(tok::text, Toks[0].getKind());
1407 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1408
1409 ASSERT_EQ(tok::text, Toks[1].getKind());
1410 ASSERT_EQ(StringRef("</"), Toks[1].getText());
1411
1412 ASSERT_EQ(tok::newline, Toks[2].getKind());
1413}
1414
1415TEST_F(CommentLexerTest, HTML18) {
1416 const char *Source = "// </@";
1417
1418 std::vector<Token> Toks;
1419
1420 lexString(Source, Toks);
1421
1422 ASSERT_EQ(4U, Toks.size());
1423
1424 ASSERT_EQ(tok::text, Toks[0].getKind());
1425 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1426
1427 ASSERT_EQ(tok::text, Toks[1].getKind());
1428 ASSERT_EQ(StringRef("</"), Toks[1].getText());
1429
1430 ASSERT_EQ(tok::text, Toks[2].getKind());
1431 ASSERT_EQ(StringRef("@"), Toks[2].getText());
1432
1433 ASSERT_EQ(tok::newline, Toks[3].getKind());
1434}
1435
1436TEST_F(CommentLexerTest, HTML19) {
1437 const char *Source = "// </img";
1438
1439 std::vector<Token> Toks;
1440
1441 lexString(Source, Toks);
1442
1443 ASSERT_EQ(3U, Toks.size());
1444
1445 ASSERT_EQ(tok::text, Toks[0].getKind());
1446 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1447
1448 ASSERT_EQ(tok::html_end_tag, Toks[1].getKind());
1449 ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagEndName());
1450
1451 ASSERT_EQ(tok::newline, Toks[2].getKind());
1452}
1453
1454TEST_F(CommentLexerTest, HTML20) {
1455 const char *Source = "// <a\n"
1456 "// \n"
1457 "// href=\"foo\"\n"
1458 "// \n"
1459 "// bar>text</a>";
1460
1461 std::vector<Token> Toks;
1462
1463 lexString(Source, Toks);
1464
1465 ASSERT_EQ(11U, Toks.size());
1466
1467 ASSERT_EQ(tok::text, Toks[0].getKind());
1468 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1469
1470 ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1471 ASSERT_EQ(StringRef("a"), Toks[1].getHTMLTagStartName());
1472
1473 ASSERT_EQ(tok::html_ident, Toks[2].getKind());
1474 ASSERT_EQ(StringRef("href"), Toks[2].getHTMLIdent());
1475
1476 ASSERT_EQ(tok::html_equals, Toks[3].getKind());
1477
1478 ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
1479 ASSERT_EQ(StringRef("foo"), Toks[4].getHTMLQuotedString());
1480
1481 ASSERT_EQ(tok::html_ident, Toks[5].getKind());
1482 ASSERT_EQ(StringRef("bar"), Toks[5].getHTMLIdent());
1483
1484 ASSERT_EQ(tok::html_greater, Toks[6].getKind());
1485
1486 ASSERT_EQ(tok::text, Toks[7].getKind());
1487 ASSERT_EQ(StringRef("text"), Toks[7].getText());
1488
1489 ASSERT_EQ(tok::html_end_tag, Toks[8].getKind());
1490 ASSERT_EQ(StringRef("a"), Toks[8].getHTMLTagEndName());
1491
1492 ASSERT_EQ(tok::html_greater, Toks[9].getKind());
1493
1494 ASSERT_EQ(tok::newline, Toks[10].getKind());
1495}
1496
1497TEST_F(CommentLexerTest, HTML21) {
1498 const char *Source = "/**\n"
1499 " * <a\n"
1500 " * \n"
1501 " * href=\"foo\"\n"
1502 " * \n"
1503 " * bar>text</a>\n"
1504 " */";
1505
1506 std::vector<Token> Toks;
1507
1508 lexString(Source, Toks);
1509
1510 ASSERT_EQ(15U, Toks.size());
1511
1512 ASSERT_EQ(tok::newline, Toks[0].getKind());
1513
1514 ASSERT_EQ(tok::text, Toks[1].getKind());
1515 ASSERT_EQ(StringRef(" "), Toks[1].getText());
1516
1517 ASSERT_EQ(tok::html_start_tag, Toks[2].getKind());
1518 ASSERT_EQ(StringRef("a"), Toks[2].getHTMLTagStartName());
1519
1520 ASSERT_EQ(tok::html_ident, Toks[3].getKind());
1521 ASSERT_EQ(StringRef("href"), Toks[3].getHTMLIdent());
1522
1523 ASSERT_EQ(tok::html_equals, Toks[4].getKind());
1524
1525 ASSERT_EQ(tok::html_quoted_string, Toks[5].getKind());
1526 ASSERT_EQ(StringRef("foo"), Toks[5].getHTMLQuotedString());
1527
1528 ASSERT_EQ(tok::html_ident, Toks[6].getKind());
1529 ASSERT_EQ(StringRef("bar"), Toks[6].getHTMLIdent());
1530
1531 ASSERT_EQ(tok::html_greater, Toks[7].getKind());
1532
1533 ASSERT_EQ(tok::text, Toks[8].getKind());
1534 ASSERT_EQ(StringRef("text"), Toks[8].getText());
1535
1536 ASSERT_EQ(tok::html_end_tag, Toks[9].getKind());
1537 ASSERT_EQ(StringRef("a"), Toks[9].getHTMLTagEndName());
1538
1539 ASSERT_EQ(tok::html_greater, Toks[10].getKind());
1540
1541 ASSERT_EQ(tok::newline, Toks[11].getKind());
1542
1543 ASSERT_EQ(tok::text, Toks[12].getKind());
1544 ASSERT_EQ(StringRef(" "), Toks[12].getText());
1545
1546 ASSERT_EQ(tok::newline, Toks[13].getKind());
1547
1548 ASSERT_EQ(tok::newline, Toks[14].getKind());
1549}
1550
1551TEST_F(CommentLexerTest, HTML22) {
1552 const char *Source = "/**\n"
1553 " * <a\n"
1554 " */";
1555
1556 std::vector<Token> Toks;
1557
1558 lexString(Source, Toks);
1559
1560 ASSERT_EQ(6U, Toks.size());
1561
1562 ASSERT_EQ(tok::newline, Toks[0].getKind());
1563
1564 ASSERT_EQ(tok::text, Toks[1].getKind());
1565 ASSERT_EQ(StringRef(" "), Toks[1].getText());
1566
1567 ASSERT_EQ(tok::html_start_tag, Toks[2].getKind());
1568 ASSERT_EQ(StringRef("a"), Toks[2].getHTMLTagStartName());
1569
1570 ASSERT_EQ(tok::newline, Toks[3].getKind());
1571
1572 ASSERT_EQ(tok::newline, Toks[4].getKind());
1573
1574 ASSERT_EQ(tok::newline, Toks[5].getKind());
1575}
1576
1577TEST_F(CommentLexerTest, HTML23) {
1578 // NOTE: "//<" is considered a comment start
1579 const char *Source = "// <\n"
1580 "// a\n"
1581 "// >";
1582
1583 std::vector<Token> Toks;
1584
1585 lexString(Source, Toks);
1586
1587 ASSERT_EQ(7U, Toks.size());
1588
1589 ASSERT_EQ(tok::text, Toks[0].getKind());
1590 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1591
1592 ASSERT_EQ(tok::text, Toks[1].getKind());
1593 ASSERT_EQ(StringRef("<"), Toks[1].getText());
1594
1595 ASSERT_EQ(tok::newline, Toks[2].getKind());
1596
1597 ASSERT_EQ(tok::text, Toks[3].getKind());
1598 ASSERT_EQ(StringRef(" a"), Toks[3].getText());
1599
1600 ASSERT_EQ(tok::newline, Toks[4].getKind());
1601
1602 ASSERT_EQ(tok::text, Toks[5].getKind());
1603 ASSERT_EQ(StringRef(" >"), Toks[5].getText());
1604
1605 ASSERT_EQ(tok::newline, Toks[6].getKind());
1606}
1607
1608TEST_F(CommentLexerTest, NotAKnownHTMLTag1) {
1609 const char *Source = "// <tag>";
1610
1611 std::vector<Token> Toks;
1612
1613 lexString(Source, Toks);
1614
1615 ASSERT_EQ(4U, Toks.size());
1616
1617 ASSERT_EQ(tok::text, Toks[0].getKind());
1618 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1619
1620 ASSERT_EQ(tok::text, Toks[1].getKind());
1621 ASSERT_EQ(StringRef("<tag"), Toks[1].getText());
1622
1623 ASSERT_EQ(tok::text, Toks[2].getKind());
1624 ASSERT_EQ(StringRef(">"), Toks[2].getText());
1625
1626 ASSERT_EQ(tok::newline, Toks[3].getKind());
1627}
1628
1629TEST_F(CommentLexerTest, NotAKnownHTMLTag2) {
1630 const char *Source = "// </tag>";
1631
1632 std::vector<Token> Toks;
1633
1634 lexString(Source, Toks);
1635
1636 ASSERT_EQ(4U, Toks.size());
1637
1638 ASSERT_EQ(tok::text, Toks[0].getKind());
1639 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1640
1641 ASSERT_EQ(tok::text, Toks[1].getKind());
1642 ASSERT_EQ(StringRef("</tag"), Toks[1].getText());
1643
1644 ASSERT_EQ(tok::text, Toks[2].getKind());
1645 ASSERT_EQ(StringRef(">"), Toks[2].getText());
1646
1647 ASSERT_EQ(tok::newline, Toks[3].getKind());
1648}
1649
1650TEST_F(CommentLexerTest, HTMLCharacterReferences1) {
1651 const char *Source = "// &";
1652
1653 std::vector<Token> Toks;
1654
1655 lexString(Source, Toks);
1656
1657 ASSERT_EQ(3U, Toks.size());
1658
1659 ASSERT_EQ(tok::text, Toks[0].getKind());
1660 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1661
1662 ASSERT_EQ(tok::text, Toks[1].getKind());
1663 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1664
1665 ASSERT_EQ(tok::newline, Toks[2].getKind());
1666}
1667
1668TEST_F(CommentLexerTest, HTMLCharacterReferences2) {
1669 const char *Source = "// &!";
1670
1671 std::vector<Token> Toks;
1672
1673 lexString(Source, Toks);
1674
1675 ASSERT_EQ(4U, Toks.size());
1676
1677 ASSERT_EQ(tok::text, Toks[0].getKind());
1678 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1679
1680 ASSERT_EQ(tok::text, Toks[1].getKind());
1681 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1682
1683 ASSERT_EQ(tok::text, Toks[2].getKind());
1684 ASSERT_EQ(StringRef("!"), Toks[2].getText());
1685
1686 ASSERT_EQ(tok::newline, Toks[3].getKind());
1687}
1688
1689TEST_F(CommentLexerTest, HTMLCharacterReferences3) {
1690 const char *Source = "// &amp";
1691
1692 std::vector<Token> Toks;
1693
1694 lexString(Source, Toks);
1695
1696 ASSERT_EQ(3U, Toks.size());
1697
1698 ASSERT_EQ(tok::text, Toks[0].getKind());
1699 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1700
1701 ASSERT_EQ(tok::text, Toks[1].getKind());
1702 ASSERT_EQ(StringRef("&amp"), Toks[1].getText());
1703
1704 ASSERT_EQ(tok::newline, Toks[2].getKind());
1705}
1706
1707TEST_F(CommentLexerTest, HTMLCharacterReferences4) {
1708 const char *Source = "// &amp!";
1709
1710 std::vector<Token> Toks;
1711
1712 lexString(Source, Toks);
1713
1714 ASSERT_EQ(4U, Toks.size());
1715
1716 ASSERT_EQ(tok::text, Toks[0].getKind());
1717 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1718
1719 ASSERT_EQ(tok::text, Toks[1].getKind());
1720 ASSERT_EQ(StringRef("&amp"), Toks[1].getText());
1721
1722 ASSERT_EQ(tok::text, Toks[2].getKind());
1723 ASSERT_EQ(StringRef("!"), Toks[2].getText());
1724
1725 ASSERT_EQ(tok::newline, Toks[3].getKind());
1726}
1727
1728TEST_F(CommentLexerTest, HTMLCharacterReferences5) {
1729 const char *Source = "// &#";
1730
1731 std::vector<Token> Toks;
1732
1733 lexString(Source, Toks);
1734
1735 ASSERT_EQ(3U, Toks.size());
1736
1737 ASSERT_EQ(tok::text, Toks[0].getKind());
1738 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1739
1740 ASSERT_EQ(tok::text, Toks[1].getKind());
1741 ASSERT_EQ(StringRef("&#"), Toks[1].getText());
1742
1743 ASSERT_EQ(tok::newline, Toks[2].getKind());
1744}
1745
1746TEST_F(CommentLexerTest, HTMLCharacterReferences6) {
1747 const char *Source = "// &#a";
1748
1749 std::vector<Token> Toks;
1750
1751 lexString(Source, Toks);
1752
1753 ASSERT_EQ(4U, Toks.size());
1754
1755 ASSERT_EQ(tok::text, Toks[0].getKind());
1756 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1757
1758 ASSERT_EQ(tok::text, Toks[1].getKind());
1759 ASSERT_EQ(StringRef("&#"), Toks[1].getText());
1760
1761 ASSERT_EQ(tok::text, Toks[2].getKind());
1762 ASSERT_EQ(StringRef("a"), Toks[2].getText());
1763
1764 ASSERT_EQ(tok::newline, Toks[3].getKind());
1765}
1766
1767TEST_F(CommentLexerTest, HTMLCharacterReferences7) {
1768 const char *Source = "// &#42";
1769
1770 std::vector<Token> Toks;
1771
1772 lexString(Source, Toks);
1773
1774 ASSERT_EQ(3U, Toks.size());
1775
1776 ASSERT_EQ(tok::text, Toks[0].getKind());
1777 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1778
1779 ASSERT_EQ(tok::text, Toks[1].getKind());
1780 ASSERT_EQ(StringRef("&#42"), Toks[1].getText());
1781
1782 ASSERT_EQ(tok::newline, Toks[2].getKind());
1783}
1784
1785TEST_F(CommentLexerTest, HTMLCharacterReferences8) {
1786 const char *Source = "// &#42a";
1787
1788 std::vector<Token> Toks;
1789
1790 lexString(Source, Toks);
1791
1792 ASSERT_EQ(4U, Toks.size());
1793
1794 ASSERT_EQ(tok::text, Toks[0].getKind());
1795 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1796
1797 ASSERT_EQ(tok::text, Toks[1].getKind());
1798 ASSERT_EQ(StringRef("&#42"), Toks[1].getText());
1799
1800 ASSERT_EQ(tok::text, Toks[2].getKind());
1801 ASSERT_EQ(StringRef("a"), Toks[2].getText());
1802
1803 ASSERT_EQ(tok::newline, Toks[3].getKind());
1804}
1805
1806TEST_F(CommentLexerTest, HTMLCharacterReferences9) {
1807 const char *Source = "// &#x";
1808
1809 std::vector<Token> Toks;
1810
1811 lexString(Source, Toks);
1812
1813 ASSERT_EQ(3U, Toks.size());
1814
1815 ASSERT_EQ(tok::text, Toks[0].getKind());
1816 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1817
1818 ASSERT_EQ(tok::text, Toks[1].getKind());
1819 ASSERT_EQ(StringRef("&#x"), Toks[1].getText());
1820
1821 ASSERT_EQ(tok::newline, Toks[2].getKind());
1822}
1823
1824TEST_F(CommentLexerTest, HTMLCharacterReferences10) {
1825 const char *Source = "// &#xz";
1826
1827 std::vector<Token> Toks;
1828
1829 lexString(Source, Toks);
1830
1831 ASSERT_EQ(4U, Toks.size());
1832
1833 ASSERT_EQ(tok::text, Toks[0].getKind());
1834 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1835
1836 ASSERT_EQ(tok::text, Toks[1].getKind());
1837 ASSERT_EQ(StringRef("&#x"), Toks[1].getText());
1838
1839 ASSERT_EQ(tok::text, Toks[2].getKind());
1840 ASSERT_EQ(StringRef("z"), Toks[2].getText());
1841
1842 ASSERT_EQ(tok::newline, Toks[3].getKind());
1843}
1844
1845TEST_F(CommentLexerTest, HTMLCharacterReferences11) {
1846 const char *Source = "// &#xab";
1847
1848 std::vector<Token> Toks;
1849
1850 lexString(Source, Toks);
1851
1852 ASSERT_EQ(3U, Toks.size());
1853
1854 ASSERT_EQ(tok::text, Toks[0].getKind());
1855 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1856
1857 ASSERT_EQ(tok::text, Toks[1].getKind());
1858 ASSERT_EQ(StringRef("&#xab"), Toks[1].getText());
1859
1860 ASSERT_EQ(tok::newline, Toks[2].getKind());
1861}
1862
1863TEST_F(CommentLexerTest, HTMLCharacterReferences12) {
1864 const char *Source = "// &#xaBz";
1865
1866 std::vector<Token> Toks;
1867
1868 lexString(Source, Toks);
1869
1870 ASSERT_EQ(4U, Toks.size());
1871
1872 ASSERT_EQ(tok::text, Toks[0].getKind());
1873 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1874
1875 ASSERT_EQ(tok::text, Toks[1].getKind());
1876 ASSERT_EQ(StringRef("&#xaB"), Toks[1].getText());
1877
1878 ASSERT_EQ(tok::text, Toks[2].getKind());
1879 ASSERT_EQ(StringRef("z"), Toks[2].getText());
1880
1881 ASSERT_EQ(tok::newline, Toks[3].getKind());
1882}
1883
1884TEST_F(CommentLexerTest, HTMLCharacterReferences13) {
1885 const char *Source = "// &amp;";
1886
1887 std::vector<Token> Toks;
1888
1889 lexString(Source, Toks);
1890
1891 ASSERT_EQ(3U, Toks.size());
1892
1893 ASSERT_EQ(tok::text, Toks[0].getKind());
1894 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1895
1896 ASSERT_EQ(tok::text, Toks[1].getKind());
1897 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1898
1899 ASSERT_EQ(tok::newline, Toks[2].getKind());
1900}
1901
1902TEST_F(CommentLexerTest, HTMLCharacterReferences14) {
1903 const char *Source = "// &amp;&lt;";
1904
1905 std::vector<Token> Toks;
1906
1907 lexString(Source, Toks);
1908
1909 ASSERT_EQ(4U, Toks.size());
1910
1911 ASSERT_EQ(tok::text, Toks[0].getKind());
1912 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1913
1914 ASSERT_EQ(tok::text, Toks[1].getKind());
1915 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1916
1917 ASSERT_EQ(tok::text, Toks[2].getKind());
1918 ASSERT_EQ(StringRef("<"), Toks[2].getText());
1919
1920 ASSERT_EQ(tok::newline, Toks[3].getKind());
1921}
1922
1923TEST_F(CommentLexerTest, HTMLCharacterReferences15) {
1924 const char *Source = "// &amp; meow";
1925
1926 std::vector<Token> Toks;
1927
1928 lexString(Source, Toks);
1929
1930 ASSERT_EQ(4U, Toks.size());
1931
1932 ASSERT_EQ(tok::text, Toks[0].getKind());
1933 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1934
1935 ASSERT_EQ(tok::text, Toks[1].getKind());
1936 ASSERT_EQ(StringRef("&"), Toks[1].getText());
1937
1938 ASSERT_EQ(tok::text, Toks[2].getKind());
1939 ASSERT_EQ(StringRef(" meow"), Toks[2].getText());
1940
1941 ASSERT_EQ(tok::newline, Toks[3].getKind());
1942}
1943
1944TEST_F(CommentLexerTest, HTMLCharacterReferences16) {
1945 const char *Sources[] = {
1946 "// &#61;",
1947 "// &#x3d;",
1948 "// &#X3d;",
1949 "// &#X3D;"
1950 };
1951
1952 for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1953 std::vector<Token> Toks;
1954
1955 lexString(Sources[i], Toks);
1956
1957 ASSERT_EQ(3U, Toks.size());
1958
1959 ASSERT_EQ(tok::text, Toks[0].getKind());
1960 ASSERT_EQ(StringRef(" "), Toks[0].getText());
1961
1962 ASSERT_EQ(tok::text, Toks[1].getKind());
1963 ASSERT_EQ(StringRef("="), Toks[1].getText());
1964
1965 ASSERT_EQ(tok::newline, Toks[2].getKind());
1966 }
1967}
1968
1969TEST_F(CommentLexerTest, MultipleComments) {
1970 const char *Source =
1971 "// Aaa\n"
1972 "/// Bbb\n"
1973 "/* Ccc\n"
1974 " * Ddd*/\n"
1975 "/** Eee*/";
1976
1977 std::vector<Token> Toks;
1978
1979 lexString(Source, Toks);
1980
1981 ASSERT_EQ(12U, Toks.size());
1982
1983 ASSERT_EQ(tok::text, Toks[0].getKind());
1984 ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
1985 ASSERT_EQ(tok::newline, Toks[1].getKind());
1986
1987 ASSERT_EQ(tok::text, Toks[2].getKind());
1988 ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText());
1989 ASSERT_EQ(tok::newline, Toks[3].getKind());
1990
1991 ASSERT_EQ(tok::text, Toks[4].getKind());
1992 ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText());
1993 ASSERT_EQ(tok::newline, Toks[5].getKind());
1994
1995 ASSERT_EQ(tok::text, Toks[6].getKind());
1996 ASSERT_EQ(StringRef(" Ddd"), Toks[6].getText());
1997 ASSERT_EQ(tok::newline, Toks[7].getKind());
1998 ASSERT_EQ(tok::newline, Toks[8].getKind());
1999
2000 ASSERT_EQ(tok::text, Toks[9].getKind());
2001 ASSERT_EQ(StringRef(" Eee"), Toks[9].getText());
2002
2003 ASSERT_EQ(tok::newline, Toks[10].getKind());
2004 ASSERT_EQ(tok::newline, Toks[11].getKind());
2005}
2006
2007} // end namespace comments
2008} // end namespace clang
2009
2010

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of clang/unittests/AST/CommentLexer.cpp