1//===--- TokenTest.cpp ----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang-pseudo/Token.h"
10#include "clang/Basic/LangOptions.h"
11#include "clang/Basic/TokenKinds.h"
12#include "gmock/gmock.h"
13#include "gtest/gtest.h"
14
15namespace clang {
16namespace pseudo {
17namespace {
18
19using testing::AllOf;
20using testing::ElementsAre;
21using testing::ElementsAreArray;
22using testing::Not;
23
24MATCHER_P2(token, Text, Kind, "") {
25 return arg.Kind == Kind && arg.text() == Text;
26}
27
28MATCHER_P(hasFlag, Flag, "") { return arg.flag(Flag); }
29
30MATCHER_P2(lineIndent, Line, Indent, "") {
31 return arg.Line == (unsigned)Line && arg.Indent == (unsigned)Indent;
32}
33
34MATCHER_P(originalIndex, index, "") {
35 return arg.OriginalIndex == (Token::Index)index;
36}
37
38TEST(TokenTest, Lex) {
39 LangOptions Opts;
40 std::string Code = R"cpp(
41 #include <stdio.h>
42 int main() {
43 return 42; // the answer
44 }
45 )cpp";
46 TokenStream Raw = lex(Code, Opts);
47 ASSERT_TRUE(Raw.isFinalized());
48 EXPECT_THAT(Raw.tokens(),
49 ElementsAreArray({
50 // Lexing of directives is weird, especially <angled> strings.
51 token("#", tok::hash),
52 token("include", tok::raw_identifier),
53 token("<", tok::less),
54 token("stdio", tok::raw_identifier),
55 token(".", tok::period),
56 token("h", tok::raw_identifier),
57 token(">", tok::greater),
58
59 token("int", tok::raw_identifier),
60 token("main", tok::raw_identifier),
61 token("(", tok::l_paren),
62 token(")", tok::r_paren),
63 token("{", tok::l_brace),
64 token("return", tok::raw_identifier),
65 token("42", tok::numeric_constant),
66 token(";", tok::semi),
67 token("// the answer", tok::comment),
68 token("}", tok::r_brace),
69 }));
70
71 TokenStream Cooked = cook(Raw, Opts);
72 ASSERT_TRUE(Cooked.isFinalized());
73 EXPECT_THAT(Cooked.tokens(),
74 ElementsAreArray({
75 // Cooked identifier types in directives are not meaningful.
76 token("#", tok::hash),
77 token("include", tok::identifier),
78 token("<", tok::less),
79 token("stdio", tok::identifier),
80 token(".", tok::period),
81 token("h", tok::identifier),
82 token(">", tok::greater),
83
84 token("int", tok::kw_int),
85 token("main", tok::identifier),
86 token("(", tok::l_paren),
87 token(")", tok::r_paren),
88 token("{", tok::l_brace),
89 token("return", tok::kw_return),
90 token("42", tok::numeric_constant),
91 token(";", tok::semi),
92 token("// the answer", tok::comment),
93 token("}", tok::r_brace),
94 }));
95 // Check raw tokens point back into original source code.
96 EXPECT_EQ(Raw.tokens().front().text().begin(), &Code[Code.find('#')]);
97}
98
99TEST(TokenTest, LineContinuation) {
100 LangOptions Opts;
101 std::string Code = R"cpp(
102one_\
103token
104two \
105tokens
106 )cpp";
107 TokenStream Raw = lex(Code, Opts);
108 EXPECT_THAT(
109 Raw.tokens(),
110 ElementsAre(AllOf(token("one_\\\ntoken", tok::raw_identifier),
111 hasFlag(LexFlags::StartsPPLine),
112 hasFlag(LexFlags::NeedsCleaning), lineIndent(1, 0),
113 originalIndex(0)),
114 AllOf(token("two", tok::raw_identifier),
115 hasFlag(LexFlags::StartsPPLine),
116 Not(hasFlag(LexFlags::NeedsCleaning)),
117 originalIndex(1)),
118 AllOf(token("\\\ntokens", tok::raw_identifier),
119 Not(hasFlag(LexFlags::StartsPPLine)),
120 hasFlag(LexFlags::NeedsCleaning), originalIndex(2))));
121
122 TokenStream Cooked = cook(Raw, Opts);
123 EXPECT_THAT(
124 Cooked.tokens(),
125 ElementsAre(AllOf(token("one_token", tok::identifier), lineIndent(1, 0),
126 originalIndex(0)),
127 AllOf(token("two", tok::identifier), originalIndex(1)),
128 AllOf(token("tokens", tok::identifier), originalIndex(2))));
129}
130
131TEST(TokenTest, EncodedCharacters) {
132 LangOptions Opts;
133 Opts.Trigraphs = true;
134 Opts.Digraphs = true;
135 Opts.C99 = true; // UCNs
136 Opts.CXXOperatorNames = true;
137 std::string Code = R"(and <: ??! '??=' \u00E9)";
138 TokenStream Raw = lex(Code, Opts);
139 EXPECT_THAT(
140 Raw.tokens(),
141 ElementsAre( // and is not recognized as && until cook().
142 AllOf(token("and", tok::raw_identifier),
143 Not(hasFlag(LexFlags::NeedsCleaning))),
144 // Digraphs are just different spellings of tokens.
145 AllOf(token("<:", tok::l_square),
146 Not(hasFlag(LexFlags::NeedsCleaning))),
147 // Trigraps are interpreted, still need text cleaning.
148 AllOf(token(R"(??!)", tok::pipe), hasFlag(LexFlags::NeedsCleaning)),
149 // Trigraphs must be substituted inside constants too.
150 AllOf(token(R"('??=')", tok::char_constant),
151 hasFlag(LexFlags::NeedsCleaning)),
152 // UCNs need substitution.
153 AllOf(token(R"(\u00E9)", tok::raw_identifier),
154 hasFlag(LexFlags::NeedsCleaning))));
155
156 TokenStream Cooked = cook(Raw, Opts);
157 EXPECT_THAT(
158 Cooked.tokens(),
159 ElementsAre(token("and", tok::ampamp), // alternate spelling recognized
160 token("<:", tok::l_square),
161 token("|", tok::pipe), // trigraph substituted
162 token("'#'", tok::char_constant), // trigraph substituted
163 token("é", tok::identifier))); // UCN substituted
164}
165
166TEST(TokenTest, Indentation) {
167 LangOptions Opts;
168 std::string Code = R"cpp( hello world
169no_indent \
170 line_was_continued
171)cpp";
172 TokenStream Raw = lex(Code, Opts);
173 EXPECT_THAT(Raw.tokens(), ElementsAreArray({
174 lineIndent(0, 3), // hello
175 lineIndent(0, 3), // world
176 lineIndent(1, 0), // no_indent
177 lineIndent(2, 2), // line_was_continued
178 }));
179}
180
181TEST(TokenTest, SplitGreaterGreater) {
182 LangOptions Opts;
183 std::string Code = R"cpp(
184>> // split
185// >> with an escaped newline in the middle, split
186>\
187>
188>>= // not split
189)cpp";
190 TokenStream Cook = cook(lex(Code, Opts), Opts);
191 TokenStream Split = stripComments(Cook);
192 EXPECT_THAT(Split.tokens(),
193 ElementsAre(AllOf(token(">", tok::greater), originalIndex(0)),
194 AllOf(token(">", tok::greater), originalIndex(0)),
195 // Token 1 and 2 are comments.
196 AllOf(token(">", tok::greater), originalIndex(3)),
197 AllOf(token(">", tok::greater), originalIndex(3)),
198 AllOf(token(">>=", tok::greatergreaterequal),
199 originalIndex(4))));
200}
201
202TEST(TokenTest, DropComments) {
203 LangOptions Opts;
204 std::string Code = R"cpp(
205 // comment
206 int /*abc*/;
207)cpp";
208 TokenStream Raw = cook(lex(Code, Opts), Opts);
209 TokenStream Stripped = stripComments(Raw);
210 EXPECT_THAT(
211 Raw.tokens(),
212 ElementsAre(AllOf(token("// comment", tok::comment), originalIndex(0)),
213 AllOf(token("int", tok::kw_int), originalIndex(1)),
214 AllOf(token("/*abc*/", tok::comment), originalIndex(2)),
215 AllOf(token(";", tok::semi), originalIndex(3))));
216
217 EXPECT_THAT(Stripped.tokens(),
218 ElementsAre(AllOf(token("int", tok::kw_int), originalIndex(1)),
219 AllOf(token(";", tok::semi), originalIndex(3))));
220}
221
222} // namespace
223} // namespace pseudo
224} // namespace clang
225

source code of clang-tools-extra/pseudo/unittests/TokenTest.cpp