TokenTest.cpp source code [clang-tools-extra/pseudo/unittests/TokenTest.cpp]

1	//===--- TokenTest.cpp ----------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "clang-pseudo/Token.h"
10	#include "clang/Basic/LangOptions.h"
11	#include "clang/Basic/TokenKinds.h"
12	#include "gmock/gmock.h"
13	#include "gtest/gtest.h"
14
15	namespace clang {
16	namespace pseudo {
17	namespace {
18
19	using testing::AllOf;
20	using testing::ElementsAre;
21	using testing::ElementsAreArray;
22	using testing::Not;
23
24	MATCHER_P2(token, Text, Kind, "") {
25	return arg.Kind == Kind && arg.text() == Text;
26	}
27
28	MATCHER_P(hasFlag, Flag, "") { return arg.flag(Flag); }
29
30	MATCHER_P2(lineIndent, Line, Indent, "") {
31	return arg.Line == (unsigned)Line && arg.Indent == (unsigned)Indent;
32	}
33
34	MATCHER_P(originalIndex, index, "") {
35	return arg.OriginalIndex == (Token::Index)index;
36	}
37
38	TEST(TokenTest, Lex) {
39	LangOptions Opts;
40	std::string Code = R"cpp(
41	#include <stdio.h>
42	int main() {
43	return 42; // the answer
44	}
45	)cpp";
46	TokenStream Raw = lex(Code, Opts);
47	ASSERT_TRUE(Raw.isFinalized());
48	EXPECT_THAT(Raw.tokens(),
49	ElementsAreArray({
50	// Lexing of directives is weird, especially <angled> strings.
51	token("#", tok::hash),
52	token("include", tok::raw_identifier),
53	token("<", tok::less),
54	token("stdio", tok::raw_identifier),
55	token(".", tok::period),
56	token("h", tok::raw_identifier),
57	token(">", tok::greater),
58
59	token("int", tok::raw_identifier),
60	token("main", tok::raw_identifier),
61	token("(", tok::l_paren),
62	token(")", tok::r_paren),
63	token("{", tok::l_brace),
64	token("return", tok::raw_identifier),
65	token("42", tok::numeric_constant),
66	token(";", tok::semi),
67	token("// the answer", tok::comment),
68	token("}", tok::r_brace),
69	}));
70
71	TokenStream Cooked = cook(Raw, Opts);
72	ASSERT_TRUE(Cooked.isFinalized());
73	EXPECT_THAT(Cooked.tokens(),
74	ElementsAreArray({
75	// Cooked identifier types in directives are not meaningful.
76	token("#", tok::hash),
77	token("include", tok::identifier),
78	token("<", tok::less),
79	token("stdio", tok::identifier),
80	token(".", tok::period),
81	token("h", tok::identifier),
82	token(">", tok::greater),
83
84	token("int", tok::kw_int),
85	token("main", tok::identifier),
86	token("(", tok::l_paren),
87	token(")", tok::r_paren),
88	token("{", tok::l_brace),
89	token("return", tok::kw_return),
90	token("42", tok::numeric_constant),
91	token(";", tok::semi),
92	token("// the answer", tok::comment),
93	token("}", tok::r_brace),
94	}));
95	// Check raw tokens point back into original source code.
96	EXPECT_EQ(Raw.tokens().front().text().begin(), &Code[Code.find(`'#'`)]);
97	}
98
99	TEST(TokenTest, LineContinuation) {
100	LangOptions Opts;
101	std::string Code = R"cpp(
102	one_\
103	token
104	two \
105	tokens
106	)cpp";
107	TokenStream Raw = lex(Code, Opts);
108	EXPECT_THAT(
109	Raw.tokens(),
110	ElementsAre(AllOf(token("one_\\\ntoken", tok::raw_identifier),
111	hasFlag(LexFlags::StartsPPLine),
112	hasFlag(LexFlags::NeedsCleaning), lineIndent(`1`, `0`),
113	originalIndex(`0`)),
114	AllOf(token("two", tok::raw_identifier),
115	hasFlag(LexFlags::StartsPPLine),
116	Not(hasFlag(LexFlags::NeedsCleaning)),
117	originalIndex(`1`)),
118	AllOf(token("\\\ntokens", tok::raw_identifier),
119	Not(hasFlag(LexFlags::StartsPPLine)),
120	hasFlag(LexFlags::NeedsCleaning), originalIndex(`2`))));
121
122	TokenStream Cooked = cook(Raw, Opts);
123	EXPECT_THAT(
124	Cooked.tokens(),
125	ElementsAre(AllOf(token("one_token", tok::identifier), lineIndent(`1`, `0`),
126	originalIndex(`0`)),
127	AllOf(token("two", tok::identifier), originalIndex(`1`)),
128	AllOf(token("tokens", tok::identifier), originalIndex(`2`))));
129	}
130
131	TEST(TokenTest, EncodedCharacters) {
132	LangOptions Opts;
133	Opts.Trigraphs = true;
134	Opts.Digraphs = true;
135	Opts.C99 = true; // UCNs
136	Opts.CXXOperatorNames = true;
137	std::string Code = R"(and <: ??! '??=' \u00E9)";
138	TokenStream Raw = lex(Code, Opts);
139	EXPECT_THAT(
140	Raw.tokens(),
141	ElementsAre( // and is not recognized as && until cook().
142	AllOf(token("and", tok::raw_identifier),
143	Not(hasFlag(LexFlags::NeedsCleaning))),
144	// Digraphs are just different spellings of tokens.
145	AllOf(token("<:", tok::l_square),
146	Not(hasFlag(LexFlags::NeedsCleaning))),
147	// Trigraps are interpreted, still need text cleaning.
148	AllOf(token(R"(??!)", tok::pipe), hasFlag(LexFlags::NeedsCleaning)),
149	// Trigraphs must be substituted inside constants too.
150	AllOf(token(R"('??=')", tok::char_constant),
151	hasFlag(LexFlags::NeedsCleaning)),
152	// UCNs need substitution.
153	AllOf(token(R"(\u00E9)", tok::raw_identifier),
154	hasFlag(LexFlags::NeedsCleaning))));
155
156	TokenStream Cooked = cook(Raw, Opts);
157	EXPECT_THAT(
158	Cooked.tokens(),
159	ElementsAre(token("and", tok::ampamp), // alternate spelling recognized
160	token("<:", tok::l_square),
161	token("\|", tok::pipe), // trigraph substituted
162	token("'#'", tok::char_constant), // trigraph substituted
163	token("é", tok::identifier))); // UCN substituted
164	}
165
166	TEST(TokenTest, Indentation) {
167	LangOptions Opts;
168	std::string Code = R"cpp( hello world
169	no_indent \
170	line_was_continued
171	)cpp";
172	TokenStream Raw = lex(Code, Opts);
173	EXPECT_THAT(Raw.tokens(), ElementsAreArray({
174	lineIndent(`0`, `3`), // hello
175	lineIndent(`0`, `3`), // world
176	lineIndent(`1`, `0`), // no_indent
177	lineIndent(`2`, `2`), // line_was_continued
178	}));
179	}
180
181	TEST(TokenTest, SplitGreaterGreater) {
182	LangOptions Opts;
183	std::string Code = R"cpp(
184	>> // split
185	// >> with an escaped newline in the middle, split
186	>\
187	>
188	>>= // not split
189	)cpp";
190	TokenStream Cook = cook(lex(Code, Opts), Opts);
191	TokenStream Split = stripComments(Cook);
192	EXPECT_THAT(Split.tokens(),
193	ElementsAre(AllOf(token(">", tok::greater), originalIndex(`0`)),
194	AllOf(token(">", tok::greater), originalIndex(`0`)),
195	// Token 1 and 2 are comments.
196	AllOf(token(">", tok::greater), originalIndex(`3`)),
197	AllOf(token(">", tok::greater), originalIndex(`3`)),
198	AllOf(token(">>=", tok::greatergreaterequal),
199	originalIndex(`4`))));
200	}
201
202	TEST(TokenTest, DropComments) {
203	LangOptions Opts;
204	std::string Code = R"cpp(
205	// comment
206	int /abc/;
207	)cpp";
208	TokenStream Raw = cook(lex(Code, Opts), Opts);
209	TokenStream Stripped = stripComments(Raw);
210	EXPECT_THAT(
211	Raw.tokens(),
212	ElementsAre(AllOf(token("// comment", tok::comment), originalIndex(`0`)),
213	AllOf(token("int", tok::kw_int), originalIndex(`1`)),
214	AllOf(token("/abc/", tok::comment), originalIndex(`2`)),
215	AllOf(token(";", tok::semi), originalIndex(`3`))));
216
217	EXPECT_THAT(Stripped.tokens(),
218	ElementsAre(AllOf(token("int", tok::kw_int), originalIndex(`1`)),
219	AllOf(token(";", tok::semi), originalIndex(`3`))));
220	}
221
222	} // namespace
223	} // namespace pseudo
224	} // namespace clang
225

source code of clang-tools-extra/pseudo/unittests/TokenTest.cpp