1//===--- CXX.cpp - Define public interfaces for C++ grammar ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang-pseudo/cxx/CXX.h"
10#include "clang-pseudo/Forest.h"
11#include "clang-pseudo/Language.h"
12#include "clang-pseudo/grammar/Grammar.h"
13#include "clang-pseudo/grammar/LRTable.h"
14#include "clang/Basic/CharInfo.h"
15#include "clang/Basic/TokenKinds.h"
16#include "llvm/ADT/StringSwitch.h"
17#include "llvm/Support/Debug.h"
18#include <utility>
19#define DEBUG_TYPE "CXX.cpp"
20
21namespace clang {
22namespace pseudo {
23namespace cxx {
24namespace {
25static const char *CXXBNF =
26#include "CXXBNF.inc"
27 ;
28
29// User-defined string literals look like `""suffix`.
30bool isStringUserDefined(const Token &Tok) {
31 return !Tok.text().ends_with(Suffix: "\"");
32}
33bool isCharUserDefined(const Token &Tok) { return !Tok.text().ends_with(Suffix: "'"); }
34
35// Combinable flags describing numbers.
36// Clang has just one numeric_token kind, the grammar has 4.
37enum NumericKind {
38 Integer = 0,
39 Floating = 1 << 0,
40 UserDefined = 1 << 1,
41};
42// Determine the kind of numeric_constant we have.
43// We can assume it's something valid, as it has been lexed.
44// FIXME: is this expensive enough that we should set flags on the token
45// and reuse them rather than computing it for each guard?
46unsigned numKind(const Token &Tok) {
47 assert(Tok.Kind == tok::numeric_constant);
48 llvm::StringRef Text = Tok.text();
49 if (Text.size() <= 1)
50 return Integer;
51 bool Hex =
52 Text.size() > 2 && Text[0] == '0' && (Text[1] == 'x' || Text[1] == 'X');
53 uint8_t K = Integer;
54
55 for (char C : Text) {
56 switch (C) {
57 case '.':
58 K |= Floating;
59 break;
60 case 'e':
61 case 'E':
62 if (!Hex)
63 K |= Floating;
64 break;
65 case 'p':
66 case 'P':
67 if (Hex)
68 K |= Floating;
69 break;
70 case '_':
71 K |= UserDefined;
72 break;
73 default:
74 break;
75 }
76 }
77
78 // We would be done here, but there are stdlib UDLs that lack _.
79 // We must distinguish these from the builtin suffixes.
80 unsigned LastLetter = Text.size();
81 while (LastLetter > 0 && isLetter(c: Text[LastLetter - 1]))
82 --LastLetter;
83 if (LastLetter == Text.size()) // Common case
84 return NumericKind(K);
85 // Trailing d/e/f are not part of the suffix in hex numbers.
86 while (Hex && LastLetter < Text.size() && isHexDigit(c: Text[LastLetter]))
87 ++LastLetter;
88 return llvm::StringSwitch<int, unsigned>(Text.substr(Start: LastLetter))
89 // std::chrono
90 .Cases("h", "min", "s", "ms", "us", "ns", "d", "y", K | UserDefined)
91 // complex
92 .Cases("il", "i", "if", K | UserDefined)
93 .Default(K);
94}
95
96// RHS is expected to contain a single terminal.
97// Returns the corresponding token.
98const Token &onlyToken(tok::TokenKind Kind,
99 const ArrayRef<const ForestNode *> RHS,
100 const TokenStream &Tokens) {
101 assert(RHS.size() == 1 && RHS.front()->symbol() == tokenSymbol(Kind));
102 return Tokens.tokens()[RHS.front()->startTokenIndex()];
103}
104// RHS is expected to contain a single symbol.
105// Returns the corresponding ForestNode.
106const ForestNode &onlySymbol(SymbolID Kind,
107 const ArrayRef<const ForestNode *> RHS,
108 const TokenStream &Tokens) {
109 assert(RHS.size() == 1 && RHS.front()->symbol() == Kind);
110 return *RHS.front();
111}
112
113bool isFunctionDeclarator(const ForestNode *Declarator) {
114 assert(Declarator->symbol() == cxx::Symbol::declarator);
115 bool IsFunction = false;
116 while (true) {
117 // not well-formed code, return the best guess.
118 if (Declarator->kind() != ForestNode::Sequence)
119 return IsFunction;
120
121 switch (Declarator->rule()) {
122 case rule::noptr_declarator::declarator_id: // reached the bottom
123 return IsFunction;
124 // *X is a nonfunction (unless X is a function).
125 case rule::ptr_declarator::ptr_operator__ptr_declarator:
126 Declarator = Declarator->elements()[1];
127 IsFunction = false;
128 continue;
129 // X() is a function (unless X is a pointer or similar).
130 case rule::declarator::
131 noptr_declarator__parameters_and_qualifiers__trailing_return_type:
132 case rule::noptr_declarator::noptr_declarator__parameters_and_qualifiers:
133 Declarator = Declarator->elements()[0];
134 IsFunction = true;
135 continue;
136 // X[] is an array (unless X is a pointer or function).
137 case rule::noptr_declarator::
138 noptr_declarator__L_SQUARE__constant_expression__R_SQUARE:
139 case rule::noptr_declarator::noptr_declarator__L_SQUARE__R_SQUARE:
140 Declarator = Declarator->elements()[0];
141 IsFunction = false;
142 continue;
143 // (X) is whatever X is.
144 case rule::noptr_declarator::L_PAREN__ptr_declarator__R_PAREN:
145 Declarator = Declarator->elements()[1];
146 continue;
147 case rule::ptr_declarator::noptr_declarator:
148 case rule::declarator::ptr_declarator:
149 Declarator = Declarator->elements()[0];
150 continue;
151
152 default:
153 assert(false && "unhandled declarator for IsFunction");
154 return IsFunction;
155 }
156 }
157 llvm_unreachable("unreachable");
158}
159
160bool guardNextTokenNotElse(const GuardParams &P) {
161 return symbolToToken(SID: P.Lookahead) != tok::kw_else;
162}
163
164bool specifiesStructuredBinding(const GuardParams &P) {
165 const auto DSS = P.RHS[0];
166 assert(DSS->symbol() == Symbol::decl_specifier_seq);
167
168 auto Length = P.RHS[1]->startTokenIndex() - DSS->startTokenIndex();
169 for (const auto &T :
170 P.Tokens.tokens().slice(N: DSS->startTokenIndex(), M: Length)) {
171 switch (T.Kind) {
172 case clang::tok::kw_static:
173 case clang::tok::kw_thread_local:
174 case clang::tok::kw_auto:
175 case clang::tok::kw_const:
176 case clang::tok::kw_volatile:
177 break;
178 default:
179 return false;
180 }
181 }
182 return true;
183}
184
185// Whether this e.g. decl-specifier contains an "exclusive" type such as a class
186// name, and thus can't combine with a second exclusive type.
187//
188// Returns false for
189// - non-types
190// - "unsigned" etc that may suffice as types but may modify others
191// - cases of uncertainty (e.g. due to ambiguity)
192bool hasExclusiveType(const ForestNode *N) {
193 // FIXME: every time we apply this check, we walk the whole subtree.
194 // Add per-node caching instead.
195 while (true) {
196 assert(N->symbol() == Symbol::decl_specifier_seq ||
197 N->symbol() == Symbol::type_specifier_seq ||
198 N->symbol() == Symbol::defining_type_specifier_seq ||
199 N->symbol() == Symbol::decl_specifier ||
200 N->symbol() == Symbol::type_specifier ||
201 N->symbol() == Symbol::defining_type_specifier ||
202 N->symbol() == Symbol::simple_type_specifier);
203 if (N->kind() == ForestNode::Opaque)
204 return false; // conservative
205 if (N->kind() == ForestNode::Ambiguous)
206 return llvm::all_of(N->alternatives(), hasExclusiveType); // conservative
207 // All supported symbols are nonterminals.
208 assert(N->kind() == ForestNode::Sequence);
209 switch (N->rule()) {
210 // seq := element seq: check element then continue into seq
211 case rule::decl_specifier_seq::decl_specifier__decl_specifier_seq:
212 case rule::defining_type_specifier_seq::defining_type_specifier__defining_type_specifier_seq:
213 case rule::type_specifier_seq::type_specifier__type_specifier_seq:
214 if (hasExclusiveType(N: N->children()[0]))
215 return true;
216 N = N->children()[1];
217 continue;
218 // seq := element: continue into element
219 case rule::decl_specifier_seq::decl_specifier:
220 case rule::type_specifier_seq::type_specifier:
221 case rule::defining_type_specifier_seq::defining_type_specifier:
222 N = N->children()[0];
223 continue;
224
225 // defining-type-specifier
226 case rule::defining_type_specifier::type_specifier:
227 N = N->children()[0];
228 continue;
229 case rule::defining_type_specifier::class_specifier:
230 case rule::defining_type_specifier::enum_specifier:
231 return true;
232
233 // decl-specifier
234 case rule::decl_specifier::defining_type_specifier:
235 N = N->children()[0];
236 continue;
237 case rule::decl_specifier::CONSTEVAL:
238 case rule::decl_specifier::CONSTEXPR:
239 case rule::decl_specifier::CONSTINIT:
240 case rule::decl_specifier::INLINE:
241 case rule::decl_specifier::FRIEND:
242 case rule::decl_specifier::storage_class_specifier:
243 case rule::decl_specifier::TYPEDEF:
244 case rule::decl_specifier::function_specifier:
245 return false;
246
247 // type-specifier
248 case rule::type_specifier::elaborated_type_specifier:
249 case rule::type_specifier::typename_specifier:
250 return true;
251 case rule::type_specifier::simple_type_specifier:
252 N = N->children()[0];
253 continue;
254 case rule::type_specifier::cv_qualifier:
255 return false;
256
257 // simple-type-specifier
258 case rule::simple_type_specifier::type_name:
259 case rule::simple_type_specifier::template_name:
260 case rule::simple_type_specifier::builtin_type:
261 case rule::simple_type_specifier::nested_name_specifier__TEMPLATE__simple_template_id:
262 case rule::simple_type_specifier::nested_name_specifier__template_name:
263 case rule::simple_type_specifier::nested_name_specifier__type_name:
264 case rule::simple_type_specifier::decltype_specifier:
265 case rule::simple_type_specifier::placeholder_type_specifier:
266 return true;
267 case rule::simple_type_specifier::LONG:
268 case rule::simple_type_specifier::SHORT:
269 case rule::simple_type_specifier::SIGNED:
270 case rule::simple_type_specifier::UNSIGNED:
271 return false;
272
273 default:
274 LLVM_DEBUG(llvm::errs() << "Unhandled rule " << N->rule() << "\n");
275 llvm_unreachable("hasExclusiveType be exhaustive!");
276 }
277 }
278}
279
280llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
281#define GUARD(cond) \
282 { \
283 [](const GuardParams &P) { return cond; } \
284 }
285#define TOKEN_GUARD(kind, cond) \
286 [](const GuardParams& P) { \
287 const Token &Tok = onlyToken(tok::kind, P.RHS, P.Tokens); \
288 return cond; \
289 }
290#define SYMBOL_GUARD(kind, cond) \
291 [](const GuardParams& P) { \
292 const ForestNode &N = onlySymbol(Symbol::kind, P.RHS, P.Tokens); \
293 return cond; \
294 }
295 return {
296 {rule::function_declarator::declarator,
297 SYMBOL_GUARD(declarator, isFunctionDeclarator(&N))},
298 {rule::non_function_declarator::declarator,
299 SYMBOL_GUARD(declarator, !isFunctionDeclarator(&N))},
300
301 // A {decl,type,defining-type}-specifier-sequence cannot have multiple
302 // "exclusive" types (like class names): a value has only one type.
303 {rule::defining_type_specifier_seq::
304 defining_type_specifier__defining_type_specifier_seq,
305 GUARD(!hasExclusiveType(P.RHS[0]) || !hasExclusiveType(P.RHS[1]))},
306 {rule::type_specifier_seq::type_specifier__type_specifier_seq,
307 GUARD(!hasExclusiveType(P.RHS[0]) || !hasExclusiveType(P.RHS[1]))},
308 {rule::decl_specifier_seq::decl_specifier__decl_specifier_seq,
309 GUARD(!hasExclusiveType(P.RHS[0]) || !hasExclusiveType(P.RHS[1]))},
310
311 {rule::contextual_override::IDENTIFIER,
312 TOKEN_GUARD(identifier, Tok.text() == "override")},
313 {rule::contextual_final::IDENTIFIER,
314 TOKEN_GUARD(identifier, Tok.text() == "final")},
315 {rule::import_keyword::IDENTIFIER,
316 TOKEN_GUARD(identifier, Tok.text() == "import")},
317 {rule::export_keyword::IDENTIFIER,
318 TOKEN_GUARD(identifier, Tok.text() == "export")},
319 {rule::module_keyword::IDENTIFIER,
320 TOKEN_GUARD(identifier, Tok.text() == "module")},
321 {rule::contextual_zero::NUMERIC_CONSTANT,
322 TOKEN_GUARD(numeric_constant, Tok.text() == "0")},
323
324 {rule::selection_statement::IF__L_PAREN__condition__R_PAREN__statement,
325 guardNextTokenNotElse},
326 {rule::selection_statement::
327 IF__L_PAREN__init_statement__condition__R_PAREN__statement,
328 guardNextTokenNotElse},
329 {rule::selection_statement::
330 IF__CONSTEXPR__L_PAREN__condition__R_PAREN__statement,
331 guardNextTokenNotElse},
332 {rule::selection_statement::
333 IF__CONSTEXPR__L_PAREN__init_statement__condition__R_PAREN__statement,
334 guardNextTokenNotElse},
335
336 // Implement C++ [basic.lookup.qual.general]:
337 // If a name, template-id, or decltype-specifier is followed by a
338 // ​::​, it shall designate a namespace, class, enumeration, or
339 // dependent type, and the ​::​ is never interpreted as a complete
340 // nested-name-specifier.
341 {rule::nested_name_specifier::COLONCOLON,
342 TOKEN_GUARD(coloncolon, Tok.prev().Kind != tok::identifier)},
343
344 // Implement C++ [dcl.pre#6]:
345 // A simple-declaration with an identifier-list is called a structured
346 // binding declaration ([dcl.struct.bind]). If the decl-specifier-seq
347 // contains any decl-specifier other than static, thread_­local, auto,
348 // or cv-qualifiers, the program is ill-formed.
349 {rule::simple_declaration::
350 decl_specifier_seq__ref_qualifier__L_SQUARE__identifier_list__R_SQUARE__initializer__SEMI,
351 specifiesStructuredBinding},
352 {rule::simple_declaration::
353 decl_specifier_seq__L_SQUARE__identifier_list__R_SQUARE__initializer__SEMI,
354 specifiesStructuredBinding},
355
356 // The grammar distinguishes (only) user-defined vs plain string literals,
357 // where the clang lexer distinguishes (only) encoding types.
358 {rule::user_defined_string_literal_chunk::STRING_LITERAL,
359 TOKEN_GUARD(string_literal, isStringUserDefined(Tok))},
360 {rule::user_defined_string_literal_chunk::UTF8_STRING_LITERAL,
361 TOKEN_GUARD(utf8_string_literal, isStringUserDefined(Tok))},
362 {rule::user_defined_string_literal_chunk::UTF16_STRING_LITERAL,
363 TOKEN_GUARD(utf16_string_literal, isStringUserDefined(Tok))},
364 {rule::user_defined_string_literal_chunk::UTF32_STRING_LITERAL,
365 TOKEN_GUARD(utf32_string_literal, isStringUserDefined(Tok))},
366 {rule::user_defined_string_literal_chunk::WIDE_STRING_LITERAL,
367 TOKEN_GUARD(wide_string_literal, isStringUserDefined(Tok))},
368 {rule::string_literal_chunk::STRING_LITERAL,
369 TOKEN_GUARD(string_literal, !isStringUserDefined(Tok))},
370 {rule::string_literal_chunk::UTF8_STRING_LITERAL,
371 TOKEN_GUARD(utf8_string_literal, !isStringUserDefined(Tok))},
372 {rule::string_literal_chunk::UTF16_STRING_LITERAL,
373 TOKEN_GUARD(utf16_string_literal, !isStringUserDefined(Tok))},
374 {rule::string_literal_chunk::UTF32_STRING_LITERAL,
375 TOKEN_GUARD(utf32_string_literal, !isStringUserDefined(Tok))},
376 {rule::string_literal_chunk::WIDE_STRING_LITERAL,
377 TOKEN_GUARD(wide_string_literal, !isStringUserDefined(Tok))},
378 // And the same for chars.
379 {rule::user_defined_character_literal::CHAR_CONSTANT,
380 TOKEN_GUARD(char_constant, isCharUserDefined(Tok))},
381 {rule::user_defined_character_literal::UTF8_CHAR_CONSTANT,
382 TOKEN_GUARD(utf8_char_constant, isCharUserDefined(Tok))},
383 {rule::user_defined_character_literal::UTF16_CHAR_CONSTANT,
384 TOKEN_GUARD(utf16_char_constant, isCharUserDefined(Tok))},
385 {rule::user_defined_character_literal::UTF32_CHAR_CONSTANT,
386 TOKEN_GUARD(utf32_char_constant, isCharUserDefined(Tok))},
387 {rule::user_defined_character_literal::WIDE_CHAR_CONSTANT,
388 TOKEN_GUARD(wide_char_constant, isCharUserDefined(Tok))},
389 {rule::character_literal::CHAR_CONSTANT,
390 TOKEN_GUARD(char_constant, !isCharUserDefined(Tok))},
391 {rule::character_literal::UTF8_CHAR_CONSTANT,
392 TOKEN_GUARD(utf8_char_constant, !isCharUserDefined(Tok))},
393 {rule::character_literal::UTF16_CHAR_CONSTANT,
394 TOKEN_GUARD(utf16_char_constant, !isCharUserDefined(Tok))},
395 {rule::character_literal::UTF32_CHAR_CONSTANT,
396 TOKEN_GUARD(utf32_char_constant, !isCharUserDefined(Tok))},
397 {rule::character_literal::WIDE_CHAR_CONSTANT,
398 TOKEN_GUARD(wide_char_constant, !isCharUserDefined(Tok))},
399 // clang just has one NUMERIC_CONSTANT token for {ud,plain}x{float,int}
400 {rule::user_defined_integer_literal::NUMERIC_CONSTANT,
401 TOKEN_GUARD(numeric_constant, numKind(Tok) == (Integer | UserDefined))},
402 {rule::user_defined_floating_point_literal::NUMERIC_CONSTANT,
403 TOKEN_GUARD(numeric_constant, numKind(Tok) == (Floating | UserDefined))},
404 {rule::integer_literal::NUMERIC_CONSTANT,
405 TOKEN_GUARD(numeric_constant, numKind(Tok) == Integer)},
406 {rule::floating_point_literal::NUMERIC_CONSTANT,
407 TOKEN_GUARD(numeric_constant, numKind(Tok) == Floating)},
408 };
409#undef TOKEN_GUARD
410#undef SYMBOL_GUARD
411}
412
413Token::Index recoverBrackets(Token::Index Begin, const TokenStream &Tokens) {
414 assert(Begin > 0);
415 const Token &Left = Tokens.tokens()[Begin - 1];
416 assert(Left.Kind == tok::l_brace || Left.Kind == tok::l_paren ||
417 Left.Kind == tok::l_square);
418 if (const Token *Right = Left.pair()) {
419 assert(Tokens.index(*Right) > Begin - 1);
420 return Tokens.index(T: *Right);
421 }
422 return Token::Invalid;
423}
424
425llvm::DenseMap<ExtensionID, RecoveryStrategy> buildRecoveryStrategies() {
426 return {
427 {Extension::Brackets, recoverBrackets},
428 };
429}
430
431} // namespace
432
433const Language &getLanguage() {
434 static const auto &CXXLanguage = []() -> const Language & {
435 std::vector<std::string> Diags;
436 auto G = Grammar::parseBNF(BNF: CXXBNF, Diags&: Diags);
437 assert(Diags.empty());
438 LRTable Table = LRTable::buildSLR(G: G);
439 const Language *PL = new Language{
440 std::move(G),
441 std::move(Table),
442 buildGuards(),
443 buildRecoveryStrategies(),
444 };
445 return *PL;
446 }();
447 return CXXLanguage;
448}
449
450} // namespace cxx
451} // namespace pseudo
452} // namespace clang
453

source code of clang-tools-extra/pseudo/lib/cxx/CXX.cpp