FormatTokenLexer.h source code [clang/lib/Format/FormatTokenLexer.h]

1	//===--- FormatTokenLexer.h - Format C++ code ----------------- C++ -----===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file contains FormatTokenLexer, which tokenizes a source file
11	/// into a token stream suitable for ClangFormat.
12	///
13	//===----------------------------------------------------------------------===//
14
15	#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
16	#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
17
18	#include "Encoding.h"
19	#include "FormatToken.h"
20	#include "llvm/ADT/MapVector.h"
21	#include "llvm/ADT/SmallPtrSet.h"
22	#include "llvm/ADT/StringSet.h"
23
24	#include <stack>
25
26	namespace clang {
27	namespace format {
28
29	enum LexerState {
30	NORMAL,
31	TEMPLATE_STRING,
32	TOKEN_STASHED,
33	};
34
35	class FormatTokenLexer {
36	public:
37	FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
38	const FormatStyle &Style, encoding::Encoding Encoding,
39	llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
40	IdentifierTable &IdentTable);
41
42	ArrayRef<FormatToken *> lex();
43
44	const AdditionalKeywords &getKeywords() { return Keywords; }
45
46	private:
47	void tryMergePreviousTokens();
48
49	bool tryMergeLessLess();
50	bool tryMergeGreaterGreater();
51	bool tryMergeNSStringLiteral();
52	bool tryMergeJSPrivateIdentifier();
53	bool tryMergeCSharpStringLiteral();
54	bool tryMergeCSharpKeywordVariables();
55	bool tryMergeNullishCoalescingEqual();
56	bool tryTransformCSharpForEach();
57	bool tryMergeForEach();
58	bool tryTransformTryUsageForC();
59
60	// Merge the most recently lexed tokens into a single token if their kinds are
61	// correct.
62	bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
63	// Merge without checking their kinds.
64	bool tryMergeTokens(size_t Count, TokenType NewType);
65	// Merge if their kinds match any one of Kinds.
66	bool tryMergeTokensAny(ArrayRef<ArrayRef<tok::TokenKind>> Kinds,
67	TokenType NewType);
68
69	// Returns \c true if \p Tok can only be followed by an operand in JavaScript.
70	bool precedesOperand(FormatToken *Tok);
71
72	bool canPrecedeRegexLiteral(FormatToken *Prev);
73
74	// Tries to parse a JavaScript Regex literal starting at the current token,
75	// if that begins with a slash and is in a location where JavaScript allows
76	// regex literals. Changes the current token to a regex literal and updates
77	// its text if successful.
78	void tryParseJSRegexLiteral();
79
80	// Handles JavaScript template strings.
81	//
82	// JavaScript template strings use backticks ('`') as delimiters, and allow
83	// embedding expressions nested in ${expr-here}. Template strings can be
84	// nested recursively, i.e. expressions can contain template strings in turn.
85	//
86	// The code below parses starting from a backtick, up to a closing backtick or
87	// an opening ${. It also maintains a stack of lexing contexts to handle
88	// nested template parts by balancing curly braces.
89	void handleTemplateStrings();
90
91	void handleCSharpVerbatimAndInterpolatedStrings();
92
93	// Handles TableGen multiline strings. It has the form [{ ... }].
94	void handleTableGenMultilineString();
95	// Handles TableGen numeric like identifiers.
96	// They have a forms of [0-9][_a-zA-Z]([_a-zA-Z0-9]). But limited to the
97	// case it is not lexed as an integer.
98	void handleTableGenNumericLikeIdentifier();
99
100	void tryParsePythonComment();
101
102	bool tryMerge_TMacro();
103
104	bool tryMergeConflictMarkers();
105
106	void truncateToken(size_t NewLen);
107
108	FormatToken *getStashedToken();
109
110	FormatToken *getNextToken();
111
112	FormatToken *FormatTok;
113	bool IsFirstToken;
114	std::stack<LexerState> StateStack;
115	unsigned Column;
116	unsigned TrailingWhitespace;
117	std::unique_ptr<Lexer> Lex;
118	LangOptions LangOpts;
119	const SourceManager &SourceMgr;
120	FileID ID;
121	const FormatStyle &Style;
122	IdentifierTable &IdentTable;
123	AdditionalKeywords Keywords;
124	encoding::Encoding Encoding;
125	llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
126	// Index (in 'Tokens') of the last token that starts a new line.
127	unsigned FirstInLineIndex;
128	SmallVector<FormatToken *, `16`> Tokens;
129
130	llvm::SmallMapVector<IdentifierInfo *, TokenType, `8`> Macros;
131
132	llvm::SmallPtrSet<IdentifierInfo *, `8`> TypeNames;
133
134	bool FormattingDisabled;
135
136	llvm::Regex MacroBlockBeginRegex;
137	llvm::Regex MacroBlockEndRegex;
138
139	// Targets that may appear inside a C# attribute.
140	static const llvm::StringSet<> CSharpAttributeTargets;
141
142	/// Handle Verilog-specific tokens.
143	bool readRawTokenVerilogSpecific(Token &Tok);
144
145	void readRawToken(FormatToken &Tok);
146
147	void resetLexer(unsigned Offset);
148	};
149
150	} // namespace format
151	} // namespace clang
152
153	#endif
154

Provided by KDAB

Learn to use CMake with our Intro Training

Find out more

Definitions

source code of clang/lib/Format/FormatTokenLexer.h