tokenizer.h source code [qttools/src/qdoc/qdoc/tokenizer.h]

1	// Copyright (C) 2021 The Qt Company Ltd.
2	// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
3
4	#ifndef TOKENIZER_H
5	#define TOKENIZER_H
6
7	#include "location.h"
8
9	#include <QtCore/qfile.h>
10	#include <QtCore/qstack.h>
11	#include <QtCore/qstring.h>
12
13	QT_BEGIN_NAMESPACE
14
15	/*
16	Here come the C++ tokens we support. The first part contains
17	all-purpose tokens; then come keywords.
18
19	If you add a keyword, make sure to modify the keyword array in
20	tokenizer.cpp as well, and possibly adjust Tok_FirstKeyword and
21	Tok_LastKeyword.
22	*/
23	enum {
24	Tok_Eoi,
25	Tok_Ampersand,
26	Tok_Aster,
27	Tok_Caret,
28	Tok_LeftParen,
29	Tok_RightParen,
30	Tok_LeftParenAster,
31	Tok_Equal,
32	Tok_LeftBrace,
33	Tok_RightBrace,
34	Tok_Semicolon,
35	Tok_Colon,
36	Tok_LeftAngle,
37	Tok_RightAngle,
38	Tok_Comma,
39	Tok_Ellipsis,
40	Tok_Gulbrandsen,
41	Tok_LeftBracket,
42	Tok_RightBracket,
43	Tok_Tilde,
44	Tok_SomeOperator,
45	Tok_Number,
46	Tok_String,
47	Tok_Doc,
48	Tok_Comment,
49	Tok_Ident,
50	Tok_At,
51	Tok_char,
52	Tok_class,
53	Tok_const,
54	Tok_double,
55	Tok_int,
56	Tok_long,
57	Tok_operator,
58	Tok_short,
59	Tok_signed,
60	Tok_typename,
61	Tok_unsigned,
62	Tok_void,
63	Tok_volatile,
64	Tok_int64,
65	Tok_QPrivateSignal,
66	Tok_FirstKeyword = Tok_char,
67	Tok_LastKeyword = Tok_QPrivateSignal
68	};
69
70	/*
71	The Tokenizer class implements lexical analysis of C++ source
72	files.
73
74	Not every operator or keyword of C++ is recognized; only those
75	that are interesting to us. Some Qt keywords or macros are also
76	recognized.
77	*/
78
79	class Tokenizer
80	{
81	public:
82	Tokenizer(const Location &loc, QByteArray in);
83	Tokenizer(const Location &loc, QFile &file);
84
85	~Tokenizer();
86
87	int getToken();
88	void setParsingFnOrMacro(bool macro) { m_parsingMacro = macro; }
89
90	[[nodiscard]] const Location &location() const { return m_tokLoc; }
91	[[nodiscard]] QString previousLexeme() const;
92	[[nodiscard]] QString lexeme() const;
93	[[nodiscard]] QString version() const { return m_version; }
94	[[nodiscard]] int parenDepth() const { return m_parenDepth; }
95	[[nodiscard]] int bracketDepth() const { return m_bracketDepth; }
96
97	static void initialize();
98	static void terminate();
99	static bool isTrue(const QString &condition);
100
101	private:
102	void init();
103	void start(const Location &loc);
104	/*
105	Represents the maximum amount of characters that a token can be composed
106	of.
107
108	When a token with more characters than the maximum amount is encountered, a
109	warning is issued and parsing continues, discarding all characters from the
110	currently parsed token that don't fit into the buffer.
111	*/
112	enum { yyLexBufSize = `1048576` };
113
114	int getch() { return m_pos == m_in.size() ? EOF : m_in [m_pos++]; }
115
116	inline int getChar()
117	{
118	using namespace Qt::StringLiterals;
119
120	if (m_ch == EOF)
121	return EOF;
122	if (m_lexLen < yyLexBufSize - `1`) {
123	m_lex[m_lexLen++] = (char)m_ch;
124	m_lex[m_lexLen] = `'\0'`;
125	} else if (!token_too_long_warning_was_issued) {
126	location().warning(
127	message: u"The content is too long.\n"_s,
128	details: u"The maximum amount of characters for this content is %1.\n"_s.arg(a: yyLexBufSize) +
129	"Consider splitting it or reducing its size."
130	);
131
132	token_too_long_warning_was_issued = true;
133	}
134	m_curLoc.advance(ch: QChar (m_ch));
135	int ch = getch();
136	if (ch == EOF)
137	return EOF;
138	// cast explicitly to make sure the value of ch
139	// is in range [0..255] to avoid assert messages
140	// when using debug CRT that checks its input.
141	return int(uint(uchar(ch)));
142	}
143
144	int getTokenAfterPreprocessor();
145	void pushSkipping(bool skip);
146	bool popSkipping();
147
148	Location m_tokLoc;
149	Location m_curLoc;
150	char m_lexBuf1 { nullptr* };
151	char m_lexBuf2 { nullptr* };
152	char m_prevLex { nullptr* };
153	char m_lex { nullptr* };
154	size_t m_lexLen {};
155	QStack<bool> m_preprocessorSkipping;
156	int m_numPreprocessorSkipping {};
157	int m_braceDepth {};
158	int m_parenDepth {};
159	int m_bracketDepth {};
160	int m_ch {};
161
162	QString m_version {};
163	bool m_parsingMacro {};
164
165	// Used to ensure that the warning that is issued when a token is
166	// too long to fit into our fixed sized buffer is not repeated for each
167	// character of that token after the last saved one.
168	// The flag is reset whenever a new token is requested, so as to allow
169	// reporting all such tokens that are too long during a single execution.
170	bool token_too_long_warning_was_issued{false};
171
172	protected:
173	QByteArray m_in {};
174	int m_pos {};
175	};
176
177	QT_END_NAMESPACE
178
179	#endif
180

source code of qttools/src/qdoc/qdoc/tokenizer.h