1// Copyright (C) 2021 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
3
4#ifndef TOKENIZER_H
5#define TOKENIZER_H
6
7#include "location.h"
8
9#include <QtCore/qfile.h>
10#include <QtCore/qstack.h>
11#include <QtCore/qstring.h>
12
13QT_BEGIN_NAMESPACE
14
15/*
16 Here come the C++ tokens we support. The first part contains
17 all-purpose tokens; then come keywords.
18
19 If you add a keyword, make sure to modify the keyword array in
20 tokenizer.cpp as well, and possibly adjust Tok_FirstKeyword and
21 Tok_LastKeyword.
22*/
23enum {
24 Tok_Eoi,
25 Tok_Ampersand,
26 Tok_Aster,
27 Tok_Caret,
28 Tok_LeftParen,
29 Tok_RightParen,
30 Tok_LeftParenAster,
31 Tok_Equal,
32 Tok_LeftBrace,
33 Tok_RightBrace,
34 Tok_Semicolon,
35 Tok_Colon,
36 Tok_LeftAngle,
37 Tok_RightAngle,
38 Tok_Comma,
39 Tok_Ellipsis,
40 Tok_Gulbrandsen,
41 Tok_LeftBracket,
42 Tok_RightBracket,
43 Tok_Tilde,
44 Tok_SomeOperator,
45 Tok_Number,
46 Tok_String,
47 Tok_Doc,
48 Tok_Comment,
49 Tok_Ident,
50 Tok_At,
51 Tok_char,
52 Tok_class,
53 Tok_const,
54 Tok_double,
55 Tok_int,
56 Tok_long,
57 Tok_operator,
58 Tok_short,
59 Tok_signed,
60 Tok_typename,
61 Tok_unsigned,
62 Tok_void,
63 Tok_volatile,
64 Tok_int64,
65 Tok_QPrivateSignal,
66 Tok_FirstKeyword = Tok_char,
67 Tok_LastKeyword = Tok_QPrivateSignal
68};
69
70/*
71 The Tokenizer class implements lexical analysis of C++ source
72 files.
73
74 Not every operator or keyword of C++ is recognized; only those
75 that are interesting to us. Some Qt keywords or macros are also
76 recognized.
77*/
78
79class Tokenizer
80{
81public:
82 Tokenizer(const Location &loc, QByteArray in);
83 Tokenizer(const Location &loc, QFile &file);
84
85 ~Tokenizer();
86
87 int getToken();
88 void setParsingFnOrMacro(bool macro) { m_parsingMacro = macro; }
89
90 [[nodiscard]] const Location &location() const { return m_tokLoc; }
91 [[nodiscard]] QString previousLexeme() const;
92 [[nodiscard]] QString lexeme() const;
93 [[nodiscard]] QString version() const { return m_version; }
94 [[nodiscard]] int parenDepth() const { return m_parenDepth; }
95 [[nodiscard]] int bracketDepth() const { return m_bracketDepth; }
96
97 static void initialize();
98 static void terminate();
99 static bool isTrue(const QString &condition);
100
101private:
102 void init();
103 void start(const Location &loc);
104 /*
105 Represents the maximum amount of characters that a token can be composed
106 of.
107
108 When a token with more characters than the maximum amount is encountered, a
109 warning is issued and parsing continues, discarding all characters from the
110 currently parsed token that don't fit into the buffer.
111 */
112 enum { yyLexBufSize = 1048576 };
113
114 int getch() { return m_pos == m_in.size() ? EOF : m_in[m_pos++]; }
115
116 inline int getChar()
117 {
118 using namespace Qt::StringLiterals;
119
120 if (m_ch == EOF)
121 return EOF;
122 if (m_lexLen < yyLexBufSize - 1) {
123 m_lex[m_lexLen++] = (char)m_ch;
124 m_lex[m_lexLen] = '\0';
125 } else if (!token_too_long_warning_was_issued) {
126 location().warning(
127 message: u"The content is too long.\n"_s,
128 details: u"The maximum amount of characters for this content is %1.\n"_s.arg(a: yyLexBufSize) +
129 "Consider splitting it or reducing its size."
130 );
131
132 token_too_long_warning_was_issued = true;
133 }
134 m_curLoc.advance(ch: QChar(m_ch));
135 int ch = getch();
136 if (ch == EOF)
137 return EOF;
138 // cast explicitly to make sure the value of ch
139 // is in range [0..255] to avoid assert messages
140 // when using debug CRT that checks its input.
141 return int(uint(uchar(ch)));
142 }
143
144 int getTokenAfterPreprocessor();
145 void pushSkipping(bool skip);
146 bool popSkipping();
147
148 Location m_tokLoc;
149 Location m_curLoc;
150 char *m_lexBuf1 { nullptr };
151 char *m_lexBuf2 { nullptr };
152 char *m_prevLex { nullptr };
153 char *m_lex { nullptr };
154 size_t m_lexLen {};
155 QStack<bool> m_preprocessorSkipping;
156 int m_numPreprocessorSkipping {};
157 int m_braceDepth {};
158 int m_parenDepth {};
159 int m_bracketDepth {};
160 int m_ch {};
161
162 QString m_version {};
163 bool m_parsingMacro {};
164
165 // Used to ensure that the warning that is issued when a token is
166 // too long to fit into our fixed sized buffer is not repeated for each
167 // character of that token after the last saved one.
168 // The flag is reset whenever a new token is requested, so as to allow
169 // reporting all such tokens that are too long during a single execution.
170 bool token_too_long_warning_was_issued{false};
171
172protected:
173 QByteArray m_in {};
174 int m_pos {};
175};
176
177QT_END_NAMESPACE
178
179#endif
180

source code of qttools/src/qdoc/qdoc/tokenizer.h