1 | // Copyright (C) 2021 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 |
3 | |
4 | #include "tokenizer.h" |
5 | |
6 | #include "config.h" |
7 | #include "generator.h" |
8 | |
9 | #include <QtCore/qfile.h> |
10 | #include <QtCore/qhash.h> |
11 | #include <QtCore/qregularexpression.h> |
12 | #include <QtCore/qstring.h> |
13 | #include <QtCore/qstringconverter.h> |
14 | |
15 | #include <cctype> |
16 | #include <cstring> |
17 | #include <utility> |
18 | |
19 | QT_BEGIN_NAMESPACE |
20 | |
21 | #define LANGUAGE_CPP "Cpp" |
22 | |
23 | /* qmake ignore Q_OBJECT */ |
24 | |
25 | /* |
26 | Keep in sync with tokenizer.h. |
27 | */ |
28 | static const char *kwords[] = { "char" , |
29 | "class" , |
30 | "const" , |
31 | "double" , |
32 | "enum" , |
33 | "explicit" , |
34 | "friend" , |
35 | "inline" , |
36 | "int" , |
37 | "long" , |
38 | "namespace" , |
39 | "operator" , |
40 | "private" , |
41 | "protected" , |
42 | "public" , |
43 | "short" , |
44 | "signals" , |
45 | "signed" , |
46 | "slots" , |
47 | "static" , |
48 | "struct" , |
49 | "template" , |
50 | "typedef" , |
51 | "typename" , |
52 | "union" , |
53 | "unsigned" , |
54 | "using" , |
55 | "virtual" , |
56 | "void" , |
57 | "volatile" , |
58 | "__int64" , |
59 | "default" , |
60 | "delete" , |
61 | "final" , |
62 | "override" , |
63 | "Q_OBJECT" , |
64 | "Q_OVERRIDE" , |
65 | "Q_PROPERTY" , |
66 | "Q_PRIVATE_PROPERTY" , |
67 | "Q_DECLARE_SEQUENTIAL_ITERATOR" , |
68 | "Q_DECLARE_MUTABLE_SEQUENTIAL_ITERATOR" , |
69 | "Q_DECLARE_ASSOCIATIVE_ITERATOR" , |
70 | "Q_DECLARE_MUTABLE_ASSOCIATIVE_ITERATOR" , |
71 | "Q_DECLARE_FLAGS" , |
72 | "Q_SIGNALS" , |
73 | "Q_SLOTS" , |
74 | "QT_COMPAT" , |
75 | "QT_COMPAT_CONSTRUCTOR" , |
76 | "QT_DEPRECATED" , |
77 | "QT_MOC_COMPAT" , |
78 | "QT_MODULE" , |
79 | "QT3_SUPPORT" , |
80 | "QT3_SUPPORT_CONSTRUCTOR" , |
81 | "QT3_MOC_SUPPORT" , |
82 | "QDOC_PROPERTY" , |
83 | "QPrivateSignal" }; |
84 | |
85 | static const int KwordHashTableSize = 4096; |
86 | static int kwordHashTable[KwordHashTableSize]; |
87 | |
88 | static QHash<QByteArray, bool> *ignoredTokensAndDirectives = nullptr; |
89 | |
90 | static QRegularExpression * = nullptr; |
91 | static QRegularExpression *versionX = nullptr; |
92 | static QRegularExpression *definedX = nullptr; |
93 | |
94 | static QRegularExpression *defines = nullptr; |
95 | static QRegularExpression *falsehoods = nullptr; |
96 | |
97 | static QStringDecoder sourceDecoder; |
98 | |
99 | /* |
100 | This function is a perfect hash function for the 37 keywords of C99 |
101 | (with a hash table size of 512). It should perform well on our |
102 | Qt-enhanced C++ subset. |
103 | */ |
104 | static int hashKword(const char *s, int len) |
105 | { |
106 | return (((uchar)s[0]) + (((uchar)s[2]) << 5) + (((uchar)s[len - 1]) << 3)) % KwordHashTableSize; |
107 | } |
108 | |
109 | static void insertKwordIntoHash(const char *s, int number) |
110 | { |
111 | int k = hashKword(s, len: int(strlen(s: s))); |
112 | while (kwordHashTable[k]) { |
113 | if (++k == KwordHashTableSize) |
114 | k = 0; |
115 | } |
116 | kwordHashTable[k] = number; |
117 | } |
118 | |
119 | Tokenizer::Tokenizer(const Location &loc, QFile &in) |
120 | { |
121 | init(); |
122 | m_in = in.readAll(); |
123 | m_pos = 0; |
124 | start(loc); |
125 | } |
126 | |
127 | Tokenizer::Tokenizer(const Location &loc, QByteArray in) : m_in(std::move(in)) |
128 | { |
129 | init(); |
130 | m_pos = 0; |
131 | start(loc); |
132 | } |
133 | |
134 | Tokenizer::~Tokenizer() |
135 | { |
136 | delete[] m_lexBuf1; |
137 | delete[] m_lexBuf2; |
138 | } |
139 | |
140 | int Tokenizer::getToken() |
141 | { |
142 | token_too_long_warning_was_issued = false; |
143 | |
144 | char *t = m_prevLex; |
145 | m_prevLex = m_lex; |
146 | m_lex = t; |
147 | |
148 | while (m_ch != EOF) { |
149 | m_tokLoc = m_curLoc; |
150 | m_lexLen = 0; |
151 | |
152 | if (isspace(m_ch)) { |
153 | do { |
154 | m_ch = getChar(); |
155 | } while (isspace(m_ch)); |
156 | } else if (isalpha(m_ch) || m_ch == '_') { |
157 | do { |
158 | m_ch = getChar(); |
159 | } while (isalnum(m_ch) || m_ch == '_'); |
160 | |
161 | int k = hashKword(s: m_lex, len: int(m_lexLen)); |
162 | for (;;) { |
163 | int i = kwordHashTable[k]; |
164 | if (i == 0) { |
165 | return Tok_Ident; |
166 | } else if (i == -1) { |
167 | if (!m_parsingMacro && ignoredTokensAndDirectives->contains(key: m_lex)) { |
168 | if (ignoredTokensAndDirectives->value(key: m_lex)) { // it's a directive |
169 | int parenDepth = 0; |
170 | while (m_ch != EOF && (m_ch != ')' || parenDepth > 1)) { |
171 | if (m_ch == '(') |
172 | ++parenDepth; |
173 | else if (m_ch == ')') |
174 | --parenDepth; |
175 | m_ch = getChar(); |
176 | } |
177 | if (m_ch == ')') |
178 | m_ch = getChar(); |
179 | } |
180 | break; |
181 | } |
182 | } else if (strcmp(s1: m_lex, s2: kwords[i - 1]) == 0) { |
183 | int ret = (int)Tok_FirstKeyword + i - 1; |
184 | if (ret != Tok_typename) |
185 | return ret; |
186 | break; |
187 | } |
188 | |
189 | if (++k == KwordHashTableSize) |
190 | k = 0; |
191 | } |
192 | } else if (isdigit(m_ch)) { |
193 | do { |
194 | m_ch = getChar(); |
195 | } while (isalnum(m_ch) || m_ch == '.' || m_ch == '+' || m_ch == '-'); |
196 | return Tok_Number; |
197 | } else { |
198 | switch (m_ch) { |
199 | case '!': |
200 | case '%': |
201 | m_ch = getChar(); |
202 | if (m_ch == '=') |
203 | m_ch = getChar(); |
204 | return Tok_SomeOperator; |
205 | case '"': |
206 | m_ch = getChar(); |
207 | |
208 | while (m_ch != EOF && m_ch != '"') { |
209 | if (m_ch == '\\') |
210 | m_ch = getChar(); |
211 | m_ch = getChar(); |
212 | } |
213 | m_ch = getChar(); |
214 | |
215 | if (m_ch == EOF) |
216 | m_tokLoc.warning( |
217 | QStringLiteral("Unterminated C++ string literal" ), |
218 | QStringLiteral("Maybe you forgot '/*!' at the beginning of the file?" )); |
219 | else |
220 | return Tok_String; |
221 | break; |
222 | case '#': |
223 | return getTokenAfterPreprocessor(); |
224 | case '&': |
225 | m_ch = getChar(); |
226 | /* |
227 | Removed check for '&&', only interpret '&=' as an operator. |
228 | '&&' is also used for an rvalue reference. QTBUG-32675 |
229 | */ |
230 | if (m_ch == '=') { |
231 | m_ch = getChar(); |
232 | return Tok_SomeOperator; |
233 | } else { |
234 | return Tok_Ampersand; |
235 | } |
236 | case '\'': |
237 | m_ch = getChar(); |
238 | /* |
239 | Allow empty character literal. QTBUG-25775 |
240 | */ |
241 | if (m_ch == '\'') { |
242 | m_ch = getChar(); |
243 | break; |
244 | } |
245 | if (m_ch == '\\') |
246 | m_ch = getChar(); |
247 | do { |
248 | m_ch = getChar(); |
249 | } while (m_ch != EOF && m_ch != '\''); |
250 | |
251 | if (m_ch == EOF) { |
252 | m_tokLoc.warning(QStringLiteral("Unterminated C++ character literal" )); |
253 | } else { |
254 | m_ch = getChar(); |
255 | return Tok_Number; |
256 | } |
257 | break; |
258 | case '(': |
259 | m_ch = getChar(); |
260 | if (m_numPreprocessorSkipping == 0) |
261 | m_parenDepth++; |
262 | if (isspace(m_ch)) { |
263 | do { |
264 | m_ch = getChar(); |
265 | } while (isspace(m_ch)); |
266 | m_lexLen = 1; |
267 | m_lex[1] = '\0'; |
268 | } |
269 | if (m_ch == '*') { |
270 | m_ch = getChar(); |
271 | return Tok_LeftParenAster; |
272 | } |
273 | return Tok_LeftParen; |
274 | case ')': |
275 | m_ch = getChar(); |
276 | if (m_numPreprocessorSkipping == 0) |
277 | m_parenDepth--; |
278 | return Tok_RightParen; |
279 | case '*': |
280 | m_ch = getChar(); |
281 | if (m_ch == '=') { |
282 | m_ch = getChar(); |
283 | return Tok_SomeOperator; |
284 | } else { |
285 | return Tok_Aster; |
286 | } |
287 | case '^': |
288 | m_ch = getChar(); |
289 | if (m_ch == '=') { |
290 | m_ch = getChar(); |
291 | return Tok_SomeOperator; |
292 | } else { |
293 | return Tok_Caret; |
294 | } |
295 | case '+': |
296 | m_ch = getChar(); |
297 | if (m_ch == '+' || m_ch == '=') |
298 | m_ch = getChar(); |
299 | return Tok_SomeOperator; |
300 | case ',': |
301 | m_ch = getChar(); |
302 | return Tok_Comma; |
303 | case '-': |
304 | m_ch = getChar(); |
305 | if (m_ch == '-' || m_ch == '=') { |
306 | m_ch = getChar(); |
307 | } else if (m_ch == '>') { |
308 | m_ch = getChar(); |
309 | if (m_ch == '*') |
310 | m_ch = getChar(); |
311 | } |
312 | return Tok_SomeOperator; |
313 | case '.': |
314 | m_ch = getChar(); |
315 | if (m_ch == '*') { |
316 | m_ch = getChar(); |
317 | } else if (m_ch == '.') { |
318 | do { |
319 | m_ch = getChar(); |
320 | } while (m_ch == '.'); |
321 | return Tok_Ellipsis; |
322 | } else if (isdigit(m_ch)) { |
323 | do { |
324 | m_ch = getChar(); |
325 | } while (isalnum(m_ch) || m_ch == '.' || m_ch == '+' || m_ch == '-'); |
326 | return Tok_Number; |
327 | } |
328 | return Tok_SomeOperator; |
329 | case '/': |
330 | m_ch = getChar(); |
331 | if (m_ch == '/') { |
332 | do { |
333 | m_ch = getChar(); |
334 | } while (m_ch != EOF && m_ch != '\n'); |
335 | } else if (m_ch == '*') { |
336 | bool metDoc = false; // empty doc is no doc |
337 | bool metSlashAsterBang = false; |
338 | bool metAster = false; |
339 | bool metAsterSlash = false; |
340 | |
341 | m_ch = getChar(); |
342 | if (m_ch == '!') |
343 | metSlashAsterBang = true; |
344 | |
345 | while (!metAsterSlash) { |
346 | if (m_ch == EOF) { |
347 | m_tokLoc.warning(QStringLiteral("Unterminated C++ comment" )); |
348 | break; |
349 | } else { |
350 | if (m_ch == '*') { |
351 | metAster = true; |
352 | } else if (metAster && m_ch == '/') { |
353 | metAsterSlash = true; |
354 | } else { |
355 | metAster = false; |
356 | if (isgraph(m_ch)) |
357 | metDoc = true; |
358 | } |
359 | } |
360 | m_ch = getChar(); |
361 | } |
362 | if (metSlashAsterBang && metDoc) |
363 | return Tok_Doc; |
364 | else if (m_parenDepth > 0) |
365 | return Tok_Comment; |
366 | } else { |
367 | if (m_ch == '=') |
368 | m_ch = getChar(); |
369 | return Tok_SomeOperator; |
370 | } |
371 | break; |
372 | case ':': |
373 | m_ch = getChar(); |
374 | if (m_ch == ':') { |
375 | m_ch = getChar(); |
376 | return Tok_Gulbrandsen; |
377 | } else { |
378 | return Tok_Colon; |
379 | } |
380 | case ';': |
381 | m_ch = getChar(); |
382 | return Tok_Semicolon; |
383 | case '<': |
384 | m_ch = getChar(); |
385 | if (m_ch == '<') { |
386 | m_ch = getChar(); |
387 | if (m_ch == '=') |
388 | m_ch = getChar(); |
389 | return Tok_SomeOperator; |
390 | } else if (m_ch == '=') { |
391 | m_ch = getChar(); |
392 | return Tok_SomeOperator; |
393 | } else { |
394 | return Tok_LeftAngle; |
395 | } |
396 | case '=': |
397 | m_ch = getChar(); |
398 | if (m_ch == '=') { |
399 | m_ch = getChar(); |
400 | return Tok_SomeOperator; |
401 | } else { |
402 | return Tok_Equal; |
403 | } |
404 | case '>': |
405 | m_ch = getChar(); |
406 | if (m_ch == '>') { |
407 | m_ch = getChar(); |
408 | if (m_ch == '=') |
409 | m_ch = getChar(); |
410 | return Tok_SomeOperator; |
411 | } else if (m_ch == '=') { |
412 | m_ch = getChar(); |
413 | return Tok_SomeOperator; |
414 | } else { |
415 | return Tok_RightAngle; |
416 | } |
417 | case '?': |
418 | m_ch = getChar(); |
419 | return Tok_SomeOperator; |
420 | case '[': |
421 | m_ch = getChar(); |
422 | if (m_numPreprocessorSkipping == 0) |
423 | m_bracketDepth++; |
424 | return Tok_LeftBracket; |
425 | case '\\': |
426 | m_ch = getChar(); |
427 | m_ch = getChar(); // skip one character |
428 | break; |
429 | case ']': |
430 | m_ch = getChar(); |
431 | if (m_numPreprocessorSkipping == 0) |
432 | m_bracketDepth--; |
433 | return Tok_RightBracket; |
434 | case '{': |
435 | m_ch = getChar(); |
436 | if (m_numPreprocessorSkipping == 0) |
437 | m_braceDepth++; |
438 | return Tok_LeftBrace; |
439 | case '}': |
440 | m_ch = getChar(); |
441 | if (m_numPreprocessorSkipping == 0) |
442 | m_braceDepth--; |
443 | return Tok_RightBrace; |
444 | case '|': |
445 | m_ch = getChar(); |
446 | if (m_ch == '|' || m_ch == '=') |
447 | m_ch = getChar(); |
448 | return Tok_SomeOperator; |
449 | case '~': |
450 | m_ch = getChar(); |
451 | return Tok_Tilde; |
452 | case '@': |
453 | m_ch = getChar(); |
454 | return Tok_At; |
455 | default: |
456 | // ### We should really prevent qdoc from looking at snippet files rather than |
457 | // ### suppress warnings when reading them. |
458 | if (m_numPreprocessorSkipping == 0 |
459 | && !(m_tokLoc.fileName().endsWith(s: ".qdoc" ) |
460 | || m_tokLoc.fileName().endsWith(s: ".js" ))) { |
461 | m_tokLoc.warning(QStringLiteral("Hostile character 0x%1 in C++ source" ) |
462 | .arg(a: (uchar)m_ch, fieldWidth: 1, base: 16)); |
463 | } |
464 | m_ch = getChar(); |
465 | } |
466 | } |
467 | } |
468 | |
469 | if (m_preprocessorSkipping.size() > 1) { |
470 | m_tokLoc.warning(QStringLiteral("Expected #endif before end of file" )); |
471 | // clear it out or we get an infinite loop! |
472 | while (!m_preprocessorSkipping.isEmpty()) { |
473 | popSkipping(); |
474 | } |
475 | } |
476 | |
477 | strcpy(dest: m_lex, src: "end-of-input" ); |
478 | m_lexLen = strlen(s: m_lex); |
479 | return Tok_Eoi; |
480 | } |
481 | |
482 | void Tokenizer::initialize() |
483 | { |
484 | Config &config = Config::instance(); |
485 | QString versionSym = config.get(CONFIG_VERSIONSYM).asString(); |
486 | const QLatin1String defaultEncoding("UTF-8" ); |
487 | |
488 | QString sourceEncoding = config.get(CONFIG_SOURCEENCODING).asString(defaultString: defaultEncoding); |
489 | if (!QStringConverter::encodingForName(name: sourceEncoding.toUtf8().constData())) { |
490 | Location().warning(QStringLiteral("Source encoding '%1' not supported, using '%2' as default." ) |
491 | .arg(args&: sourceEncoding, args: defaultEncoding)); |
492 | sourceEncoding = defaultEncoding; |
493 | } |
494 | sourceDecoder = QStringDecoder(sourceEncoding.toUtf8().constData()); |
495 | Q_ASSERT(sourceDecoder.isValid()); |
496 | |
497 | comment = new QRegularExpression("/(?:\\*.*\\*/|/.*\n|/[^\n]*$)" , QRegularExpression::InvertedGreedinessOption); |
498 | versionX = new QRegularExpression("$cannot possibly match^" ); |
499 | if (!versionSym.isEmpty()) |
500 | versionX->setPattern("^[ \t]*(?:" + QRegularExpression::escape(str: versionSym) |
501 | + ")[ \t]+\"([^\"]*)\"[ \t]*$" ); |
502 | definedX = new QRegularExpression("^defined ?\\(?([A-Z_0-9a-z]+) ?\\)?$" ); |
503 | |
504 | QStringList d{config.get(CONFIG_DEFINES).asStringList()}; |
505 | d += "qdoc" ; |
506 | defines = new QRegularExpression(QRegularExpression::anchoredPattern(expression: d.join(sep: '|'))); |
507 | falsehoods = new QRegularExpression(QRegularExpression::anchoredPattern( |
508 | expression: config.get(CONFIG_FALSEHOODS).asStringList().join(sep: '|'))); |
509 | |
510 | /* |
511 | The keyword hash table is always cleared before any words are inserted. |
512 | */ |
513 | memset(s: kwordHashTable, c: 0, n: sizeof(kwordHashTable)); |
514 | for (int i = 0; i < Tok_LastKeyword - Tok_FirstKeyword + 1; i++) |
515 | insertKwordIntoHash(s: kwords[i], number: i + 1); |
516 | |
517 | ignoredTokensAndDirectives = new QHash<QByteArray, bool>; |
518 | |
519 | const QStringList tokens{config.get(LANGUAGE_CPP |
520 | + Config::dot |
521 | + CONFIG_IGNORETOKENS).asStringList()}; |
522 | for (const auto &token : tokens) { |
523 | const QByteArray tb = token.toLatin1(); |
524 | ignoredTokensAndDirectives->insert(key: tb, value: false); |
525 | insertKwordIntoHash(s: tb.data(), number: -1); |
526 | } |
527 | |
528 | const QStringList directives{config.get(LANGUAGE_CPP |
529 | + Config::dot |
530 | + CONFIG_IGNOREDIRECTIVES).asStringList()}; |
531 | for (const auto &directive : directives) { |
532 | const QByteArray db = directive.toLatin1(); |
533 | ignoredTokensAndDirectives->insert(key: db, value: true); |
534 | insertKwordIntoHash(s: db.data(), number: -1); |
535 | } |
536 | } |
537 | |
538 | /*! |
539 | The heap allocated variables are freed here. The keyword |
540 | hash table is not cleared here, but it is cleared in the |
541 | initialize() function, before any keywords are inserted. |
542 | */ |
543 | void Tokenizer::terminate() |
544 | { |
545 | delete comment; |
546 | comment = nullptr; |
547 | delete versionX; |
548 | versionX = nullptr; |
549 | delete definedX; |
550 | definedX = nullptr; |
551 | delete defines; |
552 | defines = nullptr; |
553 | delete falsehoods; |
554 | falsehoods = nullptr; |
555 | delete ignoredTokensAndDirectives; |
556 | ignoredTokensAndDirectives = nullptr; |
557 | } |
558 | |
559 | void Tokenizer::init() |
560 | { |
561 | m_lexBuf1 = new char[(int)yyLexBufSize]; |
562 | m_lexBuf2 = new char[(int)yyLexBufSize]; |
563 | m_prevLex = m_lexBuf1; |
564 | m_prevLex[0] = '\0'; |
565 | m_lex = m_lexBuf2; |
566 | m_lex[0] = '\0'; |
567 | m_lexLen = 0; |
568 | m_preprocessorSkipping.push(t: false); |
569 | m_numPreprocessorSkipping = 0; |
570 | m_braceDepth = 0; |
571 | m_parenDepth = 0; |
572 | m_bracketDepth = 0; |
573 | m_ch = '\0'; |
574 | m_parsingMacro = false; |
575 | } |
576 | |
577 | void Tokenizer::start(const Location &loc) |
578 | { |
579 | m_tokLoc = loc; |
580 | m_curLoc = loc; |
581 | m_curLoc.start(); |
582 | strcpy(dest: m_prevLex, src: "beginning-of-input" ); |
583 | strcpy(dest: m_lex, src: "beginning-of-input" ); |
584 | m_lexLen = strlen(s: m_lex); |
585 | m_braceDepth = 0; |
586 | m_parenDepth = 0; |
587 | m_bracketDepth = 0; |
588 | m_ch = '\0'; |
589 | m_ch = getChar(); |
590 | } |
591 | |
592 | /* |
593 | Returns the next token, if # was met. This function interprets the |
594 | preprocessor directive, skips over any #ifdef'd out tokens, and returns the |
595 | token after all of that. |
596 | */ |
597 | int Tokenizer::getTokenAfterPreprocessor() |
598 | { |
599 | m_ch = getChar(); |
600 | while (isspace(m_ch) && m_ch != '\n') |
601 | m_ch = getChar(); |
602 | |
603 | /* |
604 | #directive condition |
605 | */ |
606 | QString directive; |
607 | QString condition; |
608 | |
609 | while (isalpha(m_ch)) { |
610 | directive += QChar(m_ch); |
611 | m_ch = getChar(); |
612 | } |
613 | if (!directive.isEmpty()) { |
614 | while (m_ch != EOF && m_ch != '\n') { |
615 | if (m_ch == '\\') { |
616 | m_ch = getChar(); |
617 | if (m_ch == '\r') |
618 | m_ch = getChar(); |
619 | } |
620 | condition += QChar(m_ch); |
621 | m_ch = getChar(); |
622 | } |
623 | condition.remove(re: *comment); |
624 | condition = condition.simplified(); |
625 | |
626 | /* |
627 | The #if, #ifdef, #ifndef, #elif, #else, and #endif |
628 | directives have an effect on the skipping stack. For |
629 | instance, if the code processed so far is |
630 | |
631 | #if 1 |
632 | #if 0 |
633 | #if 1 |
634 | // ... |
635 | #else |
636 | |
637 | the skipping stack contains, from bottom to top, false true |
638 | true (assuming 0 is false and 1 is true). If at least one |
639 | entry of the stack is true, the tokens are skipped. |
640 | |
641 | This mechanism is simple yet hard to understand. |
642 | */ |
643 | if (directive[0] == QChar('i')) { |
644 | if (directive == QString("if" )) |
645 | pushSkipping(skip: !isTrue(condition)); |
646 | else if (directive == QString("ifdef" )) |
647 | pushSkipping(skip: !defines->match(subject: condition).hasMatch()); |
648 | else if (directive == QString("ifndef" )) |
649 | pushSkipping(skip: defines->match(subject: condition).hasMatch()); |
650 | } else if (directive[0] == QChar('e')) { |
651 | if (directive == QString("elif" )) { |
652 | bool old = popSkipping(); |
653 | if (old) |
654 | pushSkipping(skip: !isTrue(condition)); |
655 | else |
656 | pushSkipping(skip: true); |
657 | } else if (directive == QString("else" )) { |
658 | pushSkipping(skip: !popSkipping()); |
659 | } else if (directive == QString("endif" )) { |
660 | popSkipping(); |
661 | } |
662 | } else if (directive == QString("define" )) { |
663 | auto match = versionX->match(subject: condition); |
664 | if (match.hasMatch()) |
665 | m_version = match.captured(nth: 1); |
666 | } |
667 | } |
668 | |
669 | int tok; |
670 | do { |
671 | /* |
672 | We set yyLex now, and after getToken() this will be |
673 | yyPrevLex. This way, we skip over the preprocessor |
674 | directive. |
675 | */ |
676 | qstrcpy(dst: m_lex, src: m_prevLex); |
677 | |
678 | /* |
679 | If getToken() meets another #, it will call |
680 | getTokenAfterPreprocessor() once again, which could in turn |
681 | call getToken() again, etc. Unless there are 10,000 or so |
682 | preprocessor directives in a row, this shouldn't overflow |
683 | the stack. |
684 | */ |
685 | tok = getToken(); |
686 | } while (m_numPreprocessorSkipping > 0 && tok != Tok_Eoi); |
687 | return tok; |
688 | } |
689 | |
690 | /* |
691 | Pushes a new skipping value onto the stack. This corresponds to entering a |
692 | new #if block. |
693 | */ |
694 | void Tokenizer::pushSkipping(bool skip) |
695 | { |
696 | m_preprocessorSkipping.push(t: skip); |
697 | if (skip) |
698 | m_numPreprocessorSkipping++; |
699 | } |
700 | |
701 | /* |
702 | Pops a skipping value from the stack. This corresponds to reaching a #endif. |
703 | */ |
704 | bool Tokenizer::popSkipping() |
705 | { |
706 | if (m_preprocessorSkipping.isEmpty()) { |
707 | m_tokLoc.warning(QStringLiteral("Unexpected #elif, #else or #endif" )); |
708 | return true; |
709 | } |
710 | |
711 | bool skip = m_preprocessorSkipping.pop(); |
712 | if (skip) |
713 | m_numPreprocessorSkipping--; |
714 | return skip; |
715 | } |
716 | |
717 | /* |
718 | Returns \c true if the condition evaluates as true, otherwise false. The |
719 | condition is represented by a string. Unsophisticated parsing techniques are |
720 | used. The preprocessing method could be named StriNg-Oriented PreProcessing, |
721 | as SNOBOL stands for StriNg-Oriented symBOlic Language. |
722 | */ |
723 | bool Tokenizer::isTrue(const QString &condition) |
724 | { |
725 | int firstOr = -1; |
726 | int firstAnd = -1; |
727 | int parenDepth = 0; |
728 | |
729 | /* |
730 | Find the first logical operator at top level, but be careful |
731 | about precedence. Examples: |
732 | |
733 | X || Y // the or |
734 | X || Y || Z // the leftmost or |
735 | X || Y && Z // the or |
736 | X && Y || Z // the or |
737 | (X || Y) && Z // the and |
738 | */ |
739 | for (int i = 0; i < condition.size() - 1; i++) { |
740 | QChar ch = condition[i]; |
741 | if (ch == QChar('(')) { |
742 | parenDepth++; |
743 | } else if (ch == QChar(')')) { |
744 | parenDepth--; |
745 | } else if (parenDepth == 0) { |
746 | if (condition[i + 1] == ch) { |
747 | if (ch == QChar('|')) { |
748 | firstOr = i; |
749 | break; |
750 | } else if (ch == QChar('&')) { |
751 | if (firstAnd == -1) |
752 | firstAnd = i; |
753 | } |
754 | } |
755 | } |
756 | } |
757 | if (firstOr != -1) |
758 | return isTrue(condition: condition.left(n: firstOr)) || isTrue(condition: condition.mid(position: firstOr + 2)); |
759 | if (firstAnd != -1) |
760 | return isTrue(condition: condition.left(n: firstAnd)) && isTrue(condition: condition.mid(position: firstAnd + 2)); |
761 | |
762 | QString t = condition.simplified(); |
763 | if (t.isEmpty()) |
764 | return true; |
765 | |
766 | if (t[0] == QChar('!')) |
767 | return !isTrue(condition: t.mid(position: 1)); |
768 | if (t[0] == QChar('(') && t.endsWith(c: QChar(')'))) |
769 | return isTrue(condition: t.mid(position: 1, n: t.size() - 2)); |
770 | |
771 | auto match = definedX->match(subject: t); |
772 | if (match.hasMatch()) |
773 | return defines->match(subject: match.captured(nth: 1)).hasMatch(); |
774 | else |
775 | return !falsehoods->match(subject: t).hasMatch(); |
776 | } |
777 | |
778 | QString Tokenizer::lexeme() const |
779 | { |
780 | return sourceDecoder(m_lex); |
781 | } |
782 | |
783 | QString Tokenizer::previousLexeme() const |
784 | { |
785 | return sourceDecoder(m_prevLex); |
786 | } |
787 | |
788 | QT_END_NAMESPACE |
789 | |