1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Copyright (C) 2014 Olivier Goffart <ogoffart@woboq.org>
5** Contact: https://www.qt.io/licensing/
6**
7** This file is part of the tools applications of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:GPL-EXCEPT$
10** Commercial License Usage
11** Licensees holding valid commercial Qt licenses may use this file in
12** accordance with the commercial license agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and The Qt Company. For licensing terms
15** and conditions see https://www.qt.io/terms-conditions. For further
16** information use the contact form at https://www.qt.io/contact-us.
17**
18** GNU General Public License Usage
19** Alternatively, this file may be used under the terms of the GNU
20** General Public License version 3 as published by the Free Software
21** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
22** included in the packaging of this file. Please review the following
23** information to ensure the GNU General Public License requirements will
24** be met: https://www.gnu.org/licenses/gpl-3.0.html.
25**
26** $QT_END_LICENSE$
27**
28****************************************************************************/
29
30#include "preprocessor.h"
31#include "utils.h"
32#include <qstringlist.h>
33#include <qfile.h>
34#include <qdir.h>
35#include <qfileinfo.h>
36
37QT_BEGIN_NAMESPACE
38
39#include "ppkeywords.cpp"
40#include "keywords.cpp"
41
42// transform \r\n into \n
43// \r into \n (os9 style)
44// backslash-newlines into newlines
45static QByteArray cleaned(const QByteArray &input)
46{
47 QByteArray result;
48 result.resize(size: input.size());
49 const char *data = input.constData();
50 const char *end = input.constData() + input.size();
51 char *output = result.data();
52
53 int newlines = 0;
54 while (data != end) {
55 while (data != end && is_space(s: *data))
56 ++data;
57 bool takeLine = (*data == '#');
58 if (*data == '%' && *(data+1) == ':') {
59 takeLine = true;
60 ++data;
61 }
62 if (takeLine) {
63 *output = '#';
64 ++output;
65 do ++data; while (data != end && is_space(s: *data));
66 }
67 while (data != end) {
68 // handle \\\n, \\\r\n and \\\r
69 if (*data == '\\') {
70 if (*(data + 1) == '\r') {
71 ++data;
72 }
73 if (data != end && (*(data + 1) == '\n' || (*data) == '\r')) {
74 ++newlines;
75 data += 1;
76 if (data != end && *data != '\r')
77 data += 1;
78 continue;
79 }
80 } else if (*data == '\r' && *(data + 1) == '\n') { // reduce \r\n to \n
81 ++data;
82 }
83 if (data == end)
84 break;
85
86 char ch = *data;
87 if (ch == '\r') // os9: replace \r with \n
88 ch = '\n';
89 *output = ch;
90 ++output;
91
92 if (*data == '\n') {
93 // output additional newlines to keep the correct line-numbering
94 // for the lines following the backslash-newline sequence(s)
95 while (newlines) {
96 *output = '\n';
97 ++output;
98 --newlines;
99 }
100 ++data;
101 break;
102 }
103 ++data;
104 }
105 }
106 result.resize(size: output - result.constData());
107 return result;
108}
109
110bool Preprocessor::preprocessOnly = false;
111void Preprocessor::skipUntilEndif()
112{
113 while(index < symbols.size() - 1 && symbols.at(i: index).token != PP_ENDIF){
114 switch (symbols.at(i: index).token) {
115 case PP_IF:
116 case PP_IFDEF:
117 case PP_IFNDEF:
118 ++index;
119 skipUntilEndif();
120 break;
121 default:
122 ;
123 }
124 ++index;
125 }
126}
127
128bool Preprocessor::skipBranch()
129{
130 while (index < symbols.size() - 1
131 && (symbols.at(i: index).token != PP_ENDIF
132 && symbols.at(i: index).token != PP_ELIF
133 && symbols.at(i: index).token != PP_ELSE)
134 ){
135 switch (symbols.at(i: index).token) {
136 case PP_IF:
137 case PP_IFDEF:
138 case PP_IFNDEF:
139 ++index;
140 skipUntilEndif();
141 break;
142 default:
143 ;
144 }
145 ++index;
146 }
147 return (index < symbols.size() - 1);
148}
149
150
151Symbols Preprocessor::tokenize(const QByteArray& input, int lineNum, Preprocessor::TokenizeMode mode)
152{
153 Symbols symbols;
154 // Preallocate some space to speed up the code below.
155 // The magic divisor value was found by calculating the average ratio between
156 // input size and the final size of symbols.
157 // This yielded a value of 16.x when compiling Qt Base.
158 symbols.reserve(asize: input.size() / 16);
159 const char *begin = input.constData();
160 const char *data = begin;
161 while (*data) {
162 if (mode == TokenizeCpp || mode == TokenizeDefine) {
163 int column = 0;
164
165 const char *lexem = data;
166 int state = 0;
167 Token token = NOTOKEN;
168 for (;;) {
169 if (static_cast<signed char>(*data) < 0) {
170 ++data;
171 continue;
172 }
173 int nextindex = keywords[state].next;
174 int next = 0;
175 if (*data == keywords[state].defchar)
176 next = keywords[state].defnext;
177 else if (!state || nextindex)
178 next = keyword_trans[nextindex][(int)*data];
179 if (!next)
180 break;
181 state = next;
182 token = keywords[state].token;
183 ++data;
184 }
185
186 // suboptimal, is_ident_char should use a table
187 if (keywords[state].ident && is_ident_char(s: *data))
188 token = keywords[state].ident;
189
190 if (token == NOTOKEN) {
191 if (*data)
192 ++data;
193 // an error really, but let's ignore this input
194 // to not confuse moc later. However in pre-processor
195 // only mode let's continue.
196 if (!Preprocessor::preprocessOnly)
197 continue;
198 }
199
200 ++column;
201
202 if (token > SPECIAL_TREATMENT_MARK) {
203 switch (token) {
204 case QUOTE:
205 data = skipQuote(data);
206 token = STRING_LITERAL;
207 // concatenate multi-line strings for easier
208 // STRING_LITERAL handling in moc
209 if (!Preprocessor::preprocessOnly
210 && !symbols.isEmpty()
211 && symbols.constLast().token == STRING_LITERAL) {
212
213 const QByteArray newString
214 = '\"'
215 + symbols.constLast().unquotedLexem()
216 + input.mid(index: lexem - begin + 1, len: data - lexem - 2)
217 + '\"';
218 symbols.last() = Symbol(symbols.constLast().lineNum,
219 STRING_LITERAL,
220 newString);
221 continue;
222 }
223 break;
224 case SINGLEQUOTE:
225 while (*data && (*data != '\''
226 || (*(data-1)=='\\'
227 && *(data-2)!='\\')))
228 ++data;
229 if (*data)
230 ++data;
231 token = CHARACTER_LITERAL;
232 break;
233 case LANGLE_SCOPE:
234 // split <:: into two tokens, < and ::
235 token = LANGLE;
236 data -= 2;
237 break;
238 case DIGIT:
239 while (is_digit_char(s: *data) || *data == '\'')
240 ++data;
241 if (!*data || *data != '.') {
242 token = INTEGER_LITERAL;
243 if (data - lexem == 1 &&
244 (*data == 'x' || *data == 'X'
245 || *data == 'b' || *data == 'B')
246 && *lexem == '0') {
247 ++data;
248 while (is_hex_char(s: *data) || *data == '\'')
249 ++data;
250 }
251 break;
252 }
253 token = FLOATING_LITERAL;
254 ++data;
255 Q_FALLTHROUGH();
256 case FLOATING_LITERAL:
257 while (is_digit_char(s: *data) || *data == '\'')
258 ++data;
259 if (*data == '+' || *data == '-')
260 ++data;
261 if (*data == 'e' || *data == 'E') {
262 ++data;
263 while (is_digit_char(s: *data) || *data == '\'')
264 ++data;
265 }
266 if (*data == 'f' || *data == 'F'
267 || *data == 'l' || *data == 'L')
268 ++data;
269 break;
270 case HASH:
271 if (column == 1 && mode == TokenizeCpp) {
272 mode = PreparePreprocessorStatement;
273 while (*data && (*data == ' ' || *data == '\t'))
274 ++data;
275 if (is_ident_char(s: *data))
276 mode = TokenizePreprocessorStatement;
277 continue;
278 }
279 break;
280 case PP_HASHHASH:
281 if (mode == TokenizeCpp)
282 continue;
283 break;
284 case NEWLINE:
285 ++lineNum;
286 if (mode == TokenizeDefine) {
287 mode = TokenizeCpp;
288 // emit the newline token
289 break;
290 }
291 continue;
292 case BACKSLASH:
293 {
294 const char *rewind = data;
295 while (*data && (*data == ' ' || *data == '\t'))
296 ++data;
297 if (*data && *data == '\n') {
298 ++data;
299 continue;
300 }
301 data = rewind;
302 } break;
303 case CHARACTER:
304 while (is_ident_char(s: *data))
305 ++data;
306 token = IDENTIFIER;
307 break;
308 case C_COMMENT:
309 if (*data) {
310 if (*data == '\n')
311 ++lineNum;
312 ++data;
313 if (*data) {
314 if (*data == '\n')
315 ++lineNum;
316 ++data;
317 }
318 }
319 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
320 if (*data == '\n')
321 ++lineNum;
322 ++data;
323 }
324 token = WHITESPACE; // one comment, one whitespace
325 Q_FALLTHROUGH();
326 case WHITESPACE:
327 if (column == 1)
328 column = 0;
329 while (*data && (*data == ' ' || *data == '\t'))
330 ++data;
331 if (Preprocessor::preprocessOnly) // tokenize whitespace
332 break;
333 continue;
334 case CPP_COMMENT:
335 while (*data && *data != '\n')
336 ++data;
337 continue; // ignore safely, the newline is a separator
338 default:
339 continue; //ignore
340 }
341 }
342#ifdef USE_LEXEM_STORE
343 if (!Preprocessor::preprocessOnly
344 && token != IDENTIFIER
345 && token != STRING_LITERAL
346 && token != FLOATING_LITERAL
347 && token != INTEGER_LITERAL)
348 symbols += Symbol(lineNum, token);
349 else
350#endif
351 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
352
353 } else { // Preprocessor
354
355 const char *lexem = data;
356 int state = 0;
357 Token token = NOTOKEN;
358 if (mode == TokenizePreprocessorStatement) {
359 state = pp_keyword_trans[0][(int)'#'];
360 mode = TokenizePreprocessor;
361 }
362 for (;;) {
363 if (static_cast<signed char>(*data) < 0) {
364 ++data;
365 continue;
366 }
367 int nextindex = pp_keywords[state].next;
368 int next = 0;
369 if (*data == pp_keywords[state].defchar)
370 next = pp_keywords[state].defnext;
371 else if (!state || nextindex)
372 next = pp_keyword_trans[nextindex][(int)*data];
373 if (!next)
374 break;
375 state = next;
376 token = pp_keywords[state].token;
377 ++data;
378 }
379 // suboptimal, is_ident_char should use a table
380 if (pp_keywords[state].ident && is_ident_char(s: *data))
381 token = pp_keywords[state].ident;
382
383 switch (token) {
384 case NOTOKEN:
385 if (*data)
386 ++data;
387 break;
388 case PP_DEFINE:
389 mode = PrepareDefine;
390 break;
391 case PP_IFDEF:
392 symbols += Symbol(lineNum, PP_IF);
393 symbols += Symbol(lineNum, PP_DEFINED);
394 continue;
395 case PP_IFNDEF:
396 symbols += Symbol(lineNum, PP_IF);
397 symbols += Symbol(lineNum, PP_NOT);
398 symbols += Symbol(lineNum, PP_DEFINED);
399 continue;
400 case PP_INCLUDE:
401 mode = TokenizeInclude;
402 break;
403 case PP_QUOTE:
404 data = skipQuote(data);
405 token = PP_STRING_LITERAL;
406 break;
407 case PP_SINGLEQUOTE:
408 while (*data && (*data != '\''
409 || (*(data-1)=='\\'
410 && *(data-2)!='\\')))
411 ++data;
412 if (*data)
413 ++data;
414 token = PP_CHARACTER_LITERAL;
415 break;
416 case PP_DIGIT:
417 while (is_digit_char(s: *data) || *data == '\'')
418 ++data;
419 if (!*data || *data != '.') {
420 token = PP_INTEGER_LITERAL;
421 if (data - lexem == 1 &&
422 (*data == 'x' || *data == 'X')
423 && *lexem == '0') {
424 ++data;
425 while (is_hex_char(s: *data) || *data == '\'')
426 ++data;
427 }
428 break;
429 }
430 token = PP_FLOATING_LITERAL;
431 ++data;
432 Q_FALLTHROUGH();
433 case PP_FLOATING_LITERAL:
434 while (is_digit_char(s: *data) || *data == '\'')
435 ++data;
436 if (*data == '+' || *data == '-')
437 ++data;
438 if (*data == 'e' || *data == 'E') {
439 ++data;
440 while (is_digit_char(s: *data) || *data == '\'')
441 ++data;
442 }
443 if (*data == 'f' || *data == 'F'
444 || *data == 'l' || *data == 'L')
445 ++data;
446 break;
447 case PP_CHARACTER:
448 if (mode == PreparePreprocessorStatement) {
449 // rewind entire token to begin
450 data = lexem;
451 mode = TokenizePreprocessorStatement;
452 continue;
453 }
454 while (is_ident_char(s: *data))
455 ++data;
456 token = PP_IDENTIFIER;
457
458 if (mode == PrepareDefine) {
459 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
460 // make sure we explicitly add the whitespace here if the next char
461 // is not an opening brace, so we can distinguish correctly between
462 // regular and function macros
463 if (*data != '(')
464 symbols += Symbol(lineNum, WHITESPACE);
465 mode = TokenizeDefine;
466 continue;
467 }
468 break;
469 case PP_C_COMMENT:
470 if (*data) {
471 if (*data == '\n')
472 ++lineNum;
473 ++data;
474 if (*data) {
475 if (*data == '\n')
476 ++lineNum;
477 ++data;
478 }
479 }
480 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
481 if (*data == '\n')
482 ++lineNum;
483 ++data;
484 }
485 token = PP_WHITESPACE; // one comment, one whitespace
486 Q_FALLTHROUGH();
487 case PP_WHITESPACE:
488 while (*data && (*data == ' ' || *data == '\t'))
489 ++data;
490 continue; // the preprocessor needs no whitespace
491 case PP_CPP_COMMENT:
492 while (*data && *data != '\n')
493 ++data;
494 continue; // ignore safely, the newline is a separator
495 case PP_NEWLINE:
496 ++lineNum;
497 mode = TokenizeCpp;
498 break;
499 case PP_BACKSLASH:
500 {
501 const char *rewind = data;
502 while (*data && (*data == ' ' || *data == '\t'))
503 ++data;
504 if (*data && *data == '\n') {
505 ++data;
506 continue;
507 }
508 data = rewind;
509 } break;
510 case PP_LANGLE:
511 if (mode != TokenizeInclude)
512 break;
513 token = PP_STRING_LITERAL;
514 while (*data && *data != '\n' && *(data-1) != '>')
515 ++data;
516 break;
517 default:
518 break;
519 }
520 if (mode == PreparePreprocessorStatement)
521 continue;
522#ifdef USE_LEXEM_STORE
523 if (token != PP_IDENTIFIER
524 && token != PP_STRING_LITERAL
525 && token != PP_FLOATING_LITERAL
526 && token != PP_INTEGER_LITERAL)
527 symbols += Symbol(lineNum, token);
528 else
529#endif
530 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
531 }
532 }
533 symbols += Symbol(); // eof symbol
534 return symbols;
535}
536
537void Preprocessor::macroExpand(Symbols *into, Preprocessor *that, const Symbols &toExpand, int &index,
538 int lineNum, bool one, const QSet<QByteArray> &excludeSymbols)
539{
540 SymbolStack symbols;
541 SafeSymbols sf;
542 sf.symbols = toExpand;
543 sf.index = index;
544 sf.excludedSymbols = excludeSymbols;
545 symbols.push(t: sf);
546
547 if (toExpand.isEmpty())
548 return;
549
550 for (;;) {
551 QByteArray macro;
552 Symbols newSyms = macroExpandIdentifier(that, symbols, lineNum, macroName: &macro);
553
554 if (macro.isEmpty()) {
555 // not a macro
556 Symbol s = symbols.symbol();
557 s.lineNum = lineNum;
558 *into += s;
559 } else {
560 SafeSymbols sf;
561 sf.symbols = newSyms;
562 sf.index = 0;
563 sf.expandedMacro = macro;
564 symbols.push(t: sf);
565 }
566 if (!symbols.hasNext() || (one && symbols.size() == 1))
567 break;
568 symbols.next();
569 }
570
571 if (symbols.size())
572 index = symbols.top().index;
573 else
574 index = toExpand.size();
575}
576
577
578Symbols Preprocessor::macroExpandIdentifier(Preprocessor *that, SymbolStack &symbols, int lineNum, QByteArray *macroName)
579{
580 Symbol s = symbols.symbol();
581
582 // not a macro
583 if (s.token != PP_IDENTIFIER || !that->macros.contains(akey: s) || symbols.dontReplaceSymbol(name: s.lexem())) {
584 return Symbols();
585 }
586
587 const Macro &macro = that->macros.value(akey: s);
588 *macroName = s.lexem();
589
590 Symbols expansion;
591 if (!macro.isFunction) {
592 expansion = macro.symbols;
593 } else {
594 bool haveSpace = false;
595 while (symbols.test(token: PP_WHITESPACE)) { haveSpace = true; }
596 if (!symbols.test(token: PP_LPAREN)) {
597 *macroName = QByteArray();
598 Symbols syms;
599 if (haveSpace)
600 syms += Symbol(lineNum, PP_WHITESPACE);
601 syms += s;
602 syms.last().lineNum = lineNum;
603 return syms;
604 }
605 QVarLengthArray<Symbols, 5> arguments;
606 while (symbols.hasNext()) {
607 Symbols argument;
608 // strip leading space
609 while (symbols.test(token: PP_WHITESPACE)) {}
610 int nesting = 0;
611 bool vararg = macro.isVariadic && (arguments.size() == macro.arguments.size() - 1);
612 while (symbols.hasNext()) {
613 Token t = symbols.next();
614 if (t == PP_LPAREN) {
615 ++nesting;
616 } else if (t == PP_RPAREN) {
617 --nesting;
618 if (nesting < 0)
619 break;
620 } else if (t == PP_COMMA && nesting == 0) {
621 if (!vararg)
622 break;
623 }
624 argument += symbols.symbol();
625 }
626 arguments += argument;
627
628 if (nesting < 0)
629 break;
630 else if (!symbols.hasNext())
631 that->error(msg: "missing ')' in macro usage");
632 }
633
634 // empty VA_ARGS
635 if (macro.isVariadic && arguments.size() == macro.arguments.size() - 1)
636 arguments += Symbols();
637
638 // now replace the macro arguments with the expanded arguments
639 enum Mode {
640 Normal,
641 Hash,
642 HashHash
643 } mode = Normal;
644
645 for (int i = 0; i < macro.symbols.size(); ++i) {
646 const Symbol &s = macro.symbols.at(i);
647 if (s.token == HASH || s.token == PP_HASHHASH) {
648 mode = (s.token == HASH ? Hash : HashHash);
649 continue;
650 }
651 int index = macro.arguments.indexOf(t: s);
652 if (mode == Normal) {
653 if (index >= 0 && index < arguments.size()) {
654 // each argument undoergoes macro expansion if it's not used as part of a # or ##
655 if (i == macro.symbols.size() - 1 || macro.symbols.at(i: i + 1).token != PP_HASHHASH) {
656 Symbols arg = arguments.at(idx: index);
657 int idx = 1;
658 macroExpand(into: &expansion, that, toExpand: arg, index&: idx, lineNum, one: false, excludeSymbols: symbols.excludeSymbols());
659 } else {
660 expansion += arguments.at(idx: index);
661 }
662 } else {
663 expansion += s;
664 }
665 } else if (mode == Hash) {
666 if (index < 0) {
667 that->error(msg: "'#' is not followed by a macro parameter");
668 continue;
669 } else if (index >= arguments.size()) {
670 that->error(msg: "Macro invoked with too few parameters for a use of '#'");
671 continue;
672 }
673
674 const Symbols &arg = arguments.at(idx: index);
675 QByteArray stringified;
676 for (int i = 0; i < arg.size(); ++i) {
677 stringified += arg.at(i).lexem();
678 }
679 stringified.replace(before: '"', c: "\\\"");
680 stringified.prepend(c: '"');
681 stringified.append(c: '"');
682 expansion += Symbol(lineNum, STRING_LITERAL, stringified);
683 } else if (mode == HashHash){
684 if (s.token == WHITESPACE)
685 continue;
686
687 while (expansion.size() && expansion.constLast().token == PP_WHITESPACE)
688 expansion.pop_back();
689
690 Symbol next = s;
691 if (index >= 0 && index < arguments.size()) {
692 const Symbols &arg = arguments.at(idx: index);
693 if (arg.size() == 0) {
694 mode = Normal;
695 continue;
696 }
697 next = arg.at(i: 0);
698 }
699
700 if (!expansion.isEmpty() && expansion.constLast().token == s.token
701 && expansion.constLast().token != STRING_LITERAL) {
702 Symbol last = expansion.takeLast();
703
704 QByteArray lexem = last.lexem() + next.lexem();
705 expansion += Symbol(lineNum, last.token, lexem);
706 } else {
707 expansion += next;
708 }
709
710 if (index >= 0 && index < arguments.size()) {
711 const Symbols &arg = arguments.at(idx: index);
712 for (int i = 1; i < arg.size(); ++i)
713 expansion += arg.at(i);
714 }
715 }
716 mode = Normal;
717 }
718 if (mode != Normal)
719 that->error(msg: "'#' or '##' found at the end of a macro argument");
720
721 }
722
723 return expansion;
724}
725
726void Preprocessor::substituteUntilNewline(Symbols &substituted)
727{
728 while (hasNext()) {
729 Token token = next();
730 if (token == PP_IDENTIFIER) {
731 macroExpand(into: &substituted, that: this, toExpand: symbols, index, lineNum: symbol().lineNum, one: true);
732 } else if (token == PP_DEFINED) {
733 bool braces = test(token: PP_LPAREN);
734 next(token: PP_IDENTIFIER);
735 Symbol definedOrNotDefined = symbol();
736 definedOrNotDefined.token = macros.contains(akey: definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE;
737 substituted += definedOrNotDefined;
738 if (braces)
739 test(token: PP_RPAREN);
740 continue;
741 } else if (token == PP_NEWLINE) {
742 substituted += symbol();
743 break;
744 } else {
745 substituted += symbol();
746 }
747 }
748}
749
750
751class PP_Expression : public Parser
752{
753public:
754 int value() { index = 0; return unary_expression_lookup() ? conditional_expression() : 0; }
755
756 int conditional_expression();
757 int logical_OR_expression();
758 int logical_AND_expression();
759 int inclusive_OR_expression();
760 int exclusive_OR_expression();
761 int AND_expression();
762 int equality_expression();
763 int relational_expression();
764 int shift_expression();
765 int additive_expression();
766 int multiplicative_expression();
767 int unary_expression();
768 bool unary_expression_lookup();
769 int primary_expression();
770 bool primary_expression_lookup();
771};
772
773int PP_Expression::conditional_expression()
774{
775 int value = logical_OR_expression();
776 if (test(token: PP_QUESTION)) {
777 int alt1 = conditional_expression();
778 int alt2 = test(token: PP_COLON) ? conditional_expression() : 0;
779 return value ? alt1 : alt2;
780 }
781 return value;
782}
783
784int PP_Expression::logical_OR_expression()
785{
786 int value = logical_AND_expression();
787 if (test(token: PP_OROR))
788 return logical_OR_expression() || value;
789 return value;
790}
791
792int PP_Expression::logical_AND_expression()
793{
794 int value = inclusive_OR_expression();
795 if (test(token: PP_ANDAND))
796 return logical_AND_expression() && value;
797 return value;
798}
799
800int PP_Expression::inclusive_OR_expression()
801{
802 int value = exclusive_OR_expression();
803 if (test(token: PP_OR))
804 return value | inclusive_OR_expression();
805 return value;
806}
807
808int PP_Expression::exclusive_OR_expression()
809{
810 int value = AND_expression();
811 if (test(token: PP_HAT))
812 return value ^ exclusive_OR_expression();
813 return value;
814}
815
816int PP_Expression::AND_expression()
817{
818 int value = equality_expression();
819 if (test(token: PP_AND))
820 return value & AND_expression();
821 return value;
822}
823
824int PP_Expression::equality_expression()
825{
826 int value = relational_expression();
827 switch (next()) {
828 case PP_EQEQ:
829 return value == equality_expression();
830 case PP_NE:
831 return value != equality_expression();
832 default:
833 prev();
834 return value;
835 }
836}
837
838int PP_Expression::relational_expression()
839{
840 int value = shift_expression();
841 switch (next()) {
842 case PP_LANGLE:
843 return value < relational_expression();
844 case PP_RANGLE:
845 return value > relational_expression();
846 case PP_LE:
847 return value <= relational_expression();
848 case PP_GE:
849 return value >= relational_expression();
850 default:
851 prev();
852 return value;
853 }
854}
855
856int PP_Expression::shift_expression()
857{
858 int value = additive_expression();
859 switch (next()) {
860 case PP_LTLT:
861 return value << shift_expression();
862 case PP_GTGT:
863 return value >> shift_expression();
864 default:
865 prev();
866 return value;
867 }
868}
869
870int PP_Expression::additive_expression()
871{
872 int value = multiplicative_expression();
873 switch (next()) {
874 case PP_PLUS:
875 return value + additive_expression();
876 case PP_MINUS:
877 return value - additive_expression();
878 default:
879 prev();
880 return value;
881 }
882}
883
884int PP_Expression::multiplicative_expression()
885{
886 int value = unary_expression();
887 switch (next()) {
888 case PP_STAR:
889 {
890 // get well behaved overflow behavior by converting to long
891 // and then back to int
892 // NOTE: A conformant preprocessor would need to work intmax_t/
893 // uintmax_t according to [cpp.cond], 19.1 §10
894 // But we're not compliant anyway
895 qint64 result = qint64(value) * qint64(multiplicative_expression());
896 return int(result);
897 }
898 case PP_PERCENT:
899 {
900 int remainder = multiplicative_expression();
901 return remainder ? value % remainder : 0;
902 }
903 case PP_SLASH:
904 {
905 int div = multiplicative_expression();
906 return div ? value / div : 0;
907 }
908 default:
909 prev();
910 return value;
911 };
912}
913
914int PP_Expression::unary_expression()
915{
916 switch (next()) {
917 case PP_PLUS:
918 return unary_expression();
919 case PP_MINUS:
920 return -unary_expression();
921 case PP_NOT:
922 return !unary_expression();
923 case PP_TILDE:
924 return ~unary_expression();
925 case PP_MOC_TRUE:
926 return 1;
927 case PP_MOC_FALSE:
928 return 0;
929 default:
930 prev();
931 return primary_expression();
932 }
933}
934
935bool PP_Expression::unary_expression_lookup()
936{
937 Token t = lookup();
938 return (primary_expression_lookup()
939 || t == PP_PLUS
940 || t == PP_MINUS
941 || t == PP_NOT
942 || t == PP_TILDE
943 || t == PP_DEFINED);
944}
945
946int PP_Expression::primary_expression()
947{
948 int value;
949 if (test(token: PP_LPAREN)) {
950 value = conditional_expression();
951 test(token: PP_RPAREN);
952 } else {
953 next();
954 value = lexem().toInt(ok: nullptr, base: 0);
955 }
956 return value;
957}
958
959bool PP_Expression::primary_expression_lookup()
960{
961 Token t = lookup();
962 return (t == PP_IDENTIFIER
963 || t == PP_INTEGER_LITERAL
964 || t == PP_FLOATING_LITERAL
965 || t == PP_MOC_TRUE
966 || t == PP_MOC_FALSE
967 || t == PP_LPAREN);
968}
969
970int Preprocessor::evaluateCondition()
971{
972 PP_Expression expression;
973 expression.currentFilenames = currentFilenames;
974
975 substituteUntilNewline(substituted&: expression.symbols);
976
977 return expression.value();
978}
979
980static QByteArray readOrMapFile(QFile *file)
981{
982 const qint64 size = file->size();
983 char *rawInput = reinterpret_cast<char*>(file->map(offset: 0, size));
984 return rawInput ? QByteArray::fromRawData(rawInput, size) : file->readAll();
985}
986
987static void mergeStringLiterals(Symbols *_symbols)
988{
989 Symbols &symbols = *_symbols;
990 for (Symbols::iterator i = symbols.begin(); i != symbols.end(); ++i) {
991 if (i->token == STRING_LITERAL) {
992 Symbols::Iterator mergeSymbol = i;
993 int literalsLength = mergeSymbol->len;
994 while (++i != symbols.end() && i->token == STRING_LITERAL)
995 literalsLength += i->len - 2; // no quotes
996
997 if (literalsLength != mergeSymbol->len) {
998 QByteArray mergeSymbolOriginalLexem = mergeSymbol->unquotedLexem();
999 QByteArray &mergeSymbolLexem = mergeSymbol->lex;
1000 mergeSymbolLexem.resize(size: 0);
1001 mergeSymbolLexem.reserve(asize: literalsLength);
1002 mergeSymbolLexem.append(c: '"');
1003 mergeSymbolLexem.append(a: mergeSymbolOriginalLexem);
1004 for (Symbols::iterator j = mergeSymbol + 1; j != i; ++j)
1005 mergeSymbolLexem.append(s: j->lex.constData() + j->from + 1, len: j->len - 2); // append j->unquotedLexem()
1006 mergeSymbolLexem.append(c: '"');
1007 mergeSymbol->len = mergeSymbol->lex.length();
1008 mergeSymbol->from = 0;
1009 i = symbols.erase(abegin: mergeSymbol + 1, aend: i);
1010 }
1011 if (i == symbols.end())
1012 break;
1013 }
1014 }
1015}
1016
1017static QByteArray searchIncludePaths(const QList<Parser::IncludePath> &includepaths,
1018 const QByteArray &include)
1019{
1020 QFileInfo fi;
1021 for (int j = 0; j < includepaths.size() && !fi.exists(); ++j) {
1022 const Parser::IncludePath &p = includepaths.at(i: j);
1023 if (p.isFrameworkPath) {
1024 const int slashPos = include.indexOf(c: '/');
1025 if (slashPos == -1)
1026 continue;
1027 fi.setFile(dir: QString::fromLocal8Bit(str: p.path + '/' + include.left(len: slashPos) + ".framework/Headers/"),
1028 file: QString::fromLocal8Bit(str: include.mid(index: slashPos + 1)));
1029 } else {
1030 fi.setFile(dir: QString::fromLocal8Bit(str: p.path), file: QString::fromLocal8Bit(str: include));
1031 }
1032 // try again, maybe there's a file later in the include paths with the same name
1033 // (186067)
1034 if (fi.isDir()) {
1035 fi = QFileInfo();
1036 continue;
1037 }
1038 }
1039
1040 if (!fi.exists() || fi.isDir())
1041 return QByteArray();
1042 return fi.canonicalFilePath().toLocal8Bit();
1043}
1044
1045QByteArray Preprocessor::resolveInclude(const QByteArray &include, const QByteArray &relativeTo)
1046{
1047 if (!relativeTo.isEmpty()) {
1048 QFileInfo fi;
1049 fi.setFile(dir: QFileInfo(QString::fromLocal8Bit(str: relativeTo)).dir(), file: QString::fromLocal8Bit(str: include));
1050 if (fi.exists() && !fi.isDir())
1051 return fi.canonicalFilePath().toLocal8Bit();
1052 }
1053
1054 auto it = nonlocalIncludePathResolutionCache.find(akey: include);
1055 if (it == nonlocalIncludePathResolutionCache.end())
1056 it = nonlocalIncludePathResolutionCache.insert(akey: include, avalue: searchIncludePaths(includepaths: includes, include));
1057 return it.value();
1058}
1059
1060void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed)
1061{
1062 currentFilenames.push(x: filename);
1063 preprocessed.reserve(asize: preprocessed.size() + symbols.size());
1064 while (hasNext()) {
1065 Token token = next();
1066
1067 switch (token) {
1068 case PP_INCLUDE:
1069 {
1070 int lineNum = symbol().lineNum;
1071 QByteArray include;
1072 bool local = false;
1073 if (test(token: PP_STRING_LITERAL)) {
1074 local = lexem().startsWith(c: '\"');
1075 include = unquotedLexem();
1076 } else
1077 continue;
1078 until(PP_NEWLINE);
1079
1080 include = resolveInclude(include, relativeTo: local ? filename : QByteArray());
1081 if (include.isNull())
1082 continue;
1083
1084 if (Preprocessor::preprocessedIncludes.contains(value: include))
1085 continue;
1086 Preprocessor::preprocessedIncludes.insert(value: include);
1087
1088 QFile file(QString::fromLocal8Bit(str: include.constData()));
1089 if (!file.open(flags: QFile::ReadOnly))
1090 continue;
1091
1092 QByteArray input = readOrMapFile(file: &file);
1093
1094 file.close();
1095 if (input.isEmpty())
1096 continue;
1097
1098 Symbols saveSymbols = symbols;
1099 int saveIndex = index;
1100
1101 // phase 1: get rid of backslash-newlines
1102 input = cleaned(input);
1103
1104 // phase 2: tokenize for the preprocessor
1105 symbols = tokenize(input);
1106 input.clear();
1107
1108 index = 0;
1109
1110 // phase 3: preprocess conditions and substitute macros
1111 preprocessed += Symbol(0, MOC_INCLUDE_BEGIN, include);
1112 preprocess(filename: include, preprocessed);
1113 preprocessed += Symbol(lineNum, MOC_INCLUDE_END, include);
1114
1115 symbols = saveSymbols;
1116 index = saveIndex;
1117 continue;
1118 }
1119 case PP_DEFINE:
1120 {
1121 next();
1122 QByteArray name = lexem();
1123 if (name.isEmpty() || !is_ident_start(s: name[0]))
1124 error();
1125 Macro macro;
1126 macro.isVariadic = false;
1127 if (test(token: LPAREN)) {
1128 // we have a function macro
1129 macro.isFunction = true;
1130 parseDefineArguments(m: &macro);
1131 } else {
1132 macro.isFunction = false;
1133 }
1134 int start = index;
1135 until(PP_NEWLINE);
1136 macro.symbols.reserve(asize: index - start - 1);
1137
1138 // remove whitespace where there shouldn't be any:
1139 // Before and after the macro, after a # and around ##
1140 Token lastToken = HASH; // skip shitespace at the beginning
1141 for (int i = start; i < index - 1; ++i) {
1142 Token token = symbols.at(i).token;
1143 if (token == WHITESPACE) {
1144 if (lastToken == PP_HASH || lastToken == HASH ||
1145 lastToken == PP_HASHHASH ||
1146 lastToken == WHITESPACE)
1147 continue;
1148 } else if (token == PP_HASHHASH) {
1149 if (!macro.symbols.isEmpty() &&
1150 lastToken == WHITESPACE)
1151 macro.symbols.pop_back();
1152 }
1153 macro.symbols.append(t: symbols.at(i));
1154 lastToken = token;
1155 }
1156 // remove trailing whitespace
1157 while (!macro.symbols.isEmpty() &&
1158 (macro.symbols.constLast().token == PP_WHITESPACE || macro.symbols.constLast().token == WHITESPACE))
1159 macro.symbols.pop_back();
1160
1161 if (!macro.symbols.isEmpty()) {
1162 if (macro.symbols.constFirst().token == PP_HASHHASH ||
1163 macro.symbols.constLast().token == PP_HASHHASH) {
1164 error(msg: "'##' cannot appear at either end of a macro expansion");
1165 }
1166 }
1167 macros.insert(akey: name, avalue: macro);
1168 continue;
1169 }
1170 case PP_UNDEF: {
1171 next();
1172 QByteArray name = lexem();
1173 until(PP_NEWLINE);
1174 macros.remove(akey: name);
1175 continue;
1176 }
1177 case PP_IDENTIFIER: {
1178 // substitute macros
1179 macroExpand(into: &preprocessed, that: this, toExpand: symbols, index, lineNum: symbol().lineNum, one: true);
1180 continue;
1181 }
1182 case PP_HASH:
1183 until(PP_NEWLINE);
1184 continue; // skip unknown preprocessor statement
1185 case PP_IFDEF:
1186 case PP_IFNDEF:
1187 case PP_IF:
1188 while (!evaluateCondition()) {
1189 if (!skipBranch())
1190 break;
1191 if (test(token: PP_ELIF)) {
1192 } else {
1193 until(PP_NEWLINE);
1194 break;
1195 }
1196 }
1197 continue;
1198 case PP_ELIF:
1199 case PP_ELSE:
1200 skipUntilEndif();
1201 Q_FALLTHROUGH();
1202 case PP_ENDIF:
1203 until(PP_NEWLINE);
1204 continue;
1205 case PP_NEWLINE:
1206 continue;
1207 case SIGNALS:
1208 case SLOTS: {
1209 Symbol sym = symbol();
1210 if (macros.contains(akey: "QT_NO_KEYWORDS"))
1211 sym.token = IDENTIFIER;
1212 else
1213 sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN);
1214 preprocessed += sym;
1215 } continue;
1216 default:
1217 break;
1218 }
1219 preprocessed += symbol();
1220 }
1221
1222 currentFilenames.pop();
1223}
1224
1225Symbols Preprocessor::preprocessed(const QByteArray &filename, QFile *file)
1226{
1227 QByteArray input = readOrMapFile(file);
1228
1229 if (input.isEmpty())
1230 return symbols;
1231
1232 // phase 1: get rid of backslash-newlines
1233 input = cleaned(input);
1234
1235 // phase 2: tokenize for the preprocessor
1236 index = 0;
1237 symbols = tokenize(input);
1238
1239#if 0
1240 for (int j = 0; j < symbols.size(); ++j)
1241 fprintf(stderr, "line %d: %s(%s)\n",
1242 symbols[j].lineNum,
1243 symbols[j].lexem().constData(),
1244 tokenTypeName(symbols[j].token));
1245#endif
1246
1247 // phase 3: preprocess conditions and substitute macros
1248 Symbols result;
1249 // Preallocate some space to speed up the code below.
1250 // The magic value was found by logging the final size
1251 // and calculating an average when running moc over FOSS projects.
1252 result.reserve(asize: file->size() / 300000);
1253 preprocess(filename, preprocessed&: result);
1254 mergeStringLiterals(symbols: &result);
1255
1256#if 0
1257 for (int j = 0; j < result.size(); ++j)
1258 fprintf(stderr, "line %d: %s(%s)\n",
1259 result[j].lineNum,
1260 result[j].lexem().constData(),
1261 tokenTypeName(result[j].token));
1262#endif
1263
1264 return result;
1265}
1266
1267void Preprocessor::parseDefineArguments(Macro *m)
1268{
1269 Symbols arguments;
1270 while (hasNext()) {
1271 while (test(token: PP_WHITESPACE)) {}
1272 Token t = next();
1273 if (t == PP_RPAREN)
1274 break;
1275 if (t != PP_IDENTIFIER) {
1276 QByteArray l = lexem();
1277 if (l == "...") {
1278 m->isVariadic = true;
1279 arguments += Symbol(symbol().lineNum, PP_IDENTIFIER, "__VA_ARGS__");
1280 while (test(token: PP_WHITESPACE)) {}
1281 if (!test(token: PP_RPAREN))
1282 error(msg: "missing ')' in macro argument list");
1283 break;
1284 } else if (!is_identifier(s: l.constData(), len: l.length())) {
1285 error(msg: "Unexpected character in macro argument list.");
1286 }
1287 }
1288
1289 Symbol arg = symbol();
1290 if (arguments.contains(t: arg))
1291 error(msg: "Duplicate macro parameter.");
1292 arguments += symbol();
1293
1294 while (test(token: PP_WHITESPACE)) {}
1295 t = next();
1296 if (t == PP_RPAREN)
1297 break;
1298 if (t == PP_COMMA)
1299 continue;
1300 if (lexem() == "...") {
1301 //GCC extension: #define FOO(x, y...) x(y)
1302 // The last argument was already parsed. Just mark the macro as variadic.
1303 m->isVariadic = true;
1304 while (test(token: PP_WHITESPACE)) {}
1305 if (!test(token: PP_RPAREN))
1306 error(msg: "missing ')' in macro argument list");
1307 break;
1308 }
1309 error(msg: "Unexpected character in macro argument list.");
1310 }
1311 m->arguments = arguments;
1312 while (test(token: PP_WHITESPACE)) {}
1313}
1314
1315void Preprocessor::until(Token t)
1316{
1317 while(hasNext() && next() != t)
1318 ;
1319}
1320
1321QT_END_NAMESPACE
1322

source code of qtbase/src/tools/moc/preprocessor.cpp