1// Copyright (C) 2016 The Qt Company Ltd.
2// Copyright (C) 2014 Olivier Goffart <ogoffart@woboq.org>
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
4
5#include "preprocessor.h"
6#include "utils.h"
7#include <qstringlist.h>
8#include <qfile.h>
9#include <qdir.h>
10#include <qfileinfo.h>
11#include <qvarlengtharray.h>
12
13QT_BEGIN_NAMESPACE
14
15#include "ppkeywords.cpp"
16#include "keywords.cpp"
17
18// transform \r\n into \n
19// \r into \n (os9 style)
20// backslash-newlines into newlines
21static QByteArray cleaned(const QByteArray &input)
22{
23 QByteArray result;
24 result.resize(size: input.size());
25 const char *data = input.constData();
26 const char *end = input.constData() + input.size();
27 char *output = result.data();
28
29 int newlines = 0;
30 while (data != end) {
31 while (data != end && is_space(s: *data))
32 ++data;
33 bool takeLine = (*data == '#');
34 if (*data == '%' && *(data+1) == ':') {
35 takeLine = true;
36 ++data;
37 }
38 if (takeLine) {
39 *output = '#';
40 ++output;
41 do ++data; while (data != end && is_space(s: *data));
42 }
43 while (data != end) {
44 // handle \\\n, \\\r\n and \\\r
45 if (*data == '\\') {
46 if (*(data + 1) == '\r') {
47 ++data;
48 }
49 if (data != end && (*(data + 1) == '\n' || (*data) == '\r')) {
50 ++newlines;
51 data += 1;
52 if (data != end && *data != '\r')
53 data += 1;
54 continue;
55 }
56 } else if (*data == '\r' && *(data + 1) == '\n') { // reduce \r\n to \n
57 ++data;
58 }
59 if (data == end)
60 break;
61
62 char ch = *data;
63 if (ch == '\r') // os9: replace \r with \n
64 ch = '\n';
65 *output = ch;
66 ++output;
67
68 if (*data == '\n') {
69 // output additional newlines to keep the correct line-numbering
70 // for the lines following the backslash-newline sequence(s)
71 while (newlines) {
72 *output = '\n';
73 ++output;
74 --newlines;
75 }
76 ++data;
77 break;
78 }
79 ++data;
80 }
81 }
82 result.resize(size: output - result.constData());
83 return result;
84}
85
86bool Preprocessor::preprocessOnly = false;
87void Preprocessor::skipUntilEndif()
88{
89 while(index < symbols.size() - 1 && symbols.at(i: index).token != PP_ENDIF){
90 switch (symbols.at(i: index).token) {
91 case PP_IF:
92 case PP_IFDEF:
93 case PP_IFNDEF:
94 ++index;
95 skipUntilEndif();
96 break;
97 default:
98 ;
99 }
100 ++index;
101 }
102}
103
104bool Preprocessor::skipBranch()
105{
106 while (index < symbols.size() - 1
107 && (symbols.at(i: index).token != PP_ENDIF
108 && symbols.at(i: index).token != PP_ELIF
109 && symbols.at(i: index).token != PP_ELSE)
110 ){
111 switch (symbols.at(i: index).token) {
112 case PP_IF:
113 case PP_IFDEF:
114 case PP_IFNDEF:
115 ++index;
116 skipUntilEndif();
117 break;
118 default:
119 ;
120 }
121 ++index;
122 }
123 return (index < symbols.size() - 1);
124}
125
126
127Symbols Preprocessor::tokenize(const QByteArray& input, int lineNum, Preprocessor::TokenizeMode mode)
128{
129 Symbols symbols;
130 // Preallocate some space to speed up the code below.
131 // The magic divisor value was found by calculating the average ratio between
132 // input size and the final size of symbols.
133 // This yielded a value of 16.x when compiling Qt Base.
134 symbols.reserve(asize: input.size() / 16);
135 const char *begin = input.constData();
136 const char *data = begin;
137 while (*data) {
138 if (mode == TokenizeCpp || mode == TokenizeDefine) {
139 int column = 0;
140
141 const char *lexem = data;
142 int state = 0;
143 Token token = NOTOKEN;
144 for (;;) {
145 if (static_cast<signed char>(*data) < 0) {
146 ++data;
147 continue;
148 }
149 int nextindex = keywords[state].next;
150 int next = 0;
151 if (*data == keywords[state].defchar)
152 next = keywords[state].defnext;
153 else if (!state || nextindex)
154 next = keyword_trans[nextindex][(int)*data];
155 if (!next)
156 break;
157 state = next;
158 token = keywords[state].token;
159 ++data;
160 }
161
162 // suboptimal, is_ident_char should use a table
163 if (keywords[state].ident && is_ident_char(s: *data))
164 token = keywords[state].ident;
165
166 if (token == NOTOKEN) {
167 if (*data)
168 ++data;
169 // an error really, but let's ignore this input
170 // to not confuse moc later. However in pre-processor
171 // only mode let's continue.
172 if (!Preprocessor::preprocessOnly)
173 continue;
174 }
175
176 ++column;
177
178 if (token > SPECIAL_TREATMENT_MARK) {
179 switch (token) {
180 case QUOTE:
181 data = skipQuote(data);
182 token = STRING_LITERAL;
183 // concatenate multi-line strings for easier
184 // STRING_LITERAL handling in moc
185 if (!Preprocessor::preprocessOnly
186 && !symbols.isEmpty()
187 && symbols.constLast().token == STRING_LITERAL) {
188
189 const QByteArray newString
190 = '\"'
191 + symbols.constLast().unquotedLexem()
192 + input.mid(index: lexem - begin + 1, len: data - lexem - 2)
193 + '\"';
194 symbols.last() = Symbol(symbols.constLast().lineNum,
195 STRING_LITERAL,
196 newString);
197 continue;
198 }
199 break;
200 case SINGLEQUOTE:
201 while (*data && (*data != '\''
202 || (*(data-1)=='\\'
203 && *(data-2)!='\\')))
204 ++data;
205 if (*data)
206 ++data;
207 token = CHARACTER_LITERAL;
208 break;
209 case LANGLE_SCOPE:
210 // split <:: into two tokens, < and ::
211 token = LANGLE;
212 data -= 2;
213 break;
214 case DIGIT:
215 while (is_digit_char(s: *data) || *data == '\'')
216 ++data;
217 if (!*data || *data != '.') {
218 token = INTEGER_LITERAL;
219 if (data - lexem == 1 &&
220 (*data == 'x' || *data == 'X'
221 || *data == 'b' || *data == 'B')
222 && *lexem == '0') {
223 ++data;
224 while (is_hex_char(s: *data) || *data == '\'')
225 ++data;
226 } else if (*data == 'L') // TODO: handle other suffixes
227 ++data;
228 break;
229 }
230 token = FLOATING_LITERAL;
231 ++data;
232 Q_FALLTHROUGH();
233 case FLOATING_LITERAL:
234 while (is_digit_char(s: *data) || *data == '\'')
235 ++data;
236 if (*data == '+' || *data == '-')
237 ++data;
238 if (*data == 'e' || *data == 'E') {
239 ++data;
240 while (is_digit_char(s: *data) || *data == '\'')
241 ++data;
242 }
243 if (*data == 'f' || *data == 'F'
244 || *data == 'l' || *data == 'L')
245 ++data;
246 break;
247 case HASH:
248 if (column == 1 && mode == TokenizeCpp) {
249 mode = PreparePreprocessorStatement;
250 while (*data && (*data == ' ' || *data == '\t'))
251 ++data;
252 if (is_ident_char(s: *data))
253 mode = TokenizePreprocessorStatement;
254 continue;
255 }
256 break;
257 case PP_HASHHASH:
258 if (mode == TokenizeCpp)
259 continue;
260 break;
261 case NEWLINE:
262 ++lineNum;
263 if (mode == TokenizeDefine) {
264 mode = TokenizeCpp;
265 // emit the newline token
266 break;
267 }
268 continue;
269 case BACKSLASH:
270 {
271 const char *rewind = data;
272 while (*data && (*data == ' ' || *data == '\t'))
273 ++data;
274 if (*data && *data == '\n') {
275 ++data;
276 continue;
277 }
278 data = rewind;
279 } break;
280 case CHARACTER:
281 while (is_ident_char(s: *data))
282 ++data;
283 token = IDENTIFIER;
284 break;
285 case C_COMMENT:
286 if (*data) {
287 if (*data == '\n')
288 ++lineNum;
289 ++data;
290 if (*data) {
291 if (*data == '\n')
292 ++lineNum;
293 ++data;
294 }
295 }
296 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
297 if (*data == '\n')
298 ++lineNum;
299 ++data;
300 }
301 token = WHITESPACE; // one comment, one whitespace
302 Q_FALLTHROUGH();
303 case WHITESPACE:
304 if (column == 1)
305 column = 0;
306 while (*data && (*data == ' ' || *data == '\t'))
307 ++data;
308 if (Preprocessor::preprocessOnly) // tokenize whitespace
309 break;
310 continue;
311 case CPP_COMMENT:
312 while (*data && *data != '\n')
313 ++data;
314 continue; // ignore safely, the newline is a separator
315 default:
316 continue; //ignore
317 }
318 }
319#ifdef USE_LEXEM_STORE
320 if (!Preprocessor::preprocessOnly
321 && token != IDENTIFIER
322 && token != STRING_LITERAL
323 && token != FLOATING_LITERAL
324 && token != INTEGER_LITERAL)
325 symbols += Symbol(lineNum, token);
326 else
327#endif
328 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
329
330 } else { // Preprocessor
331
332 const char *lexem = data;
333 int state = 0;
334 Token token = NOTOKEN;
335 if (mode == TokenizePreprocessorStatement) {
336 state = pp_keyword_trans[0][(int)'#'];
337 mode = TokenizePreprocessor;
338 }
339 for (;;) {
340 if (static_cast<signed char>(*data) < 0) {
341 ++data;
342 continue;
343 }
344 int nextindex = pp_keywords[state].next;
345 int next = 0;
346 if (*data == pp_keywords[state].defchar)
347 next = pp_keywords[state].defnext;
348 else if (!state || nextindex)
349 next = pp_keyword_trans[nextindex][(int)*data];
350 if (!next)
351 break;
352 state = next;
353 token = pp_keywords[state].token;
354 ++data;
355 }
356 // suboptimal, is_ident_char should use a table
357 if (pp_keywords[state].ident && is_ident_char(s: *data))
358 token = pp_keywords[state].ident;
359
360 switch (token) {
361 case NOTOKEN:
362 if (*data)
363 ++data;
364 break;
365 case PP_DEFINE:
366 mode = PrepareDefine;
367 break;
368 case PP_IFDEF:
369 symbols += Symbol(lineNum, PP_IF);
370 symbols += Symbol(lineNum, PP_DEFINED);
371 continue;
372 case PP_IFNDEF:
373 symbols += Symbol(lineNum, PP_IF);
374 symbols += Symbol(lineNum, PP_NOT);
375 symbols += Symbol(lineNum, PP_DEFINED);
376 continue;
377 case PP_INCLUDE:
378 mode = TokenizeInclude;
379 break;
380 case PP_QUOTE:
381 data = skipQuote(data);
382 token = PP_STRING_LITERAL;
383 break;
384 case PP_SINGLEQUOTE:
385 while (*data && (*data != '\''
386 || (*(data-1)=='\\'
387 && *(data-2)!='\\')))
388 ++data;
389 if (*data)
390 ++data;
391 token = PP_CHARACTER_LITERAL;
392 break;
393 case PP_DIGIT:
394 while (is_digit_char(s: *data) || *data == '\'')
395 ++data;
396 if (!*data || *data != '.') {
397 token = PP_INTEGER_LITERAL;
398 if (data - lexem == 1 &&
399 (*data == 'x' || *data == 'X')
400 && *lexem == '0') {
401 ++data;
402 while (is_hex_char(s: *data) || *data == '\'')
403 ++data;
404 } else if (*data == 'L') // TODO: handle other suffixes
405 ++data;
406 break;
407 }
408 token = PP_FLOATING_LITERAL;
409 ++data;
410 Q_FALLTHROUGH();
411 case PP_FLOATING_LITERAL:
412 while (is_digit_char(s: *data) || *data == '\'')
413 ++data;
414 if (*data == '+' || *data == '-')
415 ++data;
416 if (*data == 'e' || *data == 'E') {
417 ++data;
418 while (is_digit_char(s: *data) || *data == '\'')
419 ++data;
420 }
421 if (*data == 'f' || *data == 'F'
422 || *data == 'l' || *data == 'L')
423 ++data;
424 break;
425 case PP_CHARACTER:
426 if (mode == PreparePreprocessorStatement) {
427 // rewind entire token to begin
428 data = lexem;
429 mode = TokenizePreprocessorStatement;
430 continue;
431 }
432 while (is_ident_char(s: *data))
433 ++data;
434 token = PP_IDENTIFIER;
435
436 if (mode == PrepareDefine) {
437 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
438 // make sure we explicitly add the whitespace here if the next char
439 // is not an opening brace, so we can distinguish correctly between
440 // regular and function macros
441 if (*data != '(')
442 symbols += Symbol(lineNum, WHITESPACE);
443 mode = TokenizeDefine;
444 continue;
445 }
446 break;
447 case PP_C_COMMENT:
448 if (*data) {
449 if (*data == '\n')
450 ++lineNum;
451 ++data;
452 if (*data) {
453 if (*data == '\n')
454 ++lineNum;
455 ++data;
456 }
457 }
458 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
459 if (*data == '\n')
460 ++lineNum;
461 ++data;
462 }
463 token = PP_WHITESPACE; // one comment, one whitespace
464 Q_FALLTHROUGH();
465 case PP_WHITESPACE:
466 while (*data && (*data == ' ' || *data == '\t'))
467 ++data;
468 continue; // the preprocessor needs no whitespace
469 case PP_CPP_COMMENT:
470 while (*data && *data != '\n')
471 ++data;
472 continue; // ignore safely, the newline is a separator
473 case PP_NEWLINE:
474 ++lineNum;
475 mode = TokenizeCpp;
476 break;
477 case PP_BACKSLASH:
478 {
479 const char *rewind = data;
480 while (*data && (*data == ' ' || *data == '\t'))
481 ++data;
482 if (*data && *data == '\n') {
483 ++data;
484 continue;
485 }
486 data = rewind;
487 } break;
488 case PP_LANGLE:
489 if (mode != TokenizeInclude)
490 break;
491 token = PP_STRING_LITERAL;
492 while (*data && *data != '\n' && *(data-1) != '>')
493 ++data;
494 break;
495 default:
496 break;
497 }
498 if (mode == PreparePreprocessorStatement)
499 continue;
500#ifdef USE_LEXEM_STORE
501 if (token != PP_IDENTIFIER
502 && token != PP_STRING_LITERAL
503 && token != PP_FLOATING_LITERAL
504 && token != PP_INTEGER_LITERAL)
505 symbols += Symbol(lineNum, token);
506 else
507#endif
508 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
509 }
510 }
511 symbols += Symbol(); // eof symbol
512 return symbols;
513}
514
515void Preprocessor::macroExpand(Symbols *into, Preprocessor *that, const Symbols &toExpand, qsizetype &index,
516 int lineNum, bool one, const QSet<QByteArray> &excludeSymbols)
517{
518 SymbolStack symbols;
519 SafeSymbols sf;
520 sf.symbols = toExpand;
521 sf.index = index;
522 sf.excludedSymbols = excludeSymbols;
523 symbols.push(t: sf);
524
525 if (toExpand.isEmpty())
526 return;
527
528 for (;;) {
529 QByteArray macro;
530 Symbols newSyms = macroExpandIdentifier(that, symbols, lineNum, macroName: &macro);
531
532 if (macro.isEmpty()) {
533 // not a macro
534 Symbol s = symbols.symbol();
535 s.lineNum = lineNum;
536 *into += s;
537 } else {
538 SafeSymbols sf;
539 sf.symbols = newSyms;
540 sf.index = 0;
541 sf.expandedMacro = macro;
542 symbols.push(t: sf);
543 }
544 if (!symbols.hasNext() || (one && symbols.size() == 1))
545 break;
546 symbols.next();
547 }
548
549 if (symbols.size())
550 index = symbols.top().index;
551 else
552 index = toExpand.size();
553}
554
555
556Symbols Preprocessor::macroExpandIdentifier(Preprocessor *that, SymbolStack &symbols, int lineNum, QByteArray *macroName)
557{
558 Symbol s = symbols.symbol();
559
560 // not a macro
561 if (s.token != PP_IDENTIFIER || !that->macros.contains(key: s) || symbols.dontReplaceSymbol(name: s.lexem())) {
562 return Symbols();
563 }
564
565 const Macro &macro = that->macros.value(key: s);
566 *macroName = s.lexem();
567
568 Symbols expansion;
569 if (!macro.isFunction) {
570 expansion = macro.symbols;
571 } else {
572 bool haveSpace = false;
573 while (symbols.test(token: PP_WHITESPACE)) { haveSpace = true; }
574 if (!symbols.test(token: PP_LPAREN)) {
575 *macroName = QByteArray();
576 Symbols syms;
577 if (haveSpace)
578 syms += Symbol(lineNum, PP_WHITESPACE);
579 syms += s;
580 syms.last().lineNum = lineNum;
581 return syms;
582 }
583 QVarLengthArray<Symbols, 5> arguments;
584 while (symbols.hasNext()) {
585 Symbols argument;
586 // strip leading space
587 while (symbols.test(token: PP_WHITESPACE)) {}
588 int nesting = 0;
589 bool vararg = macro.isVariadic && (arguments.size() == macro.arguments.size() - 1);
590 while (symbols.hasNext()) {
591 Token t = symbols.next();
592 if (t == PP_LPAREN) {
593 ++nesting;
594 } else if (t == PP_RPAREN) {
595 --nesting;
596 if (nesting < 0)
597 break;
598 } else if (t == PP_COMMA && nesting == 0) {
599 if (!vararg)
600 break;
601 }
602 argument += symbols.symbol();
603 }
604 arguments += argument;
605
606 if (nesting < 0)
607 break;
608 else if (!symbols.hasNext())
609 that->error(msg: "missing ')' in macro usage");
610 }
611
612 // empty VA_ARGS
613 if (macro.isVariadic && arguments.size() == macro.arguments.size() - 1)
614 arguments += Symbols();
615
616 // now replace the macro arguments with the expanded arguments
617 enum Mode {
618 Normal,
619 Hash,
620 HashHash
621 } mode = Normal;
622
623 const auto end = macro.symbols.cend();
624 auto it = macro.symbols.cbegin();
625 const auto lastSym = std::prev(x: macro.symbols.cend(), n: !macro.symbols.isEmpty() ? 1 : 0);
626 for (; it != end; ++it) {
627 const Symbol &s = *it;
628 if (s.token == HASH || s.token == PP_HASHHASH) {
629 mode = (s.token == HASH ? Hash : HashHash);
630 continue;
631 }
632 const qsizetype index = macro.arguments.indexOf(t: s);
633 if (mode == Normal) {
634 if (index >= 0 && index < arguments.size()) {
635 // each argument undoergoes macro expansion if it's not used as part of a # or ##
636 if (it == lastSym || std::next(x: it)->token != PP_HASHHASH) {
637 Symbols arg = arguments.at(idx: index);
638 qsizetype idx = 1;
639 macroExpand(into: &expansion, that, toExpand: arg, index&: idx, lineNum, one: false, excludeSymbols: symbols.excludeSymbols());
640 } else {
641 expansion += arguments.at(idx: index);
642 }
643 } else {
644 expansion += s;
645 }
646 } else if (mode == Hash) {
647 if (index < 0) {
648 that->error(msg: "'#' is not followed by a macro parameter");
649 continue;
650 } else if (index >= arguments.size()) {
651 that->error(msg: "Macro invoked with too few parameters for a use of '#'");
652 continue;
653 }
654
655 const Symbols &arg = arguments.at(idx: index);
656 QByteArray stringified;
657 for (const Symbol &sym : arg)
658 stringified += sym.lexem();
659
660 stringified.replace(before: '"', after: "\\\"");
661 stringified.prepend(c: '"');
662 stringified.append(c: '"');
663 expansion += Symbol(lineNum, STRING_LITERAL, stringified);
664 } else if (mode == HashHash){
665 if (s.token == WHITESPACE)
666 continue;
667
668 while (expansion.size() && expansion.constLast().token == PP_WHITESPACE)
669 expansion.pop_back();
670
671 Symbol next = s;
672 if (index >= 0 && index < arguments.size()) {
673 const Symbols &arg = arguments.at(idx: index);
674 if (arg.size() == 0) {
675 mode = Normal;
676 continue;
677 }
678 next = arg.at(i: 0);
679 }
680
681 if (!expansion.isEmpty() && expansion.constLast().token == s.token
682 && expansion.constLast().token != STRING_LITERAL) {
683 Symbol last = expansion.takeLast();
684
685 QByteArray lexem = last.lexem() + next.lexem();
686 expansion += Symbol(lineNum, last.token, lexem);
687 } else {
688 expansion += next;
689 }
690
691 if (index >= 0 && index < arguments.size()) {
692 const Symbols &arg = arguments.at(idx: index);
693 if (!arg.isEmpty())
694 expansion.append(i1: arg.cbegin() + 1, i2: arg.cend());
695 }
696 }
697 mode = Normal;
698 }
699 if (mode != Normal)
700 that->error(msg: "'#' or '##' found at the end of a macro argument");
701
702 }
703
704 return expansion;
705}
706
707void Preprocessor::substituteUntilNewline(Symbols &substituted)
708{
709 while (hasNext()) {
710 Token token = next();
711 if (token == PP_IDENTIFIER) {
712 macroExpand(into: &substituted, that: this, toExpand: symbols, index, lineNum: symbol().lineNum, one: true);
713 } else if (token == PP_DEFINED) {
714 bool braces = test(token: PP_LPAREN);
715 next(token: PP_IDENTIFIER);
716 Symbol definedOrNotDefined = symbol();
717 definedOrNotDefined.token = macros.contains(key: definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE;
718 substituted += definedOrNotDefined;
719 if (braces)
720 test(token: PP_RPAREN);
721 continue;
722 } else if (token == PP_NEWLINE) {
723 substituted += symbol();
724 break;
725 } else {
726 substituted += symbol();
727 }
728 }
729}
730
731
732class PP_Expression : public Parser
733{
734public:
735 int value() { index = 0; return unary_expression_lookup() ? conditional_expression() : 0; }
736
737 int conditional_expression();
738 int logical_OR_expression();
739 int logical_AND_expression();
740 int inclusive_OR_expression();
741 int exclusive_OR_expression();
742 int AND_expression();
743 int equality_expression();
744 int relational_expression();
745 int shift_expression();
746 int additive_expression();
747 int multiplicative_expression();
748 int unary_expression();
749 bool unary_expression_lookup();
750 int primary_expression();
751 bool primary_expression_lookup();
752};
753
754int PP_Expression::conditional_expression()
755{
756 int value = logical_OR_expression();
757 if (test(token: PP_QUESTION)) {
758 int alt1 = conditional_expression();
759 int alt2 = test(token: PP_COLON) ? conditional_expression() : 0;
760 return value ? alt1 : alt2;
761 }
762 return value;
763}
764
765int PP_Expression::logical_OR_expression()
766{
767 int value = logical_AND_expression();
768 if (test(token: PP_OROR))
769 return logical_OR_expression() || value;
770 return value;
771}
772
773int PP_Expression::logical_AND_expression()
774{
775 int value = inclusive_OR_expression();
776 if (test(token: PP_ANDAND))
777 return logical_AND_expression() && value;
778 return value;
779}
780
781int PP_Expression::inclusive_OR_expression()
782{
783 int value = exclusive_OR_expression();
784 if (test(token: PP_OR))
785 return value | inclusive_OR_expression();
786 return value;
787}
788
789int PP_Expression::exclusive_OR_expression()
790{
791 int value = AND_expression();
792 if (test(token: PP_HAT))
793 return value ^ exclusive_OR_expression();
794 return value;
795}
796
797int PP_Expression::AND_expression()
798{
799 int value = equality_expression();
800 if (test(token: PP_AND))
801 return value & AND_expression();
802 return value;
803}
804
805int PP_Expression::equality_expression()
806{
807 int value = relational_expression();
808 switch (next()) {
809 case PP_EQEQ:
810 return value == equality_expression();
811 case PP_NE:
812 return value != equality_expression();
813 default:
814 prev();
815 return value;
816 }
817}
818
819int PP_Expression::relational_expression()
820{
821 int value = shift_expression();
822 switch (next()) {
823 case PP_LANGLE:
824 return value < relational_expression();
825 case PP_RANGLE:
826 return value > relational_expression();
827 case PP_LE:
828 return value <= relational_expression();
829 case PP_GE:
830 return value >= relational_expression();
831 default:
832 prev();
833 return value;
834 }
835}
836
837int PP_Expression::shift_expression()
838{
839 int value = additive_expression();
840 switch (next()) {
841 case PP_LTLT:
842 return value << shift_expression();
843 case PP_GTGT:
844 return value >> shift_expression();
845 default:
846 prev();
847 return value;
848 }
849}
850
851int PP_Expression::additive_expression()
852{
853 int value = multiplicative_expression();
854 switch (next()) {
855 case PP_PLUS:
856 return value + additive_expression();
857 case PP_MINUS:
858 return value - additive_expression();
859 default:
860 prev();
861 return value;
862 }
863}
864
865int PP_Expression::multiplicative_expression()
866{
867 int value = unary_expression();
868 switch (next()) {
869 case PP_STAR:
870 {
871 // get well behaved overflow behavior by converting to long
872 // and then back to int
873 // NOTE: A conformant preprocessor would need to work intmax_t/
874 // uintmax_t according to [cpp.cond], 19.1 §10
875 // But we're not compliant anyway
876 qint64 result = qint64(value) * qint64(multiplicative_expression());
877 return int(result);
878 }
879 case PP_PERCENT:
880 {
881 int remainder = multiplicative_expression();
882 return remainder ? value % remainder : 0;
883 }
884 case PP_SLASH:
885 {
886 int div = multiplicative_expression();
887 return div ? value / div : 0;
888 }
889 default:
890 prev();
891 return value;
892 };
893}
894
895int PP_Expression::unary_expression()
896{
897 switch (next()) {
898 case PP_PLUS:
899 return unary_expression();
900 case PP_MINUS:
901 return -unary_expression();
902 case PP_NOT:
903 return !unary_expression();
904 case PP_TILDE:
905 return ~unary_expression();
906 case PP_MOC_TRUE:
907 return 1;
908 case PP_MOC_FALSE:
909 return 0;
910 default:
911 prev();
912 return primary_expression();
913 }
914}
915
916bool PP_Expression::unary_expression_lookup()
917{
918 Token t = lookup();
919 return (primary_expression_lookup()
920 || t == PP_PLUS
921 || t == PP_MINUS
922 || t == PP_NOT
923 || t == PP_TILDE
924 || t == PP_DEFINED);
925}
926
927int PP_Expression::primary_expression()
928{
929 int value;
930 if (test(token: PP_LPAREN)) {
931 value = conditional_expression();
932 test(token: PP_RPAREN);
933 } else {
934 next();
935 const QByteArray &lex = lexem();
936 auto lexView = QByteArrayView(lex);
937 if (lex.endsWith(c: 'L'))
938 lexView.chop(n: 1);
939 value = lexView.toInt(ok: nullptr, base: 0);
940 }
941 return value;
942}
943
944bool PP_Expression::primary_expression_lookup()
945{
946 Token t = lookup();
947 return (t == PP_IDENTIFIER
948 || t == PP_INTEGER_LITERAL
949 || t == PP_FLOATING_LITERAL
950 || t == PP_MOC_TRUE
951 || t == PP_MOC_FALSE
952 || t == PP_LPAREN);
953}
954
955int Preprocessor::evaluateCondition()
956{
957 PP_Expression expression;
958 expression.currentFilenames = currentFilenames;
959
960 substituteUntilNewline(substituted&: expression.symbols);
961
962 return expression.value();
963}
964
965static QByteArray readOrMapFile(QFile *file)
966{
967 const qint64 size = file->size();
968 char *rawInput = reinterpret_cast<char*>(file->map(offset: 0, size));
969 return rawInput ? QByteArray::fromRawData(data: rawInput, size) : file->readAll();
970}
971
972static void mergeStringLiterals(Symbols *_symbols)
973{
974 Symbols &symbols = *_symbols;
975 for (Symbols::iterator i = symbols.begin(); i != symbols.end(); ++i) {
976 if (i->token == STRING_LITERAL) {
977 Symbols::Iterator mergeSymbol = i;
978 qsizetype literalsLength = mergeSymbol->len;
979 while (++i != symbols.end() && i->token == STRING_LITERAL)
980 literalsLength += i->len - 2; // no quotes
981
982 if (literalsLength != mergeSymbol->len) {
983 QByteArray mergeSymbolOriginalLexem = mergeSymbol->unquotedLexem();
984 QByteArray &mergeSymbolLexem = mergeSymbol->lex;
985 mergeSymbolLexem.resize(size: 0);
986 mergeSymbolLexem.reserve(asize: literalsLength);
987 mergeSymbolLexem.append(c: '"');
988 mergeSymbolLexem.append(a: mergeSymbolOriginalLexem);
989 for (Symbols::iterator j = mergeSymbol + 1; j != i; ++j)
990 mergeSymbolLexem.append(s: j->lex.constData() + j->from + 1, len: j->len - 2); // append j->unquotedLexem()
991 mergeSymbolLexem.append(c: '"');
992 mergeSymbol->len = mergeSymbol->lex.size();
993 mergeSymbol->from = 0;
994 i = symbols.erase(abegin: mergeSymbol + 1, aend: i);
995 }
996 if (i == symbols.end())
997 break;
998 }
999 }
1000}
1001
1002static QByteArray searchIncludePaths(const QList<Parser::IncludePath> &includepaths,
1003 const QByteArray &include,
1004 const bool debugIncludes)
1005{
1006 QFileInfo fi;
1007
1008 if (Q_UNLIKELY(debugIncludes)) {
1009 fprintf(stderr, format: "debug-includes: searching for '%s'\n", include.constData());
1010 }
1011
1012 for (const Parser::IncludePath &p : includepaths) {
1013 if (fi.exists())
1014 break;
1015
1016 if (p.isFrameworkPath) {
1017 const qsizetype slashPos = include.indexOf(c: '/');
1018 if (slashPos == -1)
1019 continue;
1020 fi.setFile(dir: QString::fromLocal8Bit(ba: p.path + '/' + include.left(len: slashPos) + ".framework/Headers/"),
1021 file: QString::fromLocal8Bit(ba: include.mid(index: slashPos + 1)));
1022 } else {
1023 fi.setFile(dir: QString::fromLocal8Bit(ba: p.path), file: QString::fromLocal8Bit(ba: include));
1024 }
1025
1026 if (Q_UNLIKELY(debugIncludes)) {
1027 const auto candidate = fi.filePath().toLocal8Bit();
1028 fprintf(stderr, format: "debug-includes: considering '%s'\n", candidate.constData());
1029 }
1030
1031 // try again, maybe there's a file later in the include paths with the same name
1032 // (186067)
1033 if (fi.isDir()) {
1034 fi = QFileInfo();
1035 continue;
1036 }
1037 }
1038
1039 if (!fi.exists() || fi.isDir()) {
1040 if (Q_UNLIKELY(debugIncludes)) {
1041 fprintf(stderr, format: "debug-includes: can't find '%s'\n", include.constData());
1042 }
1043 return QByteArray();
1044 }
1045
1046 const auto result = fi.canonicalFilePath().toLocal8Bit();
1047
1048 if (Q_UNLIKELY(debugIncludes)) {
1049 fprintf(stderr, format: "debug-includes: found '%s'\n", result.constData());
1050 }
1051
1052 return result;
1053}
1054
1055QByteArray Preprocessor::resolveInclude(const QByteArray &include, const QByteArray &relativeTo)
1056{
1057 if (!relativeTo.isEmpty()) {
1058 QFileInfo fi;
1059 fi.setFile(dir: QFileInfo(QString::fromLocal8Bit(ba: relativeTo)).dir(), file: QString::fromLocal8Bit(ba: include));
1060 if (fi.exists() && !fi.isDir())
1061 return fi.canonicalFilePath().toLocal8Bit();
1062 }
1063
1064 auto it = nonlocalIncludePathResolutionCache.find(key: include);
1065 if (it == nonlocalIncludePathResolutionCache.end())
1066 it = nonlocalIncludePathResolutionCache.insert(key: include,
1067 value: searchIncludePaths(
1068 includepaths: includes,
1069 include,
1070 debugIncludes));
1071 return it.value();
1072}
1073
1074void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed)
1075{
1076 currentFilenames.push(x: filename);
1077 preprocessed.reserve(asize: preprocessed.size() + symbols.size());
1078 while (hasNext()) {
1079 Token token = next();
1080
1081 switch (token) {
1082 case PP_INCLUDE:
1083 {
1084 int lineNum = symbol().lineNum;
1085 QByteArray include;
1086 bool local = false;
1087 if (test(token: PP_STRING_LITERAL)) {
1088 local = lexem().startsWith(c: '\"');
1089 include = unquotedLexem();
1090 } else
1091 continue;
1092 until(PP_NEWLINE);
1093
1094 include = resolveInclude(include, relativeTo: local ? filename : QByteArray());
1095 if (include.isNull())
1096 continue;
1097
1098 if (Preprocessor::preprocessedIncludes.contains(value: include))
1099 continue;
1100 Preprocessor::preprocessedIncludes.insert(value: include);
1101
1102 QFile file(QString::fromLocal8Bit(ba: include.constData()));
1103 if (!file.open(flags: QFile::ReadOnly))
1104 continue;
1105
1106 QByteArray input = readOrMapFile(file: &file);
1107
1108 file.close();
1109 if (input.isEmpty())
1110 continue;
1111
1112 Symbols saveSymbols = symbols;
1113 qsizetype saveIndex = index;
1114
1115 // phase 1: get rid of backslash-newlines
1116 input = cleaned(input);
1117
1118 // phase 2: tokenize for the preprocessor
1119 symbols = tokenize(input);
1120 input.clear();
1121
1122 index = 0;
1123
1124 // phase 3: preprocess conditions and substitute macros
1125 preprocessed += Symbol(0, MOC_INCLUDE_BEGIN, include);
1126 preprocess(filename: include, preprocessed);
1127 preprocessed += Symbol(lineNum, MOC_INCLUDE_END, include);
1128
1129 symbols = saveSymbols;
1130 index = saveIndex;
1131 continue;
1132 }
1133 case PP_DEFINE:
1134 {
1135 next();
1136 QByteArray name = lexem();
1137 if (name.isEmpty() || !is_ident_start(s: name[0]))
1138 error();
1139 Macro macro;
1140 macro.isVariadic = false;
1141 if (test(token: LPAREN)) {
1142 // we have a function macro
1143 macro.isFunction = true;
1144 parseDefineArguments(m: &macro);
1145 } else {
1146 macro.isFunction = false;
1147 }
1148 qsizetype start = index;
1149 until(PP_NEWLINE);
1150 macro.symbols.reserve(asize: index - start - 1);
1151
1152 // remove whitespace where there shouldn't be any:
1153 // Before and after the macro, after a # and around ##
1154 Token lastToken = HASH; // skip shitespace at the beginning
1155 for (qsizetype i = start; i < index - 1; ++i) {
1156 Token token = symbols.at(i).token;
1157 if (token == WHITESPACE) {
1158 if (lastToken == PP_HASH || lastToken == HASH ||
1159 lastToken == PP_HASHHASH ||
1160 lastToken == WHITESPACE)
1161 continue;
1162 } else if (token == PP_HASHHASH) {
1163 if (!macro.symbols.isEmpty() &&
1164 lastToken == WHITESPACE)
1165 macro.symbols.pop_back();
1166 }
1167 macro.symbols.append(t: symbols.at(i));
1168 lastToken = token;
1169 }
1170 // remove trailing whitespace
1171 while (!macro.symbols.isEmpty() &&
1172 (macro.symbols.constLast().token == PP_WHITESPACE || macro.symbols.constLast().token == WHITESPACE))
1173 macro.symbols.pop_back();
1174
1175 if (!macro.symbols.isEmpty()) {
1176 if (macro.symbols.constFirst().token == PP_HASHHASH ||
1177 macro.symbols.constLast().token == PP_HASHHASH) {
1178 error(msg: "'##' cannot appear at either end of a macro expansion");
1179 }
1180 }
1181 macros.insert(key: name, value: macro);
1182 continue;
1183 }
1184 case PP_UNDEF: {
1185 next();
1186 QByteArray name = lexem();
1187 until(PP_NEWLINE);
1188 macros.remove(key: name);
1189 continue;
1190 }
1191 case PP_IDENTIFIER: {
1192 // substitute macros
1193 macroExpand(into: &preprocessed, that: this, toExpand: symbols, index, lineNum: symbol().lineNum, one: true);
1194 continue;
1195 }
1196 case PP_HASH:
1197 until(PP_NEWLINE);
1198 continue; // skip unknown preprocessor statement
1199 case PP_IFDEF:
1200 case PP_IFNDEF:
1201 case PP_IF:
1202 while (!evaluateCondition()) {
1203 if (!skipBranch())
1204 break;
1205 if (test(token: PP_ELIF)) {
1206 } else {
1207 until(PP_NEWLINE);
1208 break;
1209 }
1210 }
1211 continue;
1212 case PP_ELIF:
1213 case PP_ELSE:
1214 skipUntilEndif();
1215 Q_FALLTHROUGH();
1216 case PP_ENDIF:
1217 until(PP_NEWLINE);
1218 continue;
1219 case PP_NEWLINE:
1220 continue;
1221 case SIGNALS:
1222 case SLOTS: {
1223 Symbol sym = symbol();
1224 if (macros.contains(key: "QT_NO_KEYWORDS"))
1225 sym.token = IDENTIFIER;
1226 else
1227 sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN);
1228 preprocessed += sym;
1229 } continue;
1230 default:
1231 break;
1232 }
1233 preprocessed += symbol();
1234 }
1235
1236 currentFilenames.pop();
1237}
1238
1239Symbols Preprocessor::preprocessed(const QByteArray &filename, QFile *file)
1240{
1241 QByteArray input = readOrMapFile(file);
1242
1243 if (input.isEmpty())
1244 return symbols;
1245
1246 // phase 1: get rid of backslash-newlines
1247 input = cleaned(input);
1248
1249 // phase 2: tokenize for the preprocessor
1250 index = 0;
1251 symbols = tokenize(input);
1252
1253#if 0
1254 for (int j = 0; j < symbols.size(); ++j)
1255 fprintf(stderr, "line %d: %s(%s)\n",
1256 symbols[j].lineNum,
1257 symbols[j].lexem().constData(),
1258 tokenTypeName(symbols[j].token));
1259#endif
1260
1261 // phase 3: preprocess conditions and substitute macros
1262 Symbols result;
1263 // Preallocate some space to speed up the code below.
1264 // The magic value was found by logging the final size
1265 // and calculating an average when running moc over FOSS projects.
1266 result.reserve(asize: file->size() / 300000);
1267 preprocess(filename, preprocessed&: result);
1268 mergeStringLiterals(symbols: &result);
1269
1270#if 0
1271 for (int j = 0; j < result.size(); ++j)
1272 fprintf(stderr, "line %d: %s(%s)\n",
1273 result[j].lineNum,
1274 result[j].lexem().constData(),
1275 tokenTypeName(result[j].token));
1276#endif
1277
1278 return result;
1279}
1280
1281void Preprocessor::parseDefineArguments(Macro *m)
1282{
1283 Symbols arguments;
1284 while (hasNext()) {
1285 while (test(token: PP_WHITESPACE)) {}
1286 Token t = next();
1287 if (t == PP_RPAREN)
1288 break;
1289 if (t != PP_IDENTIFIER) {
1290 QByteArray l = lexem();
1291 if (l == "...") {
1292 m->isVariadic = true;
1293 arguments += Symbol(symbol().lineNum, PP_IDENTIFIER, "__VA_ARGS__");
1294 while (test(token: PP_WHITESPACE)) {}
1295 if (!test(token: PP_RPAREN))
1296 error(msg: "missing ')' in macro argument list");
1297 break;
1298 } else if (!is_identifier(s: l.constData(), len: l.size())) {
1299 error(msg: "Unexpected character in macro argument list.");
1300 }
1301 }
1302
1303 Symbol arg = symbol();
1304 if (arguments.contains(t: arg))
1305 error(msg: "Duplicate macro parameter.");
1306 arguments += symbol();
1307
1308 while (test(token: PP_WHITESPACE)) {}
1309 t = next();
1310 if (t == PP_RPAREN)
1311 break;
1312 if (t == PP_COMMA)
1313 continue;
1314 if (lexem() == "...") {
1315 //GCC extension: #define FOO(x, y...) x(y)
1316 // The last argument was already parsed. Just mark the macro as variadic.
1317 m->isVariadic = true;
1318 while (test(token: PP_WHITESPACE)) {}
1319 if (!test(token: PP_RPAREN))
1320 error(msg: "missing ')' in macro argument list");
1321 break;
1322 }
1323 error(msg: "Unexpected character in macro argument list.");
1324 }
1325 m->arguments = arguments;
1326 while (test(token: PP_WHITESPACE)) {}
1327}
1328
1329void Preprocessor::until(Token t)
1330{
1331 while(hasNext() && next() != t)
1332 ;
1333}
1334
1335void Preprocessor::setDebugIncludes(bool value)
1336{
1337 debugIncludes = value;
1338}
1339
1340
1341QT_END_NAMESPACE
1342

source code of qtbase/src/tools/moc/preprocessor.cpp