1// Copyright (C) 2016 The Qt Company Ltd.
2// Copyright (C) 2014 Olivier Goffart <ogoffart@woboq.org>
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
4
5#include "preprocessor.h"
6#include "utils.h"
7#include <qstringlist.h>
8#include <qfile.h>
9#include <qdir.h>
10#include <qfileinfo.h>
11#include <qvarlengtharray.h>
12
13QT_BEGIN_NAMESPACE
14
15using namespace QtMiscUtils;
16
17#include "ppkeywords.cpp"
18#include "keywords.cpp"
19
20// transform \r\n into \n
21// \r into \n (os9 style)
22// backslash-newlines into newlines
23static QByteArray cleaned(const QByteArray &input)
24{
25 QByteArray result;
26 result.resize(size: input.size());
27 const char *data = input.constData();
28 const char *end = input.constData() + input.size();
29 char *output = result.data();
30
31 int newlines = 0;
32 while (data != end) {
33 while (data != end && is_space(s: *data))
34 ++data;
35 bool takeLine = (*data == '#');
36 if (*data == '%' && *(data+1) == ':') {
37 takeLine = true;
38 ++data;
39 }
40 if (takeLine) {
41 *output = '#';
42 ++output;
43 do ++data; while (data != end && is_space(s: *data));
44 }
45 while (data != end) {
46 // handle \\\n, \\\r\n and \\\r
47 if (*data == '\\') {
48 if (*(data + 1) == '\r') {
49 ++data;
50 }
51 if (data != end && (*(data + 1) == '\n' || (*data) == '\r')) {
52 ++newlines;
53 data += 1;
54 if (data != end && *data != '\r')
55 data += 1;
56 continue;
57 }
58 } else if (*data == '\r' && *(data + 1) == '\n') { // reduce \r\n to \n
59 ++data;
60 }
61 if (data == end)
62 break;
63
64 char ch = *data;
65 if (ch == '\r') // os9: replace \r with \n
66 ch = '\n';
67 *output = ch;
68 ++output;
69
70 if (*data == '\n') {
71 // output additional newlines to keep the correct line-numbering
72 // for the lines following the backslash-newline sequence(s)
73 while (newlines) {
74 *output = '\n';
75 ++output;
76 --newlines;
77 }
78 ++data;
79 break;
80 }
81 ++data;
82 }
83 }
84 result.resize(size: output - result.constData());
85 return result;
86}
87
88bool Preprocessor::preprocessOnly = false;
89void Preprocessor::skipUntilEndif()
90{
91 while(index < symbols.size() - 1 && symbols.at(i: index).token != PP_ENDIF){
92 switch (symbols.at(i: index).token) {
93 case PP_IF:
94 case PP_IFDEF:
95 case PP_IFNDEF:
96 ++index;
97 skipUntilEndif();
98 break;
99 default:
100 ;
101 }
102 ++index;
103 }
104}
105
106bool Preprocessor::skipBranch()
107{
108 while (index < symbols.size() - 1
109 && (symbols.at(i: index).token != PP_ENDIF
110 && symbols.at(i: index).token != PP_ELIF
111 && symbols.at(i: index).token != PP_ELSE)
112 ){
113 switch (symbols.at(i: index).token) {
114 case PP_IF:
115 case PP_IFDEF:
116 case PP_IFNDEF:
117 ++index;
118 skipUntilEndif();
119 break;
120 default:
121 ;
122 }
123 ++index;
124 }
125 return (index < symbols.size() - 1);
126}
127
128
129Symbols Preprocessor::tokenize(const QByteArray& input, int lineNum, Preprocessor::TokenizeMode mode)
130{
131 Symbols symbols;
132 // Preallocate some space to speed up the code below.
133 // The magic divisor value was found by calculating the average ratio between
134 // input size and the final size of symbols.
135 // This yielded a value of 16.x when compiling Qt Base.
136 symbols.reserve(asize: input.size() / 16);
137 const char *begin = input.constData();
138 const char *data = begin;
139 while (*data) {
140 if (mode == TokenizeCpp || mode == TokenizeDefine) {
141 int column = 0;
142
143 const char *lexem = data;
144 int state = 0;
145 Token token = NOTOKEN;
146 for (;;) {
147 if (static_cast<signed char>(*data) < 0) {
148 ++data;
149 continue;
150 }
151 int nextindex = keywords[state].next;
152 int next = 0;
153 if (*data == keywords[state].defchar)
154 next = keywords[state].defnext;
155 else if (!state || nextindex)
156 next = keyword_trans[nextindex][(int)*data];
157 if (!next)
158 break;
159 state = next;
160 token = keywords[state].token;
161 ++data;
162 }
163
164 // suboptimal, is_ident_char should use a table
165 if (keywords[state].ident && is_ident_char(s: *data))
166 token = keywords[state].ident;
167
168 if (token == NOTOKEN) {
169 if (*data)
170 ++data;
171 // an error really, but let's ignore this input
172 // to not confuse moc later. However in pre-processor
173 // only mode let's continue.
174 if (!Preprocessor::preprocessOnly)
175 continue;
176 }
177
178 ++column;
179
180 if (token > SPECIAL_TREATMENT_MARK) {
181 switch (token) {
182 case QUOTE:
183 data = skipQuote(data);
184 token = STRING_LITERAL;
185 // concatenate multi-line strings for easier
186 // STRING_LITERAL handling in moc
187 if (!Preprocessor::preprocessOnly
188 && !symbols.isEmpty()
189 && symbols.constLast().token == STRING_LITERAL) {
190
191 const QByteArray newString
192 = '\"'
193 + symbols.constLast().unquotedLexemView()
194 + input.mid(index: lexem - begin + 1, len: data - lexem - 2)
195 + '\"';
196 symbols.last() = Symbol(symbols.constLast().lineNum,
197 STRING_LITERAL,
198 newString);
199 continue;
200 }
201 break;
202 case SINGLEQUOTE:
203 while (*data && (*data != '\''
204 || (*(data-1)=='\\'
205 && *(data-2)!='\\')))
206 ++data;
207 if (*data)
208 ++data;
209 token = CHARACTER_LITERAL;
210 break;
211 case LANGLE_SCOPE:
212 // split <:: into two tokens, < and ::
213 token = LANGLE;
214 data -= 2;
215 break;
216 case DIGIT:
217 {
218 bool hasSeenTokenSeparator = false;;
219 while (isAsciiDigit(c: *data) || (hasSeenTokenSeparator = *data == '\''))
220 ++data;
221 if (!*data || *data != '.') {
222 token = INTEGER_LITERAL;
223 if (data - lexem == 1 &&
224 (*data == 'x' || *data == 'X'
225 || *data == 'b' || *data == 'B')
226 && *lexem == '0') {
227 ++data;
228 while (isHexDigit(c: *data) || (hasSeenTokenSeparator = *data == '\''))
229 ++data;
230 } else if (*data == 'L') // TODO: handle other suffixes
231 ++data;
232 if (!hasSeenTokenSeparator) {
233 while (is_ident_char(s: *data)) {
234 ++data;
235 token = IDENTIFIER;
236 }
237 }
238 break;
239 }
240 token = FLOATING_LITERAL;
241 ++data;
242 Q_FALLTHROUGH();
243 }
244 case FLOATING_LITERAL:
245 while (isAsciiDigit(c: *data) || *data == '\'')
246 ++data;
247 if (*data == '+' || *data == '-')
248 ++data;
249 if (*data == 'e' || *data == 'E') {
250 ++data;
251 while (isAsciiDigit(c: *data) || *data == '\'')
252 ++data;
253 }
254 if (*data == 'f' || *data == 'F'
255 || *data == 'l' || *data == 'L')
256 ++data;
257 break;
258 case HASH:
259 if (column == 1 && mode == TokenizeCpp) {
260 mode = PreparePreprocessorStatement;
261 while (*data && (*data == ' ' || *data == '\t'))
262 ++data;
263 if (is_ident_char(s: *data))
264 mode = TokenizePreprocessorStatement;
265 continue;
266 }
267 break;
268 case PP_HASHHASH:
269 if (mode == TokenizeCpp)
270 continue;
271 break;
272 case NEWLINE:
273 ++lineNum;
274 if (mode == TokenizeDefine) {
275 mode = TokenizeCpp;
276 // emit the newline token
277 break;
278 }
279 continue;
280 case BACKSLASH:
281 {
282 const char *rewind = data;
283 while (*data && (*data == ' ' || *data == '\t'))
284 ++data;
285 if (*data && *data == '\n') {
286 ++data;
287 continue;
288 }
289 data = rewind;
290 } break;
291 case CHARACTER:
292 while (is_ident_char(s: *data))
293 ++data;
294 token = IDENTIFIER;
295 break;
296 case C_COMMENT:
297 if (*data) {
298 if (*data == '\n')
299 ++lineNum;
300 ++data;
301 if (*data) {
302 if (*data == '\n')
303 ++lineNum;
304 ++data;
305 }
306 }
307 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
308 if (*data == '\n')
309 ++lineNum;
310 ++data;
311 }
312 token = WHITESPACE; // one comment, one whitespace
313 Q_FALLTHROUGH();
314 case WHITESPACE:
315 if (column == 1)
316 column = 0;
317 while (*data && (*data == ' ' || *data == '\t'))
318 ++data;
319 if (Preprocessor::preprocessOnly) // tokenize whitespace
320 break;
321 continue;
322 case CPP_COMMENT:
323 while (*data && *data != '\n')
324 ++data;
325 continue; // ignore safely, the newline is a separator
326 default:
327 continue; //ignore
328 }
329 }
330 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
331
332 } else { // Preprocessor
333
334 const char *lexem = data;
335 int state = 0;
336 Token token = NOTOKEN;
337 if (mode == TokenizePreprocessorStatement) {
338 state = pp_keyword_trans[0][(int)'#'];
339 mode = TokenizePreprocessor;
340 }
341 for (;;) {
342 if (static_cast<signed char>(*data) < 0) {
343 ++data;
344 continue;
345 }
346 int nextindex = pp_keywords[state].next;
347 int next = 0;
348 if (*data == pp_keywords[state].defchar)
349 next = pp_keywords[state].defnext;
350 else if (!state || nextindex)
351 next = pp_keyword_trans[nextindex][(int)*data];
352 if (!next)
353 break;
354 state = next;
355 token = pp_keywords[state].token;
356 ++data;
357 }
358 // suboptimal, is_ident_char should use a table
359 if (pp_keywords[state].ident && is_ident_char(s: *data))
360 token = pp_keywords[state].ident;
361
362 switch (token) {
363 case NOTOKEN:
364 if (*data)
365 ++data;
366 break;
367 case PP_DEFINE:
368 mode = PrepareDefine;
369 break;
370 case PP_IFDEF:
371 symbols += Symbol(lineNum, PP_IF);
372 symbols += Symbol(lineNum, PP_DEFINED);
373 continue;
374 case PP_IFNDEF:
375 symbols += Symbol(lineNum, PP_IF);
376 symbols += Symbol(lineNum, PP_NOT);
377 symbols += Symbol(lineNum, PP_DEFINED);
378 continue;
379 case PP_INCLUDE:
380 mode = TokenizeInclude;
381 break;
382 case PP_QUOTE:
383 data = skipQuote(data);
384 token = PP_STRING_LITERAL;
385 break;
386 case PP_SINGLEQUOTE:
387 while (*data && (*data != '\''
388 || (*(data-1)=='\\'
389 && *(data-2)!='\\')))
390 ++data;
391 if (*data)
392 ++data;
393 token = PP_CHARACTER_LITERAL;
394 break;
395 case PP_DIGIT:
396 while (isAsciiDigit(c: *data) || *data == '\'')
397 ++data;
398 if (!*data || *data != '.') {
399 token = PP_INTEGER_LITERAL;
400 if (data - lexem == 1 &&
401 (*data == 'x' || *data == 'X')
402 && *lexem == '0') {
403 ++data;
404 while (isHexDigit(c: *data) || *data == '\'')
405 ++data;
406 } else if (*data == 'L') // TODO: handle other suffixes
407 ++data;
408 break;
409 }
410 token = PP_FLOATING_LITERAL;
411 ++data;
412 Q_FALLTHROUGH();
413 case PP_FLOATING_LITERAL:
414 while (isAsciiDigit(c: *data) || *data == '\'')
415 ++data;
416 if (*data == '+' || *data == '-')
417 ++data;
418 if (*data == 'e' || *data == 'E') {
419 ++data;
420 while (isAsciiDigit(c: *data) || *data == '\'')
421 ++data;
422 }
423 if (*data == 'f' || *data == 'F'
424 || *data == 'l' || *data == 'L')
425 ++data;
426 break;
427 case PP_CHARACTER:
428 if (mode == PreparePreprocessorStatement) {
429 // rewind entire token to begin
430 data = lexem;
431 mode = TokenizePreprocessorStatement;
432 continue;
433 }
434 while (is_ident_char(s: *data))
435 ++data;
436 token = PP_IDENTIFIER;
437
438 if (mode == PrepareDefine) {
439 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
440 // make sure we explicitly add the whitespace here if the next char
441 // is not an opening brace, so we can distinguish correctly between
442 // regular and function macros
443 if (*data != '(')
444 symbols += Symbol(lineNum, WHITESPACE);
445 mode = TokenizeDefine;
446 continue;
447 }
448 break;
449 case PP_C_COMMENT:
450 if (*data) {
451 if (*data == '\n')
452 ++lineNum;
453 ++data;
454 if (*data) {
455 if (*data == '\n')
456 ++lineNum;
457 ++data;
458 }
459 }
460 while (*data && (*(data-1) != '/' || *(data-2) != '*')) {
461 if (*data == '\n')
462 ++lineNum;
463 ++data;
464 }
465 token = PP_WHITESPACE; // one comment, one whitespace
466 Q_FALLTHROUGH();
467 case PP_WHITESPACE:
468 while (*data && (*data == ' ' || *data == '\t'))
469 ++data;
470 continue; // the preprocessor needs no whitespace
471 case PP_CPP_COMMENT:
472 while (*data && *data != '\n')
473 ++data;
474 continue; // ignore safely, the newline is a separator
475 case PP_NEWLINE:
476 ++lineNum;
477 mode = TokenizeCpp;
478 break;
479 case PP_BACKSLASH:
480 {
481 const char *rewind = data;
482 while (*data && (*data == ' ' || *data == '\t'))
483 ++data;
484 if (*data && *data == '\n') {
485 ++data;
486 continue;
487 }
488 data = rewind;
489 } break;
490 case PP_LANGLE:
491 if (mode != TokenizeInclude)
492 break;
493 token = PP_STRING_LITERAL;
494 while (*data && *data != '\n' && *(data-1) != '>')
495 ++data;
496 break;
497 default:
498 break;
499 }
500 if (mode == PreparePreprocessorStatement)
501 continue;
502 symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem);
503 }
504 }
505 symbols += Symbol(); // eof symbol
506 return symbols;
507}
508
509void Preprocessor::macroExpand(Symbols *into, Preprocessor *that, const Symbols &toExpand, qsizetype &index,
510 int lineNum, bool one, const QSet<QByteArray> &excludeSymbols)
511{
512 SymbolStack symbols;
513 SafeSymbols sf;
514 sf.symbols = toExpand;
515 sf.index = index;
516 sf.excludedSymbols = excludeSymbols;
517 symbols.push(t: sf);
518
519 if (toExpand.isEmpty())
520 return;
521
522 for (;;) {
523 QByteArray macro;
524 Symbols newSyms = macroExpandIdentifier(that, symbols, lineNum, macroName: &macro);
525
526 if (macro.isEmpty()) {
527 // not a macro
528 Symbol s = symbols.symbol();
529 s.lineNum = lineNum;
530 *into += s;
531 } else {
532 SafeSymbols sf;
533 sf.symbols = newSyms;
534 sf.index = 0;
535 sf.expandedMacro = macro;
536 symbols.push(t: sf);
537 }
538 if (!symbols.hasNext() || (one && symbols.size() == 1))
539 break;
540 symbols.next();
541 }
542
543 if (symbols.size())
544 index = symbols.top().index;
545 else
546 index = toExpand.size();
547}
548
549
550Symbols Preprocessor::macroExpandIdentifier(Preprocessor *that, SymbolStack &symbols, int lineNum, QByteArray *macroName)
551{
552 Symbol s = symbols.symbol();
553
554 // not a macro
555 if (s.token != PP_IDENTIFIER || !that->macros.contains(key: s) || symbols.dontReplaceSymbol(name: s.lexem())) {
556 return Symbols();
557 }
558
559 const Macro &macro = that->macros.value(key: s);
560 *macroName = s.lexem();
561
562 Symbols expansion;
563 if (!macro.isFunction) {
564 expansion = macro.symbols;
565 } else {
566 bool haveSpace = false;
567 while (symbols.test(token: PP_WHITESPACE)) { haveSpace = true; }
568 if (!symbols.test(token: PP_LPAREN)) {
569 *macroName = QByteArray();
570 Symbols syms;
571 if (haveSpace)
572 syms += Symbol(lineNum, PP_WHITESPACE);
573 syms += s;
574 syms.last().lineNum = lineNum;
575 return syms;
576 }
577 QVarLengthArray<Symbols, 5> arguments;
578 while (symbols.hasNext()) {
579 Symbols argument;
580 // strip leading space
581 while (symbols.test(token: PP_WHITESPACE)) {}
582 int nesting = 0;
583 bool vararg = macro.isVariadic && (arguments.size() == macro.arguments.size() - 1);
584 while (symbols.hasNext()) {
585 Token t = symbols.next();
586 if (t == PP_LPAREN) {
587 ++nesting;
588 } else if (t == PP_RPAREN) {
589 --nesting;
590 if (nesting < 0)
591 break;
592 } else if (t == PP_COMMA && nesting == 0) {
593 if (!vararg)
594 break;
595 }
596 argument += symbols.symbol();
597 }
598 arguments += argument;
599
600 if (nesting < 0)
601 break;
602 else if (!symbols.hasNext())
603 that->error(msg: "missing ')' in macro usage");
604 }
605
606 // empty VA_ARGS
607 if (macro.isVariadic && arguments.size() == macro.arguments.size() - 1)
608 arguments += Symbols();
609
610 // now replace the macro arguments with the expanded arguments
611 enum Mode {
612 Normal,
613 Hash,
614 HashHash
615 } mode = Normal;
616
617 const auto end = macro.symbols.cend();
618 auto it = macro.symbols.cbegin();
619 const auto lastSym = std::prev(x: macro.symbols.cend(), n: !macro.symbols.isEmpty() ? 1 : 0);
620 for (; it != end; ++it) {
621 const Symbol &s = *it;
622 if (s.token == HASH || s.token == PP_HASHHASH) {
623 mode = (s.token == HASH ? Hash : HashHash);
624 continue;
625 }
626 const qsizetype index = macro.arguments.indexOf(t: s);
627 if (mode == Normal) {
628 if (index >= 0 && index < arguments.size()) {
629 // each argument undoergoes macro expansion if it's not used as part of a # or ##
630 if (it == lastSym || std::next(x: it)->token != PP_HASHHASH) {
631 Symbols arg = arguments.at(idx: index);
632 qsizetype idx = 1;
633 macroExpand(into: &expansion, that, toExpand: arg, index&: idx, lineNum, one: false, excludeSymbols: symbols.excludeSymbols());
634 } else {
635 expansion += arguments.at(idx: index);
636 }
637 } else {
638 expansion += s;
639 }
640 } else if (mode == Hash) {
641 if (index < 0) {
642 that->error(msg: "'#' is not followed by a macro parameter");
643 continue;
644 } else if (index >= arguments.size()) {
645 that->error(msg: "Macro invoked with too few parameters for a use of '#'");
646 continue;
647 }
648
649 const Symbols &arg = arguments.at(idx: index);
650 QByteArray stringified;
651 for (const Symbol &sym : arg)
652 stringified += sym.lexemView();
653
654 stringified.replace(before: '"', after: "\\\"");
655 stringified.prepend(c: '"');
656 stringified.append(c: '"');
657 expansion += Symbol(lineNum, STRING_LITERAL, stringified);
658 } else if (mode == HashHash){
659 if (s.token == WHITESPACE)
660 continue;
661
662 while (expansion.size() && expansion.constLast().token == PP_WHITESPACE)
663 expansion.pop_back();
664
665 Symbol next = s;
666 if (index >= 0 && index < arguments.size()) {
667 const Symbols &arg = arguments.at(idx: index);
668 if (arg.size() == 0) {
669 mode = Normal;
670 continue;
671 }
672 next = arg.at(i: 0);
673 }
674
675 if (!expansion.isEmpty() && expansion.constLast().token == s.token
676 && expansion.constLast().token != STRING_LITERAL) {
677 Symbol last = expansion.takeLast();
678
679 QByteArray lexem = last.lexem() + next.lexem();
680 expansion += Symbol(lineNum, last.token, lexem);
681 } else {
682 expansion += next;
683 }
684
685 if (index >= 0 && index < arguments.size()) {
686 const Symbols &arg = arguments.at(idx: index);
687 if (!arg.isEmpty())
688 expansion.append(i1: arg.cbegin() + 1, i2: arg.cend());
689 }
690 }
691 mode = Normal;
692 }
693 if (mode != Normal)
694 that->error(msg: "'#' or '##' found at the end of a macro argument");
695
696 }
697
698 return expansion;
699}
700
701void Preprocessor::substituteUntilNewline(Symbols &substituted)
702{
703 while (hasNext()) {
704 Token token = next();
705 if (token == PP_IDENTIFIER) {
706 macroExpand(into: &substituted, that: this, toExpand: symbols, index, lineNum: symbol().lineNum, one: true);
707 } else if (token == PP_DEFINED) {
708 bool braces = test(token: PP_LPAREN);
709 if (test(token: PP_HAS_INCLUDE)) {
710 // __has_include is always supported
711 Symbol definedOrNotDefined = symbol();
712 definedOrNotDefined.token = PP_MOC_TRUE;
713 substituted += definedOrNotDefined;
714 } else {
715 next(token: PP_IDENTIFIER);
716 Symbol definedOrNotDefined = symbol();
717 definedOrNotDefined.token = macros.contains(key: definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE;
718 substituted += definedOrNotDefined;
719 }
720 if (braces)
721 test(token: PP_RPAREN);
722 continue;
723 } else if (token == PP_NEWLINE) {
724 substituted += symbol();
725 break;
726 } else if (token == PP_HAS_INCLUDE) {
727 next(token: LPAREN);
728 Token tok = next(); // quote or LANGLE
729 bool usesAngleInclude = false;
730 QByteArray includeAsString;
731 Symbols innerSymbols;
732 if (tok == PP_LANGLE) {
733 usesAngleInclude = true;
734 next();
735 do {
736 Symbol currentSymbol = symbol();
737 includeAsString += currentSymbol.lexem();
738 if (currentSymbol.token == PP_IDENTIFIER)
739 macroExpand(into: &innerSymbols, that: this, toExpand: symbols, index, lineNum: symbol().lineNum, one: true);
740 else
741 innerSymbols.append(t: currentSymbol);
742 } while (next() != PP_RANGLE);
743 } else {
744 includeAsString = unquotedLexem();
745 }
746 next(token: RPAREN);
747 const QByteArray &relative = usesAngleInclude ? QByteArray() : currentFilenames.top();
748 bool result = !resolveInclude(filename: includeAsString, relativeTo: relative).isNull();
749 if (usesAngleInclude && !result) {
750 // try with expansion
751 includeAsString = {};
752 for (const auto &innerSymbol: innerSymbols)
753 includeAsString.append(a: innerSymbol.lexem());
754 result = !resolveInclude(filename: includeAsString, relativeTo: relative).isNull();
755 }
756 Symbol definedOrNotDefined = symbol();
757 definedOrNotDefined.token = result ? PP_MOC_TRUE : PP_MOC_FALSE;
758 substituted += definedOrNotDefined;
759 } else {
760 substituted += symbol();
761 }
762 }
763}
764
765
766class PP_Expression : public Parser
767{
768public:
769 int value() { index = 0; return unary_expression_lookup() ? conditional_expression() : 0; }
770
771 int conditional_expression();
772 int logical_OR_expression();
773 int logical_AND_expression();
774 int inclusive_OR_expression();
775 int exclusive_OR_expression();
776 int AND_expression();
777 int equality_expression();
778 int relational_expression();
779 int shift_expression();
780 int additive_expression();
781 int multiplicative_expression();
782 int unary_expression();
783 bool unary_expression_lookup();
784 int primary_expression();
785 bool primary_expression_lookup();
786};
787
788int PP_Expression::conditional_expression()
789{
790 int value = logical_OR_expression();
791 if (test(token: PP_QUESTION)) {
792 int alt1 = conditional_expression();
793 int alt2 = test(token: PP_COLON) ? conditional_expression() : 0;
794 return value ? alt1 : alt2;
795 }
796 return value;
797}
798
799int PP_Expression::logical_OR_expression()
800{
801 int value = logical_AND_expression();
802 if (test(token: PP_OROR))
803 return logical_OR_expression() || value;
804 return value;
805}
806
807int PP_Expression::logical_AND_expression()
808{
809 int value = inclusive_OR_expression();
810 if (test(token: PP_ANDAND))
811 return logical_AND_expression() && value;
812 return value;
813}
814
815int PP_Expression::inclusive_OR_expression()
816{
817 int value = exclusive_OR_expression();
818 if (test(token: PP_OR))
819 return value | inclusive_OR_expression();
820 return value;
821}
822
823int PP_Expression::exclusive_OR_expression()
824{
825 int value = AND_expression();
826 if (test(token: PP_HAT))
827 return value ^ exclusive_OR_expression();
828 return value;
829}
830
831int PP_Expression::AND_expression()
832{
833 int value = equality_expression();
834 if (test(token: PP_AND))
835 return value & AND_expression();
836 return value;
837}
838
839int PP_Expression::equality_expression()
840{
841 int value = relational_expression();
842 switch (next()) {
843 case PP_EQEQ:
844 return value == equality_expression();
845 case PP_NE:
846 return value != equality_expression();
847 default:
848 prev();
849 return value;
850 }
851}
852
853int PP_Expression::relational_expression()
854{
855 int value = shift_expression();
856 switch (next()) {
857 case PP_LANGLE:
858 return value < relational_expression();
859 case PP_RANGLE:
860 return value > relational_expression();
861 case PP_LE:
862 return value <= relational_expression();
863 case PP_GE:
864 return value >= relational_expression();
865 default:
866 prev();
867 return value;
868 }
869}
870
871int PP_Expression::shift_expression()
872{
873 int value = additive_expression();
874 switch (next()) {
875 case PP_LTLT:
876 return value << shift_expression();
877 case PP_GTGT:
878 return value >> shift_expression();
879 default:
880 prev();
881 return value;
882 }
883}
884
885int PP_Expression::additive_expression()
886{
887 int value = multiplicative_expression();
888 switch (next()) {
889 case PP_PLUS:
890 return value + additive_expression();
891 case PP_MINUS:
892 return value - additive_expression();
893 default:
894 prev();
895 return value;
896 }
897}
898
899int PP_Expression::multiplicative_expression()
900{
901 int value = unary_expression();
902 switch (next()) {
903 case PP_STAR:
904 {
905 // get well behaved overflow behavior by converting to long
906 // and then back to int
907 // NOTE: A conformant preprocessor would need to work intmax_t/
908 // uintmax_t according to [cpp.cond], 19.1 §10
909 // But we're not compliant anyway
910 qint64 result = qint64(value) * qint64(multiplicative_expression());
911 return int(result);
912 }
913 case PP_PERCENT:
914 {
915 int remainder = multiplicative_expression();
916 return remainder ? value % remainder : 0;
917 }
918 case PP_SLASH:
919 {
920 int div = multiplicative_expression();
921 return div ? value / div : 0;
922 }
923 default:
924 prev();
925 return value;
926 };
927}
928
929int PP_Expression::unary_expression()
930{
931 switch (next()) {
932 case PP_PLUS:
933 return unary_expression();
934 case PP_MINUS:
935 return -unary_expression();
936 case PP_NOT:
937 return !unary_expression();
938 case PP_TILDE:
939 return ~unary_expression();
940 case PP_MOC_TRUE:
941 return 1;
942 case PP_MOC_FALSE:
943 return 0;
944 default:
945 prev();
946 return primary_expression();
947 }
948}
949
950bool PP_Expression::unary_expression_lookup()
951{
952 Token t = lookup();
953 return (primary_expression_lookup()
954 || t == PP_PLUS
955 || t == PP_MINUS
956 || t == PP_NOT
957 || t == PP_TILDE
958 || t == PP_DEFINED);
959}
960
961int PP_Expression::primary_expression()
962{
963 int value;
964 if (test(token: PP_LPAREN)) {
965 value = conditional_expression();
966 test(token: PP_RPAREN);
967 } else {
968 next();
969 const QByteArray &lex = lexem();
970 auto lexView = QByteArrayView(lex);
971 if (lex.endsWith(c: 'L'))
972 lexView.chop(n: 1);
973 value = lexView.toInt(ok: nullptr, base: 0);
974 }
975 return value;
976}
977
978bool PP_Expression::primary_expression_lookup()
979{
980 Token t = lookup();
981 return (t == PP_IDENTIFIER
982 || t == PP_INTEGER_LITERAL
983 || t == PP_FLOATING_LITERAL
984 || t == PP_MOC_TRUE
985 || t == PP_MOC_FALSE
986 || t == PP_LPAREN);
987}
988
989int Preprocessor::evaluateCondition()
990{
991 PP_Expression expression;
992 expression.currentFilenames = currentFilenames;
993
994 substituteUntilNewline(substituted&: expression.symbols);
995
996 return expression.value();
997}
998
999static QByteArray readOrMapFile(QFile *file)
1000{
1001 const qint64 size = file->size();
1002 char *rawInput = reinterpret_cast<char*>(file->map(offset: 0, size));
1003 return rawInput ? QByteArray::fromRawData(data: rawInput, size) : file->readAll();
1004}
1005
1006void Symbol::mergeStringLiteral(const Symbol &next)
1007{
1008 Q_ASSERT(len >= 2); // at least `""`
1009 Q_ASSERT(from + len <= lex.size());
1010 Q_ASSERT(next.len >= 2); // at least `""`
1011 Q_ASSERT(next.from + next.len <= next.lex.size());
1012
1013 if (len != lex.size()) {
1014 // "rubbish" around lexem() in `lex`: clean up (`lex` may be the whole file)
1015 QByteArray l = lexemView().chopped(len: 1) % next.lexemView().sliced(pos: 1);
1016 lex = std::move(l); // lexemView() aliases `lex`; only clobber it now
1017 from = 0;
1018 } else {
1019 // like QByteArray::append(), but dealing with the "" around each lexem:
1020 const auto unquoted = next.unquotedLexemView();
1021 lex.insert(i: from + len - 1, // before closing `"`
1022 data: unquoted);
1023 }
1024 len = lex.size();
1025}
1026
1027static void mergeStringLiterals(Symbols &symbols)
1028{
1029 // like std::unique, but merges instead of skips adjacent STRING_LITERALs:
1030
1031 const auto mergeable = [](const Symbol &lhs, const Symbol &rhs) {
1032 return lhs.token == STRING_LITERAL && rhs.token == STRING_LITERAL;
1033 };
1034
1035 auto end = symbols.end();
1036 auto it = std::adjacent_find(first: symbols.begin(), last: symbols.end(), binary_pred: mergeable);
1037 if (it == end) // none found
1038 return;
1039
1040 // we know `it`, `it + 1` are both STRING_LITERAL (adjacent_find post-condition)
1041 // in particular: it + 1 < end
1042
1043 auto dst = it;
1044 auto lit = dst;
1045 ++it;
1046 lit->mergeStringLiteral(next: *it);
1047
1048 while (++it != end) {
1049 // Loop Invariants:
1050 // - [begin(), dst] is already processed
1051 // - `lit` is the last string literal
1052 // - we can merge if lit == dst
1053 // - [it, end[ still to be checked
1054 if (it->token == STRING_LITERAL) {
1055 if (lit == dst) { // can merge
1056 lit->mergeStringLiteral(next: *it);
1057 } else { // can't merge: not adjacent to previous STRING_LITERAL
1058 *++dst = std::move(*it);
1059 lit = dst; // remember that this was a literal
1060 }
1061 } else {
1062 *++dst = std::move(*it);
1063 }
1064 }
1065
1066 ++dst;
1067
1068 symbols.erase(abegin: dst, aend: end);
1069}
1070
1071static QByteArray searchIncludePaths(const QList<Parser::IncludePath> &includepaths,
1072 const QByteArray &include,
1073 const bool debugIncludes)
1074{
1075 QFileInfo fi;
1076
1077 if (Q_UNLIKELY(debugIncludes)) {
1078 fprintf(stderr, format: "debug-includes: searching for '%s'\n", include.constData());
1079 }
1080
1081 for (const Parser::IncludePath &p : includepaths) {
1082 if (fi.exists())
1083 break;
1084
1085 if (p.isFrameworkPath) {
1086 const qsizetype slashPos = include.indexOf(ch: '/');
1087 if (slashPos == -1)
1088 continue;
1089 fi.setFile(dir: QString::fromLocal8Bit(ba: p.path + '/' + include.left(n: slashPos) + ".framework/Headers/"),
1090 file: QString::fromLocal8Bit(ba: include.mid(index: slashPos + 1)));
1091 } else {
1092 fi.setFile(dir: QString::fromLocal8Bit(ba: p.path), file: QString::fromLocal8Bit(ba: include));
1093 }
1094
1095 if (Q_UNLIKELY(debugIncludes)) {
1096 const auto candidate = fi.filePath().toLocal8Bit();
1097 fprintf(stderr, format: "debug-includes: considering '%s'\n", candidate.constData());
1098 }
1099
1100 // try again, maybe there's a file later in the include paths with the same name
1101 // (186067)
1102 if (fi.isDir()) {
1103 fi = QFileInfo();
1104 continue;
1105 }
1106 }
1107
1108 if (!fi.exists() || fi.isDir()) {
1109 if (Q_UNLIKELY(debugIncludes)) {
1110 fprintf(stderr, format: "debug-includes: can't find '%s'\n", include.constData());
1111 }
1112 return QByteArray();
1113 }
1114
1115 const auto result = fi.canonicalFilePath().toLocal8Bit();
1116
1117 if (Q_UNLIKELY(debugIncludes)) {
1118 fprintf(stderr, format: "debug-includes: found '%s'\n", result.constData());
1119 }
1120
1121 return result;
1122}
1123
1124QByteArray Preprocessor::resolveInclude(const QByteArray &include, const QByteArray &relativeTo)
1125{
1126 if (!relativeTo.isEmpty()) {
1127 QFileInfo fi;
1128 fi.setFile(dir: QFileInfo(QString::fromLocal8Bit(ba: relativeTo)).dir(), file: QString::fromLocal8Bit(ba: include));
1129 if (fi.exists() && !fi.isDir())
1130 return fi.canonicalFilePath().toLocal8Bit();
1131 }
1132
1133 auto it = nonlocalIncludePathResolutionCache.find(key: include);
1134 if (it == nonlocalIncludePathResolutionCache.end())
1135 it = nonlocalIncludePathResolutionCache.insert(key: include,
1136 value: searchIncludePaths(
1137 includepaths: includes,
1138 include,
1139 debugIncludes));
1140 return it.value();
1141}
1142
1143void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed)
1144{
1145 currentFilenames.push(x: filename);
1146 preprocessed.reserve(asize: preprocessed.size() + symbols.size());
1147 while (hasNext()) {
1148 Token token = next();
1149
1150 switch (token) {
1151 case PP_INCLUDE:
1152 {
1153 int lineNum = symbol().lineNum;
1154 QByteArray include;
1155 bool local = false;
1156 if (test(token: PP_STRING_LITERAL)) {
1157 local = lexem().startsWith(c: '\"');
1158 include = unquotedLexem();
1159 } else
1160 continue;
1161 until(PP_NEWLINE);
1162
1163 include = resolveInclude(include, relativeTo: local ? filename : QByteArray());
1164 if (include.isNull())
1165 continue;
1166
1167 if (Preprocessor::preprocessedIncludes.contains(value: include))
1168 continue;
1169 Preprocessor::preprocessedIncludes.insert(value: include);
1170
1171 QFile file(QString::fromLocal8Bit(ba: include.constData()));
1172 if (!file.open(flags: QFile::ReadOnly))
1173 continue;
1174
1175 QByteArray input = readOrMapFile(file: &file);
1176
1177 file.close();
1178 if (input.isEmpty())
1179 continue;
1180
1181 Symbols saveSymbols = symbols;
1182 qsizetype saveIndex = index;
1183
1184 // phase 1: get rid of backslash-newlines
1185 input = cleaned(input);
1186
1187 // phase 2: tokenize for the preprocessor
1188 symbols = tokenize(input);
1189 input.clear();
1190
1191 index = 0;
1192
1193 // phase 3: preprocess conditions and substitute macros
1194 preprocessed += Symbol(0, MOC_INCLUDE_BEGIN, include);
1195 preprocess(filename: include, preprocessed);
1196 preprocessed += Symbol(lineNum, MOC_INCLUDE_END, include);
1197
1198 symbols = saveSymbols;
1199 index = saveIndex;
1200 continue;
1201 }
1202 case PP_DEFINE:
1203 {
1204 next();
1205 QByteArray name = lexem();
1206 if (name.isEmpty() || !is_ident_start(s: name[0]))
1207 error();
1208 Macro macro;
1209 macro.isVariadic = false;
1210 if (test(token: LPAREN)) {
1211 // we have a function macro
1212 macro.isFunction = true;
1213 parseDefineArguments(m: &macro);
1214 } else {
1215 macro.isFunction = false;
1216 }
1217 qsizetype start = index;
1218 until(PP_NEWLINE);
1219 macro.symbols.reserve(asize: index - start - 1);
1220
1221 // remove whitespace where there shouldn't be any:
1222 // Before and after the macro, after a # and around ##
1223 Token lastToken = HASH; // skip shitespace at the beginning
1224 for (qsizetype i = start; i < index - 1; ++i) {
1225 Token token = symbols.at(i).token;
1226 if (token == WHITESPACE) {
1227 if (lastToken == PP_HASH || lastToken == HASH ||
1228 lastToken == PP_HASHHASH ||
1229 lastToken == WHITESPACE)
1230 continue;
1231 } else if (token == PP_HASHHASH) {
1232 if (!macro.symbols.isEmpty() &&
1233 lastToken == WHITESPACE)
1234 macro.symbols.pop_back();
1235 }
1236 macro.symbols.append(t: symbols.at(i));
1237 lastToken = token;
1238 }
1239 // remove trailing whitespace
1240 while (!macro.symbols.isEmpty() &&
1241 (macro.symbols.constLast().token == PP_WHITESPACE || macro.symbols.constLast().token == WHITESPACE))
1242 macro.symbols.pop_back();
1243
1244 if (!macro.symbols.isEmpty()) {
1245 if (macro.symbols.constFirst().token == PP_HASHHASH ||
1246 macro.symbols.constLast().token == PP_HASHHASH) {
1247 error(msg: "'##' cannot appear at either end of a macro expansion");
1248 }
1249 }
1250 macros.insert(key: name, value: macro);
1251 continue;
1252 }
1253 case PP_UNDEF: {
1254 next();
1255 QByteArray name = lexem();
1256 until(PP_NEWLINE);
1257 macros.remove(key: name);
1258 continue;
1259 }
1260 case PP_IDENTIFIER: {
1261 // substitute macros
1262 macroExpand(into: &preprocessed, that: this, toExpand: symbols, index, lineNum: symbol().lineNum, one: true);
1263 continue;
1264 }
1265 case PP_HASH:
1266 until(PP_NEWLINE);
1267 continue; // skip unknown preprocessor statement
1268 case PP_IFDEF:
1269 case PP_IFNDEF:
1270 case PP_IF:
1271 while (!evaluateCondition()) {
1272 if (!skipBranch())
1273 break;
1274 if (test(token: PP_ELIF)) {
1275 } else {
1276 until(PP_NEWLINE);
1277 break;
1278 }
1279 }
1280 continue;
1281 case PP_ELIF:
1282 case PP_ELSE:
1283 skipUntilEndif();
1284 Q_FALLTHROUGH();
1285 case PP_ENDIF:
1286 until(PP_NEWLINE);
1287 continue;
1288 case PP_NEWLINE:
1289 continue;
1290 case SIGNALS:
1291 case SLOTS: {
1292 Symbol sym = symbol();
1293 if (macros.contains(key: "QT_NO_KEYWORDS"))
1294 sym.token = IDENTIFIER;
1295 else
1296 sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN);
1297 preprocessed += sym;
1298 } continue;
1299 default:
1300 break;
1301 }
1302 preprocessed += symbol();
1303 }
1304
1305 currentFilenames.pop();
1306}
1307
1308Symbols Preprocessor::preprocessed(const QByteArray &filename, QFile *file)
1309{
1310 QByteArray input = readOrMapFile(file);
1311
1312 if (input.isEmpty())
1313 return symbols;
1314
1315 // phase 1: get rid of backslash-newlines
1316 input = cleaned(input);
1317
1318 // phase 2: tokenize for the preprocessor
1319 index = 0;
1320 symbols = tokenize(input);
1321
1322#if 0
1323 for (int j = 0; j < symbols.size(); ++j)
1324 fprintf(stderr, "line %d: %s(%s)\n",
1325 symbols[j].lineNum,
1326 symbols[j].lexem().constData(),
1327 tokenTypeName(symbols[j].token));
1328#endif
1329
1330 // phase 3: preprocess conditions and substitute macros
1331 Symbols result;
1332 // Preallocate some space to speed up the code below.
1333 // The magic value was found by logging the final size
1334 // and calculating an average when running moc over FOSS projects.
1335 result.reserve(asize: file->size() / 300000);
1336 preprocess(filename, preprocessed&: result);
1337 mergeStringLiterals(symbols&: result);
1338
1339#if 0
1340 for (int j = 0; j < result.size(); ++j)
1341 fprintf(stderr, "line %d: %s(%s)\n",
1342 result[j].lineNum,
1343 result[j].lexem().constData(),
1344 tokenTypeName(result[j].token));
1345#endif
1346
1347 return result;
1348}
1349
1350void Preprocessor::parseDefineArguments(Macro *m)
1351{
1352 Symbols arguments;
1353 while (hasNext()) {
1354 while (test(token: PP_WHITESPACE)) {}
1355 Token t = next();
1356 if (t == PP_RPAREN)
1357 break;
1358 if (t != PP_IDENTIFIER) {
1359 QByteArray l = lexem();
1360 if (l == "...") {
1361 m->isVariadic = true;
1362 arguments += Symbol(symbol().lineNum, PP_IDENTIFIER, "__VA_ARGS__");
1363 while (test(token: PP_WHITESPACE)) {}
1364 if (!test(token: PP_RPAREN))
1365 error(msg: "missing ')' in macro argument list");
1366 break;
1367 } else if (!is_identifier(s: l.constData(), len: l.size())) {
1368 error(msg: "Unexpected character in macro argument list.");
1369 }
1370 }
1371
1372 Symbol arg = symbol();
1373 if (arguments.contains(t: arg))
1374 error(msg: "Duplicate macro parameter.");
1375 arguments += symbol();
1376
1377 while (test(token: PP_WHITESPACE)) {}
1378 t = next();
1379 if (t == PP_RPAREN)
1380 break;
1381 if (t == PP_COMMA)
1382 continue;
1383 if (lexem() == "...") {
1384 //GCC extension: #define FOO(x, y...) x(y)
1385 // The last argument was already parsed. Just mark the macro as variadic.
1386 m->isVariadic = true;
1387 while (test(token: PP_WHITESPACE)) {}
1388 if (!test(token: PP_RPAREN))
1389 error(msg: "missing ')' in macro argument list");
1390 break;
1391 }
1392 error(msg: "Unexpected character in macro argument list.");
1393 }
1394 m->arguments = arguments;
1395 while (test(token: PP_WHITESPACE)) {}
1396}
1397
1398void Preprocessor::until(Token t)
1399{
1400 while(hasNext() && next() != t)
1401 ;
1402}
1403
1404void Preprocessor::setDebugIncludes(bool value)
1405{
1406 debugIncludes = value;
1407}
1408
1409
1410QT_END_NAMESPACE
1411

source code of qtbase/src/tools/moc/preprocessor.cpp