1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Copyright (C) 2014 Olivier Goffart <ogoffart@woboq.org> |
5 | ** Contact: https://www.qt.io/licensing/ |
6 | ** |
7 | ** This file is part of the tools applications of the Qt Toolkit. |
8 | ** |
9 | ** $QT_BEGIN_LICENSE:GPL-EXCEPT$ |
10 | ** Commercial License Usage |
11 | ** Licensees holding valid commercial Qt licenses may use this file in |
12 | ** accordance with the commercial license agreement provided with the |
13 | ** Software or, alternatively, in accordance with the terms contained in |
14 | ** a written agreement between you and The Qt Company. For licensing terms |
15 | ** and conditions see https://www.qt.io/terms-conditions. For further |
16 | ** information use the contact form at https://www.qt.io/contact-us. |
17 | ** |
18 | ** GNU General Public License Usage |
19 | ** Alternatively, this file may be used under the terms of the GNU |
20 | ** General Public License version 3 as published by the Free Software |
21 | ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT |
22 | ** included in the packaging of this file. Please review the following |
23 | ** information to ensure the GNU General Public License requirements will |
24 | ** be met: https://www.gnu.org/licenses/gpl-3.0.html. |
25 | ** |
26 | ** $QT_END_LICENSE$ |
27 | ** |
28 | ****************************************************************************/ |
29 | |
30 | #include "preprocessor.h" |
31 | #include "utils.h" |
32 | #include <qstringlist.h> |
33 | #include <qfile.h> |
34 | #include <qdir.h> |
35 | #include <qfileinfo.h> |
36 | |
37 | QT_BEGIN_NAMESPACE |
38 | |
39 | #include "ppkeywords.cpp" |
40 | #include "keywords.cpp" |
41 | |
42 | // transform \r\n into \n |
43 | // \r into \n (os9 style) |
44 | // backslash-newlines into newlines |
45 | static QByteArray cleaned(const QByteArray &input) |
46 | { |
47 | QByteArray result; |
48 | result.resize(size: input.size()); |
49 | const char *data = input.constData(); |
50 | const char *end = input.constData() + input.size(); |
51 | char *output = result.data(); |
52 | |
53 | int newlines = 0; |
54 | while (data != end) { |
55 | while (data != end && is_space(s: *data)) |
56 | ++data; |
57 | bool takeLine = (*data == '#'); |
58 | if (*data == '%' && *(data+1) == ':') { |
59 | takeLine = true; |
60 | ++data; |
61 | } |
62 | if (takeLine) { |
63 | *output = '#'; |
64 | ++output; |
65 | do ++data; while (data != end && is_space(s: *data)); |
66 | } |
67 | while (data != end) { |
68 | // handle \\\n, \\\r\n and \\\r |
69 | if (*data == '\\') { |
70 | if (*(data + 1) == '\r') { |
71 | ++data; |
72 | } |
73 | if (data != end && (*(data + 1) == '\n' || (*data) == '\r')) { |
74 | ++newlines; |
75 | data += 1; |
76 | if (data != end && *data != '\r') |
77 | data += 1; |
78 | continue; |
79 | } |
80 | } else if (*data == '\r' && *(data + 1) == '\n') { // reduce \r\n to \n |
81 | ++data; |
82 | } |
83 | if (data == end) |
84 | break; |
85 | |
86 | char ch = *data; |
87 | if (ch == '\r') // os9: replace \r with \n |
88 | ch = '\n'; |
89 | *output = ch; |
90 | ++output; |
91 | |
92 | if (*data == '\n') { |
93 | // output additional newlines to keep the correct line-numbering |
94 | // for the lines following the backslash-newline sequence(s) |
95 | while (newlines) { |
96 | *output = '\n'; |
97 | ++output; |
98 | --newlines; |
99 | } |
100 | ++data; |
101 | break; |
102 | } |
103 | ++data; |
104 | } |
105 | } |
106 | result.resize(size: output - result.constData()); |
107 | return result; |
108 | } |
109 | |
110 | bool Preprocessor::preprocessOnly = false; |
111 | void Preprocessor::skipUntilEndif() |
112 | { |
113 | while(index < symbols.size() - 1 && symbols.at(i: index).token != PP_ENDIF){ |
114 | switch (symbols.at(i: index).token) { |
115 | case PP_IF: |
116 | case PP_IFDEF: |
117 | case PP_IFNDEF: |
118 | ++index; |
119 | skipUntilEndif(); |
120 | break; |
121 | default: |
122 | ; |
123 | } |
124 | ++index; |
125 | } |
126 | } |
127 | |
128 | bool Preprocessor::skipBranch() |
129 | { |
130 | while (index < symbols.size() - 1 |
131 | && (symbols.at(i: index).token != PP_ENDIF |
132 | && symbols.at(i: index).token != PP_ELIF |
133 | && symbols.at(i: index).token != PP_ELSE) |
134 | ){ |
135 | switch (symbols.at(i: index).token) { |
136 | case PP_IF: |
137 | case PP_IFDEF: |
138 | case PP_IFNDEF: |
139 | ++index; |
140 | skipUntilEndif(); |
141 | break; |
142 | default: |
143 | ; |
144 | } |
145 | ++index; |
146 | } |
147 | return (index < symbols.size() - 1); |
148 | } |
149 | |
150 | |
151 | Symbols Preprocessor::tokenize(const QByteArray& input, int lineNum, Preprocessor::TokenizeMode mode) |
152 | { |
153 | Symbols symbols; |
154 | // Preallocate some space to speed up the code below. |
155 | // The magic divisor value was found by calculating the average ratio between |
156 | // input size and the final size of symbols. |
157 | // This yielded a value of 16.x when compiling Qt Base. |
158 | symbols.reserve(asize: input.size() / 16); |
159 | const char *begin = input.constData(); |
160 | const char *data = begin; |
161 | while (*data) { |
162 | if (mode == TokenizeCpp || mode == TokenizeDefine) { |
163 | int column = 0; |
164 | |
165 | const char *lexem = data; |
166 | int state = 0; |
167 | Token token = NOTOKEN; |
168 | for (;;) { |
169 | if (static_cast<signed char>(*data) < 0) { |
170 | ++data; |
171 | continue; |
172 | } |
173 | int nextindex = keywords[state].next; |
174 | int next = 0; |
175 | if (*data == keywords[state].defchar) |
176 | next = keywords[state].defnext; |
177 | else if (!state || nextindex) |
178 | next = keyword_trans[nextindex][(int)*data]; |
179 | if (!next) |
180 | break; |
181 | state = next; |
182 | token = keywords[state].token; |
183 | ++data; |
184 | } |
185 | |
186 | // suboptimal, is_ident_char should use a table |
187 | if (keywords[state].ident && is_ident_char(s: *data)) |
188 | token = keywords[state].ident; |
189 | |
190 | if (token == NOTOKEN) { |
191 | if (*data) |
192 | ++data; |
193 | // an error really, but let's ignore this input |
194 | // to not confuse moc later. However in pre-processor |
195 | // only mode let's continue. |
196 | if (!Preprocessor::preprocessOnly) |
197 | continue; |
198 | } |
199 | |
200 | ++column; |
201 | |
202 | if (token > SPECIAL_TREATMENT_MARK) { |
203 | switch (token) { |
204 | case QUOTE: |
205 | data = skipQuote(data); |
206 | token = STRING_LITERAL; |
207 | // concatenate multi-line strings for easier |
208 | // STRING_LITERAL handling in moc |
209 | if (!Preprocessor::preprocessOnly |
210 | && !symbols.isEmpty() |
211 | && symbols.constLast().token == STRING_LITERAL) { |
212 | |
213 | const QByteArray newString |
214 | = '\"' |
215 | + symbols.constLast().unquotedLexem() |
216 | + input.mid(index: lexem - begin + 1, len: data - lexem - 2) |
217 | + '\"'; |
218 | symbols.last() = Symbol(symbols.constLast().lineNum, |
219 | STRING_LITERAL, |
220 | newString); |
221 | continue; |
222 | } |
223 | break; |
224 | case SINGLEQUOTE: |
225 | while (*data && (*data != '\'' |
226 | || (*(data-1)=='\\' |
227 | && *(data-2)!='\\'))) |
228 | ++data; |
229 | if (*data) |
230 | ++data; |
231 | token = CHARACTER_LITERAL; |
232 | break; |
233 | case LANGLE_SCOPE: |
234 | // split <:: into two tokens, < and :: |
235 | token = LANGLE; |
236 | data -= 2; |
237 | break; |
238 | case DIGIT: |
239 | while (is_digit_char(s: *data) || *data == '\'') |
240 | ++data; |
241 | if (!*data || *data != '.') { |
242 | token = INTEGER_LITERAL; |
243 | if (data - lexem == 1 && |
244 | (*data == 'x' || *data == 'X' |
245 | || *data == 'b' || *data == 'B') |
246 | && *lexem == '0') { |
247 | ++data; |
248 | while (is_hex_char(s: *data) || *data == '\'') |
249 | ++data; |
250 | } |
251 | break; |
252 | } |
253 | token = FLOATING_LITERAL; |
254 | ++data; |
255 | Q_FALLTHROUGH(); |
256 | case FLOATING_LITERAL: |
257 | while (is_digit_char(s: *data) || *data == '\'') |
258 | ++data; |
259 | if (*data == '+' || *data == '-') |
260 | ++data; |
261 | if (*data == 'e' || *data == 'E') { |
262 | ++data; |
263 | while (is_digit_char(s: *data) || *data == '\'') |
264 | ++data; |
265 | } |
266 | if (*data == 'f' || *data == 'F' |
267 | || *data == 'l' || *data == 'L') |
268 | ++data; |
269 | break; |
270 | case HASH: |
271 | if (column == 1 && mode == TokenizeCpp) { |
272 | mode = PreparePreprocessorStatement; |
273 | while (*data && (*data == ' ' || *data == '\t')) |
274 | ++data; |
275 | if (is_ident_char(s: *data)) |
276 | mode = TokenizePreprocessorStatement; |
277 | continue; |
278 | } |
279 | break; |
280 | case PP_HASHHASH: |
281 | if (mode == TokenizeCpp) |
282 | continue; |
283 | break; |
284 | case NEWLINE: |
285 | ++lineNum; |
286 | if (mode == TokenizeDefine) { |
287 | mode = TokenizeCpp; |
288 | // emit the newline token |
289 | break; |
290 | } |
291 | continue; |
292 | case BACKSLASH: |
293 | { |
294 | const char *rewind = data; |
295 | while (*data && (*data == ' ' || *data == '\t')) |
296 | ++data; |
297 | if (*data && *data == '\n') { |
298 | ++data; |
299 | continue; |
300 | } |
301 | data = rewind; |
302 | } break; |
303 | case CHARACTER: |
304 | while (is_ident_char(s: *data)) |
305 | ++data; |
306 | token = IDENTIFIER; |
307 | break; |
308 | case C_COMMENT: |
309 | if (*data) { |
310 | if (*data == '\n') |
311 | ++lineNum; |
312 | ++data; |
313 | if (*data) { |
314 | if (*data == '\n') |
315 | ++lineNum; |
316 | ++data; |
317 | } |
318 | } |
319 | while (*data && (*(data-1) != '/' || *(data-2) != '*')) { |
320 | if (*data == '\n') |
321 | ++lineNum; |
322 | ++data; |
323 | } |
324 | token = WHITESPACE; // one comment, one whitespace |
325 | Q_FALLTHROUGH(); |
326 | case WHITESPACE: |
327 | if (column == 1) |
328 | column = 0; |
329 | while (*data && (*data == ' ' || *data == '\t')) |
330 | ++data; |
331 | if (Preprocessor::preprocessOnly) // tokenize whitespace |
332 | break; |
333 | continue; |
334 | case CPP_COMMENT: |
335 | while (*data && *data != '\n') |
336 | ++data; |
337 | continue; // ignore safely, the newline is a separator |
338 | default: |
339 | continue; //ignore |
340 | } |
341 | } |
342 | #ifdef USE_LEXEM_STORE |
343 | if (!Preprocessor::preprocessOnly |
344 | && token != IDENTIFIER |
345 | && token != STRING_LITERAL |
346 | && token != FLOATING_LITERAL |
347 | && token != INTEGER_LITERAL) |
348 | symbols += Symbol(lineNum, token); |
349 | else |
350 | #endif |
351 | symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem); |
352 | |
353 | } else { // Preprocessor |
354 | |
355 | const char *lexem = data; |
356 | int state = 0; |
357 | Token token = NOTOKEN; |
358 | if (mode == TokenizePreprocessorStatement) { |
359 | state = pp_keyword_trans[0][(int)'#']; |
360 | mode = TokenizePreprocessor; |
361 | } |
362 | for (;;) { |
363 | if (static_cast<signed char>(*data) < 0) { |
364 | ++data; |
365 | continue; |
366 | } |
367 | int nextindex = pp_keywords[state].next; |
368 | int next = 0; |
369 | if (*data == pp_keywords[state].defchar) |
370 | next = pp_keywords[state].defnext; |
371 | else if (!state || nextindex) |
372 | next = pp_keyword_trans[nextindex][(int)*data]; |
373 | if (!next) |
374 | break; |
375 | state = next; |
376 | token = pp_keywords[state].token; |
377 | ++data; |
378 | } |
379 | // suboptimal, is_ident_char should use a table |
380 | if (pp_keywords[state].ident && is_ident_char(s: *data)) |
381 | token = pp_keywords[state].ident; |
382 | |
383 | switch (token) { |
384 | case NOTOKEN: |
385 | if (*data) |
386 | ++data; |
387 | break; |
388 | case PP_DEFINE: |
389 | mode = PrepareDefine; |
390 | break; |
391 | case PP_IFDEF: |
392 | symbols += Symbol(lineNum, PP_IF); |
393 | symbols += Symbol(lineNum, PP_DEFINED); |
394 | continue; |
395 | case PP_IFNDEF: |
396 | symbols += Symbol(lineNum, PP_IF); |
397 | symbols += Symbol(lineNum, PP_NOT); |
398 | symbols += Symbol(lineNum, PP_DEFINED); |
399 | continue; |
400 | case PP_INCLUDE: |
401 | mode = TokenizeInclude; |
402 | break; |
403 | case PP_QUOTE: |
404 | data = skipQuote(data); |
405 | token = PP_STRING_LITERAL; |
406 | break; |
407 | case PP_SINGLEQUOTE: |
408 | while (*data && (*data != '\'' |
409 | || (*(data-1)=='\\' |
410 | && *(data-2)!='\\'))) |
411 | ++data; |
412 | if (*data) |
413 | ++data; |
414 | token = PP_CHARACTER_LITERAL; |
415 | break; |
416 | case PP_DIGIT: |
417 | while (is_digit_char(s: *data) || *data == '\'') |
418 | ++data; |
419 | if (!*data || *data != '.') { |
420 | token = PP_INTEGER_LITERAL; |
421 | if (data - lexem == 1 && |
422 | (*data == 'x' || *data == 'X') |
423 | && *lexem == '0') { |
424 | ++data; |
425 | while (is_hex_char(s: *data) || *data == '\'') |
426 | ++data; |
427 | } |
428 | break; |
429 | } |
430 | token = PP_FLOATING_LITERAL; |
431 | ++data; |
432 | Q_FALLTHROUGH(); |
433 | case PP_FLOATING_LITERAL: |
434 | while (is_digit_char(s: *data) || *data == '\'') |
435 | ++data; |
436 | if (*data == '+' || *data == '-') |
437 | ++data; |
438 | if (*data == 'e' || *data == 'E') { |
439 | ++data; |
440 | while (is_digit_char(s: *data) || *data == '\'') |
441 | ++data; |
442 | } |
443 | if (*data == 'f' || *data == 'F' |
444 | || *data == 'l' || *data == 'L') |
445 | ++data; |
446 | break; |
447 | case PP_CHARACTER: |
448 | if (mode == PreparePreprocessorStatement) { |
449 | // rewind entire token to begin |
450 | data = lexem; |
451 | mode = TokenizePreprocessorStatement; |
452 | continue; |
453 | } |
454 | while (is_ident_char(s: *data)) |
455 | ++data; |
456 | token = PP_IDENTIFIER; |
457 | |
458 | if (mode == PrepareDefine) { |
459 | symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem); |
460 | // make sure we explicitly add the whitespace here if the next char |
461 | // is not an opening brace, so we can distinguish correctly between |
462 | // regular and function macros |
463 | if (*data != '(') |
464 | symbols += Symbol(lineNum, WHITESPACE); |
465 | mode = TokenizeDefine; |
466 | continue; |
467 | } |
468 | break; |
469 | case PP_C_COMMENT: |
470 | if (*data) { |
471 | if (*data == '\n') |
472 | ++lineNum; |
473 | ++data; |
474 | if (*data) { |
475 | if (*data == '\n') |
476 | ++lineNum; |
477 | ++data; |
478 | } |
479 | } |
480 | while (*data && (*(data-1) != '/' || *(data-2) != '*')) { |
481 | if (*data == '\n') |
482 | ++lineNum; |
483 | ++data; |
484 | } |
485 | token = PP_WHITESPACE; // one comment, one whitespace |
486 | Q_FALLTHROUGH(); |
487 | case PP_WHITESPACE: |
488 | while (*data && (*data == ' ' || *data == '\t')) |
489 | ++data; |
490 | continue; // the preprocessor needs no whitespace |
491 | case PP_CPP_COMMENT: |
492 | while (*data && *data != '\n') |
493 | ++data; |
494 | continue; // ignore safely, the newline is a separator |
495 | case PP_NEWLINE: |
496 | ++lineNum; |
497 | mode = TokenizeCpp; |
498 | break; |
499 | case PP_BACKSLASH: |
500 | { |
501 | const char *rewind = data; |
502 | while (*data && (*data == ' ' || *data == '\t')) |
503 | ++data; |
504 | if (*data && *data == '\n') { |
505 | ++data; |
506 | continue; |
507 | } |
508 | data = rewind; |
509 | } break; |
510 | case PP_LANGLE: |
511 | if (mode != TokenizeInclude) |
512 | break; |
513 | token = PP_STRING_LITERAL; |
514 | while (*data && *data != '\n' && *(data-1) != '>') |
515 | ++data; |
516 | break; |
517 | default: |
518 | break; |
519 | } |
520 | if (mode == PreparePreprocessorStatement) |
521 | continue; |
522 | #ifdef USE_LEXEM_STORE |
523 | if (token != PP_IDENTIFIER |
524 | && token != PP_STRING_LITERAL |
525 | && token != PP_FLOATING_LITERAL |
526 | && token != PP_INTEGER_LITERAL) |
527 | symbols += Symbol(lineNum, token); |
528 | else |
529 | #endif |
530 | symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem); |
531 | } |
532 | } |
533 | symbols += Symbol(); // eof symbol |
534 | return symbols; |
535 | } |
536 | |
537 | void Preprocessor::macroExpand(Symbols *into, Preprocessor *that, const Symbols &toExpand, int &index, |
538 | int lineNum, bool one, const QSet<QByteArray> &excludeSymbols) |
539 | { |
540 | SymbolStack symbols; |
541 | SafeSymbols sf; |
542 | sf.symbols = toExpand; |
543 | sf.index = index; |
544 | sf.excludedSymbols = excludeSymbols; |
545 | symbols.push(t: sf); |
546 | |
547 | if (toExpand.isEmpty()) |
548 | return; |
549 | |
550 | for (;;) { |
551 | QByteArray macro; |
552 | Symbols newSyms = macroExpandIdentifier(that, symbols, lineNum, macroName: ¯o); |
553 | |
554 | if (macro.isEmpty()) { |
555 | // not a macro |
556 | Symbol s = symbols.symbol(); |
557 | s.lineNum = lineNum; |
558 | *into += s; |
559 | } else { |
560 | SafeSymbols sf; |
561 | sf.symbols = newSyms; |
562 | sf.index = 0; |
563 | sf.expandedMacro = macro; |
564 | symbols.push(t: sf); |
565 | } |
566 | if (!symbols.hasNext() || (one && symbols.size() == 1)) |
567 | break; |
568 | symbols.next(); |
569 | } |
570 | |
571 | if (symbols.size()) |
572 | index = symbols.top().index; |
573 | else |
574 | index = toExpand.size(); |
575 | } |
576 | |
577 | |
578 | Symbols Preprocessor::macroExpandIdentifier(Preprocessor *that, SymbolStack &symbols, int lineNum, QByteArray *macroName) |
579 | { |
580 | Symbol s = symbols.symbol(); |
581 | |
582 | // not a macro |
583 | if (s.token != PP_IDENTIFIER || !that->macros.contains(akey: s) || symbols.dontReplaceSymbol(name: s.lexem())) { |
584 | return Symbols(); |
585 | } |
586 | |
587 | const Macro ¯o = that->macros.value(akey: s); |
588 | *macroName = s.lexem(); |
589 | |
590 | Symbols expansion; |
591 | if (!macro.isFunction) { |
592 | expansion = macro.symbols; |
593 | } else { |
594 | bool haveSpace = false; |
595 | while (symbols.test(token: PP_WHITESPACE)) { haveSpace = true; } |
596 | if (!symbols.test(token: PP_LPAREN)) { |
597 | *macroName = QByteArray(); |
598 | Symbols syms; |
599 | if (haveSpace) |
600 | syms += Symbol(lineNum, PP_WHITESPACE); |
601 | syms += s; |
602 | syms.last().lineNum = lineNum; |
603 | return syms; |
604 | } |
605 | QVarLengthArray<Symbols, 5> arguments; |
606 | while (symbols.hasNext()) { |
607 | Symbols argument; |
608 | // strip leading space |
609 | while (symbols.test(token: PP_WHITESPACE)) {} |
610 | int nesting = 0; |
611 | bool vararg = macro.isVariadic && (arguments.size() == macro.arguments.size() - 1); |
612 | while (symbols.hasNext()) { |
613 | Token t = symbols.next(); |
614 | if (t == PP_LPAREN) { |
615 | ++nesting; |
616 | } else if (t == PP_RPAREN) { |
617 | --nesting; |
618 | if (nesting < 0) |
619 | break; |
620 | } else if (t == PP_COMMA && nesting == 0) { |
621 | if (!vararg) |
622 | break; |
623 | } |
624 | argument += symbols.symbol(); |
625 | } |
626 | arguments += argument; |
627 | |
628 | if (nesting < 0) |
629 | break; |
630 | else if (!symbols.hasNext()) |
631 | that->error(msg: "missing ')' in macro usage" ); |
632 | } |
633 | |
634 | // empty VA_ARGS |
635 | if (macro.isVariadic && arguments.size() == macro.arguments.size() - 1) |
636 | arguments += Symbols(); |
637 | |
638 | // now replace the macro arguments with the expanded arguments |
639 | enum Mode { |
640 | Normal, |
641 | Hash, |
642 | HashHash |
643 | } mode = Normal; |
644 | |
645 | for (int i = 0; i < macro.symbols.size(); ++i) { |
646 | const Symbol &s = macro.symbols.at(i); |
647 | if (s.token == HASH || s.token == PP_HASHHASH) { |
648 | mode = (s.token == HASH ? Hash : HashHash); |
649 | continue; |
650 | } |
651 | int index = macro.arguments.indexOf(t: s); |
652 | if (mode == Normal) { |
653 | if (index >= 0 && index < arguments.size()) { |
654 | // each argument undoergoes macro expansion if it's not used as part of a # or ## |
655 | if (i == macro.symbols.size() - 1 || macro.symbols.at(i: i + 1).token != PP_HASHHASH) { |
656 | Symbols arg = arguments.at(idx: index); |
657 | int idx = 1; |
658 | macroExpand(into: &expansion, that, toExpand: arg, index&: idx, lineNum, one: false, excludeSymbols: symbols.excludeSymbols()); |
659 | } else { |
660 | expansion += arguments.at(idx: index); |
661 | } |
662 | } else { |
663 | expansion += s; |
664 | } |
665 | } else if (mode == Hash) { |
666 | if (index < 0) { |
667 | that->error(msg: "'#' is not followed by a macro parameter" ); |
668 | continue; |
669 | } else if (index >= arguments.size()) { |
670 | that->error(msg: "Macro invoked with too few parameters for a use of '#'" ); |
671 | continue; |
672 | } |
673 | |
674 | const Symbols &arg = arguments.at(idx: index); |
675 | QByteArray stringified; |
676 | for (int i = 0; i < arg.size(); ++i) { |
677 | stringified += arg.at(i).lexem(); |
678 | } |
679 | stringified.replace(before: '"', c: "\\\"" ); |
680 | stringified.prepend(c: '"'); |
681 | stringified.append(c: '"'); |
682 | expansion += Symbol(lineNum, STRING_LITERAL, stringified); |
683 | } else if (mode == HashHash){ |
684 | if (s.token == WHITESPACE) |
685 | continue; |
686 | |
687 | while (expansion.size() && expansion.constLast().token == PP_WHITESPACE) |
688 | expansion.pop_back(); |
689 | |
690 | Symbol next = s; |
691 | if (index >= 0 && index < arguments.size()) { |
692 | const Symbols &arg = arguments.at(idx: index); |
693 | if (arg.size() == 0) { |
694 | mode = Normal; |
695 | continue; |
696 | } |
697 | next = arg.at(i: 0); |
698 | } |
699 | |
700 | if (!expansion.isEmpty() && expansion.constLast().token == s.token |
701 | && expansion.constLast().token != STRING_LITERAL) { |
702 | Symbol last = expansion.takeLast(); |
703 | |
704 | QByteArray lexem = last.lexem() + next.lexem(); |
705 | expansion += Symbol(lineNum, last.token, lexem); |
706 | } else { |
707 | expansion += next; |
708 | } |
709 | |
710 | if (index >= 0 && index < arguments.size()) { |
711 | const Symbols &arg = arguments.at(idx: index); |
712 | for (int i = 1; i < arg.size(); ++i) |
713 | expansion += arg.at(i); |
714 | } |
715 | } |
716 | mode = Normal; |
717 | } |
718 | if (mode != Normal) |
719 | that->error(msg: "'#' or '##' found at the end of a macro argument" ); |
720 | |
721 | } |
722 | |
723 | return expansion; |
724 | } |
725 | |
726 | void Preprocessor::substituteUntilNewline(Symbols &substituted) |
727 | { |
728 | while (hasNext()) { |
729 | Token token = next(); |
730 | if (token == PP_IDENTIFIER) { |
731 | macroExpand(into: &substituted, that: this, toExpand: symbols, index, lineNum: symbol().lineNum, one: true); |
732 | } else if (token == PP_DEFINED) { |
733 | bool braces = test(token: PP_LPAREN); |
734 | next(token: PP_IDENTIFIER); |
735 | Symbol definedOrNotDefined = symbol(); |
736 | definedOrNotDefined.token = macros.contains(akey: definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE; |
737 | substituted += definedOrNotDefined; |
738 | if (braces) |
739 | test(token: PP_RPAREN); |
740 | continue; |
741 | } else if (token == PP_NEWLINE) { |
742 | substituted += symbol(); |
743 | break; |
744 | } else { |
745 | substituted += symbol(); |
746 | } |
747 | } |
748 | } |
749 | |
750 | |
751 | class PP_Expression : public Parser |
752 | { |
753 | public: |
754 | int value() { index = 0; return unary_expression_lookup() ? conditional_expression() : 0; } |
755 | |
756 | int conditional_expression(); |
757 | int logical_OR_expression(); |
758 | int logical_AND_expression(); |
759 | int inclusive_OR_expression(); |
760 | int exclusive_OR_expression(); |
761 | int AND_expression(); |
762 | int equality_expression(); |
763 | int relational_expression(); |
764 | int shift_expression(); |
765 | int additive_expression(); |
766 | int multiplicative_expression(); |
767 | int unary_expression(); |
768 | bool unary_expression_lookup(); |
769 | int primary_expression(); |
770 | bool primary_expression_lookup(); |
771 | }; |
772 | |
773 | int PP_Expression::conditional_expression() |
774 | { |
775 | int value = logical_OR_expression(); |
776 | if (test(token: PP_QUESTION)) { |
777 | int alt1 = conditional_expression(); |
778 | int alt2 = test(token: PP_COLON) ? conditional_expression() : 0; |
779 | return value ? alt1 : alt2; |
780 | } |
781 | return value; |
782 | } |
783 | |
784 | int PP_Expression::logical_OR_expression() |
785 | { |
786 | int value = logical_AND_expression(); |
787 | if (test(token: PP_OROR)) |
788 | return logical_OR_expression() || value; |
789 | return value; |
790 | } |
791 | |
792 | int PP_Expression::logical_AND_expression() |
793 | { |
794 | int value = inclusive_OR_expression(); |
795 | if (test(token: PP_ANDAND)) |
796 | return logical_AND_expression() && value; |
797 | return value; |
798 | } |
799 | |
800 | int PP_Expression::inclusive_OR_expression() |
801 | { |
802 | int value = exclusive_OR_expression(); |
803 | if (test(token: PP_OR)) |
804 | return value | inclusive_OR_expression(); |
805 | return value; |
806 | } |
807 | |
808 | int PP_Expression::exclusive_OR_expression() |
809 | { |
810 | int value = AND_expression(); |
811 | if (test(token: PP_HAT)) |
812 | return value ^ exclusive_OR_expression(); |
813 | return value; |
814 | } |
815 | |
816 | int PP_Expression::AND_expression() |
817 | { |
818 | int value = equality_expression(); |
819 | if (test(token: PP_AND)) |
820 | return value & AND_expression(); |
821 | return value; |
822 | } |
823 | |
824 | int PP_Expression::equality_expression() |
825 | { |
826 | int value = relational_expression(); |
827 | switch (next()) { |
828 | case PP_EQEQ: |
829 | return value == equality_expression(); |
830 | case PP_NE: |
831 | return value != equality_expression(); |
832 | default: |
833 | prev(); |
834 | return value; |
835 | } |
836 | } |
837 | |
838 | int PP_Expression::relational_expression() |
839 | { |
840 | int value = shift_expression(); |
841 | switch (next()) { |
842 | case PP_LANGLE: |
843 | return value < relational_expression(); |
844 | case PP_RANGLE: |
845 | return value > relational_expression(); |
846 | case PP_LE: |
847 | return value <= relational_expression(); |
848 | case PP_GE: |
849 | return value >= relational_expression(); |
850 | default: |
851 | prev(); |
852 | return value; |
853 | } |
854 | } |
855 | |
856 | int PP_Expression::shift_expression() |
857 | { |
858 | int value = additive_expression(); |
859 | switch (next()) { |
860 | case PP_LTLT: |
861 | return value << shift_expression(); |
862 | case PP_GTGT: |
863 | return value >> shift_expression(); |
864 | default: |
865 | prev(); |
866 | return value; |
867 | } |
868 | } |
869 | |
870 | int PP_Expression::additive_expression() |
871 | { |
872 | int value = multiplicative_expression(); |
873 | switch (next()) { |
874 | case PP_PLUS: |
875 | return value + additive_expression(); |
876 | case PP_MINUS: |
877 | return value - additive_expression(); |
878 | default: |
879 | prev(); |
880 | return value; |
881 | } |
882 | } |
883 | |
884 | int PP_Expression::multiplicative_expression() |
885 | { |
886 | int value = unary_expression(); |
887 | switch (next()) { |
888 | case PP_STAR: |
889 | { |
890 | // get well behaved overflow behavior by converting to long |
891 | // and then back to int |
892 | // NOTE: A conformant preprocessor would need to work intmax_t/ |
893 | // uintmax_t according to [cpp.cond], 19.1 §10 |
894 | // But we're not compliant anyway |
895 | qint64 result = qint64(value) * qint64(multiplicative_expression()); |
896 | return int(result); |
897 | } |
898 | case PP_PERCENT: |
899 | { |
900 | int remainder = multiplicative_expression(); |
901 | return remainder ? value % remainder : 0; |
902 | } |
903 | case PP_SLASH: |
904 | { |
905 | int div = multiplicative_expression(); |
906 | return div ? value / div : 0; |
907 | } |
908 | default: |
909 | prev(); |
910 | return value; |
911 | }; |
912 | } |
913 | |
914 | int PP_Expression::unary_expression() |
915 | { |
916 | switch (next()) { |
917 | case PP_PLUS: |
918 | return unary_expression(); |
919 | case PP_MINUS: |
920 | return -unary_expression(); |
921 | case PP_NOT: |
922 | return !unary_expression(); |
923 | case PP_TILDE: |
924 | return ~unary_expression(); |
925 | case PP_MOC_TRUE: |
926 | return 1; |
927 | case PP_MOC_FALSE: |
928 | return 0; |
929 | default: |
930 | prev(); |
931 | return primary_expression(); |
932 | } |
933 | } |
934 | |
935 | bool PP_Expression::unary_expression_lookup() |
936 | { |
937 | Token t = lookup(); |
938 | return (primary_expression_lookup() |
939 | || t == PP_PLUS |
940 | || t == PP_MINUS |
941 | || t == PP_NOT |
942 | || t == PP_TILDE |
943 | || t == PP_DEFINED); |
944 | } |
945 | |
946 | int PP_Expression::primary_expression() |
947 | { |
948 | int value; |
949 | if (test(token: PP_LPAREN)) { |
950 | value = conditional_expression(); |
951 | test(token: PP_RPAREN); |
952 | } else { |
953 | next(); |
954 | value = lexem().toInt(ok: nullptr, base: 0); |
955 | } |
956 | return value; |
957 | } |
958 | |
959 | bool PP_Expression::primary_expression_lookup() |
960 | { |
961 | Token t = lookup(); |
962 | return (t == PP_IDENTIFIER |
963 | || t == PP_INTEGER_LITERAL |
964 | || t == PP_FLOATING_LITERAL |
965 | || t == PP_MOC_TRUE |
966 | || t == PP_MOC_FALSE |
967 | || t == PP_LPAREN); |
968 | } |
969 | |
970 | int Preprocessor::evaluateCondition() |
971 | { |
972 | PP_Expression expression; |
973 | expression.currentFilenames = currentFilenames; |
974 | |
975 | substituteUntilNewline(substituted&: expression.symbols); |
976 | |
977 | return expression.value(); |
978 | } |
979 | |
980 | static QByteArray readOrMapFile(QFile *file) |
981 | { |
982 | const qint64 size = file->size(); |
983 | char *rawInput = reinterpret_cast<char*>(file->map(offset: 0, size)); |
984 | return rawInput ? QByteArray::fromRawData(rawInput, size) : file->readAll(); |
985 | } |
986 | |
987 | static void mergeStringLiterals(Symbols *_symbols) |
988 | { |
989 | Symbols &symbols = *_symbols; |
990 | for (Symbols::iterator i = symbols.begin(); i != symbols.end(); ++i) { |
991 | if (i->token == STRING_LITERAL) { |
992 | Symbols::Iterator mergeSymbol = i; |
993 | int literalsLength = mergeSymbol->len; |
994 | while (++i != symbols.end() && i->token == STRING_LITERAL) |
995 | literalsLength += i->len - 2; // no quotes |
996 | |
997 | if (literalsLength != mergeSymbol->len) { |
998 | QByteArray mergeSymbolOriginalLexem = mergeSymbol->unquotedLexem(); |
999 | QByteArray &mergeSymbolLexem = mergeSymbol->lex; |
1000 | mergeSymbolLexem.resize(size: 0); |
1001 | mergeSymbolLexem.reserve(asize: literalsLength); |
1002 | mergeSymbolLexem.append(c: '"'); |
1003 | mergeSymbolLexem.append(a: mergeSymbolOriginalLexem); |
1004 | for (Symbols::iterator j = mergeSymbol + 1; j != i; ++j) |
1005 | mergeSymbolLexem.append(s: j->lex.constData() + j->from + 1, len: j->len - 2); // append j->unquotedLexem() |
1006 | mergeSymbolLexem.append(c: '"'); |
1007 | mergeSymbol->len = mergeSymbol->lex.length(); |
1008 | mergeSymbol->from = 0; |
1009 | i = symbols.erase(abegin: mergeSymbol + 1, aend: i); |
1010 | } |
1011 | if (i == symbols.end()) |
1012 | break; |
1013 | } |
1014 | } |
1015 | } |
1016 | |
1017 | static QByteArray searchIncludePaths(const QList<Parser::IncludePath> &includepaths, |
1018 | const QByteArray &include) |
1019 | { |
1020 | QFileInfo fi; |
1021 | for (int j = 0; j < includepaths.size() && !fi.exists(); ++j) { |
1022 | const Parser::IncludePath &p = includepaths.at(i: j); |
1023 | if (p.isFrameworkPath) { |
1024 | const int slashPos = include.indexOf(c: '/'); |
1025 | if (slashPos == -1) |
1026 | continue; |
1027 | fi.setFile(dir: QString::fromLocal8Bit(str: p.path + '/' + include.left(len: slashPos) + ".framework/Headers/" ), |
1028 | file: QString::fromLocal8Bit(str: include.mid(index: slashPos + 1))); |
1029 | } else { |
1030 | fi.setFile(dir: QString::fromLocal8Bit(str: p.path), file: QString::fromLocal8Bit(str: include)); |
1031 | } |
1032 | // try again, maybe there's a file later in the include paths with the same name |
1033 | // (186067) |
1034 | if (fi.isDir()) { |
1035 | fi = QFileInfo(); |
1036 | continue; |
1037 | } |
1038 | } |
1039 | |
1040 | if (!fi.exists() || fi.isDir()) |
1041 | return QByteArray(); |
1042 | return fi.canonicalFilePath().toLocal8Bit(); |
1043 | } |
1044 | |
1045 | QByteArray Preprocessor::resolveInclude(const QByteArray &include, const QByteArray &relativeTo) |
1046 | { |
1047 | if (!relativeTo.isEmpty()) { |
1048 | QFileInfo fi; |
1049 | fi.setFile(dir: QFileInfo(QString::fromLocal8Bit(str: relativeTo)).dir(), file: QString::fromLocal8Bit(str: include)); |
1050 | if (fi.exists() && !fi.isDir()) |
1051 | return fi.canonicalFilePath().toLocal8Bit(); |
1052 | } |
1053 | |
1054 | auto it = nonlocalIncludePathResolutionCache.find(akey: include); |
1055 | if (it == nonlocalIncludePathResolutionCache.end()) |
1056 | it = nonlocalIncludePathResolutionCache.insert(akey: include, avalue: searchIncludePaths(includepaths: includes, include)); |
1057 | return it.value(); |
1058 | } |
1059 | |
1060 | void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed) |
1061 | { |
1062 | currentFilenames.push(x: filename); |
1063 | preprocessed.reserve(asize: preprocessed.size() + symbols.size()); |
1064 | while (hasNext()) { |
1065 | Token token = next(); |
1066 | |
1067 | switch (token) { |
1068 | case PP_INCLUDE: |
1069 | { |
1070 | int lineNum = symbol().lineNum; |
1071 | QByteArray include; |
1072 | bool local = false; |
1073 | if (test(token: PP_STRING_LITERAL)) { |
1074 | local = lexem().startsWith(c: '\"'); |
1075 | include = unquotedLexem(); |
1076 | } else |
1077 | continue; |
1078 | until(PP_NEWLINE); |
1079 | |
1080 | include = resolveInclude(include, relativeTo: local ? filename : QByteArray()); |
1081 | if (include.isNull()) |
1082 | continue; |
1083 | |
1084 | if (Preprocessor::preprocessedIncludes.contains(value: include)) |
1085 | continue; |
1086 | Preprocessor::preprocessedIncludes.insert(value: include); |
1087 | |
1088 | QFile file(QString::fromLocal8Bit(str: include.constData())); |
1089 | if (!file.open(flags: QFile::ReadOnly)) |
1090 | continue; |
1091 | |
1092 | QByteArray input = readOrMapFile(file: &file); |
1093 | |
1094 | file.close(); |
1095 | if (input.isEmpty()) |
1096 | continue; |
1097 | |
1098 | Symbols saveSymbols = symbols; |
1099 | int saveIndex = index; |
1100 | |
1101 | // phase 1: get rid of backslash-newlines |
1102 | input = cleaned(input); |
1103 | |
1104 | // phase 2: tokenize for the preprocessor |
1105 | symbols = tokenize(input); |
1106 | input.clear(); |
1107 | |
1108 | index = 0; |
1109 | |
1110 | // phase 3: preprocess conditions and substitute macros |
1111 | preprocessed += Symbol(0, MOC_INCLUDE_BEGIN, include); |
1112 | preprocess(filename: include, preprocessed); |
1113 | preprocessed += Symbol(lineNum, MOC_INCLUDE_END, include); |
1114 | |
1115 | symbols = saveSymbols; |
1116 | index = saveIndex; |
1117 | continue; |
1118 | } |
1119 | case PP_DEFINE: |
1120 | { |
1121 | next(); |
1122 | QByteArray name = lexem(); |
1123 | if (name.isEmpty() || !is_ident_start(s: name[0])) |
1124 | error(); |
1125 | Macro macro; |
1126 | macro.isVariadic = false; |
1127 | if (test(token: LPAREN)) { |
1128 | // we have a function macro |
1129 | macro.isFunction = true; |
1130 | parseDefineArguments(m: ¯o); |
1131 | } else { |
1132 | macro.isFunction = false; |
1133 | } |
1134 | int start = index; |
1135 | until(PP_NEWLINE); |
1136 | macro.symbols.reserve(asize: index - start - 1); |
1137 | |
1138 | // remove whitespace where there shouldn't be any: |
1139 | // Before and after the macro, after a # and around ## |
1140 | Token lastToken = HASH; // skip shitespace at the beginning |
1141 | for (int i = start; i < index - 1; ++i) { |
1142 | Token token = symbols.at(i).token; |
1143 | if (token == WHITESPACE) { |
1144 | if (lastToken == PP_HASH || lastToken == HASH || |
1145 | lastToken == PP_HASHHASH || |
1146 | lastToken == WHITESPACE) |
1147 | continue; |
1148 | } else if (token == PP_HASHHASH) { |
1149 | if (!macro.symbols.isEmpty() && |
1150 | lastToken == WHITESPACE) |
1151 | macro.symbols.pop_back(); |
1152 | } |
1153 | macro.symbols.append(t: symbols.at(i)); |
1154 | lastToken = token; |
1155 | } |
1156 | // remove trailing whitespace |
1157 | while (!macro.symbols.isEmpty() && |
1158 | (macro.symbols.constLast().token == PP_WHITESPACE || macro.symbols.constLast().token == WHITESPACE)) |
1159 | macro.symbols.pop_back(); |
1160 | |
1161 | if (!macro.symbols.isEmpty()) { |
1162 | if (macro.symbols.constFirst().token == PP_HASHHASH || |
1163 | macro.symbols.constLast().token == PP_HASHHASH) { |
1164 | error(msg: "'##' cannot appear at either end of a macro expansion" ); |
1165 | } |
1166 | } |
1167 | macros.insert(akey: name, avalue: macro); |
1168 | continue; |
1169 | } |
1170 | case PP_UNDEF: { |
1171 | next(); |
1172 | QByteArray name = lexem(); |
1173 | until(PP_NEWLINE); |
1174 | macros.remove(akey: name); |
1175 | continue; |
1176 | } |
1177 | case PP_IDENTIFIER: { |
1178 | // substitute macros |
1179 | macroExpand(into: &preprocessed, that: this, toExpand: symbols, index, lineNum: symbol().lineNum, one: true); |
1180 | continue; |
1181 | } |
1182 | case PP_HASH: |
1183 | until(PP_NEWLINE); |
1184 | continue; // skip unknown preprocessor statement |
1185 | case PP_IFDEF: |
1186 | case PP_IFNDEF: |
1187 | case PP_IF: |
1188 | while (!evaluateCondition()) { |
1189 | if (!skipBranch()) |
1190 | break; |
1191 | if (test(token: PP_ELIF)) { |
1192 | } else { |
1193 | until(PP_NEWLINE); |
1194 | break; |
1195 | } |
1196 | } |
1197 | continue; |
1198 | case PP_ELIF: |
1199 | case PP_ELSE: |
1200 | skipUntilEndif(); |
1201 | Q_FALLTHROUGH(); |
1202 | case PP_ENDIF: |
1203 | until(PP_NEWLINE); |
1204 | continue; |
1205 | case PP_NEWLINE: |
1206 | continue; |
1207 | case SIGNALS: |
1208 | case SLOTS: { |
1209 | Symbol sym = symbol(); |
1210 | if (macros.contains(akey: "QT_NO_KEYWORDS" )) |
1211 | sym.token = IDENTIFIER; |
1212 | else |
1213 | sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN); |
1214 | preprocessed += sym; |
1215 | } continue; |
1216 | default: |
1217 | break; |
1218 | } |
1219 | preprocessed += symbol(); |
1220 | } |
1221 | |
1222 | currentFilenames.pop(); |
1223 | } |
1224 | |
1225 | Symbols Preprocessor::preprocessed(const QByteArray &filename, QFile *file) |
1226 | { |
1227 | QByteArray input = readOrMapFile(file); |
1228 | |
1229 | if (input.isEmpty()) |
1230 | return symbols; |
1231 | |
1232 | // phase 1: get rid of backslash-newlines |
1233 | input = cleaned(input); |
1234 | |
1235 | // phase 2: tokenize for the preprocessor |
1236 | index = 0; |
1237 | symbols = tokenize(input); |
1238 | |
1239 | #if 0 |
1240 | for (int j = 0; j < symbols.size(); ++j) |
1241 | fprintf(stderr, "line %d: %s(%s)\n" , |
1242 | symbols[j].lineNum, |
1243 | symbols[j].lexem().constData(), |
1244 | tokenTypeName(symbols[j].token)); |
1245 | #endif |
1246 | |
1247 | // phase 3: preprocess conditions and substitute macros |
1248 | Symbols result; |
1249 | // Preallocate some space to speed up the code below. |
1250 | // The magic value was found by logging the final size |
1251 | // and calculating an average when running moc over FOSS projects. |
1252 | result.reserve(asize: file->size() / 300000); |
1253 | preprocess(filename, preprocessed&: result); |
1254 | mergeStringLiterals(symbols: &result); |
1255 | |
1256 | #if 0 |
1257 | for (int j = 0; j < result.size(); ++j) |
1258 | fprintf(stderr, "line %d: %s(%s)\n" , |
1259 | result[j].lineNum, |
1260 | result[j].lexem().constData(), |
1261 | tokenTypeName(result[j].token)); |
1262 | #endif |
1263 | |
1264 | return result; |
1265 | } |
1266 | |
1267 | void Preprocessor::parseDefineArguments(Macro *m) |
1268 | { |
1269 | Symbols arguments; |
1270 | while (hasNext()) { |
1271 | while (test(token: PP_WHITESPACE)) {} |
1272 | Token t = next(); |
1273 | if (t == PP_RPAREN) |
1274 | break; |
1275 | if (t != PP_IDENTIFIER) { |
1276 | QByteArray l = lexem(); |
1277 | if (l == "..." ) { |
1278 | m->isVariadic = true; |
1279 | arguments += Symbol(symbol().lineNum, PP_IDENTIFIER, "__VA_ARGS__" ); |
1280 | while (test(token: PP_WHITESPACE)) {} |
1281 | if (!test(token: PP_RPAREN)) |
1282 | error(msg: "missing ')' in macro argument list" ); |
1283 | break; |
1284 | } else if (!is_identifier(s: l.constData(), len: l.length())) { |
1285 | error(msg: "Unexpected character in macro argument list." ); |
1286 | } |
1287 | } |
1288 | |
1289 | Symbol arg = symbol(); |
1290 | if (arguments.contains(t: arg)) |
1291 | error(msg: "Duplicate macro parameter." ); |
1292 | arguments += symbol(); |
1293 | |
1294 | while (test(token: PP_WHITESPACE)) {} |
1295 | t = next(); |
1296 | if (t == PP_RPAREN) |
1297 | break; |
1298 | if (t == PP_COMMA) |
1299 | continue; |
1300 | if (lexem() == "..." ) { |
1301 | //GCC extension: #define FOO(x, y...) x(y) |
1302 | // The last argument was already parsed. Just mark the macro as variadic. |
1303 | m->isVariadic = true; |
1304 | while (test(token: PP_WHITESPACE)) {} |
1305 | if (!test(token: PP_RPAREN)) |
1306 | error(msg: "missing ')' in macro argument list" ); |
1307 | break; |
1308 | } |
1309 | error(msg: "Unexpected character in macro argument list." ); |
1310 | } |
1311 | m->arguments = arguments; |
1312 | while (test(token: PP_WHITESPACE)) {} |
1313 | } |
1314 | |
1315 | void Preprocessor::until(Token t) |
1316 | { |
1317 | while(hasNext() && next() != t) |
1318 | ; |
1319 | } |
1320 | |
1321 | QT_END_NAMESPACE |
1322 | |