1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Copyright (C) 2014 Olivier Goffart <ogoffart@woboq.org> |
5 | ** Contact: https://www.qt.io/licensing/ |
6 | ** |
7 | ** This file is part of the tools applications of the Qt Toolkit. |
8 | ** |
9 | ** $QT_BEGIN_LICENSE:GPL-EXCEPT$ |
10 | ** Commercial License Usage |
11 | ** Licensees holding valid commercial Qt licenses may use this file in |
12 | ** accordance with the commercial license agreement provided with the |
13 | ** Software or, alternatively, in accordance with the terms contained in |
14 | ** a written agreement between you and The Qt Company. For licensing terms |
15 | ** and conditions see https://www.qt.io/terms-conditions. For further |
16 | ** information use the contact form at https://www.qt.io/contact-us. |
17 | ** |
18 | ** GNU General Public License Usage |
19 | ** Alternatively, this file may be used under the terms of the GNU |
20 | ** General Public License version 3 as published by the Free Software |
21 | ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT |
22 | ** included in the packaging of this file. Please review the following |
23 | ** information to ensure the GNU General Public License requirements will |
24 | ** be met: https://www.gnu.org/licenses/gpl-3.0.html. |
25 | ** |
26 | ** $QT_END_LICENSE$ |
27 | ** |
28 | ****************************************************************************/ |
29 | |
30 | #include "preprocessor.h" |
31 | #include "utils.h" |
32 | #include <qstringlist.h> |
33 | #include <qfile.h> |
34 | #include <qdir.h> |
35 | #include <qfileinfo.h> |
36 | |
37 | QT_BEGIN_NAMESPACE |
38 | |
39 | #include "ppkeywords.cpp" |
40 | #include "keywords.cpp" |
41 | |
42 | // transform \r\n into \n |
43 | // \r into \n (os9 style) |
44 | // backslash-newlines into newlines |
45 | static QByteArray cleaned(const QByteArray &input) |
46 | { |
47 | QByteArray result; |
48 | result.resize(size: input.size()); |
49 | const char *data = input.constData(); |
50 | const char *end = input.constData() + input.size(); |
51 | char *output = result.data(); |
52 | |
53 | int newlines = 0; |
54 | while (data != end) { |
55 | while (data != end && is_space(s: *data)) |
56 | ++data; |
57 | bool takeLine = (*data == '#'); |
58 | if (*data == '%' && *(data+1) == ':') { |
59 | takeLine = true; |
60 | ++data; |
61 | } |
62 | if (takeLine) { |
63 | *output = '#'; |
64 | ++output; |
65 | do ++data; while (data != end && is_space(s: *data)); |
66 | } |
67 | while (data != end) { |
68 | // handle \\\n, \\\r\n and \\\r |
69 | if (*data == '\\') { |
70 | if (*(data + 1) == '\r') { |
71 | ++data; |
72 | } |
73 | if (data != end && (*(data + 1) == '\n' || (*data) == '\r')) { |
74 | ++newlines; |
75 | data += 1; |
76 | if (data != end && *data != '\r') |
77 | data += 1; |
78 | continue; |
79 | } |
80 | } else if (*data == '\r' && *(data + 1) == '\n') { // reduce \r\n to \n |
81 | ++data; |
82 | } |
83 | if (data == end) |
84 | break; |
85 | |
86 | char ch = *data; |
87 | if (ch == '\r') // os9: replace \r with \n |
88 | ch = '\n'; |
89 | *output = ch; |
90 | ++output; |
91 | |
92 | if (*data == '\n') { |
93 | // output additional newlines to keep the correct line-numbering |
94 | // for the lines following the backslash-newline sequence(s) |
95 | while (newlines) { |
96 | *output = '\n'; |
97 | ++output; |
98 | --newlines; |
99 | } |
100 | ++data; |
101 | break; |
102 | } |
103 | ++data; |
104 | } |
105 | } |
106 | result.resize(size: output - result.constData()); |
107 | return result; |
108 | } |
109 | |
110 | bool Preprocessor::preprocessOnly = false; |
111 | void Preprocessor::skipUntilEndif() |
112 | { |
113 | while(index < symbols.size() - 1 && symbols.at(i: index).token != PP_ENDIF){ |
114 | switch (symbols.at(i: index).token) { |
115 | case PP_IF: |
116 | case PP_IFDEF: |
117 | case PP_IFNDEF: |
118 | ++index; |
119 | skipUntilEndif(); |
120 | break; |
121 | default: |
122 | ; |
123 | } |
124 | ++index; |
125 | } |
126 | } |
127 | |
128 | bool Preprocessor::skipBranch() |
129 | { |
130 | while (index < symbols.size() - 1 |
131 | && (symbols.at(i: index).token != PP_ENDIF |
132 | && symbols.at(i: index).token != PP_ELIF |
133 | && symbols.at(i: index).token != PP_ELSE) |
134 | ){ |
135 | switch (symbols.at(i: index).token) { |
136 | case PP_IF: |
137 | case PP_IFDEF: |
138 | case PP_IFNDEF: |
139 | ++index; |
140 | skipUntilEndif(); |
141 | break; |
142 | default: |
143 | ; |
144 | } |
145 | ++index; |
146 | } |
147 | return (index < symbols.size() - 1); |
148 | } |
149 | |
150 | |
151 | Symbols Preprocessor::tokenize(const QByteArray& input, int lineNum, Preprocessor::TokenizeMode mode) |
152 | { |
153 | Symbols symbols; |
154 | // Preallocate some space to speed up the code below. |
155 | // The magic divisor value was found by calculating the average ratio between |
156 | // input size and the final size of symbols. |
157 | // This yielded a value of 16.x when compiling Qt Base. |
158 | symbols.reserve(asize: input.size() / 16); |
159 | const char *begin = input.constData(); |
160 | const char *data = begin; |
161 | while (*data) { |
162 | if (mode == TokenizeCpp || mode == TokenizeDefine) { |
163 | int column = 0; |
164 | |
165 | const char *lexem = data; |
166 | int state = 0; |
167 | Token token = NOTOKEN; |
168 | for (;;) { |
169 | if (static_cast<signed char>(*data) < 0) { |
170 | ++data; |
171 | continue; |
172 | } |
173 | int nextindex = keywords[state].next; |
174 | int next = 0; |
175 | if (*data == keywords[state].defchar) |
176 | next = keywords[state].defnext; |
177 | else if (!state || nextindex) |
178 | next = keyword_trans[nextindex][(int)*data]; |
179 | if (!next) |
180 | break; |
181 | state = next; |
182 | token = keywords[state].token; |
183 | ++data; |
184 | } |
185 | |
186 | // suboptimal, is_ident_char should use a table |
187 | if (keywords[state].ident && is_ident_char(s: *data)) |
188 | token = keywords[state].ident; |
189 | |
190 | if (token == NOTOKEN) { |
191 | if (*data) |
192 | ++data; |
193 | // an error really, but let's ignore this input |
194 | // to not confuse moc later. However in pre-processor |
195 | // only mode let's continue. |
196 | if (!Preprocessor::preprocessOnly) |
197 | continue; |
198 | } |
199 | |
200 | ++column; |
201 | |
202 | if (token > SPECIAL_TREATMENT_MARK) { |
203 | switch (token) { |
204 | case QUOTE: |
205 | data = skipQuote(data); |
206 | token = STRING_LITERAL; |
207 | // concatenate multi-line strings for easier |
208 | // STRING_LITERAL handling in moc |
209 | if (!Preprocessor::preprocessOnly |
210 | && !symbols.isEmpty() |
211 | && symbols.constLast().token == STRING_LITERAL) { |
212 | |
213 | const QByteArray newString |
214 | = '\"' |
215 | + symbols.constLast().unquotedLexem() |
216 | + input.mid(index: lexem - begin + 1, len: data - lexem - 2) |
217 | + '\"'; |
218 | symbols.last() = Symbol(symbols.constLast().lineNum, |
219 | STRING_LITERAL, |
220 | newString); |
221 | continue; |
222 | } |
223 | break; |
224 | case SINGLEQUOTE: |
225 | while (*data && (*data != '\'' |
226 | || (*(data-1)=='\\' |
227 | && *(data-2)!='\\'))) |
228 | ++data; |
229 | if (*data) |
230 | ++data; |
231 | token = CHARACTER_LITERAL; |
232 | break; |
233 | case LANGLE_SCOPE: |
234 | // split <:: into two tokens, < and :: |
235 | token = LANGLE; |
236 | data -= 2; |
237 | break; |
238 | case DIGIT: |
239 | while (is_digit_char(s: *data) || *data == '\'') |
240 | ++data; |
241 | if (!*data || *data != '.') { |
242 | token = INTEGER_LITERAL; |
243 | if (data - lexem == 1 && |
244 | (*data == 'x' || *data == 'X') |
245 | && *lexem == '0') { |
246 | ++data; |
247 | while (is_hex_char(s: *data) || *data == '\'') |
248 | ++data; |
249 | } |
250 | break; |
251 | } |
252 | token = FLOATING_LITERAL; |
253 | ++data; |
254 | Q_FALLTHROUGH(); |
255 | case FLOATING_LITERAL: |
256 | while (is_digit_char(s: *data) || *data == '\'') |
257 | ++data; |
258 | if (*data == '+' || *data == '-') |
259 | ++data; |
260 | if (*data == 'e' || *data == 'E') { |
261 | ++data; |
262 | while (is_digit_char(s: *data) || *data == '\'') |
263 | ++data; |
264 | } |
265 | if (*data == 'f' || *data == 'F' |
266 | || *data == 'l' || *data == 'L') |
267 | ++data; |
268 | break; |
269 | case HASH: |
270 | if (column == 1 && mode == TokenizeCpp) { |
271 | mode = PreparePreprocessorStatement; |
272 | while (*data && (*data == ' ' || *data == '\t')) |
273 | ++data; |
274 | if (is_ident_char(s: *data)) |
275 | mode = TokenizePreprocessorStatement; |
276 | continue; |
277 | } |
278 | break; |
279 | case PP_HASHHASH: |
280 | if (mode == TokenizeCpp) |
281 | continue; |
282 | break; |
283 | case NEWLINE: |
284 | ++lineNum; |
285 | if (mode == TokenizeDefine) { |
286 | mode = TokenizeCpp; |
287 | // emit the newline token |
288 | break; |
289 | } |
290 | continue; |
291 | case BACKSLASH: |
292 | { |
293 | const char *rewind = data; |
294 | while (*data && (*data == ' ' || *data == '\t')) |
295 | ++data; |
296 | if (*data && *data == '\n') { |
297 | ++data; |
298 | continue; |
299 | } |
300 | data = rewind; |
301 | } break; |
302 | case CHARACTER: |
303 | while (is_ident_char(s: *data)) |
304 | ++data; |
305 | token = IDENTIFIER; |
306 | break; |
307 | case C_COMMENT: |
308 | if (*data) { |
309 | if (*data == '\n') |
310 | ++lineNum; |
311 | ++data; |
312 | if (*data) { |
313 | if (*data == '\n') |
314 | ++lineNum; |
315 | ++data; |
316 | } |
317 | } |
318 | while (*data && (*(data-1) != '/' || *(data-2) != '*')) { |
319 | if (*data == '\n') |
320 | ++lineNum; |
321 | ++data; |
322 | } |
323 | token = WHITESPACE; // one comment, one whitespace |
324 | Q_FALLTHROUGH(); |
325 | case WHITESPACE: |
326 | if (column == 1) |
327 | column = 0; |
328 | while (*data && (*data == ' ' || *data == '\t')) |
329 | ++data; |
330 | if (Preprocessor::preprocessOnly) // tokenize whitespace |
331 | break; |
332 | continue; |
333 | case CPP_COMMENT: |
334 | while (*data && *data != '\n') |
335 | ++data; |
336 | continue; // ignore safely, the newline is a separator |
337 | default: |
338 | continue; //ignore |
339 | } |
340 | } |
341 | #ifdef USE_LEXEM_STORE |
342 | if (!Preprocessor::preprocessOnly |
343 | && token != IDENTIFIER |
344 | && token != STRING_LITERAL |
345 | && token != FLOATING_LITERAL |
346 | && token != INTEGER_LITERAL) |
347 | symbols += Symbol(lineNum, token); |
348 | else |
349 | #endif |
350 | symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem); |
351 | |
352 | } else { // Preprocessor |
353 | |
354 | const char *lexem = data; |
355 | int state = 0; |
356 | Token token = NOTOKEN; |
357 | if (mode == TokenizePreprocessorStatement) { |
358 | state = pp_keyword_trans[0][(int)'#']; |
359 | mode = TokenizePreprocessor; |
360 | } |
361 | for (;;) { |
362 | if (static_cast<signed char>(*data) < 0) { |
363 | ++data; |
364 | continue; |
365 | } |
366 | int nextindex = pp_keywords[state].next; |
367 | int next = 0; |
368 | if (*data == pp_keywords[state].defchar) |
369 | next = pp_keywords[state].defnext; |
370 | else if (!state || nextindex) |
371 | next = pp_keyword_trans[nextindex][(int)*data]; |
372 | if (!next) |
373 | break; |
374 | state = next; |
375 | token = pp_keywords[state].token; |
376 | ++data; |
377 | } |
378 | // suboptimal, is_ident_char should use a table |
379 | if (pp_keywords[state].ident && is_ident_char(s: *data)) |
380 | token = pp_keywords[state].ident; |
381 | |
382 | switch (token) { |
383 | case NOTOKEN: |
384 | if (*data) |
385 | ++data; |
386 | break; |
387 | case PP_DEFINE: |
388 | mode = PrepareDefine; |
389 | break; |
390 | case PP_IFDEF: |
391 | symbols += Symbol(lineNum, PP_IF); |
392 | symbols += Symbol(lineNum, PP_DEFINED); |
393 | continue; |
394 | case PP_IFNDEF: |
395 | symbols += Symbol(lineNum, PP_IF); |
396 | symbols += Symbol(lineNum, PP_NOT); |
397 | symbols += Symbol(lineNum, PP_DEFINED); |
398 | continue; |
399 | case PP_INCLUDE: |
400 | mode = TokenizeInclude; |
401 | break; |
402 | case PP_QUOTE: |
403 | data = skipQuote(data); |
404 | token = PP_STRING_LITERAL; |
405 | break; |
406 | case PP_SINGLEQUOTE: |
407 | while (*data && (*data != '\'' |
408 | || (*(data-1)=='\\' |
409 | && *(data-2)!='\\'))) |
410 | ++data; |
411 | if (*data) |
412 | ++data; |
413 | token = PP_CHARACTER_LITERAL; |
414 | break; |
415 | case PP_DIGIT: |
416 | while (is_digit_char(s: *data) || *data == '\'') |
417 | ++data; |
418 | if (!*data || *data != '.') { |
419 | token = PP_INTEGER_LITERAL; |
420 | if (data - lexem == 1 && |
421 | (*data == 'x' || *data == 'X') |
422 | && *lexem == '0') { |
423 | ++data; |
424 | while (is_hex_char(s: *data) || *data == '\'') |
425 | ++data; |
426 | } |
427 | break; |
428 | } |
429 | token = PP_FLOATING_LITERAL; |
430 | ++data; |
431 | Q_FALLTHROUGH(); |
432 | case PP_FLOATING_LITERAL: |
433 | while (is_digit_char(s: *data) || *data == '\'') |
434 | ++data; |
435 | if (*data == '+' || *data == '-') |
436 | ++data; |
437 | if (*data == 'e' || *data == 'E') { |
438 | ++data; |
439 | while (is_digit_char(s: *data) || *data == '\'') |
440 | ++data; |
441 | } |
442 | if (*data == 'f' || *data == 'F' |
443 | || *data == 'l' || *data == 'L') |
444 | ++data; |
445 | break; |
446 | case PP_CHARACTER: |
447 | if (mode == PreparePreprocessorStatement) { |
448 | // rewind entire token to begin |
449 | data = lexem; |
450 | mode = TokenizePreprocessorStatement; |
451 | continue; |
452 | } |
453 | while (is_ident_char(s: *data)) |
454 | ++data; |
455 | token = PP_IDENTIFIER; |
456 | |
457 | if (mode == PrepareDefine) { |
458 | symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem); |
459 | // make sure we explicitly add the whitespace here if the next char |
460 | // is not an opening brace, so we can distinguish correctly between |
461 | // regular and function macros |
462 | if (*data != '(') |
463 | symbols += Symbol(lineNum, WHITESPACE); |
464 | mode = TokenizeDefine; |
465 | continue; |
466 | } |
467 | break; |
468 | case PP_C_COMMENT: |
469 | if (*data) { |
470 | if (*data == '\n') |
471 | ++lineNum; |
472 | ++data; |
473 | if (*data) { |
474 | if (*data == '\n') |
475 | ++lineNum; |
476 | ++data; |
477 | } |
478 | } |
479 | while (*data && (*(data-1) != '/' || *(data-2) != '*')) { |
480 | if (*data == '\n') |
481 | ++lineNum; |
482 | ++data; |
483 | } |
484 | token = PP_WHITESPACE; // one comment, one whitespace |
485 | Q_FALLTHROUGH(); |
486 | case PP_WHITESPACE: |
487 | while (*data && (*data == ' ' || *data == '\t')) |
488 | ++data; |
489 | continue; // the preprocessor needs no whitespace |
490 | case PP_CPP_COMMENT: |
491 | while (*data && *data != '\n') |
492 | ++data; |
493 | continue; // ignore safely, the newline is a separator |
494 | case PP_NEWLINE: |
495 | ++lineNum; |
496 | mode = TokenizeCpp; |
497 | break; |
498 | case PP_BACKSLASH: |
499 | { |
500 | const char *rewind = data; |
501 | while (*data && (*data == ' ' || *data == '\t')) |
502 | ++data; |
503 | if (*data && *data == '\n') { |
504 | ++data; |
505 | continue; |
506 | } |
507 | data = rewind; |
508 | } break; |
509 | case PP_LANGLE: |
510 | if (mode != TokenizeInclude) |
511 | break; |
512 | token = PP_STRING_LITERAL; |
513 | while (*data && *data != '\n' && *(data-1) != '>') |
514 | ++data; |
515 | break; |
516 | default: |
517 | break; |
518 | } |
519 | if (mode == PreparePreprocessorStatement) |
520 | continue; |
521 | #ifdef USE_LEXEM_STORE |
522 | if (token != PP_IDENTIFIER |
523 | && token != PP_STRING_LITERAL |
524 | && token != PP_FLOATING_LITERAL |
525 | && token != PP_INTEGER_LITERAL) |
526 | symbols += Symbol(lineNum, token); |
527 | else |
528 | #endif |
529 | symbols += Symbol(lineNum, token, input, lexem-begin, data-lexem); |
530 | } |
531 | } |
532 | symbols += Symbol(); // eof symbol |
533 | return symbols; |
534 | } |
535 | |
536 | void Preprocessor::macroExpand(Symbols *into, Preprocessor *that, const Symbols &toExpand, int &index, |
537 | int lineNum, bool one, const QSet<QByteArray> &excludeSymbols) |
538 | { |
539 | SymbolStack symbols; |
540 | SafeSymbols sf; |
541 | sf.symbols = toExpand; |
542 | sf.index = index; |
543 | sf.excludedSymbols = excludeSymbols; |
544 | symbols.push(t: sf); |
545 | |
546 | if (toExpand.isEmpty()) |
547 | return; |
548 | |
549 | for (;;) { |
550 | QByteArray macro; |
551 | Symbols newSyms = macroExpandIdentifier(that, symbols, lineNum, macroName: ¯o); |
552 | |
553 | if (macro.isEmpty()) { |
554 | // not a macro |
555 | Symbol s = symbols.symbol(); |
556 | s.lineNum = lineNum; |
557 | *into += s; |
558 | } else { |
559 | SafeSymbols sf; |
560 | sf.symbols = newSyms; |
561 | sf.index = 0; |
562 | sf.expandedMacro = macro; |
563 | symbols.push(t: sf); |
564 | } |
565 | if (!symbols.hasNext() || (one && symbols.size() == 1)) |
566 | break; |
567 | symbols.next(); |
568 | } |
569 | |
570 | if (symbols.size()) |
571 | index = symbols.top().index; |
572 | else |
573 | index = toExpand.size(); |
574 | } |
575 | |
576 | |
577 | Symbols Preprocessor::macroExpandIdentifier(Preprocessor *that, SymbolStack &symbols, int lineNum, QByteArray *macroName) |
578 | { |
579 | Symbol s = symbols.symbol(); |
580 | |
581 | // not a macro |
582 | if (s.token != PP_IDENTIFIER || !that->macros.contains(akey: s) || symbols.dontReplaceSymbol(name: s.lexem())) { |
583 | return Symbols(); |
584 | } |
585 | |
586 | const Macro ¯o = that->macros.value(akey: s); |
587 | *macroName = s.lexem(); |
588 | |
589 | Symbols expansion; |
590 | if (!macro.isFunction) { |
591 | expansion = macro.symbols; |
592 | } else { |
593 | bool haveSpace = false; |
594 | while (symbols.test(token: PP_WHITESPACE)) { haveSpace = true; } |
595 | if (!symbols.test(token: PP_LPAREN)) { |
596 | *macroName = QByteArray(); |
597 | Symbols syms; |
598 | if (haveSpace) |
599 | syms += Symbol(lineNum, PP_WHITESPACE); |
600 | syms += s; |
601 | syms.last().lineNum = lineNum; |
602 | return syms; |
603 | } |
604 | QVarLengthArray<Symbols, 5> arguments; |
605 | while (symbols.hasNext()) { |
606 | Symbols argument; |
607 | // strip leading space |
608 | while (symbols.test(token: PP_WHITESPACE)) {} |
609 | int nesting = 0; |
610 | bool vararg = macro.isVariadic && (arguments.size() == macro.arguments.size() - 1); |
611 | while (symbols.hasNext()) { |
612 | Token t = symbols.next(); |
613 | if (t == PP_LPAREN) { |
614 | ++nesting; |
615 | } else if (t == PP_RPAREN) { |
616 | --nesting; |
617 | if (nesting < 0) |
618 | break; |
619 | } else if (t == PP_COMMA && nesting == 0) { |
620 | if (!vararg) |
621 | break; |
622 | } |
623 | argument += symbols.symbol(); |
624 | } |
625 | arguments += argument; |
626 | |
627 | if (nesting < 0) |
628 | break; |
629 | else if (!symbols.hasNext()) |
630 | that->error(msg: "missing ')' in macro usage" ); |
631 | } |
632 | |
633 | // empty VA_ARGS |
634 | if (macro.isVariadic && arguments.size() == macro.arguments.size() - 1) |
635 | arguments += Symbols(); |
636 | |
637 | // now replace the macro arguments with the expanded arguments |
638 | enum Mode { |
639 | Normal, |
640 | Hash, |
641 | HashHash |
642 | } mode = Normal; |
643 | |
644 | for (int i = 0; i < macro.symbols.size(); ++i) { |
645 | const Symbol &s = macro.symbols.at(i); |
646 | if (s.token == HASH || s.token == PP_HASHHASH) { |
647 | mode = (s.token == HASH ? Hash : HashHash); |
648 | continue; |
649 | } |
650 | int index = macro.arguments.indexOf(t: s); |
651 | if (mode == Normal) { |
652 | if (index >= 0 && index < arguments.size()) { |
653 | // each argument undoergoes macro expansion if it's not used as part of a # or ## |
654 | if (i == macro.symbols.size() - 1 || macro.symbols.at(i: i + 1).token != PP_HASHHASH) { |
655 | Symbols arg = arguments.at(idx: index); |
656 | int idx = 1; |
657 | macroExpand(into: &expansion, that, toExpand: arg, index&: idx, lineNum, one: false, excludeSymbols: symbols.excludeSymbols()); |
658 | } else { |
659 | expansion += arguments.at(idx: index); |
660 | } |
661 | } else { |
662 | expansion += s; |
663 | } |
664 | } else if (mode == Hash) { |
665 | if (index < 0) { |
666 | that->error(msg: "'#' is not followed by a macro parameter" ); |
667 | continue; |
668 | } else if (index >= arguments.size()) { |
669 | that->error(msg: "Macro invoked with too few parameters for a use of '#'" ); |
670 | continue; |
671 | } |
672 | |
673 | const Symbols &arg = arguments.at(idx: index); |
674 | QByteArray stringified; |
675 | for (int i = 0; i < arg.size(); ++i) { |
676 | stringified += arg.at(i).lexem(); |
677 | } |
678 | stringified.replace(before: '"', c: "\\\"" ); |
679 | stringified.prepend(c: '"'); |
680 | stringified.append(c: '"'); |
681 | expansion += Symbol(lineNum, STRING_LITERAL, stringified); |
682 | } else if (mode == HashHash){ |
683 | if (s.token == WHITESPACE) |
684 | continue; |
685 | |
686 | while (expansion.size() && expansion.constLast().token == PP_WHITESPACE) |
687 | expansion.pop_back(); |
688 | |
689 | Symbol next = s; |
690 | if (index >= 0 && index < arguments.size()) { |
691 | const Symbols &arg = arguments.at(idx: index); |
692 | if (arg.size() == 0) { |
693 | mode = Normal; |
694 | continue; |
695 | } |
696 | next = arg.at(i: 0); |
697 | } |
698 | |
699 | if (!expansion.isEmpty() && expansion.constLast().token == s.token |
700 | && expansion.constLast().token != STRING_LITERAL) { |
701 | Symbol last = expansion.takeLast(); |
702 | |
703 | QByteArray lexem = last.lexem() + next.lexem(); |
704 | expansion += Symbol(lineNum, last.token, lexem); |
705 | } else { |
706 | expansion += next; |
707 | } |
708 | |
709 | if (index >= 0 && index < arguments.size()) { |
710 | const Symbols &arg = arguments.at(idx: index); |
711 | for (int i = 1; i < arg.size(); ++i) |
712 | expansion += arg.at(i); |
713 | } |
714 | } |
715 | mode = Normal; |
716 | } |
717 | if (mode != Normal) |
718 | that->error(msg: "'#' or '##' found at the end of a macro argument" ); |
719 | |
720 | } |
721 | |
722 | return expansion; |
723 | } |
724 | |
725 | void Preprocessor::substituteUntilNewline(Symbols &substituted) |
726 | { |
727 | while (hasNext()) { |
728 | Token token = next(); |
729 | if (token == PP_IDENTIFIER) { |
730 | macroExpand(into: &substituted, that: this, toExpand: symbols, index, lineNum: symbol().lineNum, one: true); |
731 | } else if (token == PP_DEFINED) { |
732 | bool braces = test(token: PP_LPAREN); |
733 | next(token: PP_IDENTIFIER); |
734 | Symbol definedOrNotDefined = symbol(); |
735 | definedOrNotDefined.token = macros.contains(akey: definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE; |
736 | substituted += definedOrNotDefined; |
737 | if (braces) |
738 | test(token: PP_RPAREN); |
739 | continue; |
740 | } else if (token == PP_NEWLINE) { |
741 | substituted += symbol(); |
742 | break; |
743 | } else { |
744 | substituted += symbol(); |
745 | } |
746 | } |
747 | } |
748 | |
749 | |
750 | class PP_Expression : public Parser |
751 | { |
752 | public: |
753 | int value() { index = 0; return unary_expression_lookup() ? conditional_expression() : 0; } |
754 | |
755 | int conditional_expression(); |
756 | int logical_OR_expression(); |
757 | int logical_AND_expression(); |
758 | int inclusive_OR_expression(); |
759 | int exclusive_OR_expression(); |
760 | int AND_expression(); |
761 | int equality_expression(); |
762 | int relational_expression(); |
763 | int shift_expression(); |
764 | int additive_expression(); |
765 | int multiplicative_expression(); |
766 | int unary_expression(); |
767 | bool unary_expression_lookup(); |
768 | int primary_expression(); |
769 | bool primary_expression_lookup(); |
770 | }; |
771 | |
772 | int PP_Expression::conditional_expression() |
773 | { |
774 | int value = logical_OR_expression(); |
775 | if (test(token: PP_QUESTION)) { |
776 | int alt1 = conditional_expression(); |
777 | int alt2 = test(token: PP_COLON) ? conditional_expression() : 0; |
778 | return value ? alt1 : alt2; |
779 | } |
780 | return value; |
781 | } |
782 | |
783 | int PP_Expression::logical_OR_expression() |
784 | { |
785 | int value = logical_AND_expression(); |
786 | if (test(token: PP_OROR)) |
787 | return logical_OR_expression() || value; |
788 | return value; |
789 | } |
790 | |
791 | int PP_Expression::logical_AND_expression() |
792 | { |
793 | int value = inclusive_OR_expression(); |
794 | if (test(token: PP_ANDAND)) |
795 | return logical_AND_expression() && value; |
796 | return value; |
797 | } |
798 | |
799 | int PP_Expression::inclusive_OR_expression() |
800 | { |
801 | int value = exclusive_OR_expression(); |
802 | if (test(token: PP_OR)) |
803 | return value | inclusive_OR_expression(); |
804 | return value; |
805 | } |
806 | |
807 | int PP_Expression::exclusive_OR_expression() |
808 | { |
809 | int value = AND_expression(); |
810 | if (test(token: PP_HAT)) |
811 | return value ^ exclusive_OR_expression(); |
812 | return value; |
813 | } |
814 | |
815 | int PP_Expression::AND_expression() |
816 | { |
817 | int value = equality_expression(); |
818 | if (test(token: PP_AND)) |
819 | return value & AND_expression(); |
820 | return value; |
821 | } |
822 | |
823 | int PP_Expression::equality_expression() |
824 | { |
825 | int value = relational_expression(); |
826 | switch (next()) { |
827 | case PP_EQEQ: |
828 | return value == equality_expression(); |
829 | case PP_NE: |
830 | return value != equality_expression(); |
831 | default: |
832 | prev(); |
833 | return value; |
834 | } |
835 | } |
836 | |
837 | int PP_Expression::relational_expression() |
838 | { |
839 | int value = shift_expression(); |
840 | switch (next()) { |
841 | case PP_LANGLE: |
842 | return value < relational_expression(); |
843 | case PP_RANGLE: |
844 | return value > relational_expression(); |
845 | case PP_LE: |
846 | return value <= relational_expression(); |
847 | case PP_GE: |
848 | return value >= relational_expression(); |
849 | default: |
850 | prev(); |
851 | return value; |
852 | } |
853 | } |
854 | |
855 | int PP_Expression::shift_expression() |
856 | { |
857 | int value = additive_expression(); |
858 | switch (next()) { |
859 | case PP_LTLT: |
860 | return value << shift_expression(); |
861 | case PP_GTGT: |
862 | return value >> shift_expression(); |
863 | default: |
864 | prev(); |
865 | return value; |
866 | } |
867 | } |
868 | |
869 | int PP_Expression::additive_expression() |
870 | { |
871 | int value = multiplicative_expression(); |
872 | switch (next()) { |
873 | case PP_PLUS: |
874 | return value + additive_expression(); |
875 | case PP_MINUS: |
876 | return value - additive_expression(); |
877 | default: |
878 | prev(); |
879 | return value; |
880 | } |
881 | } |
882 | |
883 | int PP_Expression::multiplicative_expression() |
884 | { |
885 | int value = unary_expression(); |
886 | switch (next()) { |
887 | case PP_STAR: |
888 | return value * multiplicative_expression(); |
889 | case PP_PERCENT: |
890 | { |
891 | int remainder = multiplicative_expression(); |
892 | return remainder ? value % remainder : 0; |
893 | } |
894 | case PP_SLASH: |
895 | { |
896 | int div = multiplicative_expression(); |
897 | return div ? value / div : 0; |
898 | } |
899 | default: |
900 | prev(); |
901 | return value; |
902 | }; |
903 | } |
904 | |
905 | int PP_Expression::unary_expression() |
906 | { |
907 | switch (next()) { |
908 | case PP_PLUS: |
909 | return unary_expression(); |
910 | case PP_MINUS: |
911 | return -unary_expression(); |
912 | case PP_NOT: |
913 | return !unary_expression(); |
914 | case PP_TILDE: |
915 | return ~unary_expression(); |
916 | case PP_MOC_TRUE: |
917 | return 1; |
918 | case PP_MOC_FALSE: |
919 | return 0; |
920 | default: |
921 | prev(); |
922 | return primary_expression(); |
923 | } |
924 | } |
925 | |
926 | bool PP_Expression::unary_expression_lookup() |
927 | { |
928 | Token t = lookup(); |
929 | return (primary_expression_lookup() |
930 | || t == PP_PLUS |
931 | || t == PP_MINUS |
932 | || t == PP_NOT |
933 | || t == PP_TILDE |
934 | || t == PP_DEFINED); |
935 | } |
936 | |
937 | int PP_Expression::primary_expression() |
938 | { |
939 | int value; |
940 | if (test(token: PP_LPAREN)) { |
941 | value = conditional_expression(); |
942 | test(token: PP_RPAREN); |
943 | } else { |
944 | next(); |
945 | value = lexem().toInt(ok: 0, base: 0); |
946 | } |
947 | return value; |
948 | } |
949 | |
950 | bool PP_Expression::primary_expression_lookup() |
951 | { |
952 | Token t = lookup(); |
953 | return (t == PP_IDENTIFIER |
954 | || t == PP_INTEGER_LITERAL |
955 | || t == PP_FLOATING_LITERAL |
956 | || t == PP_MOC_TRUE |
957 | || t == PP_MOC_FALSE |
958 | || t == PP_LPAREN); |
959 | } |
960 | |
961 | int Preprocessor::evaluateCondition() |
962 | { |
963 | PP_Expression expression; |
964 | expression.currentFilenames = currentFilenames; |
965 | |
966 | substituteUntilNewline(substituted&: expression.symbols); |
967 | |
968 | return expression.value(); |
969 | } |
970 | |
971 | static QByteArray readOrMapFile(QFile *file) |
972 | { |
973 | const qint64 size = file->size(); |
974 | char *rawInput = reinterpret_cast<char*>(file->map(offset: 0, size)); |
975 | return rawInput ? QByteArray::fromRawData(rawInput, size) : file->readAll(); |
976 | } |
977 | |
978 | static void mergeStringLiterals(Symbols *_symbols) |
979 | { |
980 | Symbols &symbols = *_symbols; |
981 | for (Symbols::iterator i = symbols.begin(); i != symbols.end(); ++i) { |
982 | if (i->token == STRING_LITERAL) { |
983 | Symbols::Iterator mergeSymbol = i; |
984 | int literalsLength = mergeSymbol->len; |
985 | while (++i != symbols.end() && i->token == STRING_LITERAL) |
986 | literalsLength += i->len - 2; // no quotes |
987 | |
988 | if (literalsLength != mergeSymbol->len) { |
989 | QByteArray mergeSymbolOriginalLexem = mergeSymbol->unquotedLexem(); |
990 | QByteArray &mergeSymbolLexem = mergeSymbol->lex; |
991 | mergeSymbolLexem.resize(size: 0); |
992 | mergeSymbolLexem.reserve(asize: literalsLength); |
993 | mergeSymbolLexem.append(c: '"'); |
994 | mergeSymbolLexem.append(a: mergeSymbolOriginalLexem); |
995 | for (Symbols::const_iterator j = mergeSymbol + 1; j != i; ++j) |
996 | mergeSymbolLexem.append(s: j->lex.constData() + j->from + 1, len: j->len - 2); // append j->unquotedLexem() |
997 | mergeSymbolLexem.append(c: '"'); |
998 | mergeSymbol->len = mergeSymbol->lex.length(); |
999 | mergeSymbol->from = 0; |
1000 | i = symbols.erase(abegin: mergeSymbol + 1, aend: i); |
1001 | } |
1002 | if (i == symbols.end()) |
1003 | break; |
1004 | } |
1005 | } |
1006 | } |
1007 | |
1008 | static QByteArray searchIncludePaths(const QList<Parser::IncludePath> &includepaths, |
1009 | const QByteArray &include) |
1010 | { |
1011 | QFileInfo fi; |
1012 | for (int j = 0; j < includepaths.size() && !fi.exists(); ++j) { |
1013 | const Parser::IncludePath &p = includepaths.at(i: j); |
1014 | if (p.isFrameworkPath) { |
1015 | const int slashPos = include.indexOf(c: '/'); |
1016 | if (slashPos == -1) |
1017 | continue; |
1018 | fi.setFile(dir: QString::fromLocal8Bit(str: p.path + '/' + include.left(len: slashPos) + ".framework/Headers/" ), |
1019 | file: QString::fromLocal8Bit(str: include.mid(index: slashPos + 1))); |
1020 | } else { |
1021 | fi.setFile(dir: QString::fromLocal8Bit(str: p.path), file: QString::fromLocal8Bit(str: include)); |
1022 | } |
1023 | // try again, maybe there's a file later in the include paths with the same name |
1024 | // (186067) |
1025 | if (fi.isDir()) { |
1026 | fi = QFileInfo(); |
1027 | continue; |
1028 | } |
1029 | } |
1030 | |
1031 | if (!fi.exists() || fi.isDir()) |
1032 | return QByteArray(); |
1033 | return fi.canonicalFilePath().toLocal8Bit(); |
1034 | } |
1035 | |
1036 | QByteArray Preprocessor::resolveInclude(const QByteArray &include, const QByteArray &relativeTo) |
1037 | { |
1038 | if (!relativeTo.isEmpty()) { |
1039 | QFileInfo fi; |
1040 | fi.setFile(dir: QFileInfo(QString::fromLocal8Bit(str: relativeTo)).dir(), file: QString::fromLocal8Bit(str: include)); |
1041 | if (fi.exists() && !fi.isDir()) |
1042 | return fi.canonicalFilePath().toLocal8Bit(); |
1043 | } |
1044 | |
1045 | auto it = nonlocalIncludePathResolutionCache.find(akey: include); |
1046 | if (it == nonlocalIncludePathResolutionCache.end()) |
1047 | it = nonlocalIncludePathResolutionCache.insert(akey: include, avalue: searchIncludePaths(includepaths: includes, include)); |
1048 | return it.value(); |
1049 | } |
1050 | |
1051 | void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed) |
1052 | { |
1053 | currentFilenames.push(x: filename); |
1054 | preprocessed.reserve(asize: preprocessed.size() + symbols.size()); |
1055 | while (hasNext()) { |
1056 | Token token = next(); |
1057 | |
1058 | switch (token) { |
1059 | case PP_INCLUDE: |
1060 | { |
1061 | int lineNum = symbol().lineNum; |
1062 | QByteArray include; |
1063 | bool local = false; |
1064 | if (test(token: PP_STRING_LITERAL)) { |
1065 | local = lexem().startsWith(c: '\"'); |
1066 | include = unquotedLexem(); |
1067 | } else |
1068 | continue; |
1069 | until(PP_NEWLINE); |
1070 | |
1071 | include = resolveInclude(include, relativeTo: local ? filename : QByteArray()); |
1072 | if (include.isNull()) |
1073 | continue; |
1074 | |
1075 | if (Preprocessor::preprocessedIncludes.contains(value: include)) |
1076 | continue; |
1077 | Preprocessor::preprocessedIncludes.insert(value: include); |
1078 | |
1079 | QFile file(QString::fromLocal8Bit(str: include.constData())); |
1080 | if (!file.open(flags: QFile::ReadOnly)) |
1081 | continue; |
1082 | |
1083 | QByteArray input = readOrMapFile(file: &file); |
1084 | |
1085 | file.close(); |
1086 | if (input.isEmpty()) |
1087 | continue; |
1088 | |
1089 | Symbols saveSymbols = symbols; |
1090 | int saveIndex = index; |
1091 | |
1092 | // phase 1: get rid of backslash-newlines |
1093 | input = cleaned(input); |
1094 | |
1095 | // phase 2: tokenize for the preprocessor |
1096 | symbols = tokenize(input); |
1097 | input.clear(); |
1098 | |
1099 | index = 0; |
1100 | |
1101 | // phase 3: preprocess conditions and substitute macros |
1102 | preprocessed += Symbol(0, MOC_INCLUDE_BEGIN, include); |
1103 | preprocess(filename: include, preprocessed); |
1104 | preprocessed += Symbol(lineNum, MOC_INCLUDE_END, include); |
1105 | |
1106 | symbols = saveSymbols; |
1107 | index = saveIndex; |
1108 | continue; |
1109 | } |
1110 | case PP_DEFINE: |
1111 | { |
1112 | next(token: IDENTIFIER); |
1113 | QByteArray name = lexem(); |
1114 | Macro macro; |
1115 | macro.isVariadic = false; |
1116 | Token t = next(); |
1117 | if (t == LPAREN) { |
1118 | // we have a function macro |
1119 | macro.isFunction = true; |
1120 | parseDefineArguments(m: ¯o); |
1121 | } else if (t == PP_WHITESPACE){ |
1122 | macro.isFunction = false; |
1123 | } else { |
1124 | error(msg: "Moc: internal error" ); |
1125 | } |
1126 | int start = index; |
1127 | until(PP_NEWLINE); |
1128 | macro.symbols.reserve(asize: index - start - 1); |
1129 | |
1130 | // remove whitespace where there shouldn't be any: |
1131 | // Before and after the macro, after a # and around ## |
1132 | Token lastToken = HASH; // skip shitespace at the beginning |
1133 | for (int i = start; i < index - 1; ++i) { |
1134 | Token token = symbols.at(i).token; |
1135 | if (token == PP_WHITESPACE || token == WHITESPACE) { |
1136 | if (lastToken == PP_HASH || lastToken == HASH || |
1137 | lastToken == PP_HASHHASH || |
1138 | lastToken == PP_WHITESPACE || lastToken == WHITESPACE) |
1139 | continue; |
1140 | } else if (token == PP_HASHHASH) { |
1141 | if (!macro.symbols.isEmpty() && |
1142 | (lastToken == PP_WHITESPACE || lastToken == WHITESPACE)) |
1143 | macro.symbols.pop_back(); |
1144 | } |
1145 | macro.symbols.append(t: symbols.at(i)); |
1146 | lastToken = token; |
1147 | } |
1148 | // remove trailing whitespace |
1149 | while (!macro.symbols.isEmpty() && |
1150 | (macro.symbols.constLast().token == PP_WHITESPACE || macro.symbols.constLast().token == WHITESPACE)) |
1151 | macro.symbols.pop_back(); |
1152 | |
1153 | if (!macro.symbols.isEmpty()) { |
1154 | if (macro.symbols.constFirst().token == PP_HASHHASH || |
1155 | macro.symbols.constLast().token == PP_HASHHASH) { |
1156 | error(msg: "'##' cannot appear at either end of a macro expansion" ); |
1157 | } |
1158 | } |
1159 | macros.insert(akey: name, avalue: macro); |
1160 | continue; |
1161 | } |
1162 | case PP_UNDEF: { |
1163 | next(token: IDENTIFIER); |
1164 | QByteArray name = lexem(); |
1165 | until(PP_NEWLINE); |
1166 | macros.remove(akey: name); |
1167 | continue; |
1168 | } |
1169 | case PP_IDENTIFIER: { |
1170 | // substitute macros |
1171 | macroExpand(into: &preprocessed, that: this, toExpand: symbols, index, lineNum: symbol().lineNum, one: true); |
1172 | continue; |
1173 | } |
1174 | case PP_HASH: |
1175 | until(PP_NEWLINE); |
1176 | continue; // skip unknown preprocessor statement |
1177 | case PP_IFDEF: |
1178 | case PP_IFNDEF: |
1179 | case PP_IF: |
1180 | while (!evaluateCondition()) { |
1181 | if (!skipBranch()) |
1182 | break; |
1183 | if (test(token: PP_ELIF)) { |
1184 | } else { |
1185 | until(PP_NEWLINE); |
1186 | break; |
1187 | } |
1188 | } |
1189 | continue; |
1190 | case PP_ELIF: |
1191 | case PP_ELSE: |
1192 | skipUntilEndif(); |
1193 | Q_FALLTHROUGH(); |
1194 | case PP_ENDIF: |
1195 | until(PP_NEWLINE); |
1196 | continue; |
1197 | case PP_NEWLINE: |
1198 | continue; |
1199 | case SIGNALS: |
1200 | case SLOTS: { |
1201 | Symbol sym = symbol(); |
1202 | if (macros.contains(akey: "QT_NO_KEYWORDS" )) |
1203 | sym.token = IDENTIFIER; |
1204 | else |
1205 | sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN); |
1206 | preprocessed += sym; |
1207 | } continue; |
1208 | default: |
1209 | break; |
1210 | } |
1211 | preprocessed += symbol(); |
1212 | } |
1213 | |
1214 | currentFilenames.pop(); |
1215 | } |
1216 | |
1217 | Symbols Preprocessor::preprocessed(const QByteArray &filename, QFile *file) |
1218 | { |
1219 | QByteArray input = readOrMapFile(file); |
1220 | |
1221 | if (input.isEmpty()) |
1222 | return symbols; |
1223 | |
1224 | // phase 1: get rid of backslash-newlines |
1225 | input = cleaned(input); |
1226 | |
1227 | // phase 2: tokenize for the preprocessor |
1228 | index = 0; |
1229 | symbols = tokenize(input); |
1230 | |
1231 | #if 0 |
1232 | for (int j = 0; j < symbols.size(); ++j) |
1233 | fprintf(stderr, "line %d: %s(%s)\n" , |
1234 | symbols[j].lineNum, |
1235 | symbols[j].lexem().constData(), |
1236 | tokenTypeName(symbols[j].token)); |
1237 | #endif |
1238 | |
1239 | // phase 3: preprocess conditions and substitute macros |
1240 | Symbols result; |
1241 | // Preallocate some space to speed up the code below. |
1242 | // The magic value was found by logging the final size |
1243 | // and calculating an average when running moc over FOSS projects. |
1244 | result.reserve(asize: file->size() / 300000); |
1245 | preprocess(filename, preprocessed&: result); |
1246 | mergeStringLiterals(symbols: &result); |
1247 | |
1248 | #if 0 |
1249 | for (int j = 0; j < result.size(); ++j) |
1250 | fprintf(stderr, "line %d: %s(%s)\n" , |
1251 | result[j].lineNum, |
1252 | result[j].lexem().constData(), |
1253 | tokenTypeName(result[j].token)); |
1254 | #endif |
1255 | |
1256 | return result; |
1257 | } |
1258 | |
1259 | void Preprocessor::parseDefineArguments(Macro *m) |
1260 | { |
1261 | Symbols arguments; |
1262 | while (hasNext()) { |
1263 | while (test(token: PP_WHITESPACE)) {} |
1264 | Token t = next(); |
1265 | if (t == PP_RPAREN) |
1266 | break; |
1267 | if (t != PP_IDENTIFIER) { |
1268 | QByteArray l = lexem(); |
1269 | if (l == "..." ) { |
1270 | m->isVariadic = true; |
1271 | arguments += Symbol(symbol().lineNum, PP_IDENTIFIER, "__VA_ARGS__" ); |
1272 | while (test(token: PP_WHITESPACE)) {} |
1273 | if (!test(token: PP_RPAREN)) |
1274 | error(msg: "missing ')' in macro argument list" ); |
1275 | break; |
1276 | } else if (!is_identifier(s: l.constData(), len: l.length())) { |
1277 | error(msg: "Unexpected character in macro argument list." ); |
1278 | } |
1279 | } |
1280 | |
1281 | Symbol arg = symbol(); |
1282 | if (arguments.contains(t: arg)) |
1283 | error(msg: "Duplicate macro parameter." ); |
1284 | arguments += symbol(); |
1285 | |
1286 | while (test(token: PP_WHITESPACE)) {} |
1287 | t = next(); |
1288 | if (t == PP_RPAREN) |
1289 | break; |
1290 | if (t == PP_COMMA) |
1291 | continue; |
1292 | if (lexem() == "..." ) { |
1293 | //GCC extension: #define FOO(x, y...) x(y) |
1294 | // The last argument was already parsed. Just mark the macro as variadic. |
1295 | m->isVariadic = true; |
1296 | while (test(token: PP_WHITESPACE)) {} |
1297 | if (!test(token: PP_RPAREN)) |
1298 | error(msg: "missing ')' in macro argument list" ); |
1299 | break; |
1300 | } |
1301 | error(msg: "Unexpected character in macro argument list." ); |
1302 | } |
1303 | m->arguments = arguments; |
1304 | while (test(token: PP_WHITESPACE)) {} |
1305 | } |
1306 | |
1307 | void Preprocessor::until(Token t) |
1308 | { |
1309 | while(hasNext() && next() != t) |
1310 | ; |
1311 | } |
1312 | |
1313 | QT_END_NAMESPACE |
1314 | |