preprocessor.cpp source code [qtbase/src/tools/moc/preprocessor.cpp]

1	// Copyright (C) 2016 The Qt Company Ltd.
2	// Copyright (C) 2014 Olivier Goffart <ogoffart@woboq.org>
3	// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
4
5	#include "preprocessor.h"
6	#include "utils.h"
7	#include <qstringlist.h>
8	#include <qfile.h>
9	#include <qdir.h>
10	#include <qfileinfo.h>
11	#include <qvarlengtharray.h>
12
13	QT_BEGIN_NAMESPACE
14
15	using namespace QtMiscUtils;
16
17	#include "ppkeywords.cpp"
18	#include "keywords.cpp"
19
20	// transform \r\n into \n
21	// \r into \n (os9 style)
22	// backslash-newlines into newlines
23	static QByteArray cleaned(const QByteArray &input)
24	{
25	QByteArray result;
26	result.resize(size: input.size());
27	const char *data = input.constData();
28	const char *end = input.constData() + input.size();
29	char *output = result.data();
30
31	int newlines = `0`;
32	while (data != end) {
33	while (data != end && is_space(s: *data))
34	++data;
35	bool takeLine = (*data == `'#'`);
36	if (data == `'%'` && (data+`1`) == `':'`) {
37	takeLine = true;
38	++data;
39	}
40	if (takeLine) {
41	*output = `'#'`;
42	++output;
43	do ++data; while (data != end && is_space(s: *data));
44	}
45	while (data != end) {
46	// handle \\\n, \\\r\n and \\\r
47	if (*data == `'\\'`) {
48	if (*(data + `1`) == `'\r'`) {
49	++data;
50	}
51	if (data != end && ((data + `1`) == `'\n'` \|\| (data) == `'\r'`)) {
52	++newlines;
53	data += `1`;
54	if (data != end && *data != `'\r'`)
55	data += `1`;
56	continue;
57	}
58	} else if (data == `'\r'` && (data + `1`) == `'\n'`) { // reduce \r\n to \n
59	++data;
60	}
61	if (data == end)
62	break;
63
64	char ch = *data;
65	if (ch == `'\r'`) // os9: replace \r with \n
66	ch = `'\n'`;
67	*output = ch;
68	++output;
69
70	if (*data == `'\n'`) {
71	// output additional newlines to keep the correct line-numbering
72	// for the lines following the backslash-newline sequence(s)
73	while (newlines) {
74	*output = `'\n'`;
75	++output;
76	--newlines;
77	}
78	++data;
79	break;
80	}
81	++data;
82	}
83	}
84	result.resize(size: output - result.constData());
85	return result;
86	}
87
88	bool Preprocessor::preprocessOnly = false;
89	void Preprocessor::skipUntilEndif()
90	{
91	while(index < symbols.size() - `1` && symbols.at(i: index).token != PP_ENDIF){
92	switch (symbols.at(i: index).token) {
93	case PP_IF:
94	case PP_IFDEF:
95	case PP_IFNDEF:
96	++index;
97	skipUntilEndif();
98	break;
99	default:
100	;
101	}
102	++index;
103	}
104	}
105
106	bool Preprocessor::skipBranch()
107	{
108	while (index < symbols.size() - `1`
109	&& (symbols.at(i: index).token != PP_ENDIF
110	&& symbols.at(i: index).token != PP_ELIF
111	&& symbols.at(i: index).token != PP_ELSE)
112	){
113	switch (symbols.at(i: index).token) {
114	case PP_IF:
115	case PP_IFDEF:
116	case PP_IFNDEF:
117	++index;
118	skipUntilEndif();
119	break;
120	default:
121	;
122	}
123	++index;
124	}
125	return (index < symbols.size() - `1`);
126	}
127
128
129	Symbols Preprocessor::tokenize(const QByteArray& input, int lineNum, Preprocessor::TokenizeMode mode)
130	{
131	Symbols symbols;
132	// Preallocate some space to speed up the code below.
133	// The magic divisor value was found by calculating the average ratio between
134	// input size and the final size of symbols.
135	// This yielded a value of 16.x when compiling Qt Base.
136	symbols.reserve(asize: input.size() / `16`);
137	const char *begin = input.constData();
138	const char *data = begin;
139	while (*data) {
140	if (mode == TokenizeCpp \|\| mode == TokenizeDefine) {
141	int column = `0`;
142
143	const char *lexem = data;
144	int state = `0`;
145	Token token = NOTOKEN;
146	for (;;) {
147	if (static_cast<signed char>(*data) < `0`) {
148	++data;
149	continue;
150	}
151	int nextindex = keywords[state].next;
152	int next = `0`;
153	if (*data == keywords[state].defchar)
154	next = keywords[state].defnext;
155	else if (!state \|\| nextindex)
156	next = keyword_trans[nextindex][(int)*data];
157	if (!next)
158	break;
159	state = next;
160	token = keywords[state].token;
161	++data;
162	}
163
164	// suboptimal, is_ident_char should use a table
165	if (keywords[state].ident && is_ident_char(s: *data))
166	token = keywords[state].ident;
167
168	if (token == NOTOKEN) {
169	if (*data)
170	++data;
171	// an error really, but let's ignore this input
172	// to not confuse moc later. However in pre-processor
173	// only mode let's continue.
174	if (!Preprocessor::preprocessOnly)
175	continue;
176	}
177
178	++column;
179
180	if (token > SPECIAL_TREATMENT_MARK) {
181	switch (token) {
182	case QUOTE:
183	data = skipQuote(data);
184	token = STRING_LITERAL;
185	// concatenate multi-line strings for easier
186	// STRING_LITERAL handling in moc
187	if (!Preprocessor::preprocessOnly
188	&& !symbols.isEmpty()
189	&& symbols.constLast().token == STRING_LITERAL) {
190
191	const QByteArray newString
192	= `'\"'`
193	+ symbols.constLast().unquotedLexem()
194	+ input.mid(index: lexem - begin + `1`, len: data - lexem - `2`)
195	+ `'\"'`;
196	symbols.last() = Symbol (symbols.constLast().lineNum,
197	STRING_LITERAL,
198	newString);
199	continue;
200	}
201	break;
202	case SINGLEQUOTE:
203	while (data && (data != `'\''`
204	\|\| (*(data-`1`)==`'\\'`
205	&& *(data-`2`)!=`'\\'`)))
206	++data;
207	if (*data)
208	++data;
209	token = CHARACTER_LITERAL;
210	break;
211	case LANGLE_SCOPE:
212	// split <:: into two tokens, < and ::
213	token = LANGLE;
214	data -= `2`;
215	break;
216	case DIGIT:
217	{
218	bool hasSeenTokenSeparator = false;;
219	while (isAsciiDigit(c: data) \|\| (hasSeenTokenSeparator = data == `'\''`))
220	++data;
221	if (!data \|\| data != `'.'`) {
222	token = INTEGER_LITERAL;
223	if (data - lexem == `1` &&
224	(data == `'x'` \|\| data == `'X'`
225	\|\| data == `'b'` \|\| data == `'B'`)
226	&& *lexem == `'0'`) {
227	++data;
228	while (isHexDigit(c: data) \|\| (hasSeenTokenSeparator = data == `'\''`))
229	++data;
230	} else if (data == `'L'`) // TODO: handle other suffixes*
231	++data;
232	if (!hasSeenTokenSeparator) {
233	while (is_ident_char(s: *data)) {
234	++data;
235	token = IDENTIFIER;
236	}
237	}
238	break;
239	}
240	token = FLOATING_LITERAL;
241	++data;
242	Q_FALLTHROUGH();
243	}
244	case FLOATING_LITERAL:
245	while (isAsciiDigit(c: data) \|\| data == `'\''`)
246	++data;
247	if (data == `'+'` \|\| data == `'-'`)
248	++data;
249	if (data == `'e'` \|\| data == `'E'`) {
250	++data;
251	while (isAsciiDigit(c: data) \|\| data == `'\''`)
252	++data;
253	}
254	if (data == `'f'` \|\| data == `'F'`
255	\|\| data == `'l'` \|\| data == `'L'`)
256	++data;
257	break;
258	case HASH:
259	if (column == `1` && mode == TokenizeCpp) {
260	mode = PreparePreprocessorStatement;
261	while (data && (data == `' '` \|\| *data == `'\t'`))
262	++data;
263	if (is_ident_char(s: *data))
264	mode = TokenizePreprocessorStatement;
265	continue;
266	}
267	break;
268	case PP_HASHHASH:
269	if (mode == TokenizeCpp)
270	continue;
271	break;
272	case NEWLINE:
273	++lineNum;
274	if (mode == TokenizeDefine) {
275	mode = TokenizeCpp;
276	// emit the newline token
277	break;
278	}
279	continue;
280	case BACKSLASH:
281	{
282	const char *rewind = data;
283	while (data && (data == `' '` \|\| *data == `'\t'`))
284	++data;
285	if (data && data == `'\n'`) {
286	++data;
287	continue;
288	}
289	data = rewind;
290	} break;
291	case CHARACTER:
292	while (is_ident_char(s: *data))
293	++data;
294	token = IDENTIFIER;
295	break;
296	case C_COMMENT:
297	if (*data) {
298	if (*data == `'\n'`)
299	++lineNum;
300	++data;
301	if (*data) {
302	if (*data == `'\n'`)
303	++lineNum;
304	++data;
305	}
306	}
307	while (data && ((data-`1`) != `'/'` \|\| (data-`2`) != `''`)) {
308	if (*data == `'\n'`)
309	++lineNum;
310	++data;
311	}
312	token = WHITESPACE; // one comment, one whitespace
313	Q_FALLTHROUGH();
314	case WHITESPACE:
315	if (column == `1`)
316	column = `0`;
317	while (data && (data == `' '` \|\| *data == `'\t'`))
318	++data;
319	if (Preprocessor::preprocessOnly) // tokenize whitespace
320	break;
321	continue;
322	case CPP_COMMENT:
323	while (data && data != `'\n'`)
324	++data;
325	continue; // ignore safely, the newline is a separator
326	default:
327	continue; //ignore
328	}
329	}
330	symbols += Symbol (lineNum, token, input, lexem-begin, data-lexem);
331
332	} else { // Preprocessor
333
334	const char *lexem = data;
335	int state = `0`;
336	Token token = NOTOKEN;
337	if (mode == TokenizePreprocessorStatement) {
338	state = pp_keyword_trans[`0`][(int)`'#'`];
339	mode = TokenizePreprocessor;
340	}
341	for (;;) {
342	if (static_cast<signed char>(*data) < `0`) {
343	++data;
344	continue;
345	}
346	int nextindex = pp_keywords[state].next;
347	int next = `0`;
348	if (*data == pp_keywords[state].defchar)
349	next = pp_keywords[state].defnext;
350	else if (!state \|\| nextindex)
351	next = pp_keyword_trans[nextindex][(int)*data];
352	if (!next)
353	break;
354	state = next;
355	token = pp_keywords[state].token;
356	++data;
357	}
358	// suboptimal, is_ident_char should use a table
359	if (pp_keywords[state].ident && is_ident_char(s: *data))
360	token = pp_keywords[state].ident;
361
362	switch (token) {
363	case NOTOKEN:
364	if (*data)
365	++data;
366	break;
367	case PP_DEFINE:
368	mode = PrepareDefine;
369	break;
370	case PP_IFDEF:
371	symbols += Symbol (lineNum, PP_IF);
372	symbols += Symbol (lineNum, PP_DEFINED);
373	continue;
374	case PP_IFNDEF:
375	symbols += Symbol (lineNum, PP_IF);
376	symbols += Symbol (lineNum, PP_NOT);
377	symbols += Symbol (lineNum, PP_DEFINED);
378	continue;
379	case PP_INCLUDE:
380	mode = TokenizeInclude;
381	break;
382	case PP_QUOTE:
383	data = skipQuote(data);
384	token = PP_STRING_LITERAL;
385	break;
386	case PP_SINGLEQUOTE:
387	while (data && (data != `'\''`
388	\|\| (*(data-`1`)==`'\\'`
389	&& *(data-`2`)!=`'\\'`)))
390	++data;
391	if (*data)
392	++data;
393	token = PP_CHARACTER_LITERAL;
394	break;
395	case PP_DIGIT:
396	while (isAsciiDigit(c: data) \|\| data == `'\''`)
397	++data;
398	if (!data \|\| data != `'.'`) {
399	token = PP_INTEGER_LITERAL;
400	if (data - lexem == `1` &&
401	(data == `'x'` \|\| data == `'X'`)
402	&& *lexem == `'0'`) {
403	++data;
404	while (isHexDigit(c: data) \|\| data == `'\''`)
405	++data;
406	} else if (data == `'L'`) // TODO: handle other suffixes*
407	++data;
408	break;
409	}
410	token = PP_FLOATING_LITERAL;
411	++data;
412	Q_FALLTHROUGH();
413	case PP_FLOATING_LITERAL:
414	while (isAsciiDigit(c: data) \|\| data == `'\''`)
415	++data;
416	if (data == `'+'` \|\| data == `'-'`)
417	++data;
418	if (data == `'e'` \|\| data == `'E'`) {
419	++data;
420	while (isAsciiDigit(c: data) \|\| data == `'\''`)
421	++data;
422	}
423	if (data == `'f'` \|\| data == `'F'`
424	\|\| data == `'l'` \|\| data == `'L'`)
425	++data;
426	break;
427	case PP_CHARACTER:
428	if (mode == PreparePreprocessorStatement) {
429	// rewind entire token to begin
430	data = lexem;
431	mode = TokenizePreprocessorStatement;
432	continue;
433	}
434	while (is_ident_char(s: *data))
435	++data;
436	token = PP_IDENTIFIER;
437
438	if (mode == PrepareDefine) {
439	symbols += Symbol (lineNum, token, input, lexem-begin, data-lexem);
440	// make sure we explicitly add the whitespace here if the next char
441	// is not an opening brace, so we can distinguish correctly between
442	// regular and function macros
443	if (*data != `'('`)
444	symbols += Symbol (lineNum, WHITESPACE);
445	mode = TokenizeDefine;
446	continue;
447	}
448	break;
449	case PP_C_COMMENT:
450	if (*data) {
451	if (*data == `'\n'`)
452	++lineNum;
453	++data;
454	if (*data) {
455	if (*data == `'\n'`)
456	++lineNum;
457	++data;
458	}
459	}
460	while (data && ((data-`1`) != `'/'` \|\| (data-`2`) != `''`)) {
461	if (*data == `'\n'`)
462	++lineNum;
463	++data;
464	}
465	token = PP_WHITESPACE; // one comment, one whitespace
466	Q_FALLTHROUGH();
467	case PP_WHITESPACE:
468	while (data && (data == `' '` \|\| *data == `'\t'`))
469	++data;
470	continue; // the preprocessor needs no whitespace
471	case PP_CPP_COMMENT:
472	while (data && data != `'\n'`)
473	++data;
474	continue; // ignore safely, the newline is a separator
475	case PP_NEWLINE:
476	++lineNum;
477	mode = TokenizeCpp;
478	break;
479	case PP_BACKSLASH:
480	{
481	const char *rewind = data;
482	while (data && (data == `' '` \|\| *data == `'\t'`))
483	++data;
484	if (data && data == `'\n'`) {
485	++data;
486	continue;
487	}
488	data = rewind;
489	} break;
490	case PP_LANGLE:
491	if (mode != TokenizeInclude)
492	break;
493	token = PP_STRING_LITERAL;
494	while (data && data != `'\n'` && *(data-`1`) != `'>'`)
495	++data;
496	break;
497	default:
498	break;
499	}
500	if (mode == PreparePreprocessorStatement)
501	continue;
502	symbols += Symbol (lineNum, token, input, lexem-begin, data-lexem);
503	}
504	}
505	symbols += Symbol (); // eof symbol
506	return symbols;
507	}
508
509	void Preprocessor::macroExpand(Symbols into, Preprocessor that, const Symbols &toExpand, qsizetype &index,
510	int lineNum, bool one, const QSet<QByteArray> &excludeSymbols)
511	{
512	SymbolStack symbols;
513	SafeSymbols sf;
514	sf.symbols = toExpand;
515	sf.index = index;
516	sf.excludedSymbols = excludeSymbols;
517	symbols.push(t: sf);
518
519	if (toExpand.isEmpty())
520	return;
521
522	for (;;) {
523	QByteArray macro;
524	Symbols newSyms = macroExpandIdentifier(that, symbols, lineNum, macroName: &macro);
525
526	if (macro.isEmpty()) {
527	// not a macro
528	Symbol s = symbols.symbol();
529	s.lineNum = lineNum;
530	*into += s;
531	} else {
532	SafeSymbols sf;
533	sf.symbols = newSyms;
534	sf.index = `0`;
535	sf.expandedMacro = macro;
536	symbols.push(t: sf);
537	}
538	if (!symbols.hasNext() \|\| (one && symbols.size() == `1`))
539	break;
540	symbols.next();
541	}
542
543	if (symbols.size())
544	index = symbols.top().index;
545	else
546	index = toExpand.size();
547	}
548
549
550	Symbols Preprocessor::macroExpandIdentifier(Preprocessor that, SymbolStack &symbols, int* lineNum, QByteArray *macroName)
551	{
552	Symbol s = symbols.symbol();
553
554	// not a macro
555	if (s.token != PP_IDENTIFIER \|\| !that->macros.contains(key: s) \|\| symbols.dontReplaceSymbol(name: s.lexem())) {
556	return Symbols ();
557	}
558
559	const Macro &macro = that->macros.value(key: s);
560	*macroName = s.lexem();
561
562	Symbols expansion;
563	if (!macro.isFunction) {
564	expansion = macro.symbols;
565	} else {
566	bool haveSpace = false;
567	while (symbols.test(token: PP_WHITESPACE)) { haveSpace = true; }
568	if (!symbols.test(token: PP_LPAREN)) {
569	*macroName = QByteArray ();
570	Symbols syms;
571	if (haveSpace)
572	syms += Symbol (lineNum, PP_WHITESPACE);
573	syms += s;
574	syms.last().lineNum = lineNum;
575	return syms;
576	}
577	QVarLengthArray<Symbols, `5`> arguments;
578	while (symbols.hasNext()) {
579	Symbols argument;
580	// strip leading space
581	while (symbols.test(token: PP_WHITESPACE)) {}
582	int nesting = `0`;
583	bool vararg = macro.isVariadic && (arguments.size() == macro.arguments.size() - `1`);
584	while (symbols.hasNext()) {
585	Token t = symbols.next();
586	if (t == PP_LPAREN) {
587	++nesting;
588	} else if (t == PP_RPAREN) {
589	--nesting;
590	if (nesting < `0`)
591	break;
592	} else if (t == PP_COMMA && nesting == `0`) {
593	if (!vararg)
594	break;
595	}
596	argument += symbols.symbol();
597	}
598	arguments += argument;
599
600	if (nesting < `0`)
601	break;
602	else if (!symbols.hasNext())
603	that->error(msg: "missing ')' in macro usage");
604	}
605
606	// empty VA_ARGS
607	if (macro.isVariadic && arguments.size() == macro.arguments.size() - `1`)
608	arguments += Symbols ();
609
610	// now replace the macro arguments with the expanded arguments
611	enum Mode {
612	Normal,
613	Hash,
614	HashHash
615	} mode = Normal;
616
617	const auto end = macro.symbols.cend();
618	auto it = macro.symbols.cbegin();
619	const auto lastSym = std::prev(x: macro.symbols.cend(), n: !macro.symbols.isEmpty() ? `1` : `0`);
620	for (; it != end; ++it) {
621	const Symbol &s = *it;
622	if (s.token == HASH \|\| s.token == PP_HASHHASH) {
623	mode = (s.token == HASH ? Hash : HashHash);
624	continue;
625	}
626	const qsizetype index = macro.arguments.indexOf(t: s);
627	if (mode == Normal) {
628	if (index >= `0` && index < arguments.size()) {
629	// each argument undoergoes macro expansion if it's not used as part of a # or ##
630	if (it == lastSym \|\| std::next(x: it)->token != PP_HASHHASH) {
631	Symbols arg = arguments.at(idx: index);
632	qsizetype idx = `1`;
633	macroExpand(into: &expansion, that, toExpand: arg, index&: idx, lineNum, one: false, excludeSymbols: symbols.excludeSymbols());
634	} else {
635	expansion += arguments.at(idx: index);
636	}
637	} else {
638	expansion += s;
639	}
640	} else if (mode == Hash) {
641	if (index < `0`) {
642	that->error(msg: "'#' is not followed by a macro parameter");
643	continue;
644	} else if (index >= arguments.size()) {
645	that->error(msg: "Macro invoked with too few parameters for a use of '#'");
646	continue;
647	}
648
649	const Symbols &arg = arguments.at(idx: index);
650	QByteArray stringified;
651	for (const Symbol &sym : arg)
652	stringified += sym.lexem();
653
654	stringified.replace(before: `'"'`, after: "\\\"");
655	stringified.prepend(c: `'"'`);
656	stringified.append(c: `'"'`);
657	expansion += Symbol (lineNum, STRING_LITERAL, stringified);
658	} else if (mode == HashHash){
659	if (s.token == WHITESPACE)
660	continue;
661
662	while (expansion.size() && expansion.constLast().token == PP_WHITESPACE)
663	expansion.pop_back();
664
665	Symbol next = s;
666	if (index >= `0` && index < arguments.size()) {
667	const Symbols &arg = arguments.at(idx: index);
668	if (arg.size() == `0`) {
669	mode = Normal;
670	continue;
671	}
672	next = arg.at(i: `0`);
673	}
674
675	if (!expansion.isEmpty() && expansion.constLast().token == s.token
676	&& expansion.constLast().token != STRING_LITERAL) {
677	Symbol last = expansion.takeLast();
678
679	QByteArray lexem = last.lexem() + next.lexem();
680	expansion += Symbol (lineNum, last.token, lexem);
681	} else {
682	expansion += next;
683	}
684
685	if (index >= `0` && index < arguments.size()) {
686	const Symbols &arg = arguments.at(idx: index);
687	if (!arg.isEmpty())
688	expansion.append(i1: arg.cbegin() + `1`, i2: arg.cend());
689	}
690	}
691	mode = Normal;
692	}
693	if (mode != Normal)
694	that->error(msg: "'#' or '##' found at the end of a macro argument");
695
696	}
697
698	return expansion;
699	}
700
701	void Preprocessor::substituteUntilNewline(Symbols &substituted)
702	{
703	while (hasNext()) {
704	Token token = next();
705	if (token == PP_IDENTIFIER) {
706	macroExpand(into: &substituted, that: this, toExpand: symbols, index, lineNum: symbol().lineNum, one: true);
707	} else if (token == PP_DEFINED) {
708	bool braces = test(token: PP_LPAREN);
709	next(token: PP_IDENTIFIER);
710	Symbol definedOrNotDefined = symbol();
711	definedOrNotDefined.token = macros.contains(key: definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE;
712	substituted += definedOrNotDefined;
713	if (braces)
714	test(token: PP_RPAREN);
715	continue;
716	} else if (token == PP_NEWLINE) {
717	substituted += symbol();
718	break;
719	} else {
720	substituted += symbol();
721	}
722	}
723	}
724
725
726	class PP_Expression : public Parser
727	{
728	public:
729	int value() { index = `0`; return unary_expression_lookup() ? conditional_expression() : `0`; }
730
731	int conditional_expression();
732	int logical_OR_expression();
733	int logical_AND_expression();
734	int inclusive_OR_expression();
735	int exclusive_OR_expression();
736	int AND_expression();
737	int equality_expression();
738	int relational_expression();
739	int shift_expression();
740	int additive_expression();
741	int multiplicative_expression();
742	int unary_expression();
743	bool unary_expression_lookup();
744	int primary_expression();
745	bool primary_expression_lookup();
746	};
747
748	int PP_Expression::conditional_expression()
749	{
750	int value = logical_OR_expression();
751	if (test(token: PP_QUESTION)) {
752	int alt1 = conditional_expression();
753	int alt2 = test(token: PP_COLON) ? conditional_expression() : `0`;
754	return value ? alt1 : alt2;
755	}
756	return value;
757	}
758
759	int PP_Expression::logical_OR_expression()
760	{
761	int value = logical_AND_expression();
762	if (test(token: PP_OROR))
763	return logical_OR_expression() \|\| value;
764	return value;
765	}
766
767	int PP_Expression::logical_AND_expression()
768	{
769	int value = inclusive_OR_expression();
770	if (test(token: PP_ANDAND))
771	return logical_AND_expression() && value;
772	return value;
773	}
774
775	int PP_Expression::inclusive_OR_expression()
776	{
777	int value = exclusive_OR_expression();
778	if (test(token: PP_OR))
779	return value \| inclusive_OR_expression();
780	return value;
781	}
782
783	int PP_Expression::exclusive_OR_expression()
784	{
785	int value = AND_expression();
786	if (test(token: PP_HAT))
787	return value ^ exclusive_OR_expression();
788	return value;
789	}
790
791	int PP_Expression::AND_expression()
792	{
793	int value = equality_expression();
794	if (test(token: PP_AND))
795	return value & AND_expression();
796	return value;
797	}
798
799	int PP_Expression::equality_expression()
800	{
801	int value = relational_expression();
802	switch (next()) {
803	case PP_EQEQ:
804	return value == equality_expression();
805	case PP_NE:
806	return value != equality_expression();
807	default:
808	prev();
809	return value;
810	}
811	}
812
813	int PP_Expression::relational_expression()
814	{
815	int value = shift_expression();
816	switch (next()) {
817	case PP_LANGLE:
818	return value < relational_expression();
819	case PP_RANGLE:
820	return value > relational_expression();
821	case PP_LE:
822	return value <= relational_expression();
823	case PP_GE:
824	return value >= relational_expression();
825	default:
826	prev();
827	return value;
828	}
829	}
830
831	int PP_Expression::shift_expression()
832	{
833	int value = additive_expression();
834	switch (next()) {
835	case PP_LTLT:
836	return value << shift_expression();
837	case PP_GTGT:
838	return value >> shift_expression();
839	default:
840	prev();
841	return value;
842	}
843	}
844
845	int PP_Expression::additive_expression()
846	{
847	int value = multiplicative_expression();
848	switch (next()) {
849	case PP_PLUS:
850	return value + additive_expression();
851	case PP_MINUS:
852	return value - additive_expression();
853	default:
854	prev();
855	return value;
856	}
857	}
858
859	int PP_Expression::multiplicative_expression()
860	{
861	int value = unary_expression();
862	switch (next()) {
863	case PP_STAR:
864	{
865	// get well behaved overflow behavior by converting to long
866	// and then back to int
867	// NOTE: A conformant preprocessor would need to work intmax_t/
868	// uintmax_t according to [cpp.cond], 19.1 §10
869	// But we're not compliant anyway
870	qint64 result = qint64(value) * qint64(multiplicative_expression());
871	return int(result);
872	}
873	case PP_PERCENT:
874	{
875	int remainder = multiplicative_expression();
876	return remainder ? value % remainder : `0`;
877	}
878	case PP_SLASH:
879	{
880	int div = multiplicative_expression();
881	return div ? value / div : `0`;
882	}
883	default:
884	prev();
885	return value;
886	};
887	}
888
889	int PP_Expression::unary_expression()
890	{
891	switch (next()) {
892	case PP_PLUS:
893	return unary_expression();
894	case PP_MINUS:
895	return -unary_expression();
896	case PP_NOT:
897	return !unary_expression();
898	case PP_TILDE:
899	return ~unary_expression();
900	case PP_MOC_TRUE:
901	return `1`;
902	case PP_MOC_FALSE:
903	return `0`;
904	default:
905	prev();
906	return primary_expression();
907	}
908	}
909
910	bool PP_Expression::unary_expression_lookup()
911	{
912	Token t = lookup();
913	return (primary_expression_lookup()
914	\|\| t == PP_PLUS
915	\|\| t == PP_MINUS
916	\|\| t == PP_NOT
917	\|\| t == PP_TILDE
918	\|\| t == PP_DEFINED);
919	}
920
921	int PP_Expression::primary_expression()
922	{
923	int value;
924	if (test(token: PP_LPAREN)) {
925	value = conditional_expression();
926	test(token: PP_RPAREN);
927	} else {
928	next();
929	const QByteArray &lex = lexem();
930	auto lexView = QByteArrayView (lex);
931	if (lex.endsWith(c: `'L'`))
932	lexView.chop(n: `1`);
933	value = lexView.toInt(ok: nullptr, base: `0`);
934	}
935	return value;
936	}
937
938	bool PP_Expression::primary_expression_lookup()
939	{
940	Token t = lookup();
941	return (t == PP_IDENTIFIER
942	\|\| t == PP_INTEGER_LITERAL
943	\|\| t == PP_FLOATING_LITERAL
944	\|\| t == PP_MOC_TRUE
945	\|\| t == PP_MOC_FALSE
946	\|\| t == PP_LPAREN);
947	}
948
949	int Preprocessor::evaluateCondition()
950	{
951	PP_Expression expression;
952	expression.currentFilenames = currentFilenames;
953
954	substituteUntilNewline(substituted&: expression.symbols);
955
956	return expression.value();
957	}
958
959	static QByteArray readOrMapFile(QFile *file)
960	{
961	const qint64 size = file->size();
962	char rawInput = reinterpret_cast<char**>(file->map(offset: `0`, size));
963	return rawInput ? QByteArray::fromRawData(data: rawInput, size) : file->readAll();
964	}
965
966	static void mergeStringLiterals(Symbols *_symbols)
967	{
968	Symbols &symbols = *_symbols;
969	for (Symbols::iterator i = symbols.begin(); i != symbols.end(); ++i) {
970	if (i ->token == STRING_LITERAL) {
971	Symbols::Iterator mergeSymbol = i;
972	qsizetype literalsLength = mergeSymbol ->len;
973	while (++i != symbols.end() && i ->token == STRING_LITERAL)
974	literalsLength += i ->len - `2`; // no quotes
975
976	if (literalsLength != mergeSymbol ->len) {
977	QByteArray mergeSymbolOriginalLexem = mergeSymbol ->unquotedLexem();
978	QByteArray &mergeSymbolLexem = mergeSymbol ->lex;
979	mergeSymbolLexem.resize(size: `0`);
980	mergeSymbolLexem.reserve(asize: literalsLength);
981	mergeSymbolLexem.append(c: `'"'`);
982	mergeSymbolLexem.append(a: mergeSymbolOriginalLexem);
983	for (Symbols::iterator j = mergeSymbol + `1`; j != i; ++j)
984	mergeSymbolLexem.append(s: j ->lex.constData() + j ->from + `1`, len: j ->len - `2`); // append j->unquotedLexem()
985	mergeSymbolLexem.append(c: `'"'`);
986	mergeSymbol ->len = mergeSymbol ->lex.size();
987	mergeSymbol ->from = `0`;
988	i = symbols.erase(abegin: mergeSymbol + `1`, aend: i);
989	}
990	if (i == symbols.end())
991	break;
992	}
993	}
994	}
995
996	static QByteArray searchIncludePaths(const QList<Parser::IncludePath> &includepaths,
997	const QByteArray &include,
998	const bool debugIncludes)
999	{
1000	QFileInfo fi;
1001
1002	if (Q_UNLIKELY(debugIncludes)) {
1003	fprintf(stderr, format: "debug-includes: searching for '%s'\n", include.constData());
1004	}
1005
1006	for (const Parser::IncludePath &p : includepaths) {
1007	if (fi.exists())
1008	break;
1009
1010	if (p.isFrameworkPath) {
1011	const qsizetype slashPos = include.indexOf(ch: `'/'`);
1012	if (slashPos == -`1`)
1013	continue;
1014	fi.setFile(dir: QString::fromLocal8Bit(ba: p.path + `'/'` + include.left(n: slashPos) + ".framework/Headers/"),
1015	file: QString::fromLocal8Bit(ba: include.mid(index: slashPos + `1`)));
1016	} else {
1017	fi.setFile(dir: QString::fromLocal8Bit(ba: p.path), file: QString::fromLocal8Bit(ba: include));
1018	}
1019
1020	if (Q_UNLIKELY(debugIncludes)) {
1021	const auto candidate = fi.filePath().toLocal8Bit();
1022	fprintf(stderr, format: "debug-includes: considering '%s'\n", candidate.constData());
1023	}
1024
1025	// try again, maybe there's a file later in the include paths with the same name
1026	// (186067)
1027	if (fi.isDir()) {
1028	fi = QFileInfo ();
1029	continue;
1030	}
1031	}
1032
1033	if (!fi.exists() \|\| fi.isDir()) {
1034	if (Q_UNLIKELY(debugIncludes)) {
1035	fprintf(stderr, format: "debug-includes: can't find '%s'\n", include.constData());
1036	}
1037	return QByteArray ();
1038	}
1039
1040	const auto result = fi.canonicalFilePath().toLocal8Bit();
1041
1042	if (Q_UNLIKELY(debugIncludes)) {
1043	fprintf(stderr, format: "debug-includes: found '%s'\n", result.constData());
1044	}
1045
1046	return result;
1047	}
1048
1049	QByteArray Preprocessor::resolveInclude(const QByteArray &include, const QByteArray &relativeTo)
1050	{
1051	if (!relativeTo.isEmpty()) {
1052	QFileInfo fi;
1053	fi.setFile(dir: QFileInfo (QString::fromLocal8Bit(ba: relativeTo)).dir(), file: QString::fromLocal8Bit(ba: include));
1054	if (fi.exists() && !fi.isDir())
1055	return fi.canonicalFilePath().toLocal8Bit();
1056	}
1057
1058	auto it = nonlocalIncludePathResolutionCache.find(key: include);
1059	if (it == nonlocalIncludePathResolutionCache.end())
1060	it = nonlocalIncludePathResolutionCache.insert(key: include,
1061	value: searchIncludePaths(
1062	includepaths: includes,
1063	include,
1064	debugIncludes));
1065	return it.value();
1066	}
1067
1068	void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed)
1069	{
1070	currentFilenames.push(x: filename);
1071	preprocessed.reserve(asize: preprocessed.size() + symbols.size());
1072	while (hasNext()) {
1073	Token token = next();
1074
1075	switch (token) {
1076	case PP_INCLUDE:
1077	{
1078	int lineNum = symbol().lineNum;
1079	QByteArray include;
1080	bool local = false;
1081	if (test(token: PP_STRING_LITERAL)) {
1082	local = lexem().startsWith(c: `'\"'`);
1083	include = unquotedLexem();
1084	} else
1085	continue;
1086	until(PP_NEWLINE);
1087
1088	include = resolveInclude(include, relativeTo: local ? filename : QByteArray ());
1089	if (include.isNull())
1090	continue;
1091
1092	if (Preprocessor::preprocessedIncludes.contains(value: include))
1093	continue;
1094	Preprocessor::preprocessedIncludes.insert(value: include);
1095
1096	QFile file(QString::fromLocal8Bit(ba: include.constData()));
1097	if (!file.open(flags: QFile::ReadOnly))
1098	continue;
1099
1100	QByteArray input = readOrMapFile(file: &file);
1101
1102	file.close();
1103	if (input.isEmpty())
1104	continue;
1105
1106	Symbols saveSymbols = symbols;
1107	qsizetype saveIndex = index;
1108
1109	// phase 1: get rid of backslash-newlines
1110	input = cleaned(input);
1111
1112	// phase 2: tokenize for the preprocessor
1113	symbols = tokenize(input);
1114	input.clear();
1115
1116	index = `0`;
1117
1118	// phase 3: preprocess conditions and substitute macros
1119	preprocessed += Symbol (`0`, MOC_INCLUDE_BEGIN, include);
1120	preprocess(filename: include, preprocessed);
1121	preprocessed += Symbol (lineNum, MOC_INCLUDE_END, include);
1122
1123	symbols = saveSymbols;
1124	index = saveIndex;
1125	continue;
1126	}
1127	case PP_DEFINE:
1128	{
1129	next();
1130	QByteArray name = lexem();
1131	if (name.isEmpty() \|\| !is_ident_start(s: name [`0`]))
1132	error();
1133	Macro macro;
1134	macro.isVariadic = false;
1135	if (test(token: LPAREN)) {
1136	// we have a function macro
1137	macro.isFunction = true;
1138	parseDefineArguments(m: &macro);
1139	} else {
1140	macro.isFunction = false;
1141	}
1142	qsizetype start = index;
1143	until(PP_NEWLINE);
1144	macro.symbols.reserve(asize: index - start - `1`);
1145
1146	// remove whitespace where there shouldn't be any:
1147	// Before and after the macro, after a # and around ##
1148	Token lastToken = HASH; // skip shitespace at the beginning
1149	for (qsizetype i = start; i < index - `1`; ++i) {
1150	Token token = symbols.at(i).token;
1151	if (token == WHITESPACE) {
1152	if (lastToken == PP_HASH \|\| lastToken == HASH \|\|
1153	lastToken == PP_HASHHASH \|\|
1154	lastToken == WHITESPACE)
1155	continue;
1156	} else if (token == PP_HASHHASH) {
1157	if (!macro.symbols.isEmpty() &&
1158	lastToken == WHITESPACE)
1159	macro.symbols.pop_back();
1160	}
1161	macro.symbols.append(t: symbols.at(i));
1162	lastToken = token;
1163	}
1164	// remove trailing whitespace
1165	while (!macro.symbols.isEmpty() &&
1166	(macro.symbols.constLast().token == PP_WHITESPACE \|\| macro.symbols.constLast().token == WHITESPACE))
1167	macro.symbols.pop_back();
1168
1169	if (!macro.symbols.isEmpty()) {
1170	if (macro.symbols.constFirst().token == PP_HASHHASH \|\|
1171	macro.symbols.constLast().token == PP_HASHHASH) {
1172	error(msg: "'##' cannot appear at either end of a macro expansion");
1173	}
1174	}
1175	macros.insert(key: name, value: macro);
1176	continue;
1177	}
1178	case PP_UNDEF: {
1179	next();
1180	QByteArray name = lexem();
1181	until(PP_NEWLINE);
1182	macros.remove(key: name);
1183	continue;
1184	}
1185	case PP_IDENTIFIER: {
1186	// substitute macros
1187	macroExpand(into: &preprocessed, that: this, toExpand: symbols, index, lineNum: symbol().lineNum, one: true);
1188	continue;
1189	}
1190	case PP_HASH:
1191	until(PP_NEWLINE);
1192	continue; // skip unknown preprocessor statement
1193	case PP_IFDEF:
1194	case PP_IFNDEF:
1195	case PP_IF:
1196	while (!evaluateCondition()) {
1197	if (!skipBranch())
1198	break;
1199	if (test(token: PP_ELIF)) {
1200	} else {
1201	until(PP_NEWLINE);
1202	break;
1203	}
1204	}
1205	continue;
1206	case PP_ELIF:
1207	case PP_ELSE:
1208	skipUntilEndif();
1209	Q_FALLTHROUGH();
1210	case PP_ENDIF:
1211	until(PP_NEWLINE);
1212	continue;
1213	case PP_NEWLINE:
1214	continue;
1215	case SIGNALS:
1216	case SLOTS: {
1217	Symbol sym = symbol();
1218	if (macros.contains(key: "QT_NO_KEYWORDS"))
1219	sym.token = IDENTIFIER;
1220	else
1221	sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN);
1222	preprocessed += sym;
1223	} continue;
1224	default:
1225	break;
1226	}
1227	preprocessed += symbol();
1228	}
1229
1230	currentFilenames.pop();
1231	}
1232
1233	Symbols Preprocessor::preprocessed(const QByteArray &filename, QFile *file)
1234	{
1235	QByteArray input = readOrMapFile(file);
1236
1237	if (input.isEmpty())
1238	return symbols;
1239
1240	// phase 1: get rid of backslash-newlines
1241	input = cleaned(input);
1242
1243	// phase 2: tokenize for the preprocessor
1244	index = `0`;
1245	symbols = tokenize(input);
1246
1247	#if 0
1248	for (int j = `0`; j < symbols.size(); ++j)
1249	fprintf(stderr, "line %d: %s(%s)\n",
1250	symbols[j].lineNum,
1251	symbols[j].lexem().constData(),
1252	tokenTypeName(symbols[j].token));
1253	#endif
1254
1255	// phase 3: preprocess conditions and substitute macros
1256	Symbols result;
1257	// Preallocate some space to speed up the code below.
1258	// The magic value was found by logging the final size
1259	// and calculating an average when running moc over FOSS projects.
1260	result.reserve(asize: file->size() / `300000`);
1261	preprocess(filename, preprocessed&: result);
1262	mergeStringLiterals(symbols: &result);
1263
1264	#if 0
1265	for (int j = `0`; j < result.size(); ++j)
1266	fprintf(stderr, "line %d: %s(%s)\n",
1267	result[j].lineNum,
1268	result[j].lexem().constData(),
1269	tokenTypeName(result[j].token));
1270	#endif
1271
1272	return result;
1273	}
1274
1275	void Preprocessor::parseDefineArguments(Macro *m)
1276	{
1277	Symbols arguments;
1278	while (hasNext()) {
1279	while (test(token: PP_WHITESPACE)) {}
1280	Token t = next();
1281	if (t == PP_RPAREN)
1282	break;
1283	if (t != PP_IDENTIFIER) {
1284	QByteArray l = lexem();
1285	if (l == "...") {
1286	m->isVariadic = true;
1287	arguments += Symbol (symbol().lineNum, PP_IDENTIFIER, "__VA_ARGS__");
1288	while (test(token: PP_WHITESPACE)) {}
1289	if (!test(token: PP_RPAREN))
1290	error(msg: "missing ')' in macro argument list");
1291	break;
1292	} else if (!is_identifier(s: l.constData(), len: l.size())) {
1293	error(msg: "Unexpected character in macro argument list.");
1294	}
1295	}
1296
1297	Symbol arg = symbol();
1298	if (arguments.contains(t: arg))
1299	error(msg: "Duplicate macro parameter.");
1300	arguments += symbol();
1301
1302	while (test(token: PP_WHITESPACE)) {}
1303	t = next();
1304	if (t == PP_RPAREN)
1305	break;
1306	if (t == PP_COMMA)
1307	continue;
1308	if (lexem() == "...") {
1309	//GCC extension: #define FOO(x, y...) x(y)
1310	// The last argument was already parsed. Just mark the macro as variadic.
1311	m->isVariadic = true;
1312	while (test(token: PP_WHITESPACE)) {}
1313	if (!test(token: PP_RPAREN))
1314	error(msg: "missing ')' in macro argument list");
1315	break;
1316	}
1317	error(msg: "Unexpected character in macro argument list.");
1318	}
1319	m->arguments = arguments;
1320	while (test(token: PP_WHITESPACE)) {}
1321	}
1322
1323	void Preprocessor::until(Token t)
1324	{
1325	while(hasNext() && next() != t)
1326	;
1327	}
1328
1329	void Preprocessor::setDebugIncludes(bool value)
1330	{
1331	debugIncludes = value;
1332	}
1333
1334
1335	QT_END_NAMESPACE
1336

source code of qtbase/src/tools/moc/preprocessor.cpp