preprocessor.cpp source code [qtbase/src/tools/moc/preprocessor.cpp]

1	/****************************************************************************
2	**
3	** Copyright (C) 2016 The Qt Company Ltd.
4	** Copyright (C) 2014 Olivier Goffart <ogoffart@woboq.org>
5	** Contact: https://www.qt.io/licensing/
6	**
7	** This file is part of the tools applications of the Qt Toolkit.
8	**
9	** $QT_BEGIN_LICENSE:GPL-EXCEPT$
10	** Commercial License Usage
11	** Licensees holding valid commercial Qt licenses may use this file in
12	** accordance with the commercial license agreement provided with the
13	** Software or, alternatively, in accordance with the terms contained in
14	** a written agreement between you and The Qt Company. For licensing terms
15	** and conditions see https://www.qt.io/terms-conditions. For further
16	** information use the contact form at https://www.qt.io/contact-us.
17	**
18	** GNU General Public License Usage
19	** Alternatively, this file may be used under the terms of the GNU
20	** General Public License version 3 as published by the Free Software
21	** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
22	** included in the packaging of this file. Please review the following
23	** information to ensure the GNU General Public License requirements will
24	** be met: https://www.gnu.org/licenses/gpl-3.0.html.
25	**
26	** $QT_END_LICENSE$
27	**
28	****************************************************************************/
29
30	#include "preprocessor.h"
31	#include "utils.h"
32	#include <qstringlist.h>
33	#include <qfile.h>
34	#include <qdir.h>
35	#include <qfileinfo.h>
36
37	QT_BEGIN_NAMESPACE
38
39	#include "ppkeywords.cpp"
40	#include "keywords.cpp"
41
42	// transform \r\n into \n
43	// \r into \n (os9 style)
44	// backslash-newlines into newlines
45	static QByteArray cleaned(const QByteArray &input)
46	{
47	QByteArray result;
48	result.resize(size: input.size());
49	const char *data = input.constData();
50	const char *end = input.constData() + input.size();
51	char *output = result.data();
52
53	int newlines = `0`;
54	while (data != end) {
55	while (data != end && is_space(s: *data))
56	++data;
57	bool takeLine = (*data == `'#'`);
58	if (data == `'%'` && (data+`1`) == `':'`) {
59	takeLine = true;
60	++data;
61	}
62	if (takeLine) {
63	*output = `'#'`;
64	++output;
65	do ++data; while (data != end && is_space(s: *data));
66	}
67	while (data != end) {
68	// handle \\\n, \\\r\n and \\\r
69	if (*data == `'\\'`) {
70	if (*(data + `1`) == `'\r'`) {
71	++data;
72	}
73	if (data != end && ((data + `1`) == `'\n'` \|\| (data) == `'\r'`)) {
74	++newlines;
75	data += `1`;
76	if (data != end && *data != `'\r'`)
77	data += `1`;
78	continue;
79	}
80	} else if (data == `'\r'` && (data + `1`) == `'\n'`) { // reduce \r\n to \n
81	++data;
82	}
83	if (data == end)
84	break;
85
86	char ch = *data;
87	if (ch == `'\r'`) // os9: replace \r with \n
88	ch = `'\n'`;
89	*output = ch;
90	++output;
91
92	if (*data == `'\n'`) {
93	// output additional newlines to keep the correct line-numbering
94	// for the lines following the backslash-newline sequence(s)
95	while (newlines) {
96	*output = `'\n'`;
97	++output;
98	--newlines;
99	}
100	++data;
101	break;
102	}
103	++data;
104	}
105	}
106	result.resize(size: output - result.constData());
107	return result;
108	}
109
110	bool Preprocessor::preprocessOnly = false;
111	void Preprocessor::skipUntilEndif()
112	{
113	while(index < symbols.size() - `1` && symbols.at(i: index).token != PP_ENDIF){
114	switch (symbols.at(i: index).token) {
115	case PP_IF:
116	case PP_IFDEF:
117	case PP_IFNDEF:
118	++index;
119	skipUntilEndif();
120	break;
121	default:
122	;
123	}
124	++index;
125	}
126	}
127
128	bool Preprocessor::skipBranch()
129	{
130	while (index < symbols.size() - `1`
131	&& (symbols.at(i: index).token != PP_ENDIF
132	&& symbols.at(i: index).token != PP_ELIF
133	&& symbols.at(i: index).token != PP_ELSE)
134	){
135	switch (symbols.at(i: index).token) {
136	case PP_IF:
137	case PP_IFDEF:
138	case PP_IFNDEF:
139	++index;
140	skipUntilEndif();
141	break;
142	default:
143	;
144	}
145	++index;
146	}
147	return (index < symbols.size() - `1`);
148	}
149
150
151	Symbols Preprocessor::tokenize(const QByteArray& input, int lineNum, Preprocessor::TokenizeMode mode)
152	{
153	Symbols symbols;
154	// Preallocate some space to speed up the code below.
155	// The magic divisor value was found by calculating the average ratio between
156	// input size and the final size of symbols.
157	// This yielded a value of 16.x when compiling Qt Base.
158	symbols.reserve(asize: input.size() / `16`);
159	const char *begin = input.constData();
160	const char *data = begin;
161	while (*data) {
162	if (mode == TokenizeCpp \|\| mode == TokenizeDefine) {
163	int column = `0`;
164
165	const char *lexem = data;
166	int state = `0`;
167	Token token = NOTOKEN;
168	for (;;) {
169	if (static_cast<signed char>(*data) < `0`) {
170	++data;
171	continue;
172	}
173	int nextindex = keywords[state].next;
174	int next = `0`;
175	if (*data == keywords[state].defchar)
176	next = keywords[state].defnext;
177	else if (!state \|\| nextindex)
178	next = keyword_trans[nextindex][(int)*data];
179	if (!next)
180	break;
181	state = next;
182	token = keywords[state].token;
183	++data;
184	}
185
186	// suboptimal, is_ident_char should use a table
187	if (keywords[state].ident && is_ident_char(s: *data))
188	token = keywords[state].ident;
189
190	if (token == NOTOKEN) {
191	if (*data)
192	++data;
193	// an error really, but let's ignore this input
194	// to not confuse moc later. However in pre-processor
195	// only mode let's continue.
196	if (!Preprocessor::preprocessOnly)
197	continue;
198	}
199
200	++column;
201
202	if (token > SPECIAL_TREATMENT_MARK) {
203	switch (token) {
204	case QUOTE:
205	data = skipQuote(data);
206	token = STRING_LITERAL;
207	// concatenate multi-line strings for easier
208	// STRING_LITERAL handling in moc
209	if (!Preprocessor::preprocessOnly
210	&& !symbols.isEmpty()
211	&& symbols.constLast().token == STRING_LITERAL) {
212
213	const QByteArray newString
214	= `'\"'`
215	+ symbols.constLast().unquotedLexem()
216	+ input.mid(index: lexem - begin + `1`, len: data - lexem - `2`)
217	+ `'\"'`;
218	symbols.last() = Symbol (symbols.constLast().lineNum,
219	STRING_LITERAL,
220	newString);
221	continue;
222	}
223	break;
224	case SINGLEQUOTE:
225	while (data && (data != `'\''`
226	\|\| (*(data-`1`)==`'\\'`
227	&& *(data-`2`)!=`'\\'`)))
228	++data;
229	if (*data)
230	++data;
231	token = CHARACTER_LITERAL;
232	break;
233	case LANGLE_SCOPE:
234	// split <:: into two tokens, < and ::
235	token = LANGLE;
236	data -= `2`;
237	break;
238	case DIGIT:
239	while (is_digit_char(s: data) \|\| data == `'\''`)
240	++data;
241	if (!data \|\| data != `'.'`) {
242	token = INTEGER_LITERAL;
243	if (data - lexem == `1` &&
244	(data == `'x'` \|\| data == `'X'`
245	\|\| data == `'b'` \|\| data == `'B'`)
246	&& *lexem == `'0'`) {
247	++data;
248	while (is_hex_char(s: data) \|\| data == `'\''`)
249	++data;
250	}
251	break;
252	}
253	token = FLOATING_LITERAL;
254	++data;
255	Q_FALLTHROUGH();
256	case FLOATING_LITERAL:
257	while (is_digit_char(s: data) \|\| data == `'\''`)
258	++data;
259	if (data == `'+'` \|\| data == `'-'`)
260	++data;
261	if (data == `'e'` \|\| data == `'E'`) {
262	++data;
263	while (is_digit_char(s: data) \|\| data == `'\''`)
264	++data;
265	}
266	if (data == `'f'` \|\| data == `'F'`
267	\|\| data == `'l'` \|\| data == `'L'`)
268	++data;
269	break;
270	case HASH:
271	if (column == `1` && mode == TokenizeCpp) {
272	mode = PreparePreprocessorStatement;
273	while (data && (data == `' '` \|\| *data == `'\t'`))
274	++data;
275	if (is_ident_char(s: *data))
276	mode = TokenizePreprocessorStatement;
277	continue;
278	}
279	break;
280	case PP_HASHHASH:
281	if (mode == TokenizeCpp)
282	continue;
283	break;
284	case NEWLINE:
285	++lineNum;
286	if (mode == TokenizeDefine) {
287	mode = TokenizeCpp;
288	// emit the newline token
289	break;
290	}
291	continue;
292	case BACKSLASH:
293	{
294	const char *rewind = data;
295	while (data && (data == `' '` \|\| *data == `'\t'`))
296	++data;
297	if (data && data == `'\n'`) {
298	++data;
299	continue;
300	}
301	data = rewind;
302	} break;
303	case CHARACTER:
304	while (is_ident_char(s: *data))
305	++data;
306	token = IDENTIFIER;
307	break;
308	case C_COMMENT:
309	if (*data) {
310	if (*data == `'\n'`)
311	++lineNum;
312	++data;
313	if (*data) {
314	if (*data == `'\n'`)
315	++lineNum;
316	++data;
317	}
318	}
319	while (data && ((data-`1`) != `'/'` \|\| (data-`2`) != `''`)) {
320	if (*data == `'\n'`)
321	++lineNum;
322	++data;
323	}
324	token = WHITESPACE; // one comment, one whitespace
325	Q_FALLTHROUGH();
326	case WHITESPACE:
327	if (column == `1`)
328	column = `0`;
329	while (data && (data == `' '` \|\| *data == `'\t'`))
330	++data;
331	if (Preprocessor::preprocessOnly) // tokenize whitespace
332	break;
333	continue;
334	case CPP_COMMENT:
335	while (data && data != `'\n'`)
336	++data;
337	continue; // ignore safely, the newline is a separator
338	default:
339	continue; //ignore
340	}
341	}
342	#ifdef USE_LEXEM_STORE
343	if (!Preprocessor::preprocessOnly
344	&& token != IDENTIFIER
345	&& token != STRING_LITERAL
346	&& token != FLOATING_LITERAL
347	&& token != INTEGER_LITERAL)
348	symbols += Symbol(lineNum, token);
349	else
350	#endif
351	symbols += Symbol (lineNum, token, input, lexem-begin, data-lexem);
352
353	} else { // Preprocessor
354
355	const char *lexem = data;
356	int state = `0`;
357	Token token = NOTOKEN;
358	if (mode == TokenizePreprocessorStatement) {
359	state = pp_keyword_trans[`0`][(int)`'#'`];
360	mode = TokenizePreprocessor;
361	}
362	for (;;) {
363	if (static_cast<signed char>(*data) < `0`) {
364	++data;
365	continue;
366	}
367	int nextindex = pp_keywords[state].next;
368	int next = `0`;
369	if (*data == pp_keywords[state].defchar)
370	next = pp_keywords[state].defnext;
371	else if (!state \|\| nextindex)
372	next = pp_keyword_trans[nextindex][(int)*data];
373	if (!next)
374	break;
375	state = next;
376	token = pp_keywords[state].token;
377	++data;
378	}
379	// suboptimal, is_ident_char should use a table
380	if (pp_keywords[state].ident && is_ident_char(s: *data))
381	token = pp_keywords[state].ident;
382
383	switch (token) {
384	case NOTOKEN:
385	if (*data)
386	++data;
387	break;
388	case PP_DEFINE:
389	mode = PrepareDefine;
390	break;
391	case PP_IFDEF:
392	symbols += Symbol (lineNum, PP_IF);
393	symbols += Symbol (lineNum, PP_DEFINED);
394	continue;
395	case PP_IFNDEF:
396	symbols += Symbol (lineNum, PP_IF);
397	symbols += Symbol (lineNum, PP_NOT);
398	symbols += Symbol (lineNum, PP_DEFINED);
399	continue;
400	case PP_INCLUDE:
401	mode = TokenizeInclude;
402	break;
403	case PP_QUOTE:
404	data = skipQuote(data);
405	token = PP_STRING_LITERAL;
406	break;
407	case PP_SINGLEQUOTE:
408	while (data && (data != `'\''`
409	\|\| (*(data-`1`)==`'\\'`
410	&& *(data-`2`)!=`'\\'`)))
411	++data;
412	if (*data)
413	++data;
414	token = PP_CHARACTER_LITERAL;
415	break;
416	case PP_DIGIT:
417	while (is_digit_char(s: data) \|\| data == `'\''`)
418	++data;
419	if (!data \|\| data != `'.'`) {
420	token = PP_INTEGER_LITERAL;
421	if (data - lexem == `1` &&
422	(data == `'x'` \|\| data == `'X'`)
423	&& *lexem == `'0'`) {
424	++data;
425	while (is_hex_char(s: data) \|\| data == `'\''`)
426	++data;
427	}
428	break;
429	}
430	token = PP_FLOATING_LITERAL;
431	++data;
432	Q_FALLTHROUGH();
433	case PP_FLOATING_LITERAL:
434	while (is_digit_char(s: data) \|\| data == `'\''`)
435	++data;
436	if (data == `'+'` \|\| data == `'-'`)
437	++data;
438	if (data == `'e'` \|\| data == `'E'`) {
439	++data;
440	while (is_digit_char(s: data) \|\| data == `'\''`)
441	++data;
442	}
443	if (data == `'f'` \|\| data == `'F'`
444	\|\| data == `'l'` \|\| data == `'L'`)
445	++data;
446	break;
447	case PP_CHARACTER:
448	if (mode == PreparePreprocessorStatement) {
449	// rewind entire token to begin
450	data = lexem;
451	mode = TokenizePreprocessorStatement;
452	continue;
453	}
454	while (is_ident_char(s: *data))
455	++data;
456	token = PP_IDENTIFIER;
457
458	if (mode == PrepareDefine) {
459	symbols += Symbol (lineNum, token, input, lexem-begin, data-lexem);
460	// make sure we explicitly add the whitespace here if the next char
461	// is not an opening brace, so we can distinguish correctly between
462	// regular and function macros
463	if (*data != `'('`)
464	symbols += Symbol (lineNum, WHITESPACE);
465	mode = TokenizeDefine;
466	continue;
467	}
468	break;
469	case PP_C_COMMENT:
470	if (*data) {
471	if (*data == `'\n'`)
472	++lineNum;
473	++data;
474	if (*data) {
475	if (*data == `'\n'`)
476	++lineNum;
477	++data;
478	}
479	}
480	while (data && ((data-`1`) != `'/'` \|\| (data-`2`) != `''`)) {
481	if (*data == `'\n'`)
482	++lineNum;
483	++data;
484	}
485	token = PP_WHITESPACE; // one comment, one whitespace
486	Q_FALLTHROUGH();
487	case PP_WHITESPACE:
488	while (data && (data == `' '` \|\| *data == `'\t'`))
489	++data;
490	continue; // the preprocessor needs no whitespace
491	case PP_CPP_COMMENT:
492	while (data && data != `'\n'`)
493	++data;
494	continue; // ignore safely, the newline is a separator
495	case PP_NEWLINE:
496	++lineNum;
497	mode = TokenizeCpp;
498	break;
499	case PP_BACKSLASH:
500	{
501	const char *rewind = data;
502	while (data && (data == `' '` \|\| *data == `'\t'`))
503	++data;
504	if (data && data == `'\n'`) {
505	++data;
506	continue;
507	}
508	data = rewind;
509	} break;
510	case PP_LANGLE:
511	if (mode != TokenizeInclude)
512	break;
513	token = PP_STRING_LITERAL;
514	while (data && data != `'\n'` && *(data-`1`) != `'>'`)
515	++data;
516	break;
517	default:
518	break;
519	}
520	if (mode == PreparePreprocessorStatement)
521	continue;
522	#ifdef USE_LEXEM_STORE
523	if (token != PP_IDENTIFIER
524	&& token != PP_STRING_LITERAL
525	&& token != PP_FLOATING_LITERAL
526	&& token != PP_INTEGER_LITERAL)
527	symbols += Symbol(lineNum, token);
528	else
529	#endif
530	symbols += Symbol (lineNum, token, input, lexem-begin, data-lexem);
531	}
532	}
533	symbols += Symbol (); // eof symbol
534	return symbols;
535	}
536
537	void Preprocessor::macroExpand(Symbols into, Preprocessor that, const Symbols &toExpand, int &index,
538	int lineNum, bool one, const QSet<QByteArray> &excludeSymbols)
539	{
540	SymbolStack symbols;
541	SafeSymbols sf;
542	sf.symbols = toExpand;
543	sf.index = index;
544	sf.excludedSymbols = excludeSymbols;
545	symbols.push(t: sf);
546
547	if (toExpand.isEmpty())
548	return;
549
550	for (;;) {
551	QByteArray macro;
552	Symbols newSyms = macroExpandIdentifier(that, symbols, lineNum, macroName: &macro);
553
554	if (macro.isEmpty()) {
555	// not a macro
556	Symbol s = symbols.symbol();
557	s.lineNum = lineNum;
558	*into += s;
559	} else {
560	SafeSymbols sf;
561	sf.symbols = newSyms;
562	sf.index = `0`;
563	sf.expandedMacro = macro;
564	symbols.push(t: sf);
565	}
566	if (!symbols.hasNext() \|\| (one && symbols.size() == `1`))
567	break;
568	symbols.next();
569	}
570
571	if (symbols.size())
572	index = symbols.top().index;
573	else
574	index = toExpand.size();
575	}
576
577
578	Symbols Preprocessor::macroExpandIdentifier(Preprocessor that, SymbolStack &symbols, int* lineNum, QByteArray *macroName)
579	{
580	Symbol s = symbols.symbol();
581
582	// not a macro
583	if (s.token != PP_IDENTIFIER \|\| !that->macros.contains(akey: s) \|\| symbols.dontReplaceSymbol(name: s.lexem())) {
584	return Symbols ();
585	}
586
587	const Macro &macro = that->macros.value(akey: s);
588	*macroName = s.lexem();
589
590	Symbols expansion;
591	if (!macro.isFunction) {
592	expansion = macro.symbols;
593	} else {
594	bool haveSpace = false;
595	while (symbols.test(token: PP_WHITESPACE)) { haveSpace = true; }
596	if (!symbols.test(token: PP_LPAREN)) {
597	*macroName = QByteArray ();
598	Symbols syms;
599	if (haveSpace)
600	syms += Symbol (lineNum, PP_WHITESPACE);
601	syms += s;
602	syms.last().lineNum = lineNum;
603	return syms;
604	}
605	QVarLengthArray<Symbols, `5`> arguments;
606	while (symbols.hasNext()) {
607	Symbols argument;
608	// strip leading space
609	while (symbols.test(token: PP_WHITESPACE)) {}
610	int nesting = `0`;
611	bool vararg = macro.isVariadic && (arguments.size() == macro.arguments.size() - `1`);
612	while (symbols.hasNext()) {
613	Token t = symbols.next();
614	if (t == PP_LPAREN) {
615	++nesting;
616	} else if (t == PP_RPAREN) {
617	--nesting;
618	if (nesting < `0`)
619	break;
620	} else if (t == PP_COMMA && nesting == `0`) {
621	if (!vararg)
622	break;
623	}
624	argument += symbols.symbol();
625	}
626	arguments += argument;
627
628	if (nesting < `0`)
629	break;
630	else if (!symbols.hasNext())
631	that->error(msg: "missing ')' in macro usage");
632	}
633
634	// empty VA_ARGS
635	if (macro.isVariadic && arguments.size() == macro.arguments.size() - `1`)
636	arguments += Symbols ();
637
638	// now replace the macro arguments with the expanded arguments
639	enum Mode {
640	Normal,
641	Hash,
642	HashHash
643	} mode = Normal;
644
645	for (int i = `0`; i < macro.symbols.size(); ++i) {
646	const Symbol &s = macro.symbols.at(i);
647	if (s.token == HASH \|\| s.token == PP_HASHHASH) {
648	mode = (s.token == HASH ? Hash : HashHash);
649	continue;
650	}
651	int index = macro.arguments.indexOf(t: s);
652	if (mode == Normal) {
653	if (index >= `0` && index < arguments.size()) {
654	// each argument undoergoes macro expansion if it's not used as part of a # or ##
655	if (i == macro.symbols.size() - `1` \|\| macro.symbols.at(i: i + `1`).token != PP_HASHHASH) {
656	Symbols arg = arguments.at(idx: index);
657	int idx = `1`;
658	macroExpand(into: &expansion, that, toExpand: arg, index&: idx, lineNum, one: false, excludeSymbols: symbols.excludeSymbols());
659	} else {
660	expansion += arguments.at(idx: index);
661	}
662	} else {
663	expansion += s;
664	}
665	} else if (mode == Hash) {
666	if (index < `0`) {
667	that->error(msg: "'#' is not followed by a macro parameter");
668	continue;
669	} else if (index >= arguments.size()) {
670	that->error(msg: "Macro invoked with too few parameters for a use of '#'");
671	continue;
672	}
673
674	const Symbols &arg = arguments.at(idx: index);
675	QByteArray stringified;
676	for (int i = `0`; i < arg.size(); ++i) {
677	stringified += arg.at(i).lexem();
678	}
679	stringified.replace(before: `'"'`, c: "\\\"");
680	stringified.prepend(c: `'"'`);
681	stringified.append(c: `'"'`);
682	expansion += Symbol (lineNum, STRING_LITERAL, stringified);
683	} else if (mode == HashHash){
684	if (s.token == WHITESPACE)
685	continue;
686
687	while (expansion.size() && expansion.constLast().token == PP_WHITESPACE)
688	expansion.pop_back();
689
690	Symbol next = s;
691	if (index >= `0` && index < arguments.size()) {
692	const Symbols &arg = arguments.at(idx: index);
693	if (arg.size() == `0`) {
694	mode = Normal;
695	continue;
696	}
697	next = arg.at(i: `0`);
698	}
699
700	if (!expansion.isEmpty() && expansion.constLast().token == s.token
701	&& expansion.constLast().token != STRING_LITERAL) {
702	Symbol last = expansion.takeLast();
703
704	QByteArray lexem = last.lexem() + next.lexem();
705	expansion += Symbol (lineNum, last.token, lexem);
706	} else {
707	expansion += next;
708	}
709
710	if (index >= `0` && index < arguments.size()) {
711	const Symbols &arg = arguments.at(idx: index);
712	for (int i = `1`; i < arg.size(); ++i)
713	expansion += arg.at(i);
714	}
715	}
716	mode = Normal;
717	}
718	if (mode != Normal)
719	that->error(msg: "'#' or '##' found at the end of a macro argument");
720
721	}
722
723	return expansion;
724	}
725
726	void Preprocessor::substituteUntilNewline(Symbols &substituted)
727	{
728	while (hasNext()) {
729	Token token = next();
730	if (token == PP_IDENTIFIER) {
731	macroExpand(into: &substituted, that: this, toExpand: symbols, index, lineNum: symbol().lineNum, one: true);
732	} else if (token == PP_DEFINED) {
733	bool braces = test(token: PP_LPAREN);
734	next(token: PP_IDENTIFIER);
735	Symbol definedOrNotDefined = symbol();
736	definedOrNotDefined.token = macros.contains(akey: definedOrNotDefined)? PP_MOC_TRUE : PP_MOC_FALSE;
737	substituted += definedOrNotDefined;
738	if (braces)
739	test(token: PP_RPAREN);
740	continue;
741	} else if (token == PP_NEWLINE) {
742	substituted += symbol();
743	break;
744	} else {
745	substituted += symbol();
746	}
747	}
748	}
749
750
751	class PP_Expression : public Parser
752	{
753	public:
754	int value() { index = `0`; return unary_expression_lookup() ? conditional_expression() : `0`; }
755
756	int conditional_expression();
757	int logical_OR_expression();
758	int logical_AND_expression();
759	int inclusive_OR_expression();
760	int exclusive_OR_expression();
761	int AND_expression();
762	int equality_expression();
763	int relational_expression();
764	int shift_expression();
765	int additive_expression();
766	int multiplicative_expression();
767	int unary_expression();
768	bool unary_expression_lookup();
769	int primary_expression();
770	bool primary_expression_lookup();
771	};
772
773	int PP_Expression::conditional_expression()
774	{
775	int value = logical_OR_expression();
776	if (test(token: PP_QUESTION)) {
777	int alt1 = conditional_expression();
778	int alt2 = test(token: PP_COLON) ? conditional_expression() : `0`;
779	return value ? alt1 : alt2;
780	}
781	return value;
782	}
783
784	int PP_Expression::logical_OR_expression()
785	{
786	int value = logical_AND_expression();
787	if (test(token: PP_OROR))
788	return logical_OR_expression() \|\| value;
789	return value;
790	}
791
792	int PP_Expression::logical_AND_expression()
793	{
794	int value = inclusive_OR_expression();
795	if (test(token: PP_ANDAND))
796	return logical_AND_expression() && value;
797	return value;
798	}
799
800	int PP_Expression::inclusive_OR_expression()
801	{
802	int value = exclusive_OR_expression();
803	if (test(token: PP_OR))
804	return value \| inclusive_OR_expression();
805	return value;
806	}
807
808	int PP_Expression::exclusive_OR_expression()
809	{
810	int value = AND_expression();
811	if (test(token: PP_HAT))
812	return value ^ exclusive_OR_expression();
813	return value;
814	}
815
816	int PP_Expression::AND_expression()
817	{
818	int value = equality_expression();
819	if (test(token: PP_AND))
820	return value & AND_expression();
821	return value;
822	}
823
824	int PP_Expression::equality_expression()
825	{
826	int value = relational_expression();
827	switch (next()) {
828	case PP_EQEQ:
829	return value == equality_expression();
830	case PP_NE:
831	return value != equality_expression();
832	default:
833	prev();
834	return value;
835	}
836	}
837
838	int PP_Expression::relational_expression()
839	{
840	int value = shift_expression();
841	switch (next()) {
842	case PP_LANGLE:
843	return value < relational_expression();
844	case PP_RANGLE:
845	return value > relational_expression();
846	case PP_LE:
847	return value <= relational_expression();
848	case PP_GE:
849	return value >= relational_expression();
850	default:
851	prev();
852	return value;
853	}
854	}
855
856	int PP_Expression::shift_expression()
857	{
858	int value = additive_expression();
859	switch (next()) {
860	case PP_LTLT:
861	return value << shift_expression();
862	case PP_GTGT:
863	return value >> shift_expression();
864	default:
865	prev();
866	return value;
867	}
868	}
869
870	int PP_Expression::additive_expression()
871	{
872	int value = multiplicative_expression();
873	switch (next()) {
874	case PP_PLUS:
875	return value + additive_expression();
876	case PP_MINUS:
877	return value - additive_expression();
878	default:
879	prev();
880	return value;
881	}
882	}
883
884	int PP_Expression::multiplicative_expression()
885	{
886	int value = unary_expression();
887	switch (next()) {
888	case PP_STAR:
889	{
890	// get well behaved overflow behavior by converting to long
891	// and then back to int
892	// NOTE: A conformant preprocessor would need to work intmax_t/
893	// uintmax_t according to [cpp.cond], 19.1 §10
894	// But we're not compliant anyway
895	qint64 result = qint64(value) * qint64(multiplicative_expression());
896	return int(result);
897	}
898	case PP_PERCENT:
899	{
900	int remainder = multiplicative_expression();
901	return remainder ? value % remainder : `0`;
902	}
903	case PP_SLASH:
904	{
905	int div = multiplicative_expression();
906	return div ? value / div : `0`;
907	}
908	default:
909	prev();
910	return value;
911	};
912	}
913
914	int PP_Expression::unary_expression()
915	{
916	switch (next()) {
917	case PP_PLUS:
918	return unary_expression();
919	case PP_MINUS:
920	return -unary_expression();
921	case PP_NOT:
922	return !unary_expression();
923	case PP_TILDE:
924	return ~unary_expression();
925	case PP_MOC_TRUE:
926	return `1`;
927	case PP_MOC_FALSE:
928	return `0`;
929	default:
930	prev();
931	return primary_expression();
932	}
933	}
934
935	bool PP_Expression::unary_expression_lookup()
936	{
937	Token t = lookup();
938	return (primary_expression_lookup()
939	\|\| t == PP_PLUS
940	\|\| t == PP_MINUS
941	\|\| t == PP_NOT
942	\|\| t == PP_TILDE
943	\|\| t == PP_DEFINED);
944	}
945
946	int PP_Expression::primary_expression()
947	{
948	int value;
949	if (test(token: PP_LPAREN)) {
950	value = conditional_expression();
951	test(token: PP_RPAREN);
952	} else {
953	next();
954	value = lexem().toInt(ok: nullptr, base: `0`);
955	}
956	return value;
957	}
958
959	bool PP_Expression::primary_expression_lookup()
960	{
961	Token t = lookup();
962	return (t == PP_IDENTIFIER
963	\|\| t == PP_INTEGER_LITERAL
964	\|\| t == PP_FLOATING_LITERAL
965	\|\| t == PP_MOC_TRUE
966	\|\| t == PP_MOC_FALSE
967	\|\| t == PP_LPAREN);
968	}
969
970	int Preprocessor::evaluateCondition()
971	{
972	PP_Expression expression;
973	expression.currentFilenames = currentFilenames;
974
975	substituteUntilNewline(substituted&: expression.symbols);
976
977	return expression.value();
978	}
979
980	static QByteArray readOrMapFile(QFile *file)
981	{
982	const qint64 size = file->size();
983	char rawInput = reinterpret_cast<char**>(file->map(offset: `0`, size));
984	return rawInput ? QByteArray::fromRawData(rawInput, size) : file->readAll();
985	}
986
987	static void mergeStringLiterals(Symbols *_symbols)
988	{
989	Symbols &symbols = *_symbols;
990	for (Symbols::iterator i = symbols.begin(); i != symbols.end(); ++i) {
991	if (i->token == STRING_LITERAL) {
992	Symbols::Iterator mergeSymbol = i;
993	int literalsLength = mergeSymbol->len;
994	while (++i != symbols.end() && i->token == STRING_LITERAL)
995	literalsLength += i->len - `2`; // no quotes
996
997	if (literalsLength != mergeSymbol->len) {
998	QByteArray mergeSymbolOriginalLexem = mergeSymbol->unquotedLexem();
999	QByteArray &mergeSymbolLexem = mergeSymbol->lex;
1000	mergeSymbolLexem.resize(size: `0`);
1001	mergeSymbolLexem.reserve(asize: literalsLength);
1002	mergeSymbolLexem.append(c: `'"'`);
1003	mergeSymbolLexem.append(a: mergeSymbolOriginalLexem);
1004	for (Symbols::iterator j = mergeSymbol + `1`; j != i; ++j)
1005	mergeSymbolLexem.append(s: j->lex.constData() + j->from + `1`, len: j->len - `2`); // append j->unquotedLexem()
1006	mergeSymbolLexem.append(c: `'"'`);
1007	mergeSymbol->len = mergeSymbol->lex.length();
1008	mergeSymbol->from = `0`;
1009	i = symbols.erase(abegin: mergeSymbol + `1`, aend: i);
1010	}
1011	if (i == symbols.end())
1012	break;
1013	}
1014	}
1015	}
1016
1017	static QByteArray searchIncludePaths(const QList<Parser::IncludePath> &includepaths,
1018	const QByteArray &include)
1019	{
1020	QFileInfo fi;
1021	for (int j = `0`; j < includepaths.size() && !fi.exists(); ++j) {
1022	const Parser::IncludePath &p = includepaths.at(i: j);
1023	if (p.isFrameworkPath) {
1024	const int slashPos = include.indexOf(c: `'/'`);
1025	if (slashPos == -`1`)
1026	continue;
1027	fi.setFile(dir: QString::fromLocal8Bit(str: p.path + `'/'` + include.left(len: slashPos) + ".framework/Headers/"),
1028	file: QString::fromLocal8Bit(str: include.mid(index: slashPos + `1`)));
1029	} else {
1030	fi.setFile(dir: QString::fromLocal8Bit(str: p.path), file: QString::fromLocal8Bit(str: include));
1031	}
1032	// try again, maybe there's a file later in the include paths with the same name
1033	// (186067)
1034	if (fi.isDir()) {
1035	fi = QFileInfo ();
1036	continue;
1037	}
1038	}
1039
1040	if (!fi.exists() \|\| fi.isDir())
1041	return QByteArray ();
1042	return fi.canonicalFilePath().toLocal8Bit();
1043	}
1044
1045	QByteArray Preprocessor::resolveInclude(const QByteArray &include, const QByteArray &relativeTo)
1046	{
1047	if (!relativeTo.isEmpty()) {
1048	QFileInfo fi;
1049	fi.setFile(dir: QFileInfo (QString::fromLocal8Bit(str: relativeTo)).dir(), file: QString::fromLocal8Bit(str: include));
1050	if (fi.exists() && !fi.isDir())
1051	return fi.canonicalFilePath().toLocal8Bit();
1052	}
1053
1054	auto it = nonlocalIncludePathResolutionCache.find(akey: include);
1055	if (it == nonlocalIncludePathResolutionCache.end())
1056	it = nonlocalIncludePathResolutionCache.insert(akey: include, avalue: searchIncludePaths(includepaths: includes, include));
1057	return it.value();
1058	}
1059
1060	void Preprocessor::preprocess(const QByteArray &filename, Symbols &preprocessed)
1061	{
1062	currentFilenames.push(x: filename);
1063	preprocessed.reserve(asize: preprocessed.size() + symbols.size());
1064	while (hasNext()) {
1065	Token token = next();
1066
1067	switch (token) {
1068	case PP_INCLUDE:
1069	{
1070	int lineNum = symbol().lineNum;
1071	QByteArray include;
1072	bool local = false;
1073	if (test(token: PP_STRING_LITERAL)) {
1074	local = lexem().startsWith(c: `'\"'`);
1075	include = unquotedLexem();
1076	} else
1077	continue;
1078	until(PP_NEWLINE);
1079
1080	include = resolveInclude(include, relativeTo: local ? filename : QByteArray ());
1081	if (include.isNull())
1082	continue;
1083
1084	if (Preprocessor::preprocessedIncludes.contains(value: include))
1085	continue;
1086	Preprocessor::preprocessedIncludes.insert(value: include);
1087
1088	QFile file(QString::fromLocal8Bit(str: include.constData()));
1089	if (!file.open(flags: QFile::ReadOnly))
1090	continue;
1091
1092	QByteArray input = readOrMapFile(file: &file);
1093
1094	file.close();
1095	if (input.isEmpty())
1096	continue;
1097
1098	Symbols saveSymbols = symbols;
1099	int saveIndex = index;
1100
1101	// phase 1: get rid of backslash-newlines
1102	input = cleaned(input);
1103
1104	// phase 2: tokenize for the preprocessor
1105	symbols = tokenize(input);
1106	input.clear();
1107
1108	index = `0`;
1109
1110	// phase 3: preprocess conditions and substitute macros
1111	preprocessed += Symbol (`0`, MOC_INCLUDE_BEGIN, include);
1112	preprocess(filename: include, preprocessed);
1113	preprocessed += Symbol (lineNum, MOC_INCLUDE_END, include);
1114
1115	symbols = saveSymbols;
1116	index = saveIndex;
1117	continue;
1118	}
1119	case PP_DEFINE:
1120	{
1121	next();
1122	QByteArray name = lexem();
1123	if (name.isEmpty() \|\| !is_ident_start(s: name [`0`]))
1124	error();
1125	Macro macro;
1126	macro.isVariadic = false;
1127	if (test(token: LPAREN)) {
1128	// we have a function macro
1129	macro.isFunction = true;
1130	parseDefineArguments(m: &macro);
1131	} else {
1132	macro.isFunction = false;
1133	}
1134	int start = index;
1135	until(PP_NEWLINE);
1136	macro.symbols.reserve(asize: index - start - `1`);
1137
1138	// remove whitespace where there shouldn't be any:
1139	// Before and after the macro, after a # and around ##
1140	Token lastToken = HASH; // skip shitespace at the beginning
1141	for (int i = start; i < index - `1`; ++i) {
1142	Token token = symbols.at(i).token;
1143	if (token == WHITESPACE) {
1144	if (lastToken == PP_HASH \|\| lastToken == HASH \|\|
1145	lastToken == PP_HASHHASH \|\|
1146	lastToken == WHITESPACE)
1147	continue;
1148	} else if (token == PP_HASHHASH) {
1149	if (!macro.symbols.isEmpty() &&
1150	lastToken == WHITESPACE)
1151	macro.symbols.pop_back();
1152	}
1153	macro.symbols.append(t: symbols.at(i));
1154	lastToken = token;
1155	}
1156	// remove trailing whitespace
1157	while (!macro.symbols.isEmpty() &&
1158	(macro.symbols.constLast().token == PP_WHITESPACE \|\| macro.symbols.constLast().token == WHITESPACE))
1159	macro.symbols.pop_back();
1160
1161	if (!macro.symbols.isEmpty()) {
1162	if (macro.symbols.constFirst().token == PP_HASHHASH \|\|
1163	macro.symbols.constLast().token == PP_HASHHASH) {
1164	error(msg: "'##' cannot appear at either end of a macro expansion");
1165	}
1166	}
1167	macros.insert(akey: name, avalue: macro);
1168	continue;
1169	}
1170	case PP_UNDEF: {
1171	next();
1172	QByteArray name = lexem();
1173	until(PP_NEWLINE);
1174	macros.remove(akey: name);
1175	continue;
1176	}
1177	case PP_IDENTIFIER: {
1178	// substitute macros
1179	macroExpand(into: &preprocessed, that: this, toExpand: symbols, index, lineNum: symbol().lineNum, one: true);
1180	continue;
1181	}
1182	case PP_HASH:
1183	until(PP_NEWLINE);
1184	continue; // skip unknown preprocessor statement
1185	case PP_IFDEF:
1186	case PP_IFNDEF:
1187	case PP_IF:
1188	while (!evaluateCondition()) {
1189	if (!skipBranch())
1190	break;
1191	if (test(token: PP_ELIF)) {
1192	} else {
1193	until(PP_NEWLINE);
1194	break;
1195	}
1196	}
1197	continue;
1198	case PP_ELIF:
1199	case PP_ELSE:
1200	skipUntilEndif();
1201	Q_FALLTHROUGH();
1202	case PP_ENDIF:
1203	until(PP_NEWLINE);
1204	continue;
1205	case PP_NEWLINE:
1206	continue;
1207	case SIGNALS:
1208	case SLOTS: {
1209	Symbol sym = symbol();
1210	if (macros.contains(akey: "QT_NO_KEYWORDS"))
1211	sym.token = IDENTIFIER;
1212	else
1213	sym.token = (token == SIGNALS ? Q_SIGNALS_TOKEN : Q_SLOTS_TOKEN);
1214	preprocessed += sym;
1215	} continue;
1216	default:
1217	break;
1218	}
1219	preprocessed += symbol();
1220	}
1221
1222	currentFilenames.pop();
1223	}
1224
1225	Symbols Preprocessor::preprocessed(const QByteArray &filename, QFile *file)
1226	{
1227	QByteArray input = readOrMapFile(file);
1228
1229	if (input.isEmpty())
1230	return symbols;
1231
1232	// phase 1: get rid of backslash-newlines
1233	input = cleaned(input);
1234
1235	// phase 2: tokenize for the preprocessor
1236	index = `0`;
1237	symbols = tokenize(input);
1238
1239	#if 0
1240	for (int j = `0`; j < symbols.size(); ++j)
1241	fprintf(stderr, "line %d: %s(%s)\n",
1242	symbols[j].lineNum,
1243	symbols[j].lexem().constData(),
1244	tokenTypeName(symbols[j].token));
1245	#endif
1246
1247	// phase 3: preprocess conditions and substitute macros
1248	Symbols result;
1249	// Preallocate some space to speed up the code below.
1250	// The magic value was found by logging the final size
1251	// and calculating an average when running moc over FOSS projects.
1252	result.reserve(asize: file->size() / `300000`);
1253	preprocess(filename, preprocessed&: result);
1254	mergeStringLiterals(symbols: &result);
1255
1256	#if 0
1257	for (int j = `0`; j < result.size(); ++j)
1258	fprintf(stderr, "line %d: %s(%s)\n",
1259	result[j].lineNum,
1260	result[j].lexem().constData(),
1261	tokenTypeName(result[j].token));
1262	#endif
1263
1264	return result;
1265	}
1266
1267	void Preprocessor::parseDefineArguments(Macro *m)
1268	{
1269	Symbols arguments;
1270	while (hasNext()) {
1271	while (test(token: PP_WHITESPACE)) {}
1272	Token t = next();
1273	if (t == PP_RPAREN)
1274	break;
1275	if (t != PP_IDENTIFIER) {
1276	QByteArray l = lexem();
1277	if (l == "...") {
1278	m->isVariadic = true;
1279	arguments += Symbol (symbol().lineNum, PP_IDENTIFIER, "__VA_ARGS__");
1280	while (test(token: PP_WHITESPACE)) {}
1281	if (!test(token: PP_RPAREN))
1282	error(msg: "missing ')' in macro argument list");
1283	break;
1284	} else if (!is_identifier(s: l.constData(), len: l.length())) {
1285	error(msg: "Unexpected character in macro argument list.");
1286	}
1287	}
1288
1289	Symbol arg = symbol();
1290	if (arguments.contains(t: arg))
1291	error(msg: "Duplicate macro parameter.");
1292	arguments += symbol();
1293
1294	while (test(token: PP_WHITESPACE)) {}
1295	t = next();
1296	if (t == PP_RPAREN)
1297	break;
1298	if (t == PP_COMMA)
1299	continue;
1300	if (lexem() == "...") {
1301	//GCC extension: #define FOO(x, y...) x(y)
1302	// The last argument was already parsed. Just mark the macro as variadic.
1303	m->isVariadic = true;
1304	while (test(token: PP_WHITESPACE)) {}
1305	if (!test(token: PP_RPAREN))
1306	error(msg: "missing ')' in macro argument list");
1307	break;
1308	}
1309	error(msg: "Unexpected character in macro argument list.");
1310	}
1311	m->arguments = arguments;
1312	while (test(token: PP_WHITESPACE)) {}
1313	}
1314
1315	void Preprocessor::until(Token t)
1316	{
1317	while(hasNext() && next() != t)
1318	;
1319	}
1320
1321	QT_END_NAMESPACE
1322

source code of qtbase/src/tools/moc/preprocessor.cpp