lex.cc source code [libcpp/lex.cc]

1	/ CPP Library - lexical analysis.*
2	Copyright (C) 2000-2025 Free Software Foundation, Inc.
3	Contributed by Per Bothner, 1994-95.
4	Based on CCCP program by Paul Rubin, June 1986
5	Adapted to ANSI C, Richard Stallman, Jan 1987
6	Broken out to separate file, Zack Weinberg, Mar 2000
7
8	This program is free software; you can redistribute it and/or modify it
9	under the terms of the GNU General Public License as published by the
10	Free Software Foundation; either version 3, or (at your option) any
11	later version.
12
13	This program is distributed in the hope that it will be useful,
14	but WITHOUT ANY WARRANTY; without even the implied warranty of
15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16	GNU General Public License for more details.
17
18	You should have received a copy of the GNU General Public License
19	along with this program; see the file COPYING3. If not see
20	<http://www.gnu.org/licenses/>. /*
21
22	#include "config.h"
23	#include "system.h"
24	#include "cpplib.h"
25	#include "internal.h"
26
27	enum spell_type
28	{
29	SPELL_OPERATOR = `0`,
30	SPELL_IDENT,
31	SPELL_LITERAL,
32	SPELL_NONE
33	};
34
35	struct token_spelling
36	{
37	enum spell_type category;
38	const unsigned char *name;
39	};
40
41	static const unsigned char *const digraph_spellings[] =
42	{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
43
44	#define OP(e, s) { SPELL_OPERATOR, UC s },
45	#define TK(e, s) { SPELL_ ## s, UC #e },
46	static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
47	#undef OP
48	#undef TK
49
50	#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51	#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
52
53	/ ISO 10646 defines the UCS codespace as the range 0-0x10FFFF inclusive. /
54	#define UCS_LIMIT 0x10FFFF
55
56	static void add_line_note (cpp_buffer , const* uchar , unsigned* int);
57	static int skip_line_comment (cpp_reader *);
58	static void skip_whitespace (cpp_reader *, cppchar_t);
59	static void lex_string (cpp_reader , cpp_token , const uchar *);
60	static void save_comment (cpp_reader , cpp_token , const uchar *, cppchar_t);
61	static void store_comment (cpp_reader , cpp_token );
62	static void create_literal (cpp_reader , cpp_token , const uchar *,
63	unsigned int, enum cpp_ttype);
64	static bool warn_in_comment (cpp_reader , _cpp_line_note );
65	static int name_p (cpp_reader , const* cpp_string *);
66	static tokenrun next_tokenrun (tokenrun );
67
68	static _cpp_buff *new_buff (size_t);
69
70
71	/ Utility routine:*
72
73	Compares, the token TOKEN to the NUL-terminated string STRING.
74	TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. /*
75	int
76	cpp_ideq (const cpp_token token, const* char *string)
77	{
78	if (token->type != CPP_NAME)
79	return `0`;
80
81	return !ustrcmp (NODE_NAME (token->val.node.node), s2: (const uchar *) string);
82	}
83
84	/ Record a note TYPE at byte POS into the current cleaned logical*
85	line. /*
86	static void
87	add_line_note (cpp_buffer buffer, const* uchar pos, unsigned* int type)
88	{
89	if (buffer->notes_used == buffer->notes_cap)
90	{
91	buffer->notes_cap = buffer->notes_cap * `2` + `200`;
92	buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
93	buffer->notes_cap);
94	}
95
96	buffer->notes[buffer->notes_used].pos = pos;
97	buffer->notes[buffer->notes_used].type = type;
98	buffer->notes_used++;
99	}
100
101
102	/ Fast path to find line special characters using optimized character*
103	scanning algorithms. Anything complicated falls back to the slow
104	path below. Since this loop is very hot it's worth doing these kinds
105	of optimizations.
106
107	One of the paths through the ifdefs should provide
108
109	const uchar search_line_fast (const uchar s, const uchar end);*
110
111	Between S and END, search for \n, \r, \\, ?. Return a pointer to
112	the found character.
113
114	Note that the last character of the buffer is always* a newline,*
115	as forced by _cpp_convert_input. This fact can be used to avoid
116	explicitly looking for the end of the buffer. /*
117
118	/ Configure gives us an ifdef test. /
119	#ifndef WORDS_BIGENDIAN
120	#define WORDS_BIGENDIAN 0
121	#endif
122
123	/ We'd like the largest integer that fits into a register. There's nothing*
124	in <stdint.h> that gives us that. For most hosts this is unsigned long,
125	but MS decided on an LLP64 model. Thankfully when building with GCC we
126	can get the "real" word size. /*
127	#ifdef __GNUC__
128	typedef unsigned int word_type __attribute__((__mode__(__word__)));
129	#else
130	typedef unsigned long word_type;
131	#endif
132
133	/ The code below is only expecting sizes 4 or 8.*
134	Die at compile-time if this expectation is violated. /*
135	typedef char check_word_type_size
136	[(sizeof(word_type) == `8` \|\| sizeof(word_type) == `4`) * `2` - `1`];
137
138	/ Return X with the first N bytes forced to values that won't match one*
139	of the interesting characters. Note that NUL is not interesting. /*
140
141	static inline word_type
142	acc_char_mask_misalign (word_type val, unsigned int n)
143	{
144	word_type mask = -`1`;
145	if (WORDS_BIGENDIAN)
146	mask >>= n * `8`;
147	else
148	mask <<= n * `8`;
149	return val & mask;
150	}
151
152	/ Return X replicated to all byte positions within WORD_TYPE. /
153
154	static inline word_type
155	acc_char_replicate (uchar x)
156	{
157	word_type ret;
158
159	ret = (x << `24`) \| (x << `16`) \| (x << `8`) \| x;
160	if (sizeof(word_type) == `8`)
161	ret = (ret << `16` << `16`) \| ret;
162	return ret;
163	}
164
165	/ Return non-zero if some byte of VAL is (probably) C. /
166
167	static inline word_type
168	acc_char_cmp (word_type val, word_type c)
169	{
170	#if defined(__GNUC__) && defined(__alpha__)
171	/ We can get exact results using a compare-bytes instruction.*
172	Get (val == c) via (0 >= (val ^ c)). /*
173	return __builtin_alpha_cmpbge (`0`, val ^ c);
174	#else
175	word_type magic = `0x7efefefeU`;
176	if (sizeof(word_type) == `8`)
177	magic = (magic << `16` << `16`) \| `0xfefefefeU`;
178	magic \|= `1`;
179
180	val ^= c;
181	return ((val + magic) ^ ~val) & ~magic;
182	#endif
183	}
184
185	/ Given the result of acc_char_cmp is non-zero, return the index of*
186	the found character. If this was a false positive, return -1. /*
187
188	static inline int
189	acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
190	word_type val ATTRIBUTE_UNUSED)
191	{
192	#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
193	/ The cmpbge instruction sets bits of the result corresponding to*
194	matches in the bytes with no false positives. /*
195	return __builtin_ctzl (cmp);
196	#else
197	unsigned int i;
198
199	/ ??? It would be nice to force unrolling here,*
200	and have all of these constants folded. /*
201	for (i = `0`; i < sizeof(word_type); ++i)
202	{
203	uchar c;
204	if (WORDS_BIGENDIAN)
205	c = (val >> (sizeof(word_type) - i - `1`) * `8`) & `0xff`;
206	else
207	c = (val >> i * `8`) & `0xff`;
208
209	if (c == `'\n'` \|\| c == `'\r'` \|\| c == `'\\'` \|\| c == `'?'`)
210	return i;
211	}
212
213	return -`1`;
214	#endif
215	}
216
217	/ A version of the fast scanner using bit fiddling techniques.*
218
219	For 32-bit words, one would normally perform 16 comparisons and
220	16 branches. With this algorithm one performs 24 arithmetic
221	operations and one branch. Whether this is faster with a 32-bit
222	word size is going to be somewhat system dependent.
223
224	For 64-bit words, we eliminate twice the number of comparisons
225	and branches without increasing the number of arithmetic operations.
226	It's almost certainly going to be a win with 64-bit word size. /*
227
228	static inline const uchar *
229	search_line_acc_char (const uchar s, const* uchar *end ATTRIBUTE_UNUSED)
230	{
231	const word_type repl_nl = acc_char_replicate (x: `'\n'`);
232	const word_type repl_cr = acc_char_replicate (x: `'\r'`);
233	const word_type repl_bs = acc_char_replicate (x: `'\\'`);
234	const word_type repl_qm = acc_char_replicate (x: `'?'`);
235
236	unsigned int misalign;
237	const word_type *p;
238	word_type val, t;
239
240	/ Align the buffer. Mask out any bytes from before the beginning. /
241	p = (word_type )((uintptr_t)s & -sizeof*(word_type));
242	val = *p;
243	misalign = (uintptr_t)s & (sizeof(word_type) - `1`);
244	if (misalign)
245	val = acc_char_mask_misalign (val, n: misalign);
246
247	/ Main loop. /
248	while (`1`)
249	{
250	t = acc_char_cmp (val, c: repl_nl);
251	t \|= acc_char_cmp (val, c: repl_cr);
252	t \|= acc_char_cmp (val, c: repl_bs);
253	t \|= acc_char_cmp (val, c: repl_qm);
254
255	if (__builtin_expect (t != `0`, `0`))
256	{
257	int i = acc_char_index (cmp: t, val);
258	if (i >= `0`)
259	return (const uchar *)p + i;
260	}
261
262	val = *++p;
263	}
264	}
265
266	/ Disable on Solaris 2/x86 until the following problem can be properly*
267	autoconfed:
268
269	The Solaris 10+ assembler tags objects with the instruction set
270	extensions used, so SSE4.2 executables cannot run on machines that
271	don't support that extension. /*
272
273	#if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 \|\| !defined(__PIC__)) && (defined(__i386__) \|\| defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
274
275	/ Replicated character data to be shared between implementations.*
276	Recall that outside of a context with vector support we can't
277	define compatible vector types, therefore these are all defined
278	in terms of raw characters. /*
279	static const char repl_chars[`4`][`16`] __attribute__((aligned(`16`))) = {
280	{ `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`,
281	`'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'` },
282	{ `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`,
283	`'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'` },
284	{ `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`,
285	`'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'` },
286	{ `'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`,
287	`'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'` },
288	};
289
290
291	/ A version of the fast scanner using SSE2 vectorized byte compare insns. /
292
293	static inline const uchar *
294	#ifndef __SSE2__
295	__attribute__((__target__("sse2")))
296	#endif
297	search_line_sse2 (const uchar s, const* uchar *end ATTRIBUTE_UNUSED)
298	{
299	typedef char v16qi __attribute__ ((__vector_size__ (`16`)));
300
301	const v16qi repl_nl = (const* v16qi *)repl_chars[`0`];
302	const v16qi repl_cr = (const* v16qi *)repl_chars[`1`];
303	const v16qi repl_bs = (const* v16qi *)repl_chars[`2`];
304	const v16qi repl_qm = (const* v16qi *)repl_chars[`3`];
305
306	unsigned int misalign, found, mask;
307	const v16qi *p;
308	v16qi data, t;
309
310	/ Align the source pointer. /
311	misalign = (uintptr_t)s & `15`;
312	p = (const v16qi *)((uintptr_t)s & -`16`);
313	data = *p;
314
315	/ Create a mask for the bytes that are valid within the first*
316	16-byte block. The Idea here is that the AND with the mask
317	within the loop is "free", since we need some AND or TEST
318	insn in order to set the flags for the branch anyway. /*
319	mask = -`1u` << misalign;
320
321	/ Main loop processing 16 bytes at a time. /
322	goto start;
323	do
324	{
325	data = *++p;
326	mask = -`1`;
327
328	start:
329	t = data == repl_nl;
330	t \|= data == repl_cr;
331	t \|= data == repl_bs;
332	t \|= data == repl_qm;
333	found = __builtin_ia32_pmovmskb128 (t);
334	found &= mask;
335	}
336	while (!found);
337
338	/ FOUND contains 1 in bits for which we matched a relevant*
339	character. Conversion to the byte index is trivial. /*
340	found = __builtin_ctz(found);
341	return (const uchar *)p + found;
342	}
343
344	#ifdef HAVE_SSSE3
345	/ A version of the fast scanner using SSSE3 shuffle (PSHUFB) insns. /
346
347	static inline const uchar *
348	#ifndef __SSSE3__
349	__attribute__((__target__("ssse3")))
350	#endif
351	search_line_ssse3 (const uchar s, const* uchar *end ATTRIBUTE_UNUSED)
352	{
353	typedef char v16qi __attribute__ ((__vector_size__ (`16`)));
354	typedef v16qi v16qi_u __attribute__ ((__aligned__ (`1`)));
355	/ Helper vector for pshufb-based matching:*
356	each character C we're searching for is at position (C % 16). /*
357	v16qi lut = { `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `'\n'`, `0`, `'\\'`, `'\r'`, `0`, `'?'` };
358	static_assert(`'\n'` == `10` && `'\r'` == `13` && `'\\'` == `92` && `'?'` == `63`,
359	"host character encoding is ASCII");
360
361	v16qi d1, d2, t1, t2;
362	/ Unaligned loads, potentially using padding after the final newline. /
363	static_assert (CPP_BUFFER_PADDING >= `64`, "");
364	d1 = (const* v16qi_u *)s;
365	d2 = (const* v16qi_u *)(s + `16`);
366	unsigned m1, m2, found;
367	/ Process two 16-byte chunks per iteration. /
368	do
369	{
370	t1 = __builtin_ia32_pshufb128 (lut, d1);
371	t2 = __builtin_ia32_pshufb128 (lut, d2);
372	m1 = __builtin_ia32_pmovmskb128 (t1 == d1);
373	m2 = __builtin_ia32_pmovmskb128 (t2 == d2);
374	s += `32`;
375	d1 = (const* v16qi_u *)s;
376	d2 = (const* v16qi_u *)(s + `16`);
377	found = m1 + (m2 << `16`);
378	}
379	while (!found);
380	/ Prefer to compute 's - 32' here, not spend an extra instruction*
381	to make a copy of the previous value of 's' in the loop. /*
382	__asm__ ("" : "+r"(s));
383	return s - `32` + __builtin_ctz (found);
384	}
385
386	#else
387	/ Work around out-dated assemblers without SSSE3 support. /
388	#define search_line_ssse3 search_line_sse2
389	#endif
390
391	#ifdef __SSSE3__
392	/ No need for CPU probing, just use the best available variant. /
393	#define search_line_fast search_line_ssse3
394	#else
395	/ Check the CPU capabilities. /
396
397	#include "../gcc/config/i386/cpuid.h"
398
399	typedef const uchar * (search_line_fast_type) (const* uchar , const* uchar *);
400	static search_line_fast_type search_line_fast
401	#if defined(__SSE2__)
402	= search_line_sse2;
403	#else
404	= search_line_acc_char;
405	#endif
406
407	#define HAVE_init_vectorized_lexer 1
408	static inline void
409	init_vectorized_lexer (void)
410	{
411	unsigned ax, bx, cx, dx;
412
413	if (!__get_cpuid (`1`, &ax, &bx, &cx, &dx))
414	return;
415
416	if (cx & bit_SSSE3)
417	search_line_fast = search_line_ssse3;
418	else if (dx & bit_SSE2)
419	search_line_fast = search_line_sse2;
420	}
421	#endif
422
423	#elif (GCC_VERSION >= 4005) && defined(_ARCH_PWR8) && defined(__ALTIVEC__)
424
425	/ A vection of the fast scanner using AltiVec vectorized byte compares*
426	and VSX unaligned loads (when VSX is available). This is otherwise
427	the same as the AltiVec version. /*
428
429	ATTRIBUTE_NO_SANITIZE_UNDEFINED
430	static const uchar *
431	search_line_fast (const uchar s, const* uchar *end ATTRIBUTE_UNUSED)
432	{
433	typedef __attribute__((altivec(vector))) unsigned char vc;
434
435	const vc repl_nl = {
436	`'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`,
437	`'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`
438	};
439	const vc repl_cr = {
440	`'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`,
441	`'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`
442	};
443	const vc repl_bs = {
444	`'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`,
445	`'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`
446	};
447	const vc repl_qm = {
448	`'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`,
449	`'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`,
450	};
451	const vc zero = { `0` };
452
453	vc data, t;
454
455	/ Main loop processing 16 bytes at a time. /
456	do
457	{
458	vc m_nl, m_cr, m_bs, m_qm;
459
460	data = __builtin_vec_vsx_ld (`0`, s);
461	s += `16`;
462
463	m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
464	m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
465	m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
466	m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
467	t = (m_nl \| m_cr) \| (m_bs \| m_qm);
468
469	/ T now contains 0xff in bytes for which we matched one of the relevant*
470	characters. We want to exit the loop if any byte in T is non-zero.
471	Below is the expansion of vec_any_ne(t, zero). /*
472	}
473	while (!__builtin_vec_vcmpeq_p(/__CR6_LT_REV/`3`, t, zero));
474
475	/ Restore s to to point to the 16 bytes we just processed. /
476	s -= `16`;
477
478	{
479	#define N (sizeof(vc) / sizeof(long))
480
481	union {
482	vc v;
483	/ Statically assert that N is 2 or 4. /
484	unsigned long l[(N == `2` \|\| N == `4`) ? N : -`1`];
485	} u;
486	unsigned long l, i = `0`;
487
488	u.v = t;
489
490	/ Find the first word of T that is non-zero. /
491	switch (N)
492	{
493	case `4`:
494	l = u.l[i++];
495	if (l != `0`)
496	break;
497	s += sizeof(unsigned long);
498	l = u.l[i++];
499	if (l != `0`)
500	break;
501	s += sizeof(unsigned long);
502	/ FALLTHRU /
503	case `2`:
504	l = u.l[i++];
505	if (l != `0`)
506	break;
507	s += sizeof(unsigned long);
508	l = u.l[i];
509	}
510
511	/ L now contains 0xff in bytes for which we matched one of the*
512	relevant characters. We can find the byte index by finding
513	its bit index and dividing by 8. /*
514	#ifdef __BIG_ENDIAN__
515	l = __builtin_clzl(l) >> `3`;
516	#else
517	l = __builtin_ctzl(l) >> `3`;
518	#endif
519	return s + l;
520
521	#undef N
522	}
523	}
524
525	#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
526
527	/ A vection of the fast scanner using AltiVec vectorized byte compares.*
528	This cannot be used for little endian because vec_lvsl/lvsr are
529	deprecated for little endian and the code won't work properly. /*
530	/ ??? Unfortunately, attribute(target("altivec")) is not yet supported,*
531	so we can't compile this function without -maltivec on the command line
532	(or implied by some other switch). /*
533
534	static const uchar *
535	search_line_fast (const uchar s, const* uchar *end ATTRIBUTE_UNUSED)
536	{
537	typedef __attribute__((altivec(vector))) unsigned char vc;
538
539	const vc repl_nl = {
540	`'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`,
541	`'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`, `'\n'`
542	};
543	const vc repl_cr = {
544	`'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`,
545	`'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`, `'\r'`
546	};
547	const vc repl_bs = {
548	`'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`,
549	`'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`, `'\\'`
550	};
551	const vc repl_qm = {
552	`'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`,
553	`'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`, `'?'`,
554	};
555	const vc ones = {
556	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
557	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
558	};
559	const vc zero = { `0` };
560
561	vc data, mask, t;
562
563	/ Altivec loads automatically mask addresses with -16. This lets us*
564	issue the first load as early as possible. /*
565	data = __builtin_vec_ld(`0`, (const vc *)s);
566
567	/ Discard bytes before the beginning of the buffer. Do this by*
568	beginning with all ones and shifting in zeros according to the
569	mis-alignment. The LVSR instruction pulls the exact shift we
570	want from the address. /*
571	mask = __builtin_vec_lvsr(`0`, s);
572	mask = __builtin_vec_perm(zero, ones, mask);
573	data &= mask;
574
575	/ While altivec loads mask addresses, we still need to align S so*
576	that the offset we compute at the end is correct. /*
577	s = (const uchar *)((uintptr_t)s & -`16`);
578
579	/ Main loop processing 16 bytes at a time. /
580	goto start;
581	do
582	{
583	vc m_nl, m_cr, m_bs, m_qm;
584
585	s += `16`;
586	data = __builtin_vec_ld(`0`, (const vc *)s);
587
588	start:
589	m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
590	m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
591	m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
592	m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
593	t = (m_nl \| m_cr) \| (m_bs \| m_qm);
594
595	/ T now contains 0xff in bytes for which we matched one of the relevant*
596	characters. We want to exit the loop if any byte in T is non-zero.
597	Below is the expansion of vec_any_ne(t, zero). /*
598	}
599	while (!__builtin_vec_vcmpeq_p(/__CR6_LT_REV/`3`, t, zero));
600
601	{
602	#define N (sizeof(vc) / sizeof(long))
603
604	union {
605	vc v;
606	/ Statically assert that N is 2 or 4. /
607	unsigned long l[(N == `2` \|\| N == `4`) ? N : -`1`];
608	} u;
609	unsigned long l, i = `0`;
610
611	u.v = t;
612
613	/ Find the first word of T that is non-zero. /
614	switch (N)
615	{
616	case `4`:
617	l = u.l[i++];
618	if (l != `0`)
619	break;
620	s += sizeof(unsigned long);
621	l = u.l[i++];
622	if (l != `0`)
623	break;
624	s += sizeof(unsigned long);
625	/ FALLTHROUGH /
626	case `2`:
627	l = u.l[i++];
628	if (l != `0`)
629	break;
630	s += sizeof(unsigned long);
631	l = u.l[i];
632	}
633
634	/ L now contains 0xff in bytes for which we matched one of the*
635	relevant characters. We can find the byte index by finding
636	its bit index and dividing by 8. /*
637	l = __builtin_clzl(l) >> `3`;
638	return s + l;
639
640	#undef N
641	}
642	}
643
644	#elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
645	#include "arm_neon.h"
646
647	/ This doesn't have to be the exact page size, but no system may use*
648	a size smaller than this. ARMv8 requires a minimum page size of
649	4k. The impact of being conservative here is a small number of
650	cases will take the slightly slower entry path into the main
651	loop. /*
652
653	#define AARCH64_MIN_PAGE_SIZE 4096
654
655	static const uchar *
656	search_line_fast (const uchar s, const* uchar *end ATTRIBUTE_UNUSED)
657	{
658	const uint8x16_t repl_nl = vdupq_n_u8 (`'\n'`);
659	const uint8x16_t repl_cr = vdupq_n_u8 (`'\r'`);
660	const uint8x16_t repl_bs = vdupq_n_u8 (`'\\'`);
661	const uint8x16_t repl_qm = vdupq_n_u8 (`'?'`);
662	const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (`0x8040201008040201ULL`);
663
664	#ifdef __ARM_BIG_ENDIAN
665	const int16x8_t shift = {`8`, `8`, `8`, `8`, `0`, `0`, `0`, `0`};
666	#else
667	const int16x8_t shift = {`0`, `0`, `0`, `0`, `8`, `8`, `8`, `8`};
668	#endif
669
670	unsigned int found;
671	const uint8_t *p;
672	uint8x16_t data;
673	uint8x16_t t;
674	uint16x8_t m;
675	uint8x16_t u, v, w;
676
677	/ Align the source pointer. /
678	p = (const uint8_t *)((uintptr_t)s & -`16`);
679
680	/ Assuming random string start positions, with a 4k page size we'll take*
681	the slow path about 0.37% of the time. /*
682	if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
683	- (((uintptr_t) s) & (AARCH64_MIN_PAGE_SIZE - `1`)))
684	< `16`, `0`))
685	{
686	/ Slow path: the string starts near a possible page boundary. /
687	uint32_t misalign, mask;
688
689	misalign = (uintptr_t)s & `15`;
690	mask = (-`1u` << misalign) & `0xffff`;
691	data = vld1q_u8 (p);
692	t = vceqq_u8 (data, repl_nl);
693	u = vceqq_u8 (data, repl_cr);
694	v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
695	w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
696	t = vorrq_u8 (v, w);
697	t = vandq_u8 (t, xmask);
698	m = vpaddlq_u8 (t);
699	m = vshlq_u16 (m, shift);
700	found = vaddvq_u16 (m);
701	found &= mask;
702	if (found)
703	return (const uchar*)p + __builtin_ctz (found);
704	}
705	else
706	{
707	data = vld1q_u8 ((const uint8_t *) s);
708	t = vceqq_u8 (data, repl_nl);
709	u = vceqq_u8 (data, repl_cr);
710	v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
711	w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
712	t = vorrq_u8 (v, w);
713	if (__builtin_expect (vpaddd_u64 ((uint64x2_t)t) != `0`, `0`))
714	goto done;
715	}
716
717	do
718	{
719	p += `16`;
720	data = vld1q_u8 (p);
721	t = vceqq_u8 (data, repl_nl);
722	u = vceqq_u8 (data, repl_cr);
723	v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
724	w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
725	t = vorrq_u8 (v, w);
726	} while (!vpaddd_u64 ((uint64x2_t)t));
727
728	done:
729	/ Now that we've found the terminating substring, work out precisely where*
730	we need to stop. /*
731	t = vandq_u8 (t, xmask);
732	m = vpaddlq_u8 (t);
733	m = vshlq_u16 (m, shift);
734	found = vaddvq_u16 (m);
735	return (((((uintptr_t) p) < (uintptr_t) s) ? s : (const uchar *)p)
736	+ __builtin_ctz (found));
737	}
738
739	#elif defined (__ARM_NEON)
740	#include "arm_neon.h"
741
742	static const uchar *
743	search_line_fast (const uchar s, const* uchar *end ATTRIBUTE_UNUSED)
744	{
745	const uint8x16_t repl_nl = vdupq_n_u8 (`'\n'`);
746	const uint8x16_t repl_cr = vdupq_n_u8 (`'\r'`);
747	const uint8x16_t repl_bs = vdupq_n_u8 (`'\\'`);
748	const uint8x16_t repl_qm = vdupq_n_u8 (`'?'`);
749	const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (`0x8040201008040201ULL`);
750
751	unsigned int misalign, found, mask;
752	const uint8_t *p;
753	uint8x16_t data;
754
755	/ Align the source pointer. /
756	misalign = (uintptr_t)s & `15`;
757	p = (const uint8_t *)((uintptr_t)s & -`16`);
758	data = vld1q_u8 (p);
759
760	/ Create a mask for the bytes that are valid within the first*
761	16-byte block. The Idea here is that the AND with the mask
762	within the loop is "free", since we need some AND or TEST
763	insn in order to set the flags for the branch anyway. /*
764	mask = (-`1u` << misalign) & `0xffff`;
765
766	/ Main loop, processing 16 bytes at a time. /
767	goto start;
768
769	do
770	{
771	uint8x8_t l;
772	uint16x4_t m;
773	uint32x2_t n;
774	uint8x16_t t, u, v, w;
775
776	p += `16`;
777	data = vld1q_u8 (p);
778	mask = `0xffff`;
779
780	start:
781	t = vceqq_u8 (data, repl_nl);
782	u = vceqq_u8 (data, repl_cr);
783	v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
784	w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
785	t = vandq_u8 (vorrq_u8 (v, w), xmask);
786	l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
787	m = vpaddl_u8 (l);
788	n = vpaddl_u16 (m);
789
790	found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
791	vshr_n_u64 ((uint64x1_t) n, `24`)), `0`);
792	found &= mask;
793	}
794	while (!found);
795
796	/ FOUND contains 1 in bits for which we matched a relevant*
797	character. Conversion to the byte index is trivial. /*
798	found = __builtin_ctz (found);
799	return (const uchar *)p + found;
800	}
801
802	#else
803
804	/ We only have one accelerated alternative. Use a direct call so that*
805	we encourage inlining. /*
806
807	#define search_line_fast search_line_acc_char
808
809	#endif
810
811	/ Initialize the lexer if needed. /
812
813	void
814	_cpp_init_lexer (void)
815	{
816	#ifdef HAVE_init_vectorized_lexer
817	init_vectorized_lexer ();
818	#endif
819	}
820
821	/ Look for leading whitespace style issues on lines which don't contain*
822	just whitespace.
823	For -Wleading-whitespace=spaces report if such lines contain leading
824	whitespace other than spaces.
825	For -Wleading-whitespace=tabs report if such lines contain leading
826	whitespace other than tabs.
827	For -Wleading-whitespace=blanks report if such lines contain leading
828	whitespace other than spaces+tabs, or contain in it tab after space,
829	or -ftabstop= or more consecutive spaces. /*
830
831	static void
832	find_leading_whitespace_issues (cpp_reader pfile, const* uchar *s)
833	{
834	const unsigned char *p = NULL;
835	uchar type = `'L'`;
836	switch (CPP_OPTION (pfile, cpp_warn_leading_whitespace))
837	{
838	case `1`: / spaces /
839	while (*s == `' '`)
840	++s;
841	break;
842	case `2`: / tabs /
843	while (*s == `'\t'`)
844	++s;
845	break;
846	case `3`: / blanks /
847	while (*s == `'\t'`)
848	++s;
849	int n;
850	n = CPP_OPTION (pfile, cpp_tabstop);
851	while (*s == `' '`)
852	{
853	if (--n == `0`)
854	break;
855	++s;
856	}
857	if (*s == `'\t'`)
858	type = `'T'`; / Tab after space. /
859	else if (*s == `' '`)
860	type = `'S'`; / Too many spaces. /
861	break;
862	default:
863	abort ();
864	}
865	if (!IS_NVSPACE (*s))
866	return;
867	p = s++;
868	while (IS_NVSPACE (*s))
869	++s;
870	if (s != `'\n'` && s != `'\r'`)
871	add_line_note (buffer: pfile->buffer, pos: p, type);
872	}
873
874	/ Returns with a logical line that contains no escaped newlines or*
875	trigraphs. This is a time-critical inner loop. /*
876	void
877	_cpp_clean_line (cpp_reader *pfile)
878	{
879	cpp_buffer *buffer;
880	const uchar *s;
881	uchar c, d, p;
882
883	buffer = pfile->buffer;
884	buffer->cur_note = buffer->notes_used = `0`;
885	buffer->cur = buffer->line_base = buffer->next_line;
886	buffer->need_line = false;
887	s = buffer->next_line;
888
889	if (!buffer->from_stage3)
890	{
891	const uchar *pbackslash = NULL;
892	bool leading_ws_done = true;
893
894	if (CPP_OPTION (pfile, cpp_warn_leading_whitespace))
895	find_leading_whitespace_issues (pfile, s);
896
897	/ Fast path. This is the common case of an un-escaped line with*
898	no trigraphs. The primary win here is by not writing any
899	data back to memory until we have to. /*
900	while (`1`)
901	{
902	/ Perform an optimized search for \n, \r, \\, ?. /
903	s = search_line_fast (s, end: buffer->rlimit);
904
905	c = *s;
906	if (c == `'\\'`)
907	{
908	/ Record the location of the backslash and continue. /
909	pbackslash = s++;
910	}
911	else if (__builtin_expect (c == `'?'`, `0`))
912	{
913	if (__builtin_expect (s[`1`] == `'?'`, false)
914	&& _cpp_trigraph_map[s[`2`]])
915	{
916	/ Have a trigraph. We may or may not have to convert*
917	it. Add a line note regardless, for -Wtrigraphs. /*
918	add_line_note (buffer, pos: s, type: s[`2`]);
919	if (CPP_OPTION (pfile, trigraphs))
920	{
921	/ We do, and that means we have to switch to the*
922	slow path. /*
923	d = (uchar *) s;
924	*d = _cpp_trigraph_map[s[`2`]];
925	s += `2`;
926	goto slow_path;
927	}
928	}
929	/ Not a trigraph. Continue on fast-path. /
930	s++;
931	}
932	else
933	break;
934	}
935
936	/ This must be \r or \n. We're either done, or we'll be forced*
937	to write back to the buffer and continue on the slow path. /*
938	d = (uchar *) s;
939
940	if (__builtin_expect (s == buffer->rlimit, false))
941	goto done;
942
943	/ DOS line ending? /
944	if (__builtin_expect (c == `'\r'`, false) && s[`1`] == `'\n'`)
945	{
946	s++;
947	if (s == buffer->rlimit)
948	goto done;
949	}
950
951	if (__builtin_expect (pbackslash == NULL, true))
952	goto done;
953
954	/ Check for escaped newline. /
955	p = d;
956	while (is_nvspace (p[-`1`]))
957	p--;
958	if (p - `1` != pbackslash)
959	goto done;
960
961	/ Have an escaped newline; process it and proceed to*
962	the slow path. /*
963	add_line_note (buffer, pos: p - `1`, type: p != d ? `' '` : `'\\'`);
964	d = p - `2`;
965	buffer->next_line = p - `1`;
966	leading_ws_done = false;
967
968	slow_path:
969	while (`1`)
970	{
971	c = *++s;
972	*++d = c;
973
974	if (c == `'\n'` \|\| c == `'\r'`)
975	{
976	if (CPP_OPTION (pfile, cpp_warn_leading_whitespace)
977	&& !leading_ws_done)
978	find_leading_whitespace_issues (pfile, s: buffer->next_line);
979
980	/ Handle DOS line endings. /
981	if (c == `'\r'` && s != buffer->rlimit && s[`1`] == `'\n'`)
982	s++;
983	if (s == buffer->rlimit)
984	break;
985
986	/ Escaped? /
987	p = d;
988	while (p != buffer->next_line && is_nvspace (p[-`1`]))
989	p--;
990	if (p == buffer->next_line \|\| p[-`1`] != `'\\'`)
991	break;
992
993	add_line_note (buffer, pos: p - `1`, type: p != d ? `' '` : `'\\'`);
994	d = p - `2`;
995	buffer->next_line = p - `1`;
996	leading_ws_done = false;
997	}
998	else if (c == `'?'` && s[`1`] == `'?'` && _cpp_trigraph_map[s[`2`]])
999	{
1000	if (CPP_OPTION (pfile, cpp_warn_leading_whitespace)
1001	&& !leading_ws_done)
1002	{
1003	find_leading_whitespace_issues (pfile, s: buffer->next_line);
1004	leading_ws_done = true;
1005	}
1006
1007	/ Add a note regardless, for the benefit of -Wtrigraphs. /
1008	add_line_note (buffer, pos: d, type: s[`2`]);
1009	if (CPP_OPTION (pfile, trigraphs))
1010	{
1011	*d = _cpp_trigraph_map[s[`2`]];
1012	s += `2`;
1013	}
1014	}
1015	}
1016	done:
1017	if (d > buffer->next_line
1018	&& CPP_OPTION (pfile, cpp_warn_trailing_whitespace))
1019	switch (CPP_OPTION (pfile, cpp_warn_trailing_whitespace))
1020	{
1021	case `1`:
1022	if (ISBLANK (d[-`1`]))
1023	add_line_note (buffer, pos: d - `1`, type: `'W'`);
1024	break;
1025	case `2`:
1026	if (IS_NVSPACE (d[-`1`]) && d[-`1`])
1027	add_line_note (buffer, pos: d - `1`, type: `'W'`);
1028	break;
1029	}
1030	}
1031	else
1032	{
1033	while (s != `'\n'` && s != `'\r'`)
1034	s++;
1035	d = (uchar *) s;
1036
1037	/ Handle DOS line endings. /
1038	if (*s == `'\r'` && s + `1` != buffer->rlimit && s[`1`] == `'\n'`)
1039	s++;
1040	}
1041
1042	*d = `'\n'`;
1043	/ A sentinel note that should never be processed. /
1044	add_line_note (buffer, pos: d + `1`, type: `'\n'`);
1045	buffer->next_line = s + `1`;
1046	}
1047
1048	template <bool lexing_raw_string>
1049	static bool get_fresh_line_impl (cpp_reader *pfile);
1050
1051	/ Return true if the trigraph indicated by NOTE should be warned*
1052	about in a comment. /*
1053	static bool
1054	warn_in_comment (cpp_reader pfile, _cpp_line_note note)
1055	{
1056	const uchar *p;
1057
1058	/ Within comments we don't warn about trigraphs, unless the*
1059	trigraph forms an escaped newline, as that may change
1060	behavior. /*
1061	if (note->type != `'/'`)
1062	return false;
1063
1064	/ If -trigraphs, then this was an escaped newline iff the next note*
1065	is coincident. /*
1066	if (CPP_OPTION (pfile, trigraphs))
1067	return note[`1`].pos == note->pos;
1068
1069	/ Otherwise, see if this forms an escaped newline. /
1070	p = note->pos + `3`;
1071	while (is_nvspace (*p))
1072	p++;
1073
1074	/ There might have been escaped newlines between the trigraph and the*
1075	newline we found. Hence the position test. /*
1076	return (*p == `'\n'` && p < note[`1`].pos);
1077	}
1078
1079	/ Process the notes created by add_line_note as far as the current*
1080	location. /*
1081	void
1082	_cpp_process_line_notes (cpp_reader pfile, int* in_comment)
1083	{
1084	cpp_buffer *buffer = pfile->buffer;
1085
1086	for (;;)
1087	{
1088	_cpp_line_note *note = &buffer->notes[buffer->cur_note];
1089	unsigned int col;
1090
1091	if (note->pos > buffer->cur)
1092	break;
1093
1094	buffer->cur_note++;
1095	col = CPP_BUF_COLUMN (buffer, note->pos + `1`);
1096
1097	if (note->type == `'\\'` \|\| note->type == `' '`)
1098	{
1099	if (note->type == `' '`)
1100	{
1101	if (!in_comment)
1102	cpp_error_with_line (pfile, CPP_DL_WARNING,
1103	pfile->line_table->highest_line, col,
1104	msgid: "backslash and newline separated by "
1105	"space");
1106	else if (CPP_OPTION (pfile, cpp_warn_trailing_whitespace))
1107	cpp_warning_with_line (pfile, CPP_W_TRAILING_WHITESPACE,
1108	pfile->line_table->highest_line, col,
1109	msgid: "trailing whitespace");
1110	}
1111
1112	if (buffer->next_line > buffer->rlimit)
1113	{
1114	cpp_error_with_line (pfile, CPP_DL_PEDWARN,
1115	pfile->line_table->highest_line, col,
1116	msgid: "backslash-newline at end of file");
1117	/ Prevent "no newline at end of file" warning. /
1118	buffer->next_line = buffer->rlimit;
1119	}
1120
1121	buffer->line_base = note->pos;
1122	CPP_INCREMENT_LINE (pfile, `0`);
1123	}
1124	else if (_cpp_trigraph_map[note->type])
1125	{
1126	if (CPP_OPTION (pfile, warn_trigraphs)
1127	&& (!in_comment \|\| warn_in_comment (pfile, note)))
1128	{
1129	if (CPP_OPTION (pfile, trigraphs))
1130	cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1131	pfile->line_table->highest_line, col,
1132	msgid: "trigraph %<??%c%> converted to %<%c%>",
1133	note->type,
1134	(int) _cpp_trigraph_map[note->type]);
1135	else
1136	cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1137	pfile->line_table->highest_line, col,
1138	msgid: "trigraph %<??%c%> ignored, use "
1139	"%<-trigraphs%> to enable", note->type);
1140	}
1141	}
1142	else if (note->type == `'W'`)
1143	cpp_warning_with_line (pfile, CPP_W_TRAILING_WHITESPACE,
1144	pfile->line_table->highest_line, col,
1145	msgid: "trailing whitespace");
1146	else if (note->type == `'S'`)
1147	cpp_warning_with_line (pfile, CPP_W_LEADING_WHITESPACE,
1148	pfile->line_table->highest_line, col,
1149	msgid: "too many consecutive spaces in leading "
1150	"whitespace");
1151	else if (note->type == `'T'`)
1152	cpp_warning_with_line (pfile, CPP_W_LEADING_WHITESPACE,
1153	pfile->line_table->highest_line, col,
1154	msgid: "tab after space in leading whitespace");
1155	else if (note->type == `'L'`)
1156	switch (CPP_OPTION (pfile, cpp_warn_leading_whitespace))
1157	{
1158	case `1`:
1159	cpp_warning_with_line (pfile, CPP_W_LEADING_WHITESPACE,
1160	pfile->line_table->highest_line, col,
1161	msgid: "whitespace other than spaces in leading "
1162	"whitespace");
1163	break;
1164	case `2`:
1165	cpp_warning_with_line (pfile, CPP_W_LEADING_WHITESPACE,
1166	pfile->line_table->highest_line, col,
1167	msgid: "whitespace other than tabs in leading "
1168	"whitespace");
1169	break;
1170	case `3`:
1171	cpp_warning_with_line (pfile, CPP_W_LEADING_WHITESPACE,
1172	pfile->line_table->highest_line, col,
1173	msgid: "whitespace other than spaces and tabs in "
1174	"leading whitespace");
1175	break;
1176	default:
1177	abort ();
1178	}
1179	else if (note->type == `0`)
1180	/ Already processed in lex_raw_string. /;
1181	else
1182	abort ();
1183	}
1184	}
1185
1186	namespace bidi {
1187	enum class kind {
1188	NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL
1189	};
1190
1191	/ All the UTF-8 encodings of bidi characters start with E2. /
1192	constexpr uchar utf8_start = `0xe2`;
1193
1194	struct context
1195	{
1196	context () {}
1197	context (location_t loc, kind k, bool pdf, bool ucn)
1198	: m_loc (loc), m_kind (k), m_pdf (pdf), m_ucn (ucn)
1199	{
1200	}
1201
1202	kind get_pop_kind () const
1203	{
1204	return m_pdf ? kind::PDF : kind::PDI;
1205	}
1206	bool ucn_p () const
1207	{
1208	return m_ucn;
1209	}
1210
1211	location_t m_loc;
1212	kind m_kind;
1213	unsigned m_pdf : `1`;
1214	unsigned m_ucn : `1`;
1215	};
1216
1217	/ A vector holding currently open bidi contexts. We use a char for*
1218	each context, its LSB is 1 if it represents a PDF context, 0 if it
1219	represents a PDI context. The next bit is 1 if this context was open
1220	by a bidi character written as a UCN, and 0 when it was UTF-8. /*
1221	semi_embedded_vec <context, `16`> vec;
1222
1223	/ Close the whole comment/identifier/string literal/character constant*
1224	context. /*
1225	void on_close ()
1226	{
1227	vec.truncate (len: `0`);
1228	}
1229
1230	/ Pop the last element in the vector. /
1231	void pop ()
1232	{
1233	unsigned int len = vec.count ();
1234	gcc_checking_assert (len > `0`);
1235	vec.truncate (len: len - `1`);
1236	}
1237
1238	/ Return the pop kind of the context of the Ith element. /
1239	kind pop_kind_at (unsigned int i)
1240	{
1241	return vec [i].get_pop_kind ();
1242	}
1243
1244	/ Return the pop kind of the context that is currently opened. /
1245	kind current_ctx ()
1246	{
1247	unsigned int len = vec.count ();
1248	if (len == `0`)
1249	return kind::NONE;
1250	return vec [len - `1`].get_pop_kind ();
1251	}
1252
1253	/ Return true if the current context comes from a UCN origin, that is,*
1254	the bidi char which started this bidi context was written as a UCN. /*
1255	bool current_ctx_ucn_p ()
1256	{
1257	unsigned int len = vec.count ();
1258	gcc_checking_assert (len > `0`);
1259	return vec [len - `1`].m_ucn;
1260	}
1261
1262	location_t current_ctx_loc ()
1263	{
1264	unsigned int len = vec.count ();
1265	gcc_checking_assert (len > `0`);
1266	return vec [len - `1`].m_loc;
1267	}
1268
1269	/ We've read a bidi char, update the current vector as necessary.*
1270	LOC is only valid when K is not kind::NONE. /*
1271	void on_char (kind k, bool ucn_p, location_t loc)
1272	{
1273	switch (k)
1274	{
1275	case kind::LRE:
1276	case kind::RLE:
1277	case kind::LRO:
1278	case kind::RLO:
1279	vec.push (value: context (loc, k, true, ucn_p));
1280	break;
1281	case kind::LRI:
1282	case kind::RLI:
1283	case kind::FSI:
1284	vec.push (value: context (loc, k, false, ucn_p));
1285	break;
1286	/ PDF terminates the scope of the last LRE, RLE, LRO, or RLO*
1287	whose scope has not yet been terminated. /*
1288	case kind::PDF:
1289	if (current_ctx () == kind::PDF)
1290	pop ();
1291	break;
1292	/ PDI terminates the scope of the last LRI, RLI, or FSI whose*
1293	scope has not yet been terminated, as well as the scopes of
1294	any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not
1295	yet been terminated. /*
1296	case kind::PDI:
1297	for (int i = vec.count () - `1`; i >= `0`; --i)
1298	if (pop_kind_at (i) == kind::PDI)
1299	{
1300	vec.truncate (len: i);
1301	break;
1302	}
1303	break;
1304	case kind::LTR:
1305	case kind::RTL:
1306	/ These aren't popped by a PDF/PDI. /
1307	break;
1308	ATTR_LIKELY case kind::NONE:
1309	break;
1310	default:
1311	abort ();
1312	}
1313	}
1314
1315	/ Return a descriptive string for K. /
1316	const char *to_str (kind k)
1317	{
1318	switch (k)
1319	{
1320	case kind::LRE:
1321	return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
1322	case kind::RLE:
1323	return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
1324	case kind::LRO:
1325	return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
1326	case kind::RLO:
1327	return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
1328	case kind::LRI:
1329	return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
1330	case kind::RLI:
1331	return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
1332	case kind::FSI:
1333	return "U+2068 (FIRST STRONG ISOLATE)";
1334	case kind::PDF:
1335	return "U+202C (POP DIRECTIONAL FORMATTING)";
1336	case kind::PDI:
1337	return "U+2069 (POP DIRECTIONAL ISOLATE)";
1338	case kind::LTR:
1339	return "U+200E (LEFT-TO-RIGHT MARK)";
1340	case kind::RTL:
1341	return "U+200F (RIGHT-TO-LEFT MARK)";
1342	default:
1343	abort ();
1344	}
1345	}
1346	}
1347
1348	/ Get location_t for the range of bytes [START, START + NUM_BYTES)*
1349	within the current line in FILE, with the caret at START. /*
1350
1351	static location_t
1352	get_location_for_byte_range_in_cur_line (cpp_reader *pfile,
1353	const unsigned char *const start,
1354	size_t num_bytes)
1355	{
1356	gcc_checking_assert (num_bytes > `0`);
1357
1358	/ CPP_BUF_COLUMN and linemap_position_for_column both refer*
1359	to offsets in bytes, but CPP_BUF_COLUMN is 0-based,
1360	whereas linemap_position_for_column is 1-based. /*
1361
1362	/ Get 0-based offsets within the line. /
1363	size_t start_offset = CPP_BUF_COLUMN (pfile->buffer, start);
1364	size_t end_offset = start_offset + num_bytes - `1`;
1365
1366	/ Now convert to location_t, where "columns" are 1-based byte offsets. /
1367	location_t start_loc = linemap_position_for_column (pfile->line_table,
1368	start_offset + `1`);
1369	location_t end_loc = linemap_position_for_column (pfile->line_table,
1370	end_offset + `1`);
1371
1372	if (start_loc == end_loc)
1373	return start_loc;
1374
1375	source_range src_range;
1376	src_range.m_start = start_loc;
1377	src_range.m_finish = end_loc;
1378	location_t combined_loc
1379	= pfile->line_table->get_or_create_combined_loc (locus: start_loc,
1380	src_range,
1381	data: nullptr,
1382	discriminator: `0`);
1383	return combined_loc;
1384	}
1385
1386	/ Parse a sequence of 3 bytes starting with P and return its bidi code. /
1387
1388	static bidi::kind
1389	get_bidi_utf8_1 (const unsigned char *const p)
1390	{
1391	gcc_checking_assert (p[`0`] == bidi::utf8_start);
1392
1393	if (p[`1`] == `0x80`)
1394	switch (p[`2`])
1395	{
1396	case `0xaa`:
1397	return bidi::kind::LRE;
1398	case `0xab`:
1399	return bidi::kind::RLE;
1400	case `0xac`:
1401	return bidi::kind::PDF;
1402	case `0xad`:
1403	return bidi::kind::LRO;
1404	case `0xae`:
1405	return bidi::kind::RLO;
1406	case `0x8e`:
1407	return bidi::kind::LTR;
1408	case `0x8f`:
1409	return bidi::kind::RTL;
1410	default:
1411	break;
1412	}
1413	else if (p[`1`] == `0x81`)
1414	switch (p[`2`])
1415	{
1416	case `0xa6`:
1417	return bidi::kind::LRI;
1418	case `0xa7`:
1419	return bidi::kind::RLI;
1420	case `0xa8`:
1421	return bidi::kind::FSI;
1422	case `0xa9`:
1423	return bidi::kind::PDI;
1424	default:
1425	break;
1426	}
1427
1428	return bidi::kind::NONE;
1429	}
1430
1431	/ Parse a sequence of 3 bytes starting with P and return its bidi code.*
1432	If the kind is not NONE, write the location to OUT./
1433
1434	static bidi::kind
1435	get_bidi_utf8 (cpp_reader pfile, const* unsigned char *const p, location_t *out)
1436	{
1437	bidi::kind result = get_bidi_utf8_1 (p);
1438	if (result != bidi::kind::NONE)
1439	{
1440	/ We have a sequence of 3 bytes starting at P. /
1441	*out = get_location_for_byte_range_in_cur_line (pfile, start: p, num_bytes: `3`);
1442	}
1443	return result;
1444	}
1445
1446	/ Parse a UCN where P points just past \u or \U and return its bidi code. /
1447
1448	static bidi::kind
1449	get_bidi_ucn_1 (const unsigned char p, bool* is_U, const unsigned char **end)
1450	{
1451	/ 6.4.3 Universal Character Names*
1452	\u hex-quad
1453	\U hex-quad hex-quad
1454	\u { simple-hexadecimal-digit-sequence }
1455	where \unnnn means \U0000nnnn. /*
1456
1457	*end = p + `4`;
1458	if (is_U)
1459	{
1460	if (p[`0`] != `'0'` \|\| p[`1`] != `'0'` \|\| p[`2`] != `'0'` \|\| p[`3`] != `'0'`)
1461	return bidi::kind::NONE;
1462	/ Skip 4B so we can treat \u and \U the same below. /
1463	p += `4`;
1464	*end += `4`;
1465	}
1466	else if (p[`0`] == `'{'`)
1467	{
1468	p++;
1469	while (*p == `'0'`)
1470	p++;
1471	if (p[`0`] != `'2'`
1472	\|\| p[`1`] != `'0'`
1473	\|\| !ISXDIGIT (p[`2`])
1474	\|\| !ISXDIGIT (p[`3`])
1475	\|\| p[`4`] != `'}'`)
1476	return bidi::kind::NONE;
1477	*end = p + `5`;
1478	}
1479
1480	/ All code points we are looking for start with 20xx. /
1481	if (p[`0`] != `'2'` \|\| p[`1`] != `'0'`)
1482	return bidi::kind::NONE;
1483	else if (p[`2`] == `'2'`)
1484	switch (p[`3`])
1485	{
1486	case `'a'`:
1487	case `'A'`:
1488	return bidi::kind::LRE;
1489	case `'b'`:
1490	case `'B'`:
1491	return bidi::kind::RLE;
1492	case `'c'`:
1493	case `'C'`:
1494	return bidi::kind::PDF;
1495	case `'d'`:
1496	case `'D'`:
1497	return bidi::kind::LRO;
1498	case `'e'`:
1499	case `'E'`:
1500	return bidi::kind::RLO;
1501	default:
1502	break;
1503	}
1504	else if (p[`2`] == `'6'`)
1505	switch (p[`3`])
1506	{
1507	case `'6'`:
1508	return bidi::kind::LRI;
1509	case `'7'`:
1510	return bidi::kind::RLI;
1511	case `'8'`:
1512	return bidi::kind::FSI;
1513	case `'9'`:
1514	return bidi::kind::PDI;
1515	default:
1516	break;
1517	}
1518	else if (p[`2`] == `'0'`)
1519	switch (p[`3`])
1520	{
1521	case `'e'`:
1522	case `'E'`:
1523	return bidi::kind::LTR;
1524	case `'f'`:
1525	case `'F'`:
1526	return bidi::kind::RTL;
1527	default:
1528	break;
1529	}
1530
1531	return bidi::kind::NONE;
1532	}
1533
1534	/ Parse a UCN where P points just past \u or \U and return its bidi code.*
1535	If the kind is not NONE, write the location to OUT. /
1536
1537	static bidi::kind
1538	get_bidi_ucn (cpp_reader pfile, const* unsigned char p, bool* is_U,
1539	location_t *out)
1540	{
1541	const unsigned char *end;
1542	bidi::kind result = get_bidi_ucn_1 (p, is_U, end: &end);
1543	if (result != bidi::kind::NONE)
1544	{
1545	const unsigned char *start = p - `2`;
1546	size_t num_bytes = end - start;
1547	*out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
1548	}
1549	return result;
1550	}
1551
1552	/ Parse a named universal character escape where P points just past \N and*
1553	return its bidi code. If the kind is not NONE, write the location to
1554	OUT. /
1555
1556	static bidi::kind
1557	get_bidi_named (cpp_reader pfile, const* unsigned char p, location_t out)
1558	{
1559	bidi::kind result = bidi::kind::NONE;
1560	if (*p != `'{'`)
1561	return bidi::kind::NONE;
1562	if (strncmp (s1: (const char *) (p + `1`), s2: "LEFT-TO-RIGHT ", n: `14`) == `0`)
1563	{
1564	if (strncmp (s1: (const char *) (p + `15`), s2: "MARK}", n: `5`) == `0`)
1565	result = bidi::kind::LTR;
1566	else if (strncmp (s1: (const char *) (p + `15`), s2: "EMBEDDING}", n: `10`) == `0`)
1567	result = bidi::kind::LRE;
1568	else if (strncmp (s1: (const char *) (p + `15`), s2: "OVERRIDE}", n: `9`) == `0`)
1569	result = bidi::kind::LRO;
1570	else if (strncmp (s1: (const char *) (p + `15`), s2: "ISOLATE}", n: `8`) == `0`)
1571	result = bidi::kind::LRI;
1572	}
1573	else if (strncmp (s1: (const char *) (p + `1`), s2: "RIGHT-TO-LEFT ", n: `14`) == `0`)
1574	{
1575	if (strncmp (s1: (const char *) (p + `15`), s2: "MARK}", n: `5`) == `0`)
1576	result = bidi::kind::RTL;
1577	else if (strncmp (s1: (const char *) (p + `15`), s2: "EMBEDDING}", n: `10`) == `0`)
1578	result = bidi::kind::RLE;
1579	else if (strncmp (s1: (const char *) (p + `15`), s2: "OVERRIDE}", n: `9`) == `0`)
1580	result = bidi::kind::RLO;
1581	else if (strncmp (s1: (const char *) (p + `15`), s2: "ISOLATE}", n: `8`) == `0`)
1582	result = bidi::kind::RLI;
1583	}
1584	else if (strncmp (s1: (const char *) (p + `1`), s2: "POP DIRECTIONAL ", n: `16`) == `0`)
1585	{
1586	if (strncmp (s1: (const char *) (p + `16`), s2: "FORMATTING}", n: `11`) == `0`)
1587	result = bidi::kind::PDF;
1588	else if (strncmp (s1: (const char *) (p + `16`), s2: "ISOLATE}", n: `8`) == `0`)
1589	result = bidi::kind::PDI;
1590	}
1591	else if (strncmp (s1: (const char *) (p + `1`), s2: "FIRST STRONG ISOLATE}", n: `21`) == `0`)
1592	result = bidi::kind::FSI;
1593	if (result != bidi::kind::NONE)
1594	*out = get_location_for_byte_range_in_cur_line (pfile, start: p - `2`,
1595	num_bytes: (strchr (s: (const char *)
1596	(p + `1`), c: `'}'`)
1597	- (const char *) p)
1598	+ `3`);
1599	return result;
1600	}
1601
1602	/ Subclass of rich_location for reporting on unpaired UTF-8*
1603	bidirectional control character(s).
1604	Escape the source lines on output, and show all unclosed
1605	bidi context, labelling everything. /*
1606
1607	class unpaired_bidi_rich_location : public rich_location
1608	{
1609	public:
1610	class custom_range_label : public range_label
1611	{
1612	public:
1613	label_text get_text (unsigned range_idx) const final override
1614	{
1615	/ range 0 is the primary location; each subsequent range i + 1*
1616	is for bidi::vec[i]. /*
1617	if (range_idx > `0`)
1618	{
1619	const bidi::context &ctxt (bidi::vec [range_idx - `1`]);
1620	return label_text::borrow (buffer: bidi::to_str (k: ctxt.m_kind));
1621	}
1622	else
1623	return label_text::borrow (_("end of bidirectional context"));
1624	}
1625	};
1626
1627	unpaired_bidi_rich_location (cpp_reader *pfile, location_t loc)
1628	: rich_location (pfile->line_table, loc, &m_custom_label)
1629	{
1630	set_escape_on_output (true);
1631	for (unsigned i = `0`; i < bidi::vec.count (); i++)
1632	add_range (loc: bidi::vec [i].m_loc,
1633	range_display_kind: SHOW_RANGE_WITHOUT_CARET,
1634	label: &m_custom_label);
1635	}
1636
1637	private:
1638	custom_range_label m_custom_label;
1639	};
1640
1641	/ We're closing a bidi context, that is, we've encountered a newline,*
1642	are closing a C-style comment, or are at the end of a string literal,
1643	character constant, or identifier. Warn if this context was not
1644	properly terminated by a PDI or PDF. P points to the last character
1645	in this context. /*
1646
1647	static void
1648	maybe_warn_bidi_on_close (cpp_reader pfile, const* uchar *p)
1649	{
1650	const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
1651	if (bidi::vec.count () > `0`
1652	&& (warn_bidi & bidirectional_unpaired
1653	&& (!bidi::current_ctx_ucn_p ()
1654	\|\| (warn_bidi & bidirectional_ucn))))
1655	{
1656	const location_t loc
1657	= linemap_position_for_column (pfile->line_table,
1658	CPP_BUF_COLUMN (pfile->buffer, p));
1659	unpaired_bidi_rich_location rich_loc (pfile, loc);
1660	/ cpp_callbacks doesn't yet have a way to handle singular vs plural*
1661	forms of a diagnostic, so fake it for now. /*
1662	if (bidi::vec.count () > `1`)
1663	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, richloc: &rich_loc,
1664	msgid: "unpaired UTF-8 bidirectional control characters "
1665	"detected");
1666	else
1667	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, richloc: &rich_loc,
1668	msgid: "unpaired UTF-8 bidirectional control character "
1669	"detected");
1670	}
1671	/ We're done with this context. /
1672	bidi::on_close ();
1673	}
1674
1675	/ We're at the beginning or in the middle of an identifier/comment/string*
1676	literal/character constant. Warn if we've encountered a bidi character.
1677	KIND says which bidi control character it was; UCN_P is true iff this bidi
1678	control character was written as a UCN. LOC is the location of the
1679	character, but is only valid if KIND != bidi::kind::NONE. /*
1680
1681	static void
1682	maybe_warn_bidi_on_char (cpp_reader *pfile, bidi::kind kind,
1683	bool ucn_p, location_t loc)
1684	{
1685	if (__builtin_expect (kind == bidi::kind::NONE, `1`))
1686	return;
1687
1688	const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
1689
1690	if (warn_bidi & (bidirectional_unpaired\|bidirectional_any))
1691	{
1692	rich_location rich_loc (pfile->line_table, loc);
1693	rich_loc.set_escape_on_output (true);
1694
1695	/ It seems excessive to warn about a PDI/PDF that is closing*
1696	an opened context because we've already warned about the
1697	opening character. Except warn when we have a UCN x UTF-8
1698	mismatch, if UCN checking is enabled. /*
1699	if (kind == bidi::current_ctx ())
1700	{
1701	if (warn_bidi == (bidirectional_unpaired\|bidirectional_ucn)
1702	&& bidi::current_ctx_ucn_p () != ucn_p)
1703	{
1704	rich_loc.add_range (loc: bidi::current_ctx_loc ());
1705	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, richloc: &rich_loc,
1706	msgid: "UTF-8 vs UCN mismatch when closing "
1707	"a context by %qs", bidi::to_str (k: kind));
1708	}
1709	}
1710	else if (warn_bidi & bidirectional_any
1711	&& (!ucn_p \|\| (warn_bidi & bidirectional_ucn)))
1712	{
1713	if (kind == bidi::kind::PDF \|\| kind == bidi::kind::PDI)
1714	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, richloc: &rich_loc,
1715	msgid: "%qs is closing an unopened context",
1716	bidi::to_str (k: kind));
1717	else
1718	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, richloc: &rich_loc,
1719	msgid: "found problematic Unicode character %qs",
1720	bidi::to_str (k: kind));
1721	}
1722	}
1723	/ We're done with this context. /
1724	bidi::on_char (k: kind, ucn_p, loc);
1725	}
1726
1727	static const cppchar_t utf8_continuation = `0x80`;
1728	static const cppchar_t utf8_signifier = `0xC0`;
1729
1730	/ Emit -Winvalid-utf8 warning on invalid UTF-8 character starting*
1731	at PFILE->buffer->cur. Return a pointer after the diagnosed
1732	invalid character. /*
1733
1734	static const uchar *
1735	_cpp_warn_invalid_utf8 (cpp_reader *pfile)
1736	{
1737	cpp_buffer *buffer = pfile->buffer;
1738	const uchar *cur = buffer->cur;
1739	bool pedantic = (CPP_PEDANTIC (pfile)
1740	&& CPP_OPTION (pfile, cpp_warn_invalid_utf8) == `2`);
1741
1742	if (cur[`0`] < utf8_signifier
1743	\|\| cur[`1`] < utf8_continuation \|\| cur[`1`] >= utf8_signifier)
1744	{
1745	if (pedantic)
1746	cpp_error_with_line (pfile, CPP_DL_PEDWARN,
1747	pfile->line_table->highest_line,
1748	CPP_BUF_COL (buffer),
1749	msgid: "invalid UTF-8 character %<<%x>%>",
1750	cur[`0`]);
1751	else
1752	cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
1753	pfile->line_table->highest_line,
1754	CPP_BUF_COL (buffer),
1755	msgid: "invalid UTF-8 character %<<%x>%>",
1756	cur[`0`]);
1757	return cur + `1`;
1758	}
1759	else if (cur[`2`] < utf8_continuation \|\| cur[`2`] >= utf8_signifier)
1760	{
1761	if (pedantic)
1762	cpp_error_with_line (pfile, CPP_DL_PEDWARN,
1763	pfile->line_table->highest_line,
1764	CPP_BUF_COL (buffer),
1765	msgid: "invalid UTF-8 character %<<%x><%x>%>",
1766	cur[`0`], cur[`1`]);
1767	else
1768	cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
1769	pfile->line_table->highest_line,
1770	CPP_BUF_COL (buffer),
1771	msgid: "invalid UTF-8 character %<<%x><%x>%>",
1772	cur[`0`], cur[`1`]);
1773	return cur + `2`;
1774	}
1775	else if (cur[`3`] < utf8_continuation \|\| cur[`3`] >= utf8_signifier)
1776	{
1777	if (pedantic)
1778	cpp_error_with_line (pfile, CPP_DL_PEDWARN,
1779	pfile->line_table->highest_line,
1780	CPP_BUF_COL (buffer),
1781	msgid: "invalid UTF-8 character %<<%x><%x><%x>%>",
1782	cur[`0`], cur[`1`], cur[`2`]);
1783	else
1784	cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
1785	pfile->line_table->highest_line,
1786	CPP_BUF_COL (buffer),
1787	msgid: "invalid UTF-8 character %<<%x><%x><%x>%>",
1788	cur[`0`], cur[`1`], cur[`2`]);
1789	return cur + `3`;
1790	}
1791	else
1792	{
1793	if (pedantic)
1794	cpp_error_with_line (pfile, CPP_DL_PEDWARN,
1795	pfile->line_table->highest_line,
1796	CPP_BUF_COL (buffer),
1797	msgid: "invalid UTF-8 character %<<%x><%x><%x><%x>%>",
1798	cur[`0`], cur[`1`], cur[`2`], cur[`3`]);
1799	else
1800	cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
1801	pfile->line_table->highest_line,
1802	CPP_BUF_COL (buffer),
1803	msgid: "invalid UTF-8 character %<<%x><%x><%x><%x>%>",
1804	cur[`0`], cur[`1`], cur[`2`], cur[`3`]);
1805	return cur + `4`;
1806	}
1807	}
1808
1809	/ Helper function of skip__comment and lex_string. For C,
1810	character at CUR[-1] with MSB set handle -Wbidi-chars and*
1811	-Winvalid-utf8 diagnostics and return pointer to first character
1812	that should be processed next. /*
1813
1814	static inline const uchar *
1815	_cpp_handle_multibyte_utf8 (cpp_reader *pfile, uchar c,
1816	const uchar cur, bool* warn_bidi_p,
1817	bool warn_invalid_utf8_p)
1818	{
1819	/ If this is a beginning of a UTF-8 encoding, it might be*
1820	a bidirectional control character. /*
1821	if (c == bidi::utf8_start && warn_bidi_p)
1822	{
1823	location_t loc;
1824	bidi::kind kind = get_bidi_utf8 (pfile, p: cur - `1`, out: &loc);
1825	maybe_warn_bidi_on_char (pfile, kind, /ucn_p=/false, loc);
1826	}
1827	if (!warn_invalid_utf8_p)
1828	return cur;
1829	if (c >= utf8_signifier)
1830	{
1831	cppchar_t s;
1832	const uchar *pstr = cur - `1`;
1833	if (_cpp_valid_utf8 (pfile, pstr: &pstr, limit: pfile->buffer->rlimit, identifier_pos: `0`, NULL, cp: &s)
1834	&& s <= UCS_LIMIT)
1835	return pstr;
1836	}
1837	pfile->buffer->cur = cur - `1`;
1838	return _cpp_warn_invalid_utf8 (pfile);
1839	}
1840
1841	/ Skip a C-style block comment. We find the end of the comment by*
1842	seeing if an asterisk is before every '/' we encounter. Returns
1843	nonzero if comment terminated by EOF, zero otherwise.
1844
1845	Buffer->cur points to the initial asterisk of the comment. /*
1846	bool
1847	_cpp_skip_block_comment (cpp_reader *pfile)
1848	{
1849	cpp_buffer *buffer = pfile->buffer;
1850	const uchar *cur = buffer->cur;
1851	uchar c;
1852	const bool warn_bidi_p = pfile->warn_bidi_p ();
1853	const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
1854	const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p \| warn_invalid_utf8_p;
1855
1856	cur++;
1857	if (*cur == `'/'`)
1858	cur++;
1859
1860	for (;;)
1861	{
1862	/ People like decorating comments with '', so check for '/'
1863	instead for efficiency. /*
1864	c = *cur++;
1865
1866	if (c == `'/'`)
1867	{
1868	if (cur[-`2`] == `'*'`)
1869	{
1870	if (warn_bidi_p)
1871	maybe_warn_bidi_on_close (pfile, p: cur);
1872	break;
1873	}
1874
1875	/ Warn about potential nested comments, but not if the '/'*
1876	comes immediately before the true comment delimiter.
1877	Don't bother to get it right across escaped newlines. /*
1878	if (CPP_OPTION (pfile, warn_comments)
1879	&& cur[`0`] == `'*'` && cur[`1`] != `'/'`)
1880	{
1881	buffer->cur = cur;
1882	cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1883	pfile->line_table->highest_line,
1884	CPP_BUF_COL (buffer),
1885	msgid: "%</*%> within comment");
1886	}
1887	}
1888	else if (c == `'\n'`)
1889	{
1890	unsigned int cols;
1891	buffer->cur = cur - `1`;
1892	if (warn_bidi_p)
1893	maybe_warn_bidi_on_close (pfile, p: cur);
1894	_cpp_process_line_notes (pfile, in_comment: true);
1895	if (buffer->next_line >= buffer->rlimit)
1896	return true;
1897	_cpp_clean_line (pfile);
1898
1899	cols = buffer->next_line - buffer->line_base;
1900	CPP_INCREMENT_LINE (pfile, cols);
1901
1902	cur = buffer->cur;
1903	}
1904	else if (__builtin_expect (c >= utf8_continuation, `0`)
1905	&& warn_bidi_or_invalid_utf8_p)
1906	cur = _cpp_handle_multibyte_utf8 (pfile, c, cur, warn_bidi_p,
1907	warn_invalid_utf8_p);
1908	}
1909
1910	buffer->cur = cur;
1911	_cpp_process_line_notes (pfile, in_comment: true);
1912	return false;
1913	}
1914
1915	/ Skip a C++ line comment, leaving buffer->cur pointing to the*
1916	terminating newline. Handles escaped newlines. Returns nonzero
1917	if a multiline comment. /*
1918	static int
1919	skip_line_comment (cpp_reader *pfile)
1920	{
1921	cpp_buffer *buffer = pfile->buffer;
1922	location_t orig_line = pfile->line_table->highest_line;
1923	const bool warn_bidi_p = pfile->warn_bidi_p ();
1924	const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
1925	const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p \| warn_invalid_utf8_p;
1926
1927	if (!warn_bidi_or_invalid_utf8_p)
1928	while (*buffer->cur != `'\n'`)
1929	buffer->cur++;
1930	else if (!warn_invalid_utf8_p)
1931	{
1932	while (*buffer->cur != `'\n'`
1933	&& *buffer->cur != bidi::utf8_start)
1934	buffer->cur++;
1935	if (__builtin_expect (*buffer->cur == bidi::utf8_start, `0`))
1936	{
1937	while (*buffer->cur != `'\n'`)
1938	{
1939	if (__builtin_expect (*buffer->cur == bidi::utf8_start, `0`))
1940	{
1941	location_t loc;
1942	bidi::kind kind = get_bidi_utf8 (pfile, p: buffer->cur, out: &loc);
1943	maybe_warn_bidi_on_char (pfile, kind, /ucn_p=/false, loc);
1944	}
1945	buffer->cur++;
1946	}
1947	maybe_warn_bidi_on_close (pfile, p: buffer->cur);
1948	}
1949	}
1950	else
1951	{
1952	while (*buffer->cur != `'\n'`)
1953	{
1954	if (*buffer->cur < utf8_continuation)
1955	{
1956	buffer->cur++;
1957	continue;
1958	}
1959	buffer->cur
1960	= _cpp_handle_multibyte_utf8 (pfile, c: *buffer->cur, cur: buffer->cur + `1`,
1961	warn_bidi_p, warn_invalid_utf8_p);
1962	}
1963	if (warn_bidi_p)
1964	maybe_warn_bidi_on_close (pfile, p: buffer->cur);
1965	}
1966
1967	_cpp_process_line_notes (pfile, in_comment: true);
1968	return orig_line != pfile->line_table->highest_line;
1969	}
1970
1971	/ Skips whitespace, saving the next non-whitespace character. /
1972	static void
1973	skip_whitespace (cpp_reader *pfile, cppchar_t c)
1974	{
1975	cpp_buffer *buffer = pfile->buffer;
1976	bool saw_NUL = false;
1977
1978	do
1979	{
1980	/ Horizontal space always OK. /
1981	if (c == `' '` \|\| c == `'\t'`)
1982	;
1983	/ Just \f \v or \0 left. /
1984	else if (c == `'\0'`)
1985	saw_NUL = true;
1986	else if (pfile->state.in_directive)
1987	cpp_pedwarning_with_line (pfile, CPP_W_PEDANTIC,
1988	pfile->line_table->highest_line,
1989	CPP_BUF_COL (buffer),
1990	msgid: "%s in preprocessing directive",
1991	c == `'\f'` ? "form feed" : "vertical tab");
1992
1993	c = *buffer->cur++;
1994	}
1995	/ We only want non-vertical space, i.e. ' ' \t \f \v \0. /
1996	while (is_nvspace (c));
1997
1998	if (saw_NUL)
1999	{
2000	encoding_rich_location rich_loc (pfile);
2001	cpp_error_at (pfile, CPP_DL_WARNING, richloc: &rich_loc,
2002	msgid: "null character(s) ignored");
2003	}
2004
2005	buffer->cur--;
2006	}
2007
2008	/ See if the characters of a number token are valid in a name (no*
2009	'.', '+' or '-'). /*
2010	static int
2011	name_p (cpp_reader pfile, const* cpp_string *string)
2012	{
2013	unsigned int i;
2014
2015	for (i = `0`; i < string->len; i++)
2016	if (!is_idchar (string->text[i]))
2017	return `0`;
2018
2019	return `1`;
2020	}
2021
2022	/ After parsing an identifier or other sequence, produce a warning about*
2023	sequences not in NFC/NFKC. /*
2024	static void
2025	warn_about_normalization (cpp_reader *pfile,
2026	const cpp_token *token,
2027	const struct normalize_state *s,
2028	bool identifier)
2029	{
2030	if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
2031	&& !pfile->state.skipping)
2032	{
2033	location_t loc = token->src_loc;
2034
2035	/ If possible, create a location range for the token. /
2036	if (loc >= RESERVED_LOCATION_COUNT
2037	&& token->type != CPP_EOF
2038	/ There must be no line notes to process. /
2039	&& (!(pfile->buffer->cur
2040	>= pfile->buffer->notes[pfile->buffer->cur_note].pos
2041	&& !pfile->overlaid_buffer)))
2042	{
2043	source_range tok_range;
2044	tok_range.m_start = loc;
2045	tok_range.m_finish
2046	= linemap_position_for_column (pfile->line_table,
2047	CPP_BUF_COLUMN (pfile->buffer,
2048	pfile->buffer->cur));
2049	loc = pfile->line_table->get_or_create_combined_loc (locus: loc, src_range: tok_range,
2050	data: nullptr, discriminator: `0`);
2051	}
2052
2053	encoding_rich_location rich_loc (pfile, loc);
2054
2055	/ Make sure that the token is printed using UCNs, even*
2056	if we'd otherwise happily print UTF-8. /*
2057	unsigned char buf = XNEWVEC (unsigned* char, cpp_token_len (token));
2058	size_t sz;
2059
2060	sz = cpp_spell_token (pfile, token, buf, false) - buf;
2061	if (NORMALIZE_STATE_RESULT (s) == normalized_C)
2062	cpp_warning_at (pfile, CPP_W_NORMALIZE, richloc: &rich_loc,
2063	msgid: "%<%.s%> is not in NFKC", (int*) sz, buf);
2064	else if (identifier && CPP_OPTION (pfile, xid_identifiers))
2065	cpp_pedwarning_at (pfile, CPP_W_NORMALIZE, richloc: &rich_loc,
2066	msgid: "%<%.s%> is not in NFC", (int*) sz, buf);
2067	else
2068	cpp_warning_at (pfile, CPP_W_NORMALIZE, richloc: &rich_loc,
2069	msgid: "%<%.s%> is not in NFC", (int*) sz, buf);
2070	free (ptr: buf);
2071	}
2072	}
2073
2074	/ Returns TRUE if the byte sequence starting at buffer->cur is a valid*
2075	extended character in an identifier. If FIRST is TRUE, then the character
2076	must be valid at the beginning of an identifier as well. If the return
2077	value is TRUE, then pfile->buffer->cur has been moved to point to the next
2078	byte after the extended character. /*
2079
2080	static bool
2081	forms_identifier_p (cpp_reader pfile, int* first,
2082	struct normalize_state *state)
2083	{
2084	cpp_buffer *buffer = pfile->buffer;
2085	const bool warn_bidi_p = pfile->warn_bidi_p ();
2086
2087	if (*buffer->cur == `'$'`)
2088	{
2089	if (!CPP_OPTION (pfile, dollars_in_ident))
2090	return false;
2091
2092	buffer->cur++;
2093	if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
2094	{
2095	CPP_OPTION (pfile, warn_dollars) = `0`;
2096	cpp_error (pfile, CPP_DL_PEDWARN, msgid: "%<$%> in identifier or number");
2097	}
2098
2099	return true;
2100	}
2101
2102	/ Is this a syntactically valid UCN or a valid UTF-8 char? /
2103	if (CPP_OPTION (pfile, extended_identifiers))
2104	{
2105	cppchar_t s;
2106	if (*buffer->cur >= utf8_signifier)
2107	{
2108	if (__builtin_expect (*buffer->cur == bidi::utf8_start, `0`)
2109	&& warn_bidi_p)
2110	{
2111	location_t loc;
2112	bidi::kind kind = get_bidi_utf8 (pfile, p: buffer->cur, out: &loc);
2113	maybe_warn_bidi_on_char (pfile, kind, /ucn_p=/false, loc);
2114	}
2115	if (_cpp_valid_utf8 (pfile, pstr: &buffer->cur, limit: buffer->rlimit, identifier_pos: `1` + !first,
2116	nst: state, cp: &s))
2117	return true;
2118	}
2119	else if (*buffer->cur == `'\\'`
2120	&& (buffer->cur[`1`] == `'u'`
2121	\|\| buffer->cur[`1`] == `'U'`
2122	\|\| buffer->cur[`1`] == `'N'`))
2123	{
2124	buffer->cur += `2`;
2125	if (warn_bidi_p)
2126	{
2127	location_t loc;
2128	bidi::kind kind;
2129	if (buffer->cur[-`1`] == `'N'`)
2130	kind = get_bidi_named (pfile, p: buffer->cur, out: &loc);
2131	else
2132	kind = get_bidi_ucn (pfile, p: buffer->cur,
2133	is_U: buffer->cur[-`1`] == `'U'`, out: &loc);
2134	maybe_warn_bidi_on_char (pfile, kind, /ucn_p=/true, loc);
2135	}
2136	if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, `1` + !first,
2137	state, &s, NULL, NULL))
2138	return true;
2139	buffer->cur -= `2`;
2140	}
2141	}
2142
2143	return false;
2144	}
2145
2146	/ Helper function to issue error about improper __VA_OPT__ use. /
2147	static void
2148	maybe_va_opt_error (cpp_reader *pfile)
2149	{
2150	if (CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, va_opt))
2151	{
2152	/ __VA_OPT__ should not be accepted at all, but allow it in*
2153	system headers. /*
2154	if (!_cpp_in_system_header (pfile))
2155	{
2156	if (CPP_OPTION (pfile, cplusplus))
2157	cpp_pedwarning (pfile, CPP_W_CXX20_EXTENSIONS,
2158	msgid: "%<__VA_OPT__%> is not available until C++20");
2159	else
2160	cpp_pedwarning (pfile, CPP_W_PEDANTIC,
2161	msgid: "%<__VA_OPT__%> is not available until C23");
2162	}
2163	}
2164	else if (!pfile->state.va_args_ok)
2165	{
2166	/ __VA_OPT__ should only appear in the replacement list of a*
2167	variadic macro. /*
2168	cpp_error (pfile, CPP_DL_PEDWARN,
2169	msgid: "%<__VA_OPT__%> can only appear in the expansion"
2170	" of a C++20 variadic macro");
2171	}
2172	}
2173
2174	/ Helper function to perform diagnostics that are needed (rarely)*
2175	when an identifier is lexed. /*
2176	static void
2177	identifier_diagnostics_on_lex (cpp_reader pfile, cpp_hashnode node)
2178	{
2179	if (__builtin_expect (!(node->flags & NODE_DIAGNOSTIC)
2180	\|\| pfile->state.skipping, `1`))
2181	return;
2182
2183	/ It is allowed to poison the same identifier twice. /
2184	if ((node->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
2185	{
2186	cpp_error (pfile, CPP_DL_ERROR, msgid: "attempt to use poisoned %qs",
2187	NODE_NAME (node));
2188	const auto data = (cpp_hashnode_extra *)
2189	ht_lookup (ht: pfile->extra_hash_table, id: node->ident, opt: HT_NO_INSERT);
2190	if (data && data->poisoned_loc)
2191	cpp_error_at (pfile, CPP_DL_NOTE, src_loc: data->poisoned_loc, msgid: "poisoned here");
2192	}
2193
2194	/ Constraint 6.10.3.5: __VA_ARGS__ should only appear in the*
2195	replacement list of a variadic macro. /*
2196	if (node == pfile->spec_nodes.n__VA_ARGS__
2197	&& !pfile->state.va_args_ok)
2198	{
2199	if (CPP_OPTION (pfile, cplusplus))
2200	cpp_error (pfile, CPP_DL_PEDWARN,
2201	msgid: "%<__VA_ARGS__%> can only appear in the expansion"
2202	" of a C++11 variadic macro");
2203	else
2204	cpp_error (pfile, CPP_DL_PEDWARN,
2205	msgid: "%<__VA_ARGS__%> can only appear in the expansion"
2206	" of a C99 variadic macro");
2207	}
2208
2209	/ __VA_OPT__ should only appear in the replacement list of a*
2210	variadic macro. /*
2211	if (node == pfile->spec_nodes.n__VA_OPT__)
2212	maybe_va_opt_error (pfile);
2213
2214	/ For -Wc++-compat, warn about use of C++ named operators. /
2215	if (node->flags & NODE_WARN_OPERATOR)
2216	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
2217	msgid: "identifier %qs is a special operator name in C++",
2218	NODE_NAME (node));
2219	}
2220
2221	/ Lex an identifier starting at BASE. BUFFER->CUR is expected to point*
2222	one past the first character at BASE, which may be a (possibly multi-byte)
2223	character if STARTS_UCN is true. /*
2224	static cpp_hashnode *
2225	lex_identifier (cpp_reader pfile, const* uchar base, bool* starts_ucn,
2226	struct normalize_state nst, cpp_hashnode *spelling)
2227	{
2228	cpp_hashnode *result;
2229	const uchar *cur;
2230	unsigned int len;
2231	unsigned int hash = HT_HASHSTEP (`0`, *base);
2232	const bool warn_bidi_p = pfile->warn_bidi_p ();
2233
2234	cur = pfile->buffer->cur;
2235	if (! starts_ucn)
2236	{
2237	while (ISIDNUM (*cur))
2238	{
2239	hash = HT_HASHSTEP (hash, *cur);
2240	cur++;
2241	}
2242	NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - `1`));
2243	}
2244	pfile->buffer->cur = cur;
2245	if (starts_ucn \|\| forms_identifier_p (pfile, first: false, state: nst))
2246	{
2247	/ Slower version for identifiers containing UCNs*
2248	or extended chars (including $). /*
2249	do {
2250	while (ISIDNUM (*pfile->buffer->cur))
2251	{
2252	NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
2253	pfile->buffer->cur++;
2254	}
2255	} while (forms_identifier_p (pfile, first: false, state: nst));
2256	if (warn_bidi_p)
2257	maybe_warn_bidi_on_close (pfile, p: pfile->buffer->cur);
2258	result = _cpp_interpret_identifier (pfile, id: base,
2259	len: pfile->buffer->cur - base);
2260	*spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
2261	}
2262	else
2263	{
2264	len = cur - base;
2265	hash = HT_HASHFINISH (hash, len);
2266
2267	result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
2268	base, len, hash, HT_ALLOC));
2269	*spelling = result;
2270	}
2271
2272	return result;
2273	}
2274
2275	/ Struct to hold the return value of the scan_cur_identifier () helper*
2276	function below. /*
2277
2278	struct scan_id_result
2279	{
2280	cpp_hashnode *node;
2281	normalize_state nst;
2282
2283	scan_id_result ()
2284	: node (nullptr)
2285	{
2286	nst = INITIAL_NORMALIZE_STATE;
2287	}
2288
2289	explicit operator bool () const { return node; }
2290	};
2291
2292	/ Helper function to scan an entire identifier beginning at*
2293	pfile->buffer->cur, and possibly containing extended characters (UCNs
2294	and/or UTF-8). Returns the cpp_hashnode for the identifier on success, or
2295	else nullptr, as well as a normalize_state so that normalization warnings
2296	may be issued once the token lexing is complete. /*
2297
2298	static scan_id_result
2299	scan_cur_identifier (cpp_reader *pfile)
2300	{
2301	const auto buffer = pfile->buffer;
2302	const auto begin = buffer->cur;
2303	scan_id_result result;
2304	if (ISIDST (*buffer->cur))
2305	{
2306	++buffer->cur;
2307	cpp_hashnode *ignore;
2308	result.node = lex_identifier (pfile, base: begin, starts_ucn: false, nst: &result.nst, spelling: &ignore);
2309	}
2310	else if (forms_identifier_p (pfile, first: true, state: &result.nst))
2311	{
2312	/ buffer->cur has been moved already by the call*
2313	to forms_identifier_p. /*
2314	cpp_hashnode *ignore;
2315	result.node = lex_identifier (pfile, base: begin, starts_ucn: true, nst: &result.nst, spelling: &ignore);
2316	}
2317	return result;
2318	}
2319
2320	/ Lex a number to NUMBER starting at BUFFER->CUR - 1. /
2321	static void
2322	lex_number (cpp_reader pfile, cpp_string number,
2323	struct normalize_state *nst)
2324	{
2325	const uchar *cur;
2326	const uchar *base;
2327	uchar *dest;
2328
2329	base = pfile->buffer->cur - `1`;
2330	do
2331	{
2332	const uchar *adj_digit_sep = NULL;
2333	cur = pfile->buffer->cur;
2334
2335	/ N.B. ISIDNUM does not include $. /
2336	while (ISIDNUM (*cur)
2337	\|\| (*cur == `'.'` && !DIGIT_SEP (cur[-`1`]))
2338	\|\| DIGIT_SEP (*cur)
2339	\|\| (VALID_SIGN (*cur, cur[-`1`]) && !DIGIT_SEP (cur[-`2`])))
2340	{
2341	NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
2342	/ Adjacent digit separators do not form part of the pp-number syntax.*
2343	However, they can safely be diagnosed here as an error, since '' is
2344	not a valid preprocessing token. /*
2345	if (DIGIT_SEP (*cur) && DIGIT_SEP (cur[-`1`]) && !adj_digit_sep)
2346	adj_digit_sep = cur;
2347	cur++;
2348	}
2349	/ A number can't end with a digit separator. /
2350	while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-`1`]))
2351	--cur;
2352	if (adj_digit_sep && adj_digit_sep < cur)
2353	cpp_error (pfile, CPP_DL_ERROR, msgid: "adjacent digit separators");
2354
2355	pfile->buffer->cur = cur;
2356	}
2357	while (forms_identifier_p (pfile, first: false, state: nst));
2358
2359	number->len = cur - base;
2360	dest = _cpp_unaligned_alloc (pfile, number->len + `1`);
2361	memcpy (dest: dest, src: base, n: number->len);
2362	dest[number->len] = `'\0'`;
2363	number->text = dest;
2364	}
2365
2366	/ Create a token of type TYPE with a literal spelling. /
2367	static void
2368	create_literal (cpp_reader pfile, cpp_token token, const uchar *base,
2369	unsigned int len, enum cpp_ttype type)
2370	{
2371	token->type = type;
2372	token->val.str.len = len;
2373	token->val.str.text = cpp_alloc_token_string (pfile, base, len);
2374	}
2375
2376	/ Like create_literal(), but construct it from two separate strings*
2377	which are concatenated. LEN2 may be 0 if no second string is
2378	required. /*
2379	static void
2380	create_literal2 (cpp_reader pfile, cpp_token token, const uchar *base1,
2381	unsigned int len1, const uchar base2, unsigned* int len2,
2382	enum cpp_ttype type)
2383	{
2384	token->type = type;
2385	token->val.str.len = len1 + len2;
2386	uchar *const dest = _cpp_unaligned_alloc (pfile, len1 + len2 + `1`);
2387	memcpy (dest: dest, src: base1, n: len1);
2388	if (len2)
2389	memcpy (dest: dest+len1, src: base2, n: len2);
2390	dest[len1 + len2] = `0`;
2391	token->val.str.text = dest;
2392	}
2393
2394	const uchar *
2395	cpp_alloc_token_string (cpp_reader *pfile,
2396	const unsigned char ptr, unsigned* len)
2397	{
2398	uchar *dest = _cpp_unaligned_alloc (pfile, len + `1`);
2399
2400	dest[len] = `0`;
2401	memcpy (dest: dest, src: ptr, n: len);
2402	return dest;
2403	}
2404
2405	/ A pair of raw buffer pointers. The currently open one is [1], the*
2406	first one is [0]. Used for string literal lexing. /*
2407	struct lit_accum {
2408	_cpp_buff *first;
2409	_cpp_buff *last;
2410	const uchar *rpos;
2411	size_t accum;
2412
2413	lit_accum ()
2414	: first (NULL), last (NULL), rpos (`0`), accum (`0`)
2415	{
2416	}
2417
2418	void append (cpp_reader , const* uchar *, size_t);
2419
2420	void read_begin (cpp_reader *);
2421	bool reading_p () const
2422	{
2423	return rpos != NULL;
2424	}
2425	char read_char ()
2426	{
2427	char c = *rpos++;
2428	if (rpos == BUFF_FRONT (last))
2429	rpos = NULL;
2430	return c;
2431	}
2432
2433	void create_literal2 (cpp_reader pfile, cpp_token token,
2434	const uchar base1, unsigned* int len1,
2435	const uchar base2, unsigned* int len2,
2436	enum cpp_ttype type);
2437	};
2438
2439	/ Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer*
2440	sequence from FIRST_BUFF_P to LAST_BUFF_P. /
2441
2442	void
2443	lit_accum::append (cpp_reader pfile, const* uchar *base, size_t len)
2444	{
2445	if (!last)
2446	/ Starting. /
2447	first = last = _cpp_get_buff (pfile, len);
2448	else if (len > BUFF_ROOM (last))
2449	{
2450	/ There is insufficient room in the buffer. Copy what we can,*
2451	and then either extend or create a new one. /*
2452	size_t room = BUFF_ROOM (last);
2453	memcpy (BUFF_FRONT (last), src: base, n: room);
2454	BUFF_FRONT (last) += room;
2455	base += room;
2456	len -= room;
2457	accum += room;
2458
2459	gcc_checking_assert (!rpos);
2460
2461	last = _cpp_append_extend_buff (pfile, last, len);
2462	}
2463
2464	memcpy (BUFF_FRONT (last), src: base, n: len);
2465	BUFF_FRONT (last) += len;
2466	accum += len;
2467	}
2468
2469	void
2470	lit_accum::read_begin (cpp_reader *pfile)
2471	{
2472	/ We never accumulate more than 4 chars to read. /
2473	if (BUFF_ROOM (last) < `4`)
2474
2475	last = _cpp_append_extend_buff (pfile, last, `4`);
2476	rpos = BUFF_FRONT (last);
2477	}
2478
2479	/ Helper function to check if a string format macro, say from inttypes.h, is*
2480	placed touching a string literal, in which case it could be parsed as a C++11
2481	user-defined string literal thus breaking the program. Return TRUE if the
2482	UDL should be ignored for now and preserved for potential macro
2483	expansion. /*
2484
2485	static bool
2486	maybe_ignore_udl_macro_suffix (cpp_reader *pfile, location_t src_loc,
2487	const uchar suffix_begin, cpp_hashnode node)
2488	{
2489	/ User-defined literals outside of namespace std must start with a single*
2490	underscore, so assume anything of that form really is a UDL suffix.
2491	We don't need to worry about UDLs defined inside namespace std because
2492	their names are reserved, so cannot be used as macro names in valid
2493	programs. /*
2494	if ((suffix_begin[`0`] == `'_'` && suffix_begin[`1`] != `'_'`)
2495	\|\| !cpp_macro_p (node))
2496	return false;
2497
2498	/ Maybe raise a warning here; caller should arrange not to consume*
2499	the tokens. /*
2500	if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
2501	cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX, src_loc, `0`,
2502	msgid: "invalid suffix on literal; C++11 requires a space "
2503	"between literal and string macro");
2504	return true;
2505	}
2506
2507	/ Like create_literal2(), but also prepend all the accumulated data from*
2508	the lit_accum struct. /*
2509	void
2510	lit_accum::create_literal2 (cpp_reader pfile, cpp_token token,
2511	const uchar base1, unsigned* int len1,
2512	const uchar base2, unsigned* int len2,
2513	enum cpp_ttype type)
2514	{
2515	const unsigned int tot_len = accum + len1 + len2;
2516	uchar *dest = _cpp_unaligned_alloc (pfile, tot_len + `1`);
2517	token->type = type;
2518	token->val.str.len = tot_len;
2519	token->val.str.text = dest;
2520	for (_cpp_buff *buf = first; buf; buf = buf->next)
2521	{
2522	size_t len = BUFF_FRONT (buf) - buf->base;
2523	memcpy (dest: dest, src: buf->base, n: len);
2524	dest += len;
2525	}
2526	memcpy (dest: dest, src: base1, n: len1);
2527	dest += len1;
2528	if (len2)
2529	memcpy (dest: dest, src: base2, n: len2);
2530	dest += len2;
2531	*dest = `'\0'`;
2532	}
2533
2534	/ Lexes a raw string. The stored string contains the spelling,*
2535	including double quotes, delimiter string, '(' and ')', any leading
2536	'L', 'u', 'U' or 'u8' and 'R' modifier. The created token contains
2537	the type of the literal, or CPP_OTHER if it was not properly
2538	terminated.
2539
2540	BASE is the start of the token. Updates pfile->buffer->cur to just
2541	after the lexed string.
2542
2543	The spelling is NUL-terminated, but it is not guaranteed that this
2544	is the first NUL since embedded NULs are preserved. /*
2545
2546	static void
2547	lex_raw_string (cpp_reader pfile, cpp_token token, const uchar *base)
2548	{
2549	const uchar *pos = base;
2550	const bool warn_bidi_p = pfile->warn_bidi_p ();
2551	const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
2552	const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p \| warn_invalid_utf8_p;
2553
2554	/ 'tis a pity this information isn't passed down from the lexer's*
2555	initial categorization of the token. /*
2556	enum cpp_ttype type = CPP_STRING;
2557
2558	if (*pos == `'L'`)
2559	{
2560	type = CPP_WSTRING;
2561	pos++;
2562	}
2563	else if (*pos == `'U'`)
2564	{
2565	type = CPP_STRING32;
2566	pos++;
2567	}
2568	else if (*pos == `'u'`)
2569	{
2570	if (pos[`1`] == `'8'`)
2571	{
2572	type = CPP_UTF8STRING;
2573	pos++;
2574	}
2575	else
2576	type = CPP_STRING16;
2577	pos++;
2578	}
2579
2580	gcc_checking_assert (pos[`0`] == `'R'` && pos[`1`] == `'"'`);
2581	pos += `2`;
2582
2583	_cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
2584
2585	/ Skip notes before the ". /
2586	while (note->pos < pos)
2587	++note;
2588
2589	lit_accum accum;
2590
2591	uchar prefix[`17`];
2592	unsigned prefix_len = `0`;
2593	enum Phase
2594	{
2595	PHASE_PREFIX = -`2`,
2596	PHASE_NONE = -`1`,
2597	PHASE_SUFFIX = `0`
2598	} phase = PHASE_PREFIX;
2599
2600	for (;;)
2601	{
2602	gcc_checking_assert (note->pos >= pos);
2603
2604	/ Undo any escaped newlines and trigraphs. /
2605	if (!accum.reading_p () && note->pos == pos)
2606	switch (note->type)
2607	{
2608	case `'\\'`:
2609	case `' '`:
2610	/ Restore backslash followed by newline. /
2611	accum.append (pfile, base, len: pos - base);
2612	base = pos;
2613	accum.read_begin (pfile);
2614	accum.append (pfile, UC"\\", len: `1`);
2615
2616	after_backslash:
2617	if (note->type == `' '`)
2618	/ GNU backslash whitespace newline extension. FIXME*
2619	could be any sequence of non-vertical space. When we
2620	can properly restore any such sequence, we should
2621	mark this note as handled so _cpp_process_line_notes
2622	doesn't warn. /*
2623	accum.append (pfile, UC" ", len: `1`);
2624
2625	accum.append (pfile, UC"\n", len: `1`);
2626	note++;
2627	break;
2628
2629	case `'\n'`:
2630	/ This can happen for ??/<NEWLINE> when trigraphs are not*
2631	being interpretted. /*
2632	gcc_checking_assert (!CPP_OPTION (pfile, trigraphs));
2633	note->type = `0`;
2634	note++;
2635	break;
2636
2637	case `'W'`:
2638	case `'L'`:
2639	case `'S'`:
2640	case `'T'`:
2641	/ Don't warn about leading or trailing whitespace in raw string*
2642	literals. /*
2643	note->type = `0`;
2644	note++;
2645	break;
2646
2647	default:
2648	gcc_checking_assert (_cpp_trigraph_map[note->type]);
2649
2650	/ Don't warn about this trigraph in*
2651	_cpp_process_line_notes, since trigraphs show up as
2652	trigraphs in raw strings. /*
2653	uchar type = note->type;
2654	note->type = `0`;
2655
2656	if (CPP_OPTION (pfile, trigraphs))
2657	{
2658	accum.append (pfile, base, len: pos - base);
2659	base = pos;
2660	accum.read_begin (pfile);
2661	accum.append (pfile, UC"??", len: `2`);
2662	accum.append (pfile, base: &type, len: `1`);
2663
2664	/ ??/ followed by newline gets two line notes, one for*
2665	the trigraph and one for the backslash/newline. /*
2666	if (type == `'/'` && note[`1`].pos == pos)
2667	{
2668	note++;
2669	gcc_assert (note->type == `'\\'` \|\| note->type == `' '`);
2670	goto after_backslash;
2671	}
2672	/ Skip the replacement character. /
2673	base = ++pos;
2674	}
2675
2676	note++;
2677	break;
2678	}
2679
2680	/ Now get a char to process. Either from an expanded note, or*
2681	from the line buffer. /*
2682	bool read_note = accum.reading_p ();
2683	char c = read_note ? accum.read_char () : *pos++;
2684
2685	if (phase == PHASE_PREFIX)
2686	{
2687	if (c == `'('`)
2688	{
2689	/ Done. /
2690	phase = PHASE_NONE;
2691	prefix[prefix_len++] = `'"'`;
2692	}
2693	else if (prefix_len < `16`
2694	/ Prefix chars are any of the basic character set,*
2695	[lex.charset] except for '
2696	()\\\t\v\f\n'. Optimized for a contiguous
2697	alphabet. /*
2698	/ Unlike a switch, this collapses down to one or*
2699	two shift and bitmask operations on an ASCII
2700	system, with an outlier or two. /*
2701	&& ((`'Z'` - `'A'` == `25`
2702	? ((c >= `'a'` && c <= `'z'`) \|\| (c >= `'A'` && c <= `'Z'`))
2703	: ISIDST (c))
2704	\|\| (c >= `'0'` && c <= `'9'`)
2705	\|\| c == `'_'` \|\| c == `'{'` \|\| c == `'}'`
2706	\|\| c == `'['` \|\| c == `']'` \|\| c == `'#'`
2707	\|\| c == `'<'` \|\| c == `'>'` \|\| c == `'%'`
2708	\|\| c == `':'` \|\| c == `';'` \|\| c == `'.'` \|\| c == `'?'`
2709	\|\| c == `'*'` \|\| c == `'+'` \|\| c == `'-'` \|\| c == `'/'`
2710	\|\| c == `'^'` \|\| c == `'&'` \|\| c == `'\|'` \|\| c == `'~'`
2711	\|\| c == `'!'` \|\| c == `'='` \|\| c == `','`
2712	\|\| c == `'"'` \|\| c == `'\''`
2713	\|\| ((c == `'$'` \|\| c == `'@'` \|\| c == '`')
2714	&& (CPP_OPTION (pfile, cplusplus)
2715	? CPP_OPTION (pfile, lang) > CLK_CXX23
2716	: CPP_OPTION (pfile, low_ucns)))))
2717	prefix[prefix_len++] = c;
2718	else
2719	{
2720	/ Something is wrong. /
2721	int col = CPP_BUF_COLUMN (pfile->buffer, pos) + read_note;
2722	if (prefix_len == `16`)
2723	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
2724	col, msgid: "raw string delimiter longer "
2725	"than 16 characters");
2726	else if (c == `'\n'`)
2727	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
2728	col, msgid: "invalid new-line in raw "
2729	"string delimiter");
2730	else
2731	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
2732	col, msgid: "invalid character '%c' in "
2733	"raw string delimiter", c);
2734	type = CPP_OTHER;
2735	phase = PHASE_NONE;
2736	/ Continue until we get a close quote, that's probably*
2737	the best failure mode. /*
2738	prefix_len = `0`;
2739	}
2740	if (c != `'\n'`)
2741	continue;
2742	}
2743
2744	if (phase != PHASE_NONE)
2745	{
2746	if (prefix[phase] != c)
2747	phase = PHASE_NONE;
2748	else if (unsigned (phase + `1`) == prefix_len)
2749	break;
2750	else
2751	{
2752	phase = Phase (phase + `1`);
2753	continue;
2754	}
2755	}
2756
2757	if (!prefix_len && c == `'"'`)
2758	/ Failure mode lexing. /
2759	goto out;
2760	else if (prefix_len && c == `')'`)
2761	phase = PHASE_SUFFIX;
2762	else if (!read_note && c == `'\n'`)
2763	{
2764	pos--;
2765	pfile->buffer->cur = pos;
2766	if ((pfile->state.in_directive \|\| pfile->state.parsing_args
2767	\|\| pfile->state.in_deferred_pragma)
2768	&& pfile->buffer->next_line >= pfile->buffer->rlimit)
2769	{
2770	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, `0`,
2771	msgid: "unterminated raw string");
2772	type = CPP_OTHER;
2773	goto out;
2774	}
2775
2776	accum.append (pfile, base, len: pos - base + `1`);
2777	_cpp_process_line_notes (pfile, in_comment: false);
2778
2779	if (pfile->buffer->next_line < pfile->buffer->rlimit)
2780	CPP_INCREMENT_LINE (pfile, `0`);
2781	pfile->buffer->need_line = true;
2782
2783	if (!get_fresh_line_impl<true> (pfile))
2784	{
2785	/ We ran out of file and failed to get a line. /
2786	location_t src_loc = token->src_loc;
2787	token->type = CPP_EOF;
2788	/ Tell the compiler the line number of the EOF token. /
2789	token->src_loc = pfile->line_table->highest_line;
2790	token->flags = BOL;
2791	if (accum.first)
2792	_cpp_release_buff (pfile, accum.first);
2793	cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, `0`,
2794	msgid: "unterminated raw string");
2795
2796	/ Now pop the buffer that get_fresh_line_impl() did not. Popping*
2797	is not safe if processing a directive, however this cannot
2798	happen as we already checked above that a line would be
2799	available, and get_fresh_line_impl() can't fail in this
2800	case. /*
2801	gcc_assert (!pfile->state.in_directive);
2802	_cpp_pop_buffer (pfile);
2803
2804	return;
2805	}
2806
2807	pos = base = pfile->buffer->cur;
2808	note = &pfile->buffer->notes[pfile->buffer->cur_note];
2809	}
2810	else if (__builtin_expect ((unsigned char) c >= utf8_continuation, `0`)
2811	&& warn_bidi_or_invalid_utf8_p)
2812	pos = _cpp_handle_multibyte_utf8 (pfile, c, cur: pos, warn_bidi_p,
2813	warn_invalid_utf8_p);
2814	}
2815
2816	if (warn_bidi_p)
2817	maybe_warn_bidi_on_close (pfile, p: pos);
2818
2819	if (CPP_OPTION (pfile, user_literals))
2820	{
2821	const uchar *const suffix_begin = pos;
2822	pfile->buffer->cur = pos;
2823
2824	if (const auto sr = scan_cur_identifier (pfile))
2825	{
2826	if (maybe_ignore_udl_macro_suffix (pfile, src_loc: token->src_loc,
2827	suffix_begin, node: sr.node))
2828	pfile->buffer->cur = suffix_begin;
2829	else
2830	{
2831	type = cpp_userdef_string_add_type (type);
2832	accum.create_literal2 (pfile, token, base1: base, len1: suffix_begin - base,
2833	NODE_NAME (sr.node), NODE_LEN (sr.node),
2834	type);
2835	if (accum.first)
2836	_cpp_release_buff (pfile, accum.first);
2837	warn_about_normalization (pfile, token, s: &sr.nst, identifier: true);
2838	return;
2839	}
2840	}
2841	}
2842
2843	out:
2844	pfile->buffer->cur = pos;
2845	if (!accum.accum)
2846	create_literal (pfile, token, base, len: pos - base, type);
2847	else
2848	{
2849	accum.create_literal2 (pfile, token, base1: base, len1: pos - base, base2: nullptr, len2: `0`, type);
2850	_cpp_release_buff (pfile, accum.first);
2851	}
2852	}
2853
2854	/ Lexes a string, character constant, or angle-bracketed header file*
2855	name. The stored string contains the spelling, including opening
2856	quote and any leading 'L', 'u', 'U' or 'u8' and optional
2857	'R' modifier. It returns the type of the literal, or CPP_OTHER
2858	if it was not properly terminated, or CPP_LESS for an unterminated
2859	header name which must be relexed as normal tokens.
2860
2861	The spelling is NUL-terminated, but it is not guaranteed that this
2862	is the first NUL since embedded NULs are preserved. /*
2863	static void
2864	lex_string (cpp_reader pfile, cpp_token token, const uchar *base)
2865	{
2866	bool saw_NUL = false;
2867	const uchar *cur;
2868	cppchar_t terminator;
2869	enum cpp_ttype type;
2870
2871	cur = base;
2872	terminator = *cur++;
2873	if (terminator == `'L'` \|\| terminator == `'U'`)
2874	terminator = *cur++;
2875	else if (terminator == `'u'`)
2876	{
2877	terminator = *cur++;
2878	if (terminator == `'8'`)
2879	terminator = *cur++;
2880	}
2881	if (terminator == `'R'`)
2882	{
2883	lex_raw_string (pfile, token, base);
2884	return;
2885	}
2886	if (terminator == `'"'`)
2887	type = (*base == `'L'` ? CPP_WSTRING :
2888	*base == `'U'` ? CPP_STRING32 :
2889	*base == `'u'` ? (base[`1`] == `'8'` ? CPP_UTF8STRING : CPP_STRING16)
2890	: CPP_STRING);
2891	else if (terminator == `'\''`)
2892	type = (*base == `'L'` ? CPP_WCHAR :
2893	*base == `'U'` ? CPP_CHAR32 :
2894	*base == `'u'` ? (base[`1`] == `'8'` ? CPP_UTF8CHAR : CPP_CHAR16)
2895	: CPP_CHAR);
2896	else
2897	terminator = `'>'`, type = CPP_HEADER_NAME;
2898
2899	const bool warn_bidi_p = pfile->warn_bidi_p ();
2900	const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
2901	const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p \| warn_invalid_utf8_p;
2902	for (;;)
2903	{
2904	cppchar_t c = *cur++;
2905
2906	/ In #include-style directives, terminators are not escapable. /
2907	if (c == `'\\'` && !pfile->state.angled_headers && *cur != `'\n'`)
2908	{
2909	if ((cur[`0`] == `'u'` \|\| cur[`0`] == `'U'` \|\| cur[`0`] == `'N'`) && warn_bidi_p)
2910	{
2911	location_t loc;
2912	bidi::kind kind;
2913	if (cur[`0`] == `'N'`)
2914	kind = get_bidi_named (pfile, p: cur + `1`, out: &loc);
2915	else
2916	kind = get_bidi_ucn (pfile, p: cur + `1`, is_U: cur[`0`] == `'U'`, out: &loc);
2917	maybe_warn_bidi_on_char (pfile, kind, /ucn_p=/true, loc);
2918	}
2919	cur++;
2920	}
2921	else if (c == terminator)
2922	{
2923	if (warn_bidi_p)
2924	maybe_warn_bidi_on_close (pfile, p: cur - `1`);
2925	break;
2926	}
2927	else if (c == `'\n'`)
2928	{
2929	cur--;
2930	/ Unmatched quotes always yield undefined behavior, but*
2931	greedy lexing means that what appears to be an unterminated
2932	header name may actually be a legitimate sequence of tokens. /*
2933	if (terminator == `'>'`)
2934	{
2935	token->type = CPP_LESS;
2936	return;
2937	}
2938	type = CPP_OTHER;
2939	break;
2940	}
2941	else if (c == `'\0'`)
2942	saw_NUL = true;
2943	else if (__builtin_expect (c >= utf8_continuation, `0`)
2944	&& warn_bidi_or_invalid_utf8_p)
2945	cur = _cpp_handle_multibyte_utf8 (pfile, c, cur, warn_bidi_p,
2946	warn_invalid_utf8_p);
2947	}
2948
2949	if (saw_NUL && !pfile->state.skipping)
2950	cpp_error (pfile, CPP_DL_WARNING,
2951	msgid: "null character(s) preserved in literal");
2952
2953	if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
2954	cpp_error (pfile, CPP_DL_PEDWARN, msgid: "missing terminating %c character",
2955	(int) terminator);
2956
2957	pfile->buffer->cur = cur;
2958	const uchar *const suffix_begin = cur;
2959
2960	if (CPP_OPTION (pfile, user_literals))
2961	{
2962	if (const auto sr = scan_cur_identifier (pfile))
2963	{
2964	if (maybe_ignore_udl_macro_suffix (pfile, src_loc: token->src_loc,
2965	suffix_begin, node: sr.node))
2966	pfile->buffer->cur = suffix_begin;
2967	else
2968	{
2969	/ Grab user defined literal suffix. /
2970	type = cpp_userdef_char_add_type (type);
2971	type = cpp_userdef_string_add_type (type);
2972	create_literal2 (pfile, token, base1: base, len1: suffix_begin - base,
2973	NODE_NAME (sr.node), NODE_LEN (sr.node), type);
2974	warn_about_normalization (pfile, token, s: &sr.nst, identifier: true);
2975	return;
2976	}
2977	}
2978	}
2979	else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
2980	&& !pfile->state.skipping)
2981	{
2982	const auto sr = scan_cur_identifier (pfile);
2983	/ Maybe raise a warning, but do not consume the tokens. /
2984	pfile->buffer->cur = suffix_begin;
2985	if (sr && cpp_macro_p (node: sr.node))
2986	cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
2987	token->src_loc, `0`, msgid: "C++11 requires a space "
2988	"between string literal and macro");
2989	}
2990
2991	create_literal (pfile, token, base, len: cur - base, type);
2992	}
2993
2994	/ Return the comment table. The client may not make any assumption*
2995	about the ordering of the table. /*
2996	cpp_comment_table *
2997	cpp_get_comments (cpp_reader *pfile)
2998	{
2999	return &pfile->comments;
3000	}
3001
3002	/ Append a comment to the end of the comment table. /
3003	static void
3004	store_comment (cpp_reader pfile, cpp_token token)
3005	{
3006	int len;
3007
3008	if (pfile->comments.allocated == `0`)
3009	{
3010	pfile->comments.allocated = `256`;
3011	pfile->comments.entries = (cpp_comment *) xmalloc
3012	(pfile->comments.allocated * sizeof (cpp_comment));
3013	}
3014
3015	if (pfile->comments.count == pfile->comments.allocated)
3016	{
3017	pfile->comments.allocated *= `2`;
3018	pfile->comments.entries = (cpp_comment *) xrealloc
3019	(pfile->comments.entries,
3020	pfile->comments.allocated * sizeof (cpp_comment));
3021	}
3022
3023	len = token->val.str.len;
3024
3025	/ Copy comment. Note, token may not be NULL terminated. /
3026	pfile->comments.entries[pfile->comments.count].comment =
3027	(char ) xmalloc (sizeof* (char) * (len + `1`));
3028	memcpy (dest: pfile->comments.entries[pfile->comments.count].comment,
3029	src: token->val.str.text, n: len);
3030	pfile->comments.entries[pfile->comments.count].comment[len] = `'\0'`;
3031
3032	/ Set source location. /
3033	pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
3034
3035	/ Increment the count of entries in the comment table. /
3036	pfile->comments.count++;
3037	}
3038
3039	/ The stored comment includes the comment start and any terminator. /
3040	static void
3041	save_comment (cpp_reader pfile, cpp_token token, const unsigned char *from,
3042	cppchar_t type)
3043	{
3044	unsigned char *buffer;
3045	unsigned int len, clen, i;
3046
3047	len = pfile->buffer->cur - from + `1`; / + 1 for the initial '/'. /
3048
3049	/ C++ comments probably (not definitely) have moved past a new*
3050	line, which we don't want to save in the comment. /*
3051	if (is_vspace (pfile->buffer->cur[-`1`]))
3052	len--;
3053
3054	/ If we are currently in a directive or in argument parsing, then*
3055	we need to store all C++ comments as C comments internally, and
3056	so we need to allocate a little extra space in that case.
3057
3058	Note that the only time we encounter a directive here is
3059	when we are saving comments in a "#define". /*
3060	clen = ((pfile->state.in_directive \|\| pfile->state.parsing_args)
3061	&& type == `'/'`) ? len + `2` : len;
3062
3063	buffer = _cpp_unaligned_alloc (pfile, clen);
3064
3065	token->type = CPP_COMMENT;
3066	token->val.str.len = clen;
3067	token->val.str.text = buffer;
3068
3069	buffer[`0`] = `'/'`;
3070	memcpy (dest: buffer + `1`, src: from, n: len - `1`);
3071
3072	/ Finish conversion to a C comment, if necessary. /
3073	if ((pfile->state.in_directive \|\| pfile->state.parsing_args) && type == `'/'`)
3074	{
3075	buffer[`1`] = `'*'`;
3076	buffer[clen - `2`] = `'*'`;
3077	buffer[clen - `1`] = `'/'`;
3078	/ As there can be in a C++ comments illegal sequences for C comments*
3079	we need to filter them out. /*
3080	for (i = `2`; i < (clen - `2`); i++)
3081	if (buffer[i] == `'/'` && (buffer[i - `1`] == `''` \|\| buffer[i + `1`] == `''`))
3082	buffer[i] = `'\|'`;
3083	}
3084
3085	/ Finally store this comment for use by clients of libcpp. /
3086	store_comment (pfile, token);
3087	}
3088
3089	/ Returns true if comment at COMMENT_START is a recognized FALLTHROUGH*
3090	comment. /*
3091
3092	static bool
3093	fallthrough_comment_p (cpp_reader pfile, const* unsigned char *comment_start)
3094	{
3095	const unsigned char *from = comment_start + `1`;
3096
3097	switch (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough))
3098	{
3099	/ For both -Wimplicit-fallthrough=0 and -Wimplicit-fallthrough=5 we*
3100	don't recognize any comments. The latter only checks attributes,
3101	the former doesn't warn. /*
3102	case `0`:
3103	default:
3104	return false;
3105	/ -Wimplicit-fallthrough=1 considers any comment, no matter what*
3106	content it has. /*
3107	case `1`:
3108	return true;
3109	case `2`:
3110	/ -Wimplicit-fallthrough=2 looks for (case insensitive)*
3111	.falls?[ \t-]thr(u\|ough). regex. /
3112	for (; (size_t) (pfile->buffer->cur - from) >= sizeof "fallthru" - `1`;
3113	from++)
3114	{
3115	/ Is there anything like strpbrk with upper boundary, or*
3116	memchr looking for 2 characters rather than just one? /*
3117	if (from[`0`] != `'f'` && from[`0`] != `'F'`)
3118	continue;
3119	if (from[`1`] != `'a'` && from[`1`] != `'A'`)
3120	continue;
3121	if (from[`2`] != `'l'` && from[`2`] != `'L'`)
3122	continue;
3123	if (from[`3`] != `'l'` && from[`3`] != `'L'`)
3124	continue;
3125	from += sizeof "fall" - `1`;
3126	if (from[`0`] == `'s'` \|\| from[`0`] == `'S'`)
3127	from++;
3128	while (from == `' '` \|\| from == `'\t'` \|\| *from == `'-'`)
3129	from++;
3130	if (from[`0`] != `'t'` && from[`0`] != `'T'`)
3131	continue;
3132	if (from[`1`] != `'h'` && from[`1`] != `'H'`)
3133	continue;
3134	if (from[`2`] != `'r'` && from[`2`] != `'R'`)
3135	continue;
3136	if (from[`3`] == `'u'` \|\| from[`3`] == `'U'`)
3137	return true;
3138	if (from[`3`] != `'o'` && from[`3`] != `'O'`)
3139	continue;
3140	if (from[`4`] != `'u'` && from[`4`] != `'U'`)
3141	continue;
3142	if (from[`5`] != `'g'` && from[`5`] != `'G'`)
3143	continue;
3144	if (from[`6`] != `'h'` && from[`6`] != `'H'`)
3145	continue;
3146	return true;
3147	}
3148	return false;
3149	case `3`:
3150	case `4`:
3151	break;
3152	}
3153
3154	/ Whole comment contents:*
3155	-fallthrough
3156	@fallthrough@
3157	*/
3158	if (from == `'-'` \|\| from == `'@'`)
3159	{
3160	size_t len = sizeof "fallthrough" - `1`;
3161	if ((size_t) (pfile->buffer->cur - from - `1`) < len)
3162	return false;
3163	if (memcmp (s1: from + `1`, s2: "fallthrough", n: len))
3164	return false;
3165	if (*from == `'@'`)
3166	{
3167	if (from[len + `1`] != `'@'`)
3168	return false;
3169	len++;
3170	}
3171	from += `1` + len;
3172	}
3173	/ Whole comment contents (regex):*
3174	lint -fallthrough[ \t]*
3175	*/
3176	else if (*from == `'l'`)
3177	{
3178	size_t len = sizeof "int -fallthrough" - `1`;
3179	if ((size_t) (pfile->buffer->cur - from - `1`) < len)
3180	return false;
3181	if (memcmp (s1: from + `1`, s2: "int -fallthrough", n: len))
3182	return false;
3183	from += `1` + len;
3184	while (from == `' '` \|\| from == `'\t'`)
3185	from++;
3186	}
3187	/ Whole comment contents (regex):*
3188	[ \t]FALLTHR(U\|OUGH)[ \t]
3189	*/
3190	else if (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough) == `4`)
3191	{
3192	while (from == `' '` \|\| from == `'\t'`)
3193	from++;
3194	if ((size_t) (pfile->buffer->cur - from) < sizeof "FALLTHRU" - `1`)
3195	return false;
3196	if (memcmp (s1: from, s2: "FALLTHR", n: sizeof "FALLTHR" - `1`))
3197	return false;
3198	from += sizeof "FALLTHR" - `1`;
3199	if (*from == `'U'`)
3200	from++;
3201	else if ((size_t) (pfile->buffer->cur - from) < sizeof "OUGH" - `1`)
3202	return false;
3203	else if (memcmp (s1: from, s2: "OUGH", n: sizeof "OUGH" - `1`))
3204	return false;
3205	else
3206	from += sizeof "OUGH" - `1`;
3207	while (from == `' '` \|\| from == `'\t'`)
3208	from++;
3209	}
3210	/ Whole comment contents (regex):*
3211	[ \t.!](ELSE,? \|INTENTIONAL(LY)? )?FALL(S \| \|-)?THR(OUGH\|U)[ \t.!](-[^\n\r])?*
3212	[ \t.!](Else,? \|Intentional(ly)? )?Fall((s \| \|-)[Tt]\|t)hr(ough\|u)[ \t.!](-[^\n\r])?*
3213	[ \t.!]([Ee]lse,? \|[Ii]ntentional(ly)? )?fall(s \| \|-)?thr(ough\|u)[ \t.!](-[^\n\r])?*
3214	*/
3215	else
3216	{
3217	while (from == `' '` \|\| from == `'\t'` \|\| from == `'.'` \|\| from == `'!'`)
3218	from++;
3219	unsigned char f = *from;
3220	bool all_upper = false;
3221	if (f == `'E'` \|\| f == `'e'`)
3222	{
3223	if ((size_t) (pfile->buffer->cur - from)
3224	< sizeof "else fallthru" - `1`)
3225	return false;
3226	if (f == `'E'` && memcmp (s1: from + `1`, s2: "LSE", n: sizeof "LSE" - `1`) == `0`)
3227	all_upper = true;
3228	else if (memcmp (s1: from + `1`, s2: "lse", n: sizeof "lse" - `1`))
3229	return false;
3230	from += sizeof "else" - `1`;
3231	if (*from == `','`)
3232	from++;
3233	if (*from != `' '`)
3234	return false;
3235	from++;
3236	if (all_upper && *from == `'f'`)
3237	return false;
3238	if (f == `'e'` && *from == `'F'`)
3239	return false;
3240	f = *from;
3241	}
3242	else if (f == `'I'` \|\| f == `'i'`)
3243	{
3244	if ((size_t) (pfile->buffer->cur - from)
3245	< sizeof "intentional fallthru" - `1`)
3246	return false;
3247	if (f == `'I'` && memcmp (s1: from + `1`, s2: "NTENTIONAL",
3248	n: sizeof "NTENTIONAL" - `1`) == `0`)
3249	all_upper = true;
3250	else if (memcmp (s1: from + `1`, s2: "ntentional",
3251	n: sizeof "ntentional" - `1`))
3252	return false;
3253	from += sizeof "intentional" - `1`;
3254	if (*from == `' '`)
3255	{
3256	from++;
3257	if (all_upper && *from == `'f'`)
3258	return false;
3259	}
3260	else if (all_upper)
3261	{
3262	if (memcmp (s1: from, s2: "LY F", n: sizeof "LY F" - `1`))
3263	return false;
3264	from += sizeof "LY " - `1`;
3265	}
3266	else
3267	{
3268	if (memcmp (s1: from, s2: "ly ", n: sizeof "ly " - `1`))
3269	return false;
3270	from += sizeof "ly " - `1`;
3271	}
3272	if (f == `'i'` && *from == `'F'`)
3273	return false;
3274	f = *from;
3275	}
3276	if (f != `'F'` && f != `'f'`)
3277	return false;
3278	if ((size_t) (pfile->buffer->cur - from) < sizeof "fallthru" - `1`)
3279	return false;
3280	if (f == `'F'` && memcmp (s1: from + `1`, s2: "ALL", n: sizeof "ALL" - `1`) == `0`)
3281	all_upper = true;
3282	else if (all_upper)
3283	return false;
3284	else if (memcmp (s1: from + `1`, s2: "all", n: sizeof "all" - `1`))
3285	return false;
3286	from += sizeof "fall" - `1`;
3287	if (*from == (all_upper ? `'S'` : `'s'`) && from[`1`] == `' '`)
3288	from += `2`;
3289	else if (from == `' '` \|\| from == `'-'`)
3290	from++;
3291	else if (*from != (all_upper ? `'T'` : `'t'`))
3292	return false;
3293	if ((f == `'f'` \|\| from != `'T'`) && (all_upper \|\| from != `'t'`))
3294	return false;
3295	if ((size_t) (pfile->buffer->cur - from) < sizeof "thru" - `1`)
3296	return false;
3297	if (memcmp (s1: from + `1`, s2: all_upper ? "HRU" : "hru", n: sizeof "hru" - `1`))
3298	{
3299	if ((size_t) (pfile->buffer->cur - from) < sizeof "through" - `1`)
3300	return false;
3301	if (memcmp (s1: from + `1`, s2: all_upper ? "HROUGH" : "hrough",
3302	n: sizeof "hrough" - `1`))
3303	return false;
3304	from += sizeof "through" - `1`;
3305	}
3306	else
3307	from += sizeof "thru" - `1`;
3308	while (from == `' '` \|\| from == `'\t'` \|\| from == `'.'` \|\| from == `'!'`)
3309	from++;
3310	if (*from == `'-'`)
3311	{
3312	from++;
3313	if (comment_start == `''`)
3314	{
3315	do
3316	{
3317	while (from && from != `'*'`
3318	&& from != `'\n'` && from != `'\r'`)
3319	from++;
3320	if (from != `''` \|\| from[`1`] == `'/'`)
3321	break;
3322	from++;
3323	}
3324	while (`1`);
3325	}
3326	else
3327	while (from && from != `'\n'` && *from != `'\r'`)
3328	from++;
3329	}
3330	}
3331	/ C block comment. /
3332	if (comment_start == `''`)
3333	{
3334	if (from != `''` \|\| from[`1`] != `'/'`)
3335	return false;
3336	}
3337	/ C++ line comment. /
3338	else if (*from != `'\n'`)
3339	return false;
3340
3341	return true;
3342	}
3343
3344	/ Allocate COUNT tokens for RUN. /
3345	void
3346	_cpp_init_tokenrun (tokenrun run, unsigned* int count)
3347	{
3348	run->base = XNEWVEC (cpp_token, count);
3349	run->limit = run->base + count;
3350	run->next = NULL;
3351	}
3352
3353	/ Returns the next tokenrun, or creates one if there is none. /
3354	static tokenrun *
3355	next_tokenrun (tokenrun *run)
3356	{
3357	if (run->next == NULL)
3358	{
3359	run->next = XNEW (tokenrun);
3360	run->next->prev = run;
3361	_cpp_init_tokenrun (run: run->next, count: `250`);
3362	}
3363
3364	return run->next;
3365	}
3366
3367	/ Return the number of not yet processed token in a given*
3368	context. /*
3369	int
3370	_cpp_remaining_tokens_num_in_context (cpp_context *context)
3371	{
3372	if (context->tokens_kind == TOKENS_KIND_DIRECT)
3373	return (LAST (context).token - FIRST (context).token);
3374	else if (context->tokens_kind == TOKENS_KIND_INDIRECT
3375	\|\| context->tokens_kind == TOKENS_KIND_EXTENDED)
3376	return (LAST (context).ptoken - FIRST (context).ptoken);
3377	else
3378	abort ();
3379	}
3380
3381	/ Returns the token present at index INDEX in a given context. If*
3382	INDEX is zero, the next token to be processed is returned. /*
3383	static const cpp_token*
3384	_cpp_token_from_context_at (cpp_context context, int* index)
3385	{
3386	if (context->tokens_kind == TOKENS_KIND_DIRECT)
3387	return &(FIRST (context).token[index]);
3388	else if (context->tokens_kind == TOKENS_KIND_INDIRECT
3389	\|\| context->tokens_kind == TOKENS_KIND_EXTENDED)
3390	return FIRST (context).ptoken[index];
3391	else
3392	abort ();
3393	}
3394
3395	/ Look ahead in the input stream. /
3396	const cpp_token *
3397	cpp_peek_token (cpp_reader pfile, int* index)
3398	{
3399	cpp_context *context = pfile->context;
3400	const cpp_token *peektok;
3401	int count;
3402
3403	/ First, scan through any pending cpp_context objects. /
3404	while (context->prev)
3405	{
3406	ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
3407
3408	if (index < (int) sz)
3409	return _cpp_token_from_context_at (context, index);
3410	index -= (int) sz;
3411	context = context->prev;
3412	}
3413
3414	/ We will have to read some new tokens after all (and do so*
3415	without invalidating preceding tokens). /*
3416	count = index;
3417	pfile->keep_tokens++;
3418
3419	/ For peeked tokens temporarily disable line_change reporting,*
3420	until the tokens are parsed for real. /*
3421	void (line_change) (cpp_reader , const cpp_token , int*)
3422	= pfile->cb.line_change;
3423	pfile->cb.line_change = NULL;
3424
3425	do
3426	{
3427	peektok = _cpp_lex_token (pfile);
3428	if (peektok->type == CPP_EOF)
3429	{
3430	index--;
3431	break;
3432	}
3433	else if (peektok->type == CPP_PRAGMA)
3434	{
3435	/ Don't peek past a pragma. /
3436	if (peektok == &pfile->directive_result)
3437	/ Save the pragma in the buffer. /
3438	pfile->cur_token++ = peektok;
3439	index--;
3440	break;
3441	}
3442	}
3443	while (index--);
3444
3445	_cpp_backup_tokens_direct (pfile, count - index);
3446	pfile->keep_tokens--;
3447	pfile->cb.line_change = line_change;
3448
3449	return peektok;
3450	}
3451
3452	/ Allocate a single token that is invalidated at the same time as the*
3453	rest of the tokens on the line. Has its line and col set to the
3454	same as the last lexed token, so that diagnostics appear in the
3455	right place. /*
3456	cpp_token *
3457	_cpp_temp_token (cpp_reader *pfile)
3458	{
3459	cpp_token old, result;
3460	ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
3461	ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
3462
3463	old = pfile->cur_token - `1`;
3464	/ Any pre-existing lookaheads must not be clobbered. /
3465	if (la)
3466	{
3467	if (sz <= la)
3468	{
3469	tokenrun *next = next_tokenrun (run: pfile->cur_run);
3470
3471	if (sz < la)
3472	memmove (dest: next->base + `1`, src: next->base,
3473	n: (la - sz) * sizeof (cpp_token));
3474
3475	next->base[`0`] = pfile->cur_run->limit[-`1`];
3476	}
3477
3478	if (sz > `1`)
3479	memmove (dest: pfile->cur_token + `1`, src: pfile->cur_token,
3480	MIN (la, sz - `1`) * sizeof (cpp_token));
3481	}
3482
3483	if (!sz && pfile->cur_token == pfile->cur_run->limit)
3484	{
3485	pfile->cur_run = next_tokenrun (run: pfile->cur_run);
3486	pfile->cur_token = pfile->cur_run->base;
3487	}
3488
3489	result = pfile->cur_token++;
3490	result->src_loc = old->src_loc;
3491	return result;
3492	}
3493
3494	/ We're at the beginning of a logical line (so not in*
3495	directives-mode) and RESULT is a CPP_NAME with NODE_MODULE set. See
3496	if we should enter deferred_pragma mode to tokenize the rest of the
3497	line as a module control-line. /*
3498
3499	static void
3500	cpp_maybe_module_directive (cpp_reader pfile, cpp_token result)
3501	{
3502	unsigned backup = `0`; / Tokens we peeked. /
3503	cpp_hashnode *node = result->val.node.node;
3504	cpp_token *peek = result;
3505	cpp_token *keyword = peek;
3506	cpp_hashnode *(&n_modules)[spec_nodes::M_HWM][`2`] = pfile->spec_nodes.n_modules;
3507	int header_count = `0`;
3508
3509	/ Make sure the incoming state is as we expect it. This way we*
3510	can restore it using constants. /*
3511	gcc_checking_assert (!pfile->state.in_deferred_pragma
3512	&& !pfile->state.skipping
3513	&& !pfile->state.parsing_args
3514	&& !pfile->state.angled_headers
3515	&& (pfile->state.save_comments
3516	== !CPP_OPTION (pfile, discard_comments)));
3517
3518	/ Enter directives mode sufficiently for peeking. We don't have*
3519	to actually set in_directive. /*
3520	pfile->state.in_deferred_pragma = true;
3521
3522	/ These two fields are needed to process tokenization in deferred*
3523	pragma mode. They are not used outside deferred pragma mode or
3524	directives mode. /*
3525	pfile->state.pragma_allow_expansion = true;
3526	pfile->directive_line = result->src_loc;
3527
3528	/ Saving comments is incompatible with directives mode. /
3529	pfile->state.save_comments = `0`;
3530
3531	if (node == n_modules[spec_nodes::M_EXPORT][`0`])
3532	{
3533	peek = _cpp_lex_direct (pfile);
3534	keyword = peek;
3535	backup++;
3536	if (keyword->type != CPP_NAME)
3537	goto not_module;
3538	node = keyword->val.node.node;
3539	if (!(node->flags & NODE_MODULE))
3540	goto not_module;
3541	}
3542
3543	if (node == n_modules[spec_nodes::M__IMPORT][`0`])
3544	/ __import /
3545	header_count = backup + `2` + `16`;
3546	else if (node == n_modules[spec_nodes::M_IMPORT][`0`])
3547	/ import /
3548	header_count = backup + `2` + (CPP_OPTION (pfile, preprocessed) ? `16` : `0`);
3549	else if (node == n_modules[spec_nodes::M_MODULE][`0`])
3550	; / module /
3551	else
3552	goto not_module;
3553
3554	/ We've seen [export] {module\|import\|__import}. Check the next token. /
3555	if (header_count)
3556	/ After '{,__}import' a header name may appear. /
3557	pfile->state.angled_headers = true;
3558	peek = _cpp_lex_direct (pfile);
3559	backup++;
3560
3561	/ ... import followed by identifier, ':', '<' or*
3562	header-name preprocessing tokens, or module
3563	followed by cpp-identifier, ':' or ';' preprocessing
3564	tokens. C++ keywords are not yet relevant. /*
3565	if (peek->type == CPP_NAME
3566	\|\| peek->type == CPP_COLON
3567	\|\| (header_count
3568	? (peek->type == CPP_LESS
3569	\|\| (peek->type == CPP_STRING && peek->val.str.text[`0`] != `'R'`)
3570	\|\| peek->type == CPP_HEADER_NAME)
3571	: peek->type == CPP_SEMICOLON))
3572	{
3573	pfile->state.pragma_allow_expansion = !CPP_OPTION (pfile, preprocessed);
3574	if (!pfile->state.pragma_allow_expansion)
3575	pfile->state.prevent_expansion++;
3576
3577	if (!header_count && linemap_included_from
3578	(ord_map: LINEMAPS_LAST_ORDINARY_MAP (set: pfile->line_table)))
3579	cpp_error_with_line (pfile, CPP_DL_ERROR, keyword->src_loc, `0`,
3580	msgid: "module control-line cannot be in included file");
3581
3582	/ The first one or two tokens cannot be macro names. /
3583	for (int ix = backup; ix--;)
3584	{
3585	cpp_token *tok = ix ? keyword : result;
3586	cpp_hashnode *node = tok->val.node.node;
3587
3588	/ Don't attempt to expand the token. /
3589	tok->flags \|= NO_EXPAND;
3590	if (_cpp_defined_macro_p (node)
3591	&& _cpp_maybe_notify_macro_use (pfile, node, loc: tok->src_loc)
3592	&& !cpp_fun_like_macro_p (node))
3593	cpp_error_with_line (pfile, CPP_DL_ERROR, tok->src_loc, `0`,
3594	msgid: "module control-line %qs cannot be"
3595	" an object-like macro",
3596	NODE_NAME (node));
3597	}
3598
3599	/ Map to underbar variants. /
3600	keyword->val.node.node = n_modules[header_count
3601	? spec_nodes::M_IMPORT
3602	: spec_nodes::M_MODULE][`1`];
3603	if (backup != `1`)
3604	result->val.node.node = n_modules[spec_nodes::M_EXPORT][`1`];
3605
3606	/ Maybe tell the tokenizer we expect a header-name down the*
3607	road. /*
3608	pfile->state.directive_file_token = header_count;
3609
3610	/ According to P3034R1, pp-module-name and pp-module-partition tokens*
3611	if any shouldn't be macro expanded and identifiers shouldn't be
3612	defined as object-like macro. /*
3613	if (!header_count && peek->type == CPP_NAME)
3614	{
3615	int state = `0`;
3616	do
3617	{
3618	cpp_token *tok = peek;
3619	if (tok->type == CPP_NAME)
3620	{
3621	cpp_hashnode *node = tok->val.node.node;
3622	/ Don't attempt to expand the token. /
3623	tok->flags \|= NO_EXPAND;
3624	if (_cpp_defined_macro_p (node)
3625	&& _cpp_maybe_notify_macro_use (pfile, node,
3626	loc: tok->src_loc)
3627	&& !cpp_fun_like_macro_p (node))
3628	{
3629	if (state == `0`)
3630	cpp_error_with_line (pfile, CPP_DL_ERROR,
3631	tok->src_loc, `0`,
3632	msgid: "module name %qs cannot "
3633	"be an object-like macro",
3634	NODE_NAME (node));
3635	else
3636	cpp_error_with_line (pfile, CPP_DL_ERROR,
3637	tok->src_loc, `0`,
3638	msgid: "module partition %qs cannot "
3639	"be an object-like macro",
3640	NODE_NAME (node));
3641	}
3642	}
3643	peek = _cpp_lex_direct (pfile);
3644	backup++;
3645	if (tok->type == CPP_NAME)
3646	{
3647	if (peek->type == CPP_DOT)
3648	continue;
3649	else if (peek->type == CPP_COLON && state == `0`)
3650	{
3651	++state;
3652	continue;
3653	}
3654	else if (peek->type == CPP_OPEN_PAREN)
3655	{
3656	if (state == `0`)
3657	cpp_error_with_line (pfile, CPP_DL_ERROR,
3658	peek->src_loc, `0`,
3659	msgid: "module name followed by %<(%>");
3660	else
3661	cpp_error_with_line (pfile, CPP_DL_ERROR,
3662	peek->src_loc, `0`,
3663	msgid: "module partition followed by "
3664	"%<(%>");
3665	break;
3666	}
3667	else if (peek->type == CPP_NAME
3668	&& _cpp_defined_macro_p (node: peek->val.node.node))
3669	{
3670	peek->flags \|= NO_DOT_COLON;
3671	break;
3672	}
3673	else
3674	break;
3675	}
3676	else if (peek->type != CPP_NAME)
3677	break;
3678	}
3679	while (true);
3680	}
3681	}
3682	else
3683	{
3684	not_module:
3685	/ Drop out of directive mode. /
3686	/ We aaserted save_comments had this value upon entry. /
3687	pfile->state.save_comments
3688	= !CPP_OPTION (pfile, discard_comments);
3689	pfile->state.in_deferred_pragma = false;
3690	/ Do not let this remain on. /
3691	pfile->state.angled_headers = false;
3692	}
3693
3694	/ In either case we want to backup the peeked tokens. /
3695	if (backup)
3696	{
3697	/ If we saw EOL, we should drop it, because this isn't a module*
3698	control-line after all. /*
3699	bool eol = peek->type == CPP_PRAGMA_EOL;
3700	if (!eol \|\| backup > `1`)
3701	{
3702	/ Put put the peeked tokens back /
3703	_cpp_backup_tokens_direct (pfile, backup);
3704	/ But if the last one was an EOL, forget it. /
3705	if (eol)
3706	pfile->lookaheads--;
3707	}
3708	}
3709	}
3710
3711	/ Lex a token into RESULT (external interface). Takes care of issues*
3712	like directive handling, token lookahead, multiple include
3713	optimization and skipping. /*
3714	const cpp_token *
3715	_cpp_lex_token (cpp_reader *pfile)
3716	{
3717	cpp_token *result;
3718
3719	for (;;)
3720	{
3721	if (pfile->cur_token == pfile->cur_run->limit)
3722	{
3723	pfile->cur_run = next_tokenrun (run: pfile->cur_run);
3724	pfile->cur_token = pfile->cur_run->base;
3725	}
3726	/ We assume that the current token is somewhere in the current*
3727	run. /*
3728	if (pfile->cur_token < pfile->cur_run->base
3729	\|\| pfile->cur_token >= pfile->cur_run->limit)
3730	abort ();
3731
3732	if (pfile->lookaheads)
3733	{
3734	pfile->lookaheads--;
3735	result = pfile->cur_token++;
3736	}
3737	else
3738	result = _cpp_lex_direct (pfile);
3739
3740	if (result->flags & BOL)
3741	{
3742	/ Is this a directive. If _cpp_handle_directive returns*
3743	false, it is an assembler #. /*
3744	if (result->type == CPP_HASH
3745	/ 6.10.3 p 11: Directives in a list of macro arguments*
3746	gives undefined behavior. This implementation
3747	handles the directive as normal. /*
3748	&& pfile->state.parsing_args != `1`)
3749	{
3750	if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
3751	{
3752	if (pfile->directive_result.type == CPP_PADDING)
3753	continue;
3754	result = &pfile->directive_result;
3755	}
3756	}
3757	else if (pfile->state.in_deferred_pragma)
3758	result = &pfile->directive_result;
3759	else if (result->type == CPP_NAME
3760	&& (result->val.node.node->flags & NODE_MODULE)
3761	&& !pfile->state.skipping
3762	/ Unlike regular directives, we do not deal with*
3763	tokenizing module directives as macro arguments.
3764	That's not permitted. /*
3765	&& !pfile->state.parsing_args)
3766	{
3767	/ P1857. Before macro expansion, At start of logical*
3768	line ... /*
3769	/ We don't have to consider lookaheads at this point. /
3770	gcc_checking_assert (!pfile->lookaheads);
3771
3772	cpp_maybe_module_directive (pfile, result);
3773	}
3774
3775	if (pfile->cb.line_change && !pfile->state.skipping)
3776	pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
3777	}
3778
3779	/ We don't skip tokens in directives. /
3780	if (pfile->state.in_directive \|\| pfile->state.in_deferred_pragma)
3781	break;
3782
3783	/ Outside a directive, invalidate controlling macros. At file*
3784	EOF, _cpp_lex_direct takes care of popping the buffer, so we never
3785	get here and MI optimization works. /*
3786	pfile->mi_valid = false;
3787
3788	if (!pfile->state.skipping \|\| result->type == CPP_EOF)
3789	break;
3790	}
3791
3792	return result;
3793	}
3794
3795	/ Returns true if a fresh line has been loaded. /
3796	template <bool lexing_raw_string>
3797	static bool
3798	get_fresh_line_impl (cpp_reader *pfile)
3799	{
3800	/ We can't get a new line until we leave the current directive, unless we*
3801	are lexing a raw string, in which case it will be OK as long as we don't
3802	pop the current buffer. /*
3803	if (!lexing_raw_string && pfile->state.in_directive)
3804	return false;
3805
3806	for (;;)
3807	{
3808	cpp_buffer *buffer = pfile->buffer;
3809
3810	if (!buffer->need_line)
3811	return true;
3812
3813	if (buffer->next_line < buffer->rlimit)
3814	{
3815	_cpp_clean_line (pfile);
3816	return true;
3817	}
3818
3819	/ We can't change buffers until we leave the current directive. /
3820	if (lexing_raw_string && pfile->state.in_directive)
3821	return false;
3822
3823	/ First, get out of parsing arguments state. /
3824	if (pfile->state.parsing_args)
3825	return false;
3826
3827	/ End of buffer. Non-empty files should end in a newline. /
3828	if (buffer->buf != buffer->rlimit
3829	&& buffer->next_line > buffer->rlimit
3830	&& !buffer->from_stage3)
3831	{
3832	/ Clip to buffer size. /
3833	buffer->next_line = buffer->rlimit;
3834	}
3835
3836	if (buffer->prev && !buffer->return_at_eof)
3837	_cpp_pop_buffer (pfile);
3838	else
3839	{
3840	/ End of translation. Do not pop the buffer yet. Increment*
3841	line number so that the EOF token is on a line of its own
3842	(_cpp_lex_direct doesn't increment in that case, because
3843	it's hard for it to distinguish this special case). /*
3844	CPP_INCREMENT_LINE (pfile, `0`);
3845	return false;
3846	}
3847	}
3848	}
3849
3850	bool
3851	_cpp_get_fresh_line (cpp_reader *pfile)
3852	{
3853	return get_fresh_line_impl<false> (pfile);
3854	}
3855
3856
3857	#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
3858	do \
3859	{ \
3860	result->type = ELSE_TYPE; \
3861	if (*buffer->cur == CHAR) \
3862	buffer->cur++, result->type = THEN_TYPE; \
3863	} \
3864	while (0)
3865
3866	/ Lex a token into pfile->cur_token, which is also incremented, to*
3867	get diagnostics pointing to the correct location.
3868
3869	Does not handle issues such as token lookahead, multiple-include
3870	optimization, directives, skipping etc. This function is only
3871	suitable for use by _cpp_lex_token, and in special cases like
3872	lex_expansion_token which doesn't care for any of these issues.
3873
3874	When meeting a newline, returns CPP_EOF if parsing a directive,
3875	otherwise returns to the start of the token buffer if permissible.
3876	Returns the location of the lexed token. /*
3877	cpp_token *
3878	_cpp_lex_direct (cpp_reader *pfile)
3879	{
3880	cppchar_t c = `0`;
3881	cpp_buffer *buffer;
3882	const unsigned char *comment_start;
3883	bool fallthrough_comment = false;
3884	cpp_token *result = pfile->cur_token++;
3885
3886	fresh_line:
3887	result->flags = `0`;
3888	buffer = pfile->buffer;
3889	if (buffer->need_line)
3890	{
3891	if (pfile->state.in_deferred_pragma)
3892	{
3893	/ This can happen in cases like:*
3894	#define loop(x) whatever
3895	#pragma omp loop
3896	where when trying to expand loop we need to peek
3897	next token after loop, but aren't still in_deferred_pragma
3898	mode but are in in_directive mode, so buffer->need_line
3899	is set, a CPP_EOF is peeked. /*
3900	result->type = CPP_PRAGMA_EOL;
3901	pfile->state.in_deferred_pragma = false;
3902	if (!pfile->state.pragma_allow_expansion)
3903	pfile->state.prevent_expansion--;
3904	result->src_loc = pfile->line_table->highest_line;
3905	return result;
3906	}
3907	if (!_cpp_get_fresh_line (pfile))
3908	{
3909	result->type = CPP_EOF;
3910	/ Not a real EOF in a directive or arg parsing -- we refuse*
3911	to advance to the next file now, and will once we're out
3912	of those modes. /*
3913	if (!pfile->state.in_directive && !pfile->state.parsing_args)
3914	{
3915	/ Tell the compiler the line number of the EOF token. /
3916	result->src_loc = pfile->line_table->highest_line;
3917	result->flags = BOL;
3918	/ Now pop the buffer that _cpp_get_fresh_line did not. /
3919	_cpp_pop_buffer (pfile);
3920	}
3921	else if (c == `0`)
3922	result->src_loc = pfile->line_table->highest_line;
3923	return result;
3924	}
3925	if (buffer != pfile->buffer)
3926	fallthrough_comment = false;
3927	if (!pfile->keep_tokens)
3928	{
3929	pfile->cur_run = &pfile->base_run;
3930	result = pfile->base_run.base;
3931	pfile->cur_token = result + `1`;
3932	}
3933	result->flags = BOL;
3934	if (pfile->state.parsing_args == `2`)
3935	result->flags \|= PREV_WHITE;
3936	}
3937	buffer = pfile->buffer;
3938	update_tokens_line:
3939	result->src_loc = pfile->line_table->highest_line;
3940
3941	skipped_white:
3942	if (buffer->cur >= buffer->notes[buffer->cur_note].pos
3943	&& !pfile->overlaid_buffer)
3944	{
3945	_cpp_process_line_notes (pfile, in_comment: false);
3946	result->src_loc = pfile->line_table->highest_line;
3947	}
3948	c = *buffer->cur++;
3949
3950	if (pfile->forced_token_location)
3951	result->src_loc = pfile->forced_token_location;
3952	else
3953	result->src_loc = linemap_position_for_column (pfile->line_table,
3954	CPP_BUF_COLUMN (buffer, buffer->cur));
3955
3956	switch (c)
3957	{
3958	case `' '`: case `'\t'`: case `'\f'`: case `'\v'`: case `'\0'`:
3959	result->flags \|= PREV_WHITE;
3960	skip_whitespace (pfile, c);
3961	goto skipped_white;
3962
3963	case `'\n'`:
3964	/ Increment the line, unless this is the last line ... /
3965	if (buffer->cur < buffer->rlimit
3966	/ ... or this is a #include, (where _cpp_stack_file needs to*
3967	unwind by one line) ... /*
3968	\|\| (pfile->state.in_directive > `1`
3969	/ ... except traditional-cpp increments this elsewhere. /
3970	&& !CPP_OPTION (pfile, traditional)))
3971	CPP_INCREMENT_LINE (pfile, `0`);
3972	buffer->need_line = true;
3973	if (pfile->state.in_deferred_pragma)
3974	{
3975	/ Produce the PRAGMA_EOL on this line. File reading*
3976	ensures there is always a \n at end of the buffer, thus
3977	in a deferred pragma we always see CPP_PRAGMA_EOL before
3978	any CPP_EOF. /*
3979	result->type = CPP_PRAGMA_EOL;
3980	result->flags &= ~PREV_WHITE;
3981	pfile->state.in_deferred_pragma = false;
3982	if (!pfile->state.pragma_allow_expansion)
3983	pfile->state.prevent_expansion--;
3984	return result;
3985	}
3986	goto fresh_line;
3987
3988	case `'0'`: case `'1'`: case `'2'`: case `'3'`: case `'4'`:
3989	case `'5'`: case `'6'`: case `'7'`: case `'8'`: case `'9'`:
3990	{
3991	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
3992	result->type = CPP_NUMBER;
3993	lex_number (pfile, number: &result->val.str, nst: &nst);
3994	warn_about_normalization (pfile, token: result, s: &nst, identifier: false);
3995	break;
3996	}
3997
3998	case `'L'`:
3999	case `'u'`:
4000	case `'U'`:
4001	case `'R'`:
4002	/ 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,*
4003	wide strings or raw strings. /*
4004	if (c == `'L'` \|\| CPP_OPTION (pfile, rliterals)
4005	\|\| (c != `'R'` && CPP_OPTION (pfile, uliterals)))
4006	{
4007	if ((*buffer->cur == `'\''` && c != `'R'`)
4008	\|\| *buffer->cur == `'"'`
4009	\|\| (*buffer->cur == `'R'`
4010	&& c != `'R'`
4011	&& buffer->cur[`1`] == `'"'`
4012	&& CPP_OPTION (pfile, rliterals))
4013	\|\| (*buffer->cur == `'8'`
4014	&& c == `'u'`
4015	&& ((buffer->cur[`1`] == `'"'` \|\| (buffer->cur[`1`] == `'\''`
4016	&& CPP_OPTION (pfile, utf8_char_literals)))
4017	\|\| (buffer->cur[`1`] == `'R'` && buffer->cur[`2`] == `'"'`
4018	&& CPP_OPTION (pfile, rliterals)))))
4019	{
4020	lex_string (pfile, token: result, base: buffer->cur - `1`);
4021	break;
4022	}
4023	}
4024	/ Fall through. /
4025
4026	case `'_'`:
4027	case `'a'`: case `'b'`: case `'c'`: case `'d'`: case `'e'`: case `'f'`:
4028	case `'g'`: case `'h'`: case `'i'`: case `'j'`: case `'k'`: case `'l'`:
4029	case `'m'`: case `'n'`: case `'o'`: case `'p'`: case `'q'`: case `'r'`:
4030	case `'s'`: case `'t'`: case `'v'`: case `'w'`: case `'x'`:
4031	case `'y'`: case `'z'`:
4032	case `'A'`: case `'B'`: case `'C'`: case `'D'`: case `'E'`: case `'F'`:
4033	case `'G'`: case `'H'`: case `'I'`: case `'J'`: case `'K'`:
4034	case `'M'`: case `'N'`: case `'O'`: case `'P'`: case `'Q'`:
4035	case `'S'`: case `'T'`: case `'V'`: case `'W'`: case `'X'`:
4036	case `'Y'`: case `'Z'`:
4037	result->type = CPP_NAME;
4038	{
4039	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
4040	const auto node = lex_identifier (pfile, base: buffer->cur - `1`, starts_ucn: false, nst: &nst,
4041	spelling: &result->val.node.spelling);
4042	result->val.node.node = node;
4043	identifier_diagnostics_on_lex (pfile, node);
4044	warn_about_normalization (pfile, token: result, s: &nst, identifier: true);
4045	}
4046
4047	/ Convert named operators to their proper types. /
4048	if (result->val.node.node->flags & NODE_OPERATOR)
4049	{
4050	result->flags \|= NAMED_OP;
4051	result->type = (enum cpp_ttype) result->val.node.node->directive_index;
4052	}
4053
4054	/ Signal FALLTHROUGH comment followed by another token. /
4055	if (fallthrough_comment)
4056	result->flags \|= PREV_FALLTHROUGH;
4057	break;
4058
4059	case `'\''`:
4060	case `'"'`:
4061	lex_string (pfile, token: result, base: buffer->cur - `1`);
4062	break;
4063
4064	case `'/'`:
4065	/ A potential block or line comment. /
4066	comment_start = buffer->cur;
4067	c = *buffer->cur;
4068
4069	if (c == `'*'`)
4070	{
4071	if (_cpp_skip_block_comment (pfile))
4072	cpp_error (pfile, CPP_DL_ERROR, msgid: "unterminated comment");
4073	}
4074	else if (c == `'/'` && ! CPP_OPTION (pfile, traditional))
4075	{
4076	/ Don't warn for system headers. /
4077	if (_cpp_in_system_header (pfile))
4078	;
4079	/ Warn about comments if pedantically GNUC89, and not*
4080	in system headers. /*
4081	else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
4082	&& CPP_PEDANTIC (pfile)
4083	&& ! buffer->warned_cplusplus_comments)
4084	{
4085	if (cpp_pedwarning (pfile, CPP_W_PEDANTIC,
4086	msgid: "C++ style comments are not allowed "
4087	"in ISO C90"))
4088	cpp_error (pfile, CPP_DL_NOTE,
4089	msgid: "(this will be reported only once per input file)");
4090	buffer->warned_cplusplus_comments = `1`;
4091	}
4092	/ Or if specifically desired via -Wc90-c99-compat. /
4093	else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > `0`
4094	&& ! CPP_OPTION (pfile, cplusplus)
4095	&& ! buffer->warned_cplusplus_comments)
4096	{
4097	if (cpp_error (pfile, CPP_DL_WARNING,
4098	msgid: "C++ style comments are incompatible with C90"))
4099	cpp_error (pfile, CPP_DL_NOTE,
4100	msgid: "(this will be reported only once per input file)");
4101	buffer->warned_cplusplus_comments = `1`;
4102	}
4103	/ In C89/C94, C++ style comments are forbidden. /
4104	else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
4105	\|\| CPP_OPTION (pfile, lang) == CLK_STDC94))
4106	{
4107	/ But don't be confused about valid code such as*
4108	- // immediately followed by ,*
4109	- // in a preprocessing directive,
4110	- // in an #if 0 block. /*
4111	if (buffer->cur[`1`] == `'*'`
4112	\|\| pfile->state.in_directive
4113	\|\| pfile->state.skipping)
4114	{
4115	result->type = CPP_DIV;
4116	break;
4117	}
4118	else if (! buffer->warned_cplusplus_comments)
4119	{
4120	if (cpp_error (pfile, CPP_DL_ERROR,
4121	msgid: "C++ style comments are not allowed in "
4122	"ISO C90"))
4123	cpp_error (pfile, CPP_DL_NOTE,
4124	msgid: "(this will be reported only once per input "
4125	"file)");
4126	buffer->warned_cplusplus_comments = `1`;
4127	}
4128	}
4129	if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
4130	cpp_warning (pfile, CPP_W_COMMENTS, msgid: "multi-line comment");
4131	}
4132	else if (c == `'='`)
4133	{
4134	buffer->cur++;
4135	result->type = CPP_DIV_EQ;
4136	break;
4137	}
4138	else
4139	{
4140	result->type = CPP_DIV;
4141	break;
4142	}
4143
4144	if (fallthrough_comment_p (pfile, comment_start))
4145	fallthrough_comment = true;
4146
4147	if (pfile->cb.comment)
4148	{
4149	size_t len = pfile->buffer->cur - comment_start;
4150	pfile->cb.comment (pfile, result->src_loc, comment_start - `1`,
4151	len + `1`);
4152	}
4153
4154	if (!pfile->state.save_comments)
4155	{
4156	result->flags \|= PREV_WHITE;
4157	goto update_tokens_line;
4158	}
4159
4160	if (fallthrough_comment)
4161	result->flags \|= PREV_FALLTHROUGH;
4162
4163	/ Save the comment as a token in its own right. /
4164	save_comment (pfile, token: result, from: comment_start, type: c);
4165	break;
4166
4167	case `'<'`:
4168	if (pfile->state.angled_headers)
4169	{
4170	lex_string (pfile, token: result, base: buffer->cur - `1`);
4171	if (result->type != CPP_LESS)
4172	break;
4173	}
4174
4175	result->type = CPP_LESS;
4176	if (*buffer->cur == `'='`)
4177	{
4178	buffer->cur++, result->type = CPP_LESS_EQ;
4179	if (*buffer->cur == `'>'`
4180	&& CPP_OPTION (pfile, cplusplus)
4181	&& CPP_OPTION (pfile, lang) >= CLK_GNUCXX20)
4182	buffer->cur++, result->type = CPP_SPACESHIP;
4183	}
4184	else if (*buffer->cur == `'<'`)
4185	{
4186	buffer->cur++;
4187	IF_NEXT_IS (`'='`, CPP_LSHIFT_EQ, CPP_LSHIFT);
4188	}
4189	else if (CPP_OPTION (pfile, digraphs))
4190	{
4191	if (*buffer->cur == `':'`)
4192	{
4193	/ C++11 [2.5/3 lex.pptoken], "Otherwise, if the next*
4194	three characters are <:: and the subsequent character
4195	is neither : nor >, the < is treated as a preprocessor
4196	token by itself". /*
4197	if (CPP_OPTION (pfile, cplusplus)
4198	&& CPP_OPTION (pfile, lang) != CLK_CXX98
4199	&& CPP_OPTION (pfile, lang) != CLK_GNUCXX
4200	&& buffer->cur[`1`] == `':'`
4201	&& buffer->cur[`2`] != `':'` && buffer->cur[`2`] != `'>'`)
4202	break;
4203
4204	buffer->cur++;
4205	result->flags \|= DIGRAPH;
4206	result->type = CPP_OPEN_SQUARE;
4207	}
4208	else if (*buffer->cur == `'%'`)
4209	{
4210	buffer->cur++;
4211	result->flags \|= DIGRAPH;
4212	result->type = CPP_OPEN_BRACE;
4213	}
4214	}
4215	break;
4216
4217	case `'>'`:
4218	result->type = CPP_GREATER;
4219	if (*buffer->cur == `'='`)
4220	buffer->cur++, result->type = CPP_GREATER_EQ;
4221	else if (*buffer->cur == `'>'`)
4222	{
4223	buffer->cur++;
4224	IF_NEXT_IS (`'='`, CPP_RSHIFT_EQ, CPP_RSHIFT);
4225	}
4226	break;
4227
4228	case `'%'`:
4229	result->type = CPP_MOD;
4230	if (*buffer->cur == `'='`)
4231	buffer->cur++, result->type = CPP_MOD_EQ;
4232	else if (CPP_OPTION (pfile, digraphs))
4233	{
4234	if (*buffer->cur == `':'`)
4235	{
4236	buffer->cur++;
4237	result->flags \|= DIGRAPH;
4238	result->type = CPP_HASH;
4239	if (*buffer->cur == `'%'` && buffer->cur[`1`] == `':'`)
4240	buffer->cur += `2`, result->type = CPP_PASTE, result->val.token_no = `0`;
4241	}
4242	else if (*buffer->cur == `'>'`)
4243	{
4244	buffer->cur++;
4245	result->flags \|= DIGRAPH;
4246	result->type = CPP_CLOSE_BRACE;
4247	}
4248	}
4249	break;
4250
4251	case `'.'`:
4252	result->type = CPP_DOT;
4253	if (ISDIGIT (*buffer->cur))
4254	{
4255	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
4256	result->type = CPP_NUMBER;
4257	lex_number (pfile, number: &result->val.str, nst: &nst);
4258	warn_about_normalization (pfile, token: result, s: &nst, identifier: false);
4259	}
4260	else if (*buffer->cur == `'.'` && buffer->cur[`1`] == `'.'`)
4261	buffer->cur += `2`, result->type = CPP_ELLIPSIS;
4262	else if (buffer->cur == `''` && CPP_OPTION (pfile, cplusplus))
4263	buffer->cur++, result->type = CPP_DOT_STAR;
4264	break;
4265
4266	case `'+'`:
4267	result->type = CPP_PLUS;
4268	if (*buffer->cur == `'+'`)
4269	buffer->cur++, result->type = CPP_PLUS_PLUS;
4270	else if (*buffer->cur == `'='`)
4271	buffer->cur++, result->type = CPP_PLUS_EQ;
4272	break;
4273
4274	case `'-'`:
4275	result->type = CPP_MINUS;
4276	if (*buffer->cur == `'>'`)
4277	{
4278	buffer->cur++;
4279	result->type = CPP_DEREF;
4280	if (buffer->cur == `''` && CPP_OPTION (pfile, cplusplus))
4281	buffer->cur++, result->type = CPP_DEREF_STAR;
4282	}
4283	else if (*buffer->cur == `'-'`)
4284	buffer->cur++, result->type = CPP_MINUS_MINUS;
4285	else if (*buffer->cur == `'='`)
4286	buffer->cur++, result->type = CPP_MINUS_EQ;
4287	break;
4288
4289	case `'&'`:
4290	result->type = CPP_AND;
4291	if (*buffer->cur == `'&'`)
4292	buffer->cur++, result->type = CPP_AND_AND;
4293	else if (*buffer->cur == `'='`)
4294	buffer->cur++, result->type = CPP_AND_EQ;
4295	break;
4296
4297	case `'\|'`:
4298	result->type = CPP_OR;
4299	if (*buffer->cur == `'\|'`)
4300	buffer->cur++, result->type = CPP_OR_OR;
4301	else if (*buffer->cur == `'='`)
4302	buffer->cur++, result->type = CPP_OR_EQ;
4303	break;
4304
4305	case `':'`:
4306	result->type = CPP_COLON;
4307	if (*buffer->cur == `':'`)
4308	{
4309	if (CPP_OPTION (pfile, scope))
4310	buffer->cur++, result->type = CPP_SCOPE;
4311	else
4312	result->flags \|= COLON_SCOPE;
4313	}
4314	else if (*buffer->cur == `'>'` && CPP_OPTION (pfile, digraphs))
4315	{
4316	buffer->cur++;
4317	result->flags \|= DIGRAPH;
4318	result->type = CPP_CLOSE_SQUARE;
4319	}
4320	break;
4321
4322	case `''`: IF_NEXT_IS (`'='`, CPP_MULT_EQ, CPP_MULT); break*;
4323	case `'='`: IF_NEXT_IS (`'='`, CPP_EQ_EQ, CPP_EQ); break;
4324	case `'!'`: IF_NEXT_IS (`'='`, CPP_NOT_EQ, CPP_NOT); break;
4325	case `'^'`: IF_NEXT_IS (`'='`, CPP_XOR_EQ, CPP_XOR); break;
4326	case `'#'`: IF_NEXT_IS (`'#'`, CPP_PASTE, CPP_HASH); result->val.token_no = `0`; break;
4327
4328	case `'?'`: result->type = CPP_QUERY; break;
4329	case `'~'`: result->type = CPP_COMPL; break;
4330	case `','`: result->type = CPP_COMMA; break;
4331	case `'('`: result->type = CPP_OPEN_PAREN; break;
4332	case `')'`: result->type = CPP_CLOSE_PAREN; break;
4333	case `'['`: result->type = CPP_OPEN_SQUARE; break;
4334	case `']'`: result->type = CPP_CLOSE_SQUARE; break;
4335	case `'{'`: result->type = CPP_OPEN_BRACE; break;
4336	case `'}'`: result->type = CPP_CLOSE_BRACE; break;
4337	case `';'`: result->type = CPP_SEMICOLON; break;
4338
4339	/ @ is a punctuator in Objective-C. /
4340	case `'@'`: result->type = CPP_ATSIGN; break;
4341
4342	default:
4343	{
4344	const uchar *base = --buffer->cur;
4345	static int no_warn_cnt;
4346
4347	/ Check for an extended identifier ($ or UCN or UTF-8). /
4348	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
4349	if (forms_identifier_p (pfile, first: true, state: &nst))
4350	{
4351	result->type = CPP_NAME;
4352	const auto node = lex_identifier (pfile, base, starts_ucn: true, nst: &nst,
4353	spelling: &result->val.node.spelling);
4354	result->val.node.node = node;
4355	identifier_diagnostics_on_lex (pfile, node);
4356	warn_about_normalization (pfile, token: result, s: &nst, identifier: true);
4357	break;
4358	}
4359
4360	/ Otherwise this will form a CPP_OTHER token. Parse valid UTF-8 as a*
4361	single token. /*
4362	buffer->cur++;
4363	if (c >= utf8_signifier)
4364	{
4365	const uchar *pstr = base;
4366	cppchar_t s;
4367	if (_cpp_valid_utf8 (pfile, pstr: &pstr, limit: buffer->rlimit, identifier_pos: `0`, NULL, cp: &s))
4368	{
4369	if (s > UCS_LIMIT && CPP_OPTION (pfile, cpp_warn_invalid_utf8))
4370	{
4371	buffer->cur = base;
4372	_cpp_warn_invalid_utf8 (pfile);
4373	}
4374	buffer->cur = pstr;
4375	}
4376	else if (CPP_OPTION (pfile, cpp_warn_invalid_utf8))
4377	{
4378	buffer->cur = base;
4379	const uchar *end = _cpp_warn_invalid_utf8 (pfile);
4380	buffer->cur = base + `1`;
4381	no_warn_cnt = end - buffer->cur;
4382	}
4383	}
4384	else if (c >= utf8_continuation
4385	&& CPP_OPTION (pfile, cpp_warn_invalid_utf8))
4386	{
4387	if (no_warn_cnt)
4388	--no_warn_cnt;
4389	else
4390	{
4391	buffer->cur = base;
4392	_cpp_warn_invalid_utf8 (pfile);
4393	buffer->cur = base + `1`;
4394	}
4395	}
4396	create_literal (pfile, token: result, base, len: buffer->cur - base, type: CPP_OTHER);
4397	break;
4398	}
4399
4400	}
4401
4402	/ Potentially convert the location of the token to a range. /
4403	if (result->src_loc >= RESERVED_LOCATION_COUNT
4404	&& result->type != CPP_EOF)
4405	{
4406	/ Ensure that any line notes are processed, so that we have the*
4407	correct physical line/column for the end-point of the token even
4408	when a logical line is split via one or more backslashes. /*
4409	if (buffer->cur >= buffer->notes[buffer->cur_note].pos
4410	&& !pfile->overlaid_buffer)
4411	_cpp_process_line_notes (pfile, in_comment: false);
4412
4413	source_range tok_range;
4414	tok_range.m_start = result->src_loc;
4415	tok_range.m_finish
4416	= linemap_position_for_column (pfile->line_table,
4417	CPP_BUF_COLUMN (buffer, buffer->cur));
4418
4419	result->src_loc
4420	= pfile->line_table->get_or_create_combined_loc (locus: result->src_loc,
4421	src_range: tok_range, data: nullptr, discriminator: `0`);
4422	}
4423
4424	return result;
4425	}
4426
4427	/ An upper bound on the number of bytes needed to spell TOKEN.*
4428	Does not include preceding whitespace. /*
4429	unsigned int
4430	cpp_token_len (const cpp_token *token)
4431	{
4432	unsigned int len;
4433
4434	switch (TOKEN_SPELL (token))
4435	{
4436	default: len = `6`; break;
4437	case SPELL_LITERAL: len = token->val.str.len; break;
4438	case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * `10`; break;
4439	}
4440
4441	return len;
4442	}
4443
4444	/ Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.*
4445	Return the number of bytes read out of NAME. (There are always
4446	10 bytes written to BUFFER.) /*
4447
4448	static size_t
4449	utf8_to_ucn (unsigned char buffer, const* unsigned char *name)
4450	{
4451	int j;
4452	int ucn_len = `0`;
4453	int ucn_len_c;
4454	unsigned t;
4455	unsigned long utf32;
4456
4457	/ Compute the length of the UTF-8 sequence. /
4458	for (t = *name; t & `0x80`; t <<= `1`)
4459	ucn_len++;
4460
4461	utf32 = *name & (`0x7F` >> ucn_len);
4462	for (ucn_len_c = `1`; ucn_len_c < ucn_len; ucn_len_c++)
4463	{
4464	utf32 = (utf32 << `6`) \| (*++name & `0x3F`);
4465
4466	/ Ill-formed UTF-8. /
4467	if ((*name & ~`0x3F`) != `0x80`)
4468	abort ();
4469	}
4470
4471	*buffer++ = `'\\'`;
4472	*buffer++ = `'U'`;
4473	for (j = `7`; j >= `0`; j--)
4474	buffer++ = "0123456789abcdef"[(utf32 >> (`4` j)) & `0xF`];
4475	return ucn_len;
4476	}
4477
4478	/ Given a token TYPE corresponding to a digraph, return a pointer to*
4479	the spelling of the digraph. /*
4480	static const unsigned char *
4481	cpp_digraph2name (enum cpp_ttype type)
4482	{
4483	return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
4484	}
4485
4486	/ Write the spelling of an identifier IDENT, using UCNs, to BUFFER.*
4487	The buffer must already contain enough space to hold the
4488	token's spelling. Returns a pointer to the character after the
4489	last character written. /*
4490	unsigned char *
4491	_cpp_spell_ident_ucns (unsigned char buffer, cpp_hashnode ident)
4492	{
4493	size_t i;
4494	const unsigned char *name = NODE_NAME (ident);
4495
4496	for (i = `0`; i < NODE_LEN (ident); i++)
4497	if (name[i] & ~`0x7F`)
4498	{
4499	i += utf8_to_ucn (buffer, name: name + i) - `1`;
4500	buffer += `10`;
4501	}
4502	else
4503	*buffer++ = name[i];
4504
4505	return buffer;
4506	}
4507
4508	/ Write the spelling of a token TOKEN to BUFFER. The buffer must*
4509	already contain enough space to hold the token's spelling.
4510	Returns a pointer to the character after the last character written.
4511	FORSTRING is true if this is to be the spelling after translation
4512	phase 1 (with the original spelling of extended identifiers), false
4513	if extended identifiers should always be written using UCNs (there is
4514	no option for always writing them in the internal UTF-8 form).
4515	FIXME: Would be nice if we didn't need the PFILE argument. /*
4516	unsigned char *
4517	cpp_spell_token (cpp_reader pfile, const* cpp_token *token,
4518	unsigned char buffer, bool* forstring)
4519	{
4520	switch (TOKEN_SPELL (token))
4521	{
4522	case SPELL_OPERATOR:
4523	{
4524	const unsigned char *spelling;
4525	unsigned char c;
4526
4527	if (token->flags & DIGRAPH)
4528	spelling = cpp_digraph2name (type: token->type);
4529	else if (token->flags & NAMED_OP)
4530	goto spell_ident;
4531	else
4532	spelling = TOKEN_NAME (token);
4533
4534	while ((c = *spelling++) != `'\0'`)
4535	*buffer++ = c;
4536	}
4537	break;
4538
4539	spell_ident:
4540	case SPELL_IDENT:
4541	if (forstring)
4542	{
4543	memcpy (dest: buffer, NODE_NAME (token->val.node.spelling),
4544	NODE_LEN (token->val.node.spelling));
4545	buffer += NODE_LEN (token->val.node.spelling);
4546	}
4547	else
4548	buffer = _cpp_spell_ident_ucns (buffer, ident: token->val.node.node);
4549	break;
4550
4551	case SPELL_LITERAL:
4552	memcpy (dest: buffer, src: token->val.str.text, n: token->val.str.len);
4553	buffer += token->val.str.len;
4554	break;
4555
4556	case SPELL_NONE:
4557	cpp_error (pfile, CPP_DL_ICE,
4558	msgid: "unspellable token %s", TOKEN_NAME (token));
4559	break;
4560	}
4561
4562	return buffer;
4563	}
4564
4565	/ Returns TOKEN spelt as a null-terminated string. The string is*
4566	freed when the reader is destroyed. Useful for diagnostics. /*
4567	unsigned char *
4568	cpp_token_as_text (cpp_reader pfile, const* cpp_token *token)
4569	{
4570	unsigned int len = cpp_token_len (token) + `1`;
4571	unsigned char start = _cpp_unaligned_alloc (pfile, len), end;
4572
4573	end = cpp_spell_token (pfile, token, buffer: start, forstring: false);
4574	end[`0`] = `'\0'`;
4575
4576	return start;
4577	}
4578
4579	/ Returns a pointer to a string which spells the token defined by*
4580	TYPE and FLAGS. Used by C front ends, which really should move to
4581	using cpp_token_as_text. /*
4582	const char *
4583	cpp_type2name (enum cpp_ttype type, unsigned char flags)
4584	{
4585	if (flags & DIGRAPH)
4586	return (const char *) cpp_digraph2name (type);
4587	else if (flags & NAMED_OP)
4588	return cpp_named_operator2name (type);
4589
4590	return (const char *) token_spellings[type].name;
4591	}
4592
4593	/ Writes the spelling of token to FP, without any preceding space.*
4594	Separated from cpp_spell_token for efficiency - to avoid stdio
4595	double-buffering. /*
4596	void
4597	cpp_output_token (const cpp_token token, FILE fp)
4598	{
4599	switch (TOKEN_SPELL (token))
4600	{
4601	case SPELL_OPERATOR:
4602	{
4603	const unsigned char *spelling;
4604	int c;
4605
4606	if (token->flags & DIGRAPH)
4607	spelling = cpp_digraph2name (type: token->type);
4608	else if (token->flags & NAMED_OP)
4609	goto spell_ident;
4610	else
4611	spelling = TOKEN_NAME (token);
4612
4613	c = *spelling;
4614	do
4615	putc (c, fp);
4616	while ((c = *++spelling) != `'\0'`);
4617	}
4618	break;
4619
4620	spell_ident:
4621	case SPELL_IDENT:
4622	{
4623	size_t i;
4624	const unsigned char * name = NODE_NAME (token->val.node.node);
4625	unsigned len = NODE_LEN (token->val.node.node);
4626
4627	for (i = `0`; i < len; i++)
4628	if (name[i] & ~`0x7F`)
4629	{
4630	unsigned char buffer[`10`];
4631	i += utf8_to_ucn (buffer, name: name + i) - `1`;
4632	fwrite (buffer, `1`, `10`, fp);
4633	}
4634	else if (name[i] == `' '` && i == len - `1`)
4635	/ Omit terminal space in "export ". /;
4636	else
4637	fputc (NODE_NAME (token->val.node.node)[i], fp);
4638	}
4639	break;
4640
4641	case SPELL_LITERAL:
4642	if (token->type == CPP_HEADER_NAME)
4643	fputc (`'"'`, fp);
4644	fwrite (token->val.str.text, `1`, token->val.str.len, fp);
4645	if (token->type == CPP_HEADER_NAME)
4646	fputc (`'"'`, fp);
4647	break;
4648
4649	case SPELL_NONE:
4650	/ An error, most probably. /
4651	break;
4652	}
4653	}
4654
4655	/ Compare two tokens. /
4656	int
4657	_cpp_equiv_tokens (const cpp_token a, const* cpp_token *b)
4658	{
4659	if (a->type == b->type && a->flags == b->flags)
4660	switch (TOKEN_SPELL (a))
4661	{
4662	default: / Keep compiler happy. /
4663	case SPELL_OPERATOR:
4664	/ token_no is used to track where multiple consecutive ##*
4665	tokens were originally located. /*
4666	return (a->type != CPP_PASTE \|\| a->val.token_no == b->val.token_no);
4667	case SPELL_NONE:
4668	return (a->type != CPP_MACRO_ARG
4669	\|\| (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
4670	&& a->val.macro_arg.spelling == b->val.macro_arg.spelling));
4671	case SPELL_IDENT:
4672	return (a->val.node.node == b->val.node.node
4673	&& a->val.node.spelling == b->val.node.spelling);
4674	case SPELL_LITERAL:
4675	return (a->val.str.len == b->val.str.len
4676	&& !memcmp (s1: a->val.str.text, s2: b->val.str.text,
4677	n: a->val.str.len));
4678	}
4679
4680	return `0`;
4681	}
4682
4683	/ Returns nonzero if a space should be inserted to avoid an*
4684	accidental token paste for output. For simplicity, it is
4685	conservative, and occasionally advises a space where one is not
4686	needed, e.g. "." and ".2". /*
4687	int
4688	cpp_avoid_paste (cpp_reader pfile, const* cpp_token *token1,
4689	const cpp_token *token2)
4690	{
4691	enum cpp_ttype a = token1->type, b = token2->type;
4692	cppchar_t c;
4693
4694	if (token1->flags & NAMED_OP)
4695	a = CPP_NAME;
4696	if (token2->flags & NAMED_OP)
4697	b = CPP_NAME;
4698
4699	c = EOF;
4700	if (token2->flags & DIGRAPH)
4701	c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][`0`];
4702	else if (token_spellings[b].category == SPELL_OPERATOR)
4703	c = token_spellings[b].name[`0`];
4704
4705	/ Quickly get everything that can paste with an '='. /
4706	if ((int) a <= (int) CPP_LAST_EQ && c == `'='`)
4707	return `1`;
4708
4709	switch (a)
4710	{
4711	case CPP_GREATER: return c == `'>'`;
4712	case CPP_LESS: return c == `'<'` \|\| c == `'%'` \|\| c == `':'`;
4713	case CPP_PLUS: return c == `'+'`;
4714	case CPP_MINUS: return c == `'-'` \|\| c == `'>'`;
4715	case CPP_DIV: return c == `'/'` \|\| c == `''`; /* Comments. /
4716	case CPP_MOD: return c == `':'` \|\| c == `'>'`;
4717	case CPP_AND: return c == `'&'`;
4718	case CPP_OR: return c == `'\|'`;
4719	case CPP_COLON: return c == `':'` \|\| c == `'>'`;
4720	case CPP_DEREF: return c == `'*'`;
4721	case CPP_DOT: return c == `'.'` \|\| c == `'%'` \|\| b == CPP_NUMBER;
4722	case CPP_HASH: return c == `'#'` \|\| c == `'%'`; / Digraph form. /
4723	case CPP_PRAGMA:
4724	case CPP_NAME: return ((b == CPP_NUMBER
4725	&& name_p (pfile, string: &token2->val.str))
4726	\|\| b == CPP_NAME
4727	\|\| b == CPP_CHAR \|\| b == CPP_STRING); / L /
4728	case CPP_NUMBER: return (b == CPP_NUMBER \|\| b == CPP_NAME
4729	\|\| b == CPP_CHAR
4730	\|\| c == `'.'` \|\| c == `'+'` \|\| c == `'-'`);
4731	/ UCNs /
4732	case CPP_OTHER: return ((token1->val.str.text[`0`] == `'\\'`
4733	&& b == CPP_NAME)
4734	\|\| (CPP_OPTION (pfile, objc)
4735	&& token1->val.str.text[`0`] == `'@'`
4736	&& (b == CPP_NAME \|\| b == CPP_STRING)));
4737	case CPP_LESS_EQ: return c == `'>'`;
4738	case CPP_STRING:
4739	case CPP_WSTRING:
4740	case CPP_UTF8STRING:
4741	case CPP_STRING16:
4742	case CPP_STRING32: return (CPP_OPTION (pfile, user_literals)
4743	&& (b == CPP_NAME
4744	\|\| (TOKEN_SPELL (token2) == SPELL_LITERAL
4745	&& ISIDST (token2->val.str.text[`0`]))));
4746
4747	default: break;
4748	}
4749
4750	return `0`;
4751	}
4752
4753	/ Output all the remaining tokens on the current line, and a newline*
4754	character, to FP. Leading whitespace is removed. If there are
4755	macros, special token padding is not performed. /*
4756	void
4757	cpp_output_line (cpp_reader pfile, FILE fp)
4758	{
4759	const cpp_token *token;
4760
4761	token = cpp_get_token (pfile);
4762	while (token->type != CPP_EOF)
4763	{
4764	cpp_output_token (token, fp);
4765	token = cpp_get_token (pfile);
4766	if (token->flags & PREV_WHITE)
4767	putc (`' '`, fp);
4768	}
4769
4770	putc (`'\n'`, fp);
4771	}
4772
4773	/ Return a string representation of all the remaining tokens on the*
4774	current line. The result is allocated using xmalloc and must be
4775	freed by the caller. /*
4776	unsigned char *
4777	cpp_output_line_to_string (cpp_reader pfile, const* unsigned char *dir_name)
4778	{
4779	const cpp_token *token;
4780	unsigned int out = dir_name ? ustrlen (s1: dir_name) : `0`;
4781	unsigned int alloced = `120` + out;
4782	unsigned char result = (unsigned* char *) xmalloc (alloced);
4783
4784	/ If DIR_NAME is empty, there are no initial contents. /
4785	if (dir_name)
4786	{
4787	sprintf (s: (char *) result, format: "#%s ", dir_name);
4788	out += `2`;
4789	}
4790
4791	token = cpp_get_token (pfile);
4792	while (token->type != CPP_EOF)
4793	{
4794	unsigned char *last;
4795	/ Include room for a possible space and the terminating nul. /
4796	unsigned int len = cpp_token_len (token) + `2`;
4797
4798	if (out + len > alloced)
4799	{
4800	alloced *= `2`;
4801	if (out + len > alloced)
4802	alloced = out + len;
4803	result = (unsigned char *) xrealloc (result, alloced);
4804	}
4805
4806	last = cpp_spell_token (pfile, token, buffer: &result[out], forstring: `0`);
4807	out = last - result;
4808
4809	token = cpp_get_token (pfile);
4810	if (token->flags & PREV_WHITE)
4811	result[out++] = `' '`;
4812	}
4813
4814	result[out] = `'\0'`;
4815	return result;
4816	}
4817
4818	/ Memory buffers. Changing these three constants can have a dramatic*
4819	effect on performance. The values here are reasonable defaults,
4820	but might be tuned. If you adjust them, be sure to test across a
4821	range of uses of cpplib, including heavy nested function-like macro
4822	expansion. Also check the change in peak memory usage (NJAMD is a
4823	good tool for this). /*
4824	#define MIN_BUFF_SIZE 8000
4825	#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
4826	#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
4827	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
4828
4829	#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
4830	#error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
4831	#endif
4832
4833	/ Create a new allocation buffer. Place the control block at the end*
4834	of the buffer, so that buffer overflows will cause immediate chaos. /*
4835	static _cpp_buff *
4836	new_buff (size_t len)
4837	{
4838	_cpp_buff *result;
4839	unsigned char *base;
4840
4841	if (len < MIN_BUFF_SIZE)
4842	len = MIN_BUFF_SIZE;
4843	len = CPP_ALIGN (len);
4844
4845	#ifdef ENABLE_VALGRIND_WORKAROUNDS
4846	/ Valgrind warns about uses of interior pointers, so put _cpp_buff*
4847	struct first. /*
4848	size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), `2` * DEFAULT_ALIGNMENT);
4849	base = XNEWVEC (unsigned char, len + slen);
4850	result = (_cpp_buff *) base;
4851	base += slen;
4852	#else
4853	base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
4854	result = (_cpp_buff *) (base + len);
4855	#endif
4856	result->base = base;
4857	result->cur = base;
4858	result->limit = base + len;
4859	result->next = NULL;
4860	return result;
4861	}
4862
4863	/ Place a chain of unwanted allocation buffers on the free list. /
4864	void
4865	_cpp_release_buff (cpp_reader pfile, _cpp_buff buff)
4866	{
4867	_cpp_buff *end = buff;
4868
4869	while (end->next)
4870	end = end->next;
4871	end->next = pfile->free_buffs;
4872	pfile->free_buffs = buff;
4873	}
4874
4875	/ Return a free buffer of size at least MIN_SIZE. /
4876	_cpp_buff *
4877	_cpp_get_buff (cpp_reader *pfile, size_t min_size)
4878	{
4879	_cpp_buff result, *p;
4880
4881	for (p = &pfile->free_buffs;; p = &(*p)->next)
4882	{
4883	size_t size;
4884
4885	if (*p == NULL)
4886	return new_buff (len: min_size);
4887	result = *p;
4888	size = result->limit - result->base;
4889	/ Return a buffer that's big enough, but don't waste one that's*
4890	way too big. /*
4891	if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
4892	break;
4893	}
4894
4895	*p = result->next;
4896	result->next = NULL;
4897	result->cur = result->base;
4898	return result;
4899	}
4900
4901	/ Creates a new buffer with enough space to hold the uncommitted*
4902	remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
4903	the excess bytes to the new buffer. Chains the new buffer after
4904	BUFF, and returns the new buffer. /*
4905	_cpp_buff *
4906	_cpp_append_extend_buff (cpp_reader pfile, _cpp_buff buff, size_t min_extra)
4907	{
4908	size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
4909	_cpp_buff *new_buff = _cpp_get_buff (pfile, min_size: size);
4910
4911	buff->next = new_buff;
4912	memcpy (dest: new_buff->base, src: buff->cur, BUFF_ROOM (buff));
4913	return new_buff;
4914	}
4915
4916	/ Creates a new buffer with enough space to hold the uncommitted*
4917	remaining bytes of the buffer pointed to by BUFF, and at least
4918	MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
4919	Chains the new buffer before the buffer pointed to by BUFF, and
4920	updates the pointer to point to the new buffer. /*
4921	void
4922	_cpp_extend_buff (cpp_reader pfile, _cpp_buff *pbuff, size_t min_extra)
4923	{
4924	_cpp_buff new_buff, old_buff = *pbuff;
4925	size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
4926
4927	new_buff = _cpp_get_buff (pfile, min_size: size);
4928	memcpy (dest: new_buff->base, src: old_buff->cur, BUFF_ROOM (old_buff));
4929	new_buff->next = old_buff;
4930	*pbuff = new_buff;
4931	}
4932
4933	/ Free a chain of buffers starting at BUFF. /
4934	void
4935	_cpp_free_buff (_cpp_buff *buff)
4936	{
4937	_cpp_buff *next;
4938
4939	for (; buff; buff = next)
4940	{
4941	next = buff->next;
4942	#ifdef ENABLE_VALGRIND_WORKAROUNDS
4943	free (buff);
4944	#else
4945	free (ptr: buff->base);
4946	#endif
4947	}
4948	}
4949
4950	/ Allocate permanent, unaligned storage of length LEN. /
4951	unsigned char *
4952	_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
4953	{
4954	_cpp_buff *buff = pfile->u_buff;
4955	unsigned char *result = buff->cur;
4956
4957	if (len > (size_t) (buff->limit - result))
4958	{
4959	buff = _cpp_get_buff (pfile, min_size: len);
4960	buff->next = pfile->u_buff;
4961	pfile->u_buff = buff;
4962	result = buff->cur;
4963	}
4964
4965	buff->cur = result + len;
4966	return result;
4967	}
4968
4969	/ Allocate permanent, unaligned storage of length LEN from a_buff.*
4970	That buffer is used for growing allocations when saving macro
4971	replacement lists in a #define, and when parsing an answer to an
4972	assertion in #assert, #unassert or #if (and therefore possibly
4973	whilst expanding macros). It therefore must not be used by any
4974	code that they might call: specifically the lexer and the guts of
4975	the macro expander.
4976
4977	All existing other uses clearly fit this restriction: storing
4978	registered pragmas during initialization. /*
4979	unsigned char *
4980	_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
4981	{
4982	_cpp_buff *buff = pfile->a_buff;
4983	unsigned char *result = buff->cur;
4984
4985	if (len > (size_t) (buff->limit - result))
4986	{
4987	buff = _cpp_get_buff (pfile, min_size: len);
4988	buff->next = pfile->a_buff;
4989	pfile->a_buff = buff;
4990	result = buff->cur;
4991	}
4992
4993	buff->cur = result + len;
4994	return result;
4995	}
4996
4997	/ Commit or allocate storage from a buffer. /
4998
4999	void *
5000	_cpp_commit_buff (cpp_reader *pfile, size_t size)
5001	{
5002	const auto buff = pfile->a_buff;
5003	void *ptr = BUFF_FRONT (buff);
5004
5005	if (pfile->hash_table->alloc_subobject)
5006	{
5007	void *copy = pfile->hash_table->alloc_subobject (size);
5008	memcpy (dest: copy, src: ptr, n: size);
5009	ptr = copy;
5010	}
5011	else
5012	{
5013	BUFF_FRONT (buff) += size;
5014	/ Make sure the remaining space is maximally aligned for whatever this*
5015	buffer holds next. /*
5016	BUFF_FRONT (buff) += BUFF_ROOM (buff) % DEFAULT_ALIGNMENT;
5017	}
5018
5019	return ptr;
5020	}
5021
5022	/ Say which field of TOK is in use. /
5023
5024	enum cpp_token_fld_kind
5025	cpp_token_val_index (const cpp_token *tok)
5026	{
5027	switch (TOKEN_SPELL (tok))
5028	{
5029	case SPELL_IDENT:
5030	return CPP_TOKEN_FLD_NODE;
5031	case SPELL_LITERAL:
5032	return CPP_TOKEN_FLD_STR;
5033	case SPELL_OPERATOR:
5034	/ Operands which were originally spelled as ident keep around*
5035	the node for the exact spelling. /*
5036	if (tok->flags & NAMED_OP)
5037	return CPP_TOKEN_FLD_NODE;
5038	else if (tok->type == CPP_PASTE)
5039	return CPP_TOKEN_FLD_TOKEN_NO;
5040	else
5041	return CPP_TOKEN_FLD_NONE;
5042	case SPELL_NONE:
5043	if (tok->type == CPP_MACRO_ARG)
5044	return CPP_TOKEN_FLD_ARG_NO;
5045	else if (tok->type == CPP_PADDING)
5046	return CPP_TOKEN_FLD_SOURCE;
5047	else if (tok->type == CPP_PRAGMA)
5048	return CPP_TOKEN_FLD_PRAGMA;
5049	/ fall through /
5050	default:
5051	return CPP_TOKEN_FLD_NONE;
5052	}
5053	}
5054
5055	/ All tokens lexed in R after calling this function will be forced to*
5056	have their location_t to be P, until
5057	cpp_stop_forcing_token_locations is called for R. /*
5058
5059	void
5060	cpp_force_token_locations (cpp_reader *r, location_t loc)
5061	{
5062	r->forced_token_location = loc;
5063	}
5064
5065	/ Go back to assigning locations naturally for lexed tokens. /
5066
5067	void
5068	cpp_stop_forcing_token_locations (cpp_reader *r)
5069	{
5070	r->forced_token_location = `0`;
5071	}
5072
5073	/ We're looking at \, if it's escaping EOL, look past it. If at*
5074	LIMIT, don't advance. /*
5075
5076	static const unsigned char *
5077	do_peek_backslash (const unsigned char peek, const* unsigned char *limit)
5078	{
5079	const unsigned char *probe = peek;
5080
5081	if (__builtin_expect (peek[`1`] == `'\n'`, true))
5082	{
5083	eol:
5084	probe += `2`;
5085	if (__builtin_expect (probe < limit, true))
5086	{
5087	peek = probe;
5088	if (*peek == `'\\'`)
5089	/ The user might be perverse. /
5090	return do_peek_backslash (peek, limit);
5091	}
5092	}
5093	else if (__builtin_expect (peek[`1`] == `'\r'`, false))
5094	{
5095	if (probe[`2`] == `'\n'`)
5096	probe++;
5097	goto eol;
5098	}
5099
5100	return peek;
5101	}
5102
5103	static const unsigned char *
5104	do_peek_next (const unsigned char peek, const* unsigned char *limit)
5105	{
5106	if (__builtin_expect (peek == `'\\'`, false*))
5107	peek = do_peek_backslash (peek, limit);
5108	return peek;
5109	}
5110
5111	static const unsigned char *
5112	do_peek_prev (const unsigned char peek, const* unsigned char *bound)
5113	{
5114	if (peek == bound)
5115	return NULL;
5116
5117	unsigned char c = *--peek;
5118	if (__builtin_expect (c == `'\n'`, false)
5119	\|\| __builtin_expect (c == `'\r'`, false))
5120	{
5121	if (peek == bound)
5122	return peek;
5123	int ix = -`1`;
5124	if (c == `'\n'` && peek[ix] == `'\r'`)
5125	{
5126	if (peek + ix == bound)
5127	return peek;
5128	ix--;
5129	}
5130
5131	if (peek[ix] == `'\\'`)
5132	return do_peek_prev (peek: peek + ix, bound);
5133
5134	return peek;
5135	}
5136	else
5137	return peek;
5138	}
5139
5140	/ If PEEK[-1] is identifier MATCH, scan past it and trailing white*
5141	space. Otherwise return NULL. /*
5142
5143	static const unsigned char *
5144	do_peek_ident (const char match, const* unsigned char *peek,
5145	const unsigned char *limit)
5146	{
5147	for (; *++match; peek++)
5148	if (peek != match)
5149	{
5150	peek = do_peek_next (peek, limit);
5151	if (peek != match)
5152	return NULL;
5153	}
5154
5155	/ Must now not be looking at an identifier char. /
5156	peek = do_peek_next (peek, limit);
5157	if (ISIDNUM (*peek))
5158	return NULL;
5159
5160	/ Skip control-line whitespace. /
5161	ws:
5162	while (peek == `' '` \|\| peek == `'\t'`)
5163	peek++;
5164	if (__builtin_expect (peek == `'\\'`, false*))
5165	{
5166	peek = do_peek_backslash (peek, limit);
5167	if (*peek != `'\\'`)
5168	goto ws;
5169	}
5170
5171	return peek;
5172	}
5173
5174	/ Are we looking at a module control line starting as PEEK - 1? /
5175
5176	static bool
5177	do_peek_module (cpp_reader pfile, unsigned* char c,
5178	const unsigned char peek, const* unsigned char *limit)
5179	{
5180	bool import = false;
5181
5182	if (__builtin_expect (c == `'e'`, false))
5183	{
5184	if (!((peek[`0`] == `'x'` \|\| peek[`0`] == `'\\'`)
5185	&& (peek = do_peek_ident (match: "export", peek, limit))))
5186	return false;
5187
5188	/ export, peek for import or module. No need to peek __import*
5189	here. /*
5190	if (peek[`0`] == `'i'`)
5191	{
5192	if (!((peek[`1`] == `'m'` \|\| peek[`1`] == `'\\'`)
5193	&& (peek = do_peek_ident (match: "import", peek: peek + `1`, limit))))
5194	return false;
5195	import = true;
5196	}
5197	else if (peek[`0`] == `'m'`)
5198	{
5199	if (!((peek[`1`] == `'o'` \|\| peek[`1`] == `'\\'`)
5200	&& (peek = do_peek_ident (match: "module", peek: peek + `1`, limit))))
5201	return false;
5202	}
5203	else
5204	return false;
5205	}
5206	else if (__builtin_expect (c == `'i'`, false))
5207	{
5208	if (!((peek[`0`] == `'m'` \|\| peek[`0`] == `'\\'`)
5209	&& (peek = do_peek_ident (match: "import", peek, limit))))
5210	return false;
5211	import = true;
5212	}
5213	else if (__builtin_expect (c == `'_'`, false))
5214	{
5215	/ Needed for translated includes. /
5216	if (!((peek[`0`] == `'_'` \|\| peek[`0`] == `'\\'`)
5217	&& (peek = do_peek_ident (match: "__import", peek, limit))))
5218	return false;
5219	import = true;
5220	}
5221	else if (__builtin_expect (c == `'m'`, false))
5222	{
5223	if (!((peek[`0`] == `'o'` \|\| peek[`0`] == `'\\'`)
5224	&& (peek = do_peek_ident (match: "module", peek, limit))))
5225	return false;
5226	}
5227	else
5228	return false;
5229
5230	/ Peek the next character to see if it's good enough. We'll be at*
5231	the first non-whitespace char, including skipping an escaped
5232	newline. /*
5233	/ ... import followed by identifier, ':', '<' or header-name*
5234	preprocessing tokens, or module followed by identifier, ':' or
5235	';' preprocessing tokens. /*
5236	unsigned char p = *peek++;
5237
5238	/ A character literal is ... single quotes, ... optionally preceded*
5239	by u8, u, U, or L /*
5240	/ A string-literal is a ... double quotes, optionally prefixed by*
5241	R, u8, u8R, u, uR, U, UR, L, or LR /*
5242	if (p == `'u'`)
5243	{
5244	peek = do_peek_next (peek, limit);
5245	if (*peek == `'8'`)
5246	{
5247	peek++;
5248	goto peek_u8;
5249	}
5250	goto peek_u;
5251	}
5252	else if (p == `'U'` \|\| p == `'L'`)
5253	{
5254	peek_u8:
5255	peek = do_peek_next (peek, limit);
5256	peek_u:
5257	if (peek == `'\"'` \|\| peek == `'\''`)
5258	return false;
5259
5260	if (*peek == `'R'`)
5261	goto peek_R;
5262	/ Identifier. Ok. /
5263	}
5264	else if (p == `'R'`)
5265	{
5266	peek_R:
5267	if (CPP_OPTION (pfile, rliterals))
5268	{
5269	peek = do_peek_next (peek, limit);
5270	if (*peek == `'\"'`)
5271	return false;
5272	}
5273	/ Identifier. Ok. /
5274	}
5275	else if (`'Z'` - `'A'` == `25`
5276	? ((p >= `'A'` && p <= `'Z'`) \|\| (p >= `'a'` && p <= `'z'`) \|\| p == `'_'`)
5277	: ISIDST (p))
5278	{
5279	/ Identifier. Ok. /
5280	}
5281	else if (p == `'<'`)
5282	{
5283	/ Maybe angle header, ok for import. Reject*
5284	'<=', '<<' digraph:'<:'. /*
5285	if (!import)
5286	return false;
5287	peek = do_peek_next (peek, limit);
5288	if (peek == `'='` \|\| peek == `'<'`
5289	\|\| (*peek == `':'` && CPP_OPTION (pfile, digraphs)))
5290	return false;
5291	}
5292	else if (p == `';'`)
5293	{
5294	/ SEMICOLON, ok for module. /
5295	if (import)
5296	return false;
5297	}
5298	else if (p == `'"'`)
5299	{
5300	/ STRING, ok for import. /
5301	if (!import)
5302	return false;
5303	}
5304	else if (p == `':'`)
5305	{
5306	/ Maybe COLON, ok. Reject '::', digraph:':>'. /
5307	peek = do_peek_next (peek, limit);
5308	if (peek == `':'` \|\| (peek == `'>'` && CPP_OPTION (pfile, digraphs)))
5309	return false;
5310	}
5311	else
5312	/ FIXME: Detect a unicode character, excluding those not*
5313	permitted as the initial character. [lex.name]/1. I presume
5314	we need to check the \[uU] spellings, and directly using
5315	Unicode in say UTF8 form? Or perhaps we do the phase-1
5316	conversion of UTF8 to universal-character-names? /*
5317	return false;
5318
5319	return true;
5320	}
5321
5322	/ Directives-only scanning. Somewhat more relaxed than correct*
5323	parsing -- some ill-formed programs will not be rejected. /*
5324
5325	void
5326	cpp_directive_only_process (cpp_reader *pfile,
5327	void *data,
5328	void (cb) (cpp_reader , CPP_DO_task, void *, ...))
5329	{
5330	bool module_p = CPP_OPTION (pfile, module_directives);
5331
5332	do
5333	{
5334	restart:
5335	/ Buffer initialization, but no line cleaning. /
5336	cpp_buffer *buffer = pfile->buffer;
5337	buffer->cur_note = buffer->notes_used = `0`;
5338	buffer->cur = buffer->line_base = buffer->next_line;
5339	buffer->need_line = false;
5340	/ Files always end in a newline or carriage return. We rely on this for*
5341	character peeking safety. /*
5342	gcc_assert (buffer->rlimit[`0`] == `'\n'` \|\| buffer->rlimit[`0`] == `'\r'`);
5343
5344	const unsigned char *base = buffer->cur;
5345	unsigned line_count = `0`;
5346	const unsigned char *line_start = base;
5347
5348	bool bol = true;
5349	bool raw = false;
5350
5351	const unsigned char *lwm = base;
5352	for (const unsigned char pos = base, limit = buffer->rlimit;
5353	pos < limit;)
5354	{
5355	unsigned char c = *pos++;
5356	/ This matches the switch in _cpp_lex_direct. /
5357	switch (c)
5358	{
5359	case `' '`: case `'\t'`: case `'\f'`: case `'\v'`:
5360	/ Whitespace, do nothing. /
5361	break;
5362
5363	case `'\r'`: / MAC line ending, or Windows \r\n /
5364	if (*pos == `'\n'`)
5365	pos++;
5366	/ FALLTHROUGH /
5367
5368	case `'\n'`:
5369	bol = true;
5370
5371	next_line:
5372	CPP_INCREMENT_LINE (pfile, `0`);
5373	line_count++;
5374	line_start = pos;
5375	break;
5376
5377	case `'\\'`:
5378	/ <backslash><newline> is removed, and doesn't undo any*
5379	preceeding escape or whatnot. /*
5380	if (*pos == `'\n'`)
5381	{
5382	pos++;
5383	goto next_line;
5384	}
5385	else if (*pos == `'\r'`)
5386	{
5387	if (pos[`1`] == `'\n'`)
5388	pos++;
5389	pos++;
5390	goto next_line;
5391	}
5392	goto dflt;
5393
5394	case `'#'`:
5395	if (bol)
5396	{
5397	/ Line directive. /
5398	if (pos - `1` > base && !pfile->state.skipping)
5399	cb (pfile, CPP_DO_print, data,
5400	line_count, base, pos - `1` - base);
5401
5402	/ Prep things for directive handling. /
5403	buffer->next_line = pos;
5404	buffer->need_line = true;
5405	bool ok = _cpp_get_fresh_line (pfile);
5406	gcc_checking_assert (ok);
5407
5408	/ Ensure proper column numbering for generated*
5409	error messages. /*
5410	buffer->line_base -= pos - line_start;
5411
5412	if (_cpp_handle_directive (pfile, line_start + `1` != pos) == `2`)
5413	{
5414	if (pfile->directive_result.type != CPP_PADDING)
5415	cb (pfile, CPP_DO_token, data,
5416	&pfile->directive_result, pfile->directive_result.src_loc);
5417	if (pfile->context->prev)
5418	{
5419	gcc_assert (pfile->context->tokens_kind == TOKENS_KIND_DIRECT);
5420	for (const cpp_token *tok = FIRST (pfile->context).token;
5421	tok != LAST (pfile->context).token; ++tok)
5422	cb (pfile, CPP_DO_token, data, tok, tok->src_loc);
5423	_cpp_pop_context (pfile);
5424	}
5425	}
5426
5427	/ Sanitize the line settings. Duplicate #include's can*
5428	mess things up. /*
5429	// FIXME: Necessary?
5430	pfile->line_table->highest_location
5431	= pfile->line_table->highest_line;
5432
5433	if (!pfile->state.skipping
5434	&& pfile->buffer->next_line < pfile->buffer->rlimit)
5435	cb (pfile, CPP_DO_location, data,
5436	pfile->line_table->highest_line);
5437
5438	goto restart;
5439	}
5440	goto dflt;
5441
5442	case `'/'`:
5443	{
5444	const unsigned char *peek = do_peek_next (peek: pos, limit);
5445	if (!(peek == `'/'` \|\| peek == `'*'`))
5446	goto dflt;
5447
5448	/ Line or block comment /
5449	bool is_block = peek == `''`;
5450	bool star = false;
5451	bool esc = false;
5452	location_t sloc
5453	= linemap_position_for_column (pfile->line_table,
5454	pos - line_start);
5455
5456	while (pos < limit)
5457	{
5458	char c = *pos++;
5459	switch (c)
5460	{
5461	case `'\\'`:
5462	esc = true;
5463	break;
5464
5465	case `'\r'`:
5466	if (*pos == `'\n'`)
5467	pos++;
5468	/ FALLTHROUGH /
5469
5470	case `'\n'`:
5471	{
5472	CPP_INCREMENT_LINE (pfile, `0`);
5473	line_count++;
5474	line_start = pos;
5475	if (!esc && !is_block)
5476	{
5477	bol = true;
5478	goto done_comment;
5479	}
5480	}
5481	if (!esc)
5482	star = false;
5483	esc = false;
5484	break;
5485
5486	case `'*'`:
5487	if (pos > peek)
5488	star = is_block;
5489	esc = false;
5490	break;
5491
5492	case `'/'`:
5493	if (star)
5494	goto done_comment;
5495	/ FALLTHROUGH /
5496
5497	default:
5498	star = false;
5499	esc = false;
5500	break;
5501	}
5502	}
5503	if (pos < limit \|\| is_block)
5504	cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, `0`,
5505	msgid: "unterminated comment");
5506	done_comment:
5507	lwm = pos;
5508	break;
5509	}
5510
5511	case `'\''`:
5512	if (!CPP_OPTION (pfile, digit_separators))
5513	goto delimited_string;
5514
5515	/ Possibly a number punctuator. /
5516	if (!ISIDNUM (*do_peek_next (pos, limit)))
5517	goto delimited_string;
5518
5519	goto quote_peek;
5520
5521	case `'\"'`:
5522	if (!CPP_OPTION (pfile, rliterals))
5523	goto delimited_string;
5524
5525	quote_peek:
5526	{
5527	/ For ' see if it's a number punctuator*
5528	\.?<digit>(<digit>\|<identifier-nondigit>
5529	\|'<digit>\|'<nondigit>\|[eEpP]<sign>\|\.) /
5530	/ For " see if it's a raw string*
5531	{U,L,u,u8}R. This includes CPP_NUMBER detection,
5532	because that could be 0e+R. /*
5533	const unsigned char *peek = pos - `1`;
5534	bool quote_first = c == `'"'`;
5535	bool quote_eight = false;
5536	bool maybe_number_start = false;
5537	bool want_number = false;
5538
5539	while ((peek = do_peek_prev (peek, bound: lwm)))
5540	{
5541	unsigned char p = *peek;
5542	if (quote_first)
5543	{
5544	if (!raw)
5545	{
5546	if (p != `'R'`)
5547	break;
5548	raw = true;
5549	continue;
5550	}
5551
5552	quote_first = false;
5553	if (p == `'L'` \|\| p == `'U'` \|\| p == `'u'`)
5554	;
5555	else if (p == `'8'`)
5556	quote_eight = true;
5557	else
5558	goto second_raw;
5559	}
5560	else if (quote_eight)
5561	{
5562	if (p != `'u'`)
5563	{
5564	raw = false;
5565	break;
5566	}
5567	quote_eight = false;
5568	}
5569	else if (c == `'"'`)
5570	{
5571	second_raw:;
5572	if (!want_number && ISIDNUM (p))
5573	{
5574	raw = false;
5575	break;
5576	}
5577	}
5578
5579	if (ISDIGIT (p))
5580	maybe_number_start = true;
5581	else if (p == `'.'`)
5582	want_number = true;
5583	else if (ISIDNUM (p))
5584	maybe_number_start = false;
5585	else if (p == `'+'` \|\| p == `'-'`)
5586	{
5587	if (const unsigned char *peek_prev
5588	= do_peek_prev (peek, bound: lwm))
5589	{
5590	p = *peek_prev;
5591	if (p == `'e'` \|\| p == `'E'`
5592	\|\| p == `'p'` \|\| p == `'P'`)
5593	{
5594	want_number = true;
5595	maybe_number_start = false;
5596	}
5597	else
5598	break;
5599	}
5600	else
5601	break;
5602	}
5603	else if (p == `'\''` \|\| p == `'\"'`)
5604	{
5605	/ If this is lwm, this must be the end of a*
5606	previous string. So this is a trailing
5607	literal type, (a) if those are allowed,
5608	and (b) maybe_start is false. Otherwise
5609	this must be a CPP_NUMBER because we've
5610	met another ', and we'd have checked that
5611	in its own right. /*
5612	if (peek == lwm && CPP_OPTION (pfile, uliterals))
5613	{
5614	if (!maybe_number_start && !want_number)
5615	/ Must be a literal type. /
5616	raw = false;
5617	}
5618	else if (p == `'\''`
5619	&& CPP_OPTION (pfile, digit_separators))
5620	maybe_number_start = true;
5621	break;
5622	}
5623	else if (c == `'\''`)
5624	break;
5625	else if (!quote_first && !quote_eight)
5626	break;
5627	}
5628
5629	if (maybe_number_start)
5630	{
5631	if (c == `'\''`)
5632	/ A CPP NUMBER. /
5633	goto dflt;
5634	raw = false;
5635	}
5636
5637	goto delimited_string;
5638	}
5639
5640	delimited_string:
5641	{
5642	/ (Possibly raw) string or char literal. /
5643	unsigned char end = c;
5644	int delim_len = -`1`;
5645	const unsigned char *delim = NULL;
5646	location_t sloc = linemap_position_for_column (pfile->line_table,
5647	pos - line_start);
5648	int esc = `0`;
5649
5650	if (raw)
5651	{
5652	/ There can be no line breaks in the delimiter. /
5653	delim = pos;
5654	for (delim_len = `0`; (c = *pos++) != `'('`; delim_len++)
5655	{
5656	if (delim_len == `16`)
5657	{
5658	cpp_error_with_line (pfile, CPP_DL_ERROR,
5659	sloc, `0`,
5660	msgid: "raw string delimiter"
5661	" longer than %d"
5662	" characters",
5663	delim_len);
5664	raw = false;
5665	pos = delim;
5666	break;
5667	}
5668	if (strchr (s: ") \\\t\v\f\n", c: c))
5669	{
5670	cpp_error_with_line (pfile, CPP_DL_ERROR,
5671	sloc, `0`,
5672	msgid: "invalid character '%c'"
5673	" in raw string"
5674	" delimiter", c);
5675	raw = false;
5676	pos = delim;
5677	break;
5678	}
5679	if (pos >= limit)
5680	goto bad_string;
5681	}
5682	}
5683
5684	while (pos < limit)
5685	{
5686	char c = *pos++;
5687	switch (c)
5688	{
5689	case `'\\'`:
5690	if (!raw)
5691	esc++;
5692	break;
5693
5694	case `'\r'`:
5695	if (*pos == `'\n'`)
5696	pos++;
5697	/ FALLTHROUGH /
5698
5699	case `'\n'`:
5700	{
5701	CPP_INCREMENT_LINE (pfile, `0`);
5702	line_count++;
5703	line_start = pos;
5704	}
5705	if (esc)
5706	esc--;
5707	break;
5708
5709	case `')'`:
5710	if (raw
5711	&& pos + delim_len + `1` < limit
5712	&& pos[delim_len] == end
5713	&& !memcmp (s1: delim, s2: pos, n: delim_len))
5714	{
5715	pos += delim_len + `1`;
5716	raw = false;
5717	goto done_string;
5718	}
5719	break;
5720
5721	default:
5722	if (!raw && !(esc & `1`) && c == end)
5723	goto done_string;
5724	esc = `0`;
5725	break;
5726	}
5727	}
5728	bad_string:
5729	cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, `0`,
5730	msgid: "unterminated literal");
5731
5732	done_string:
5733	raw = false;
5734	lwm = pos - `1`;
5735	}
5736	goto dflt;
5737
5738	case `'_'`:
5739	case `'e'`:
5740	case `'i'`:
5741	case `'m'`:
5742	if (bol && module_p && !pfile->state.skipping
5743	&& do_peek_module (pfile, c, peek: pos, limit))
5744	{
5745	/ We've seen the start of a module control line.*
5746	Start up the tokenizer. /*
5747	pos--; / Backup over the first character. /
5748
5749	/ Backup over whitespace to start of line. /
5750	while (pos > line_start
5751	&& (pos[-`1`] == `' '` \|\| pos[-`1`] == `'\t'`))
5752	pos--;
5753
5754	if (pos > base)
5755	cb (pfile, CPP_DO_print, data, line_count, base, pos - base);
5756
5757	/ Prep things for directive handling. /
5758	buffer->next_line = pos;
5759	buffer->need_line = true;
5760
5761	/ Now get tokens until the PRAGMA_EOL. /
5762	do
5763	{
5764	location_t spelling;
5765	const cpp_token *tok
5766	= cpp_get_token_with_location (pfile, &spelling);
5767
5768	gcc_assert (pfile->state.in_deferred_pragma
5769	\|\| tok->type == CPP_PRAGMA_EOL);
5770	cb (pfile, CPP_DO_token, data, tok, spelling);
5771	}
5772	while (pfile->state.in_deferred_pragma);
5773
5774	if (pfile->buffer->next_line < pfile->buffer->rlimit)
5775	cb (pfile, CPP_DO_location, data,
5776	pfile->line_table->highest_line);
5777
5778	pfile->mi_valid = false;
5779	goto restart;
5780	}
5781	goto dflt;
5782
5783	default:
5784	dflt:
5785	bol = false;
5786	pfile->mi_valid = false;
5787	break;
5788	}
5789	}
5790
5791	if (buffer->rlimit > base && !pfile->state.skipping)
5792	{
5793	const unsigned char *limit = buffer->rlimit;
5794	/ If the file was not newline terminated, add rlimit, which is*
5795	guaranteed to point to a newline, to the end of our range. /*
5796	if (limit[-`1`] != `'\n'`)
5797	{
5798	limit++;
5799	CPP_INCREMENT_LINE (pfile, `0`);
5800	line_count++;
5801	}
5802	cb (pfile, CPP_DO_print, data, line_count, base, limit - base);
5803	}
5804
5805	_cpp_pop_buffer (pfile);
5806	}
5807	while (pfile->buffer);
5808	}
5809

source code of libcpp/lex.cc