input.cc source code [gcc/input.cc]

1	/ Data and functions related to line maps and input files.*
2	Copyright (C) 2004-2023 Free Software Foundation, Inc.
3
4	This file is part of GCC.
5
6	GCC is free software; you can redistribute it and/or modify it under
7	the terms of the GNU General Public License as published by the Free
8	Software Foundation; either version 3, or (at your option) any later
9	version.
10
11	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12	WARRANTY; without even the implied warranty of MERCHANTABILITY or
13	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14	for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with GCC; see the file COPYING3. If not see
18	<http://www.gnu.org/licenses/>. /*
19
20	#include "config.h"
21	#include "system.h"
22	#include "coretypes.h"
23	#include "intl.h"
24	#include "diagnostic.h"
25	#include "selftest.h"
26	#include "cpplib.h"
27
28	#ifndef HAVE_ICONV
29	#define HAVE_ICONV 0
30	#endif
31
32	const char *
33	special_fname_builtin ()
34	{
35	return _("<built-in>");
36	}
37
38	/ Input charset configuration. /
39	static const char default_charset_callback (const* char *)
40	{
41	return nullptr;
42	}
43
44	void
45	file_cache::initialize_input_context (diagnostic_input_charset_callback ccb,
46	bool should_skip_bom)
47	{
48	in_context.ccb = (ccb ? ccb : default_charset_callback);
49	in_context.should_skip_bom = should_skip_bom;
50	}
51
52	/ This is a cache used by get_next_line to store the content of a*
53	file to be searched for file lines. /*
54	class file_cache_slot
55	{
56	public:
57	file_cache_slot ();
58	~file_cache_slot ();
59
60	bool read_line_num (size_t line_num,
61	char ** line, ssize_t *line_len);
62
63	/ Accessors. /
64	const char get_file_path () const* { return m_file_path; }
65	unsigned get_use_count () const { return m_use_count; }
66	bool missing_trailing_newline_p () const
67	{
68	return m_missing_trailing_newline;
69	}
70	char_span get_full_file_content ();
71
72	void inc_use_count () { m_use_count++; }
73
74	bool create (const file_cache::input_context &in_context,
75	const char file_path, FILE fp, unsigned highest_use_count);
76	void evict ();
77
78	private:
79	/ These are information used to store a line boundary. /
80	class line_info
81	{
82	public:
83	/ The line number. It starts from 1. /
84	size_t line_num;
85
86	/ The position (byte count) of the beginning of the line,*
87	relative to the file data pointer. This starts at zero. /*
88	size_t start_pos;
89
90	/ The position (byte count) of the last byte of the line. This*
91	normally points to the '\n' character, or to one byte after the
92	last byte of the file, if the file doesn't contain a '\n'
93	character. /*
94	size_t end_pos;
95
96	line_info (size_t l, size_t s, size_t e)
97	: line_num (l), start_pos (s), end_pos (e)
98	{}
99
100	line_info ()
101	:line_num (`0`), start_pos (`0`), end_pos (`0`)
102	{}
103	};
104
105	bool needs_read_p () const;
106	bool needs_grow_p () const;
107	void maybe_grow ();
108	bool read_data ();
109	bool maybe_read_data ();
110	bool get_next_line (char *line, ssize_t line_len);
111	bool read_next_line (char ** line, ssize_t *line_len);
112	bool goto_next_line ();
113
114	static const size_t buffer_size = `4` * `1024`;
115	static const size_t line_record_size = `100`;
116
117	/ The number of time this file has been accessed. This is used*
118	to designate which file cache to evict from the cache
119	array. /*
120	unsigned m_use_count;
121
122	/ The file_path is the key for identifying a particular file in*
123	the cache.
124	For libcpp-using code, the underlying buffer for this field is
125	owned by the corresponding _cpp_file within the cpp_reader. /*
126	const char *m_file_path;
127
128	FILE *m_fp;
129
130	/ This points to the content of the file that we've read so*
131	far. /*
132	char *m_data;
133
134	/ The allocated buffer to be freed may start a little earlier than DATA,*
135	e.g. if a UTF8 BOM was skipped at the beginning. /*
136	int m_alloc_offset;
137
138	/ The size of the DATA array above./
139	size_t m_size;
140
141	/ The number of bytes read from the underlying file so far. This*
142	must be less (or equal) than SIZE above. /*
143	size_t m_nb_read;
144
145	/ The index of the beginning of the current line. /
146	size_t m_line_start_idx;
147
148	/ The number of the previous line read. This starts at 1. Zero*
149	means we've read no line so far. /*
150	size_t m_line_num;
151
152	/ This is the total number of lines of the current file. At the*
153	moment, we try to get this information from the line map
154	subsystem. Note that this is just a hint. When using the C++
155	front-end, this hint is correct because the input file is then
156	completely tokenized before parsing starts; so the line map knows
157	the number of lines before compilation really starts. For e.g,
158	the C front-end, it can happen that we start emitting diagnostics
159	before the line map has seen the end of the file. /*
160	size_t m_total_lines;
161
162	/ Could this file be missing a trailing newline on its final line?*
163	Initially true (to cope with empty files), set to true/false
164	as each line is read. /*
165	bool m_missing_trailing_newline;
166
167	/ This is a record of the beginning and end of the lines we've seen*
168	while reading the file. This is useful to avoid walking the data
169	from the beginning when we are asked to read a line that is
170	before LINE_START_IDX above. Note that the maximum size of this
171	record is line_record_size, so that the memory consumption
172	doesn't explode. We thus scale total_lines down to
173	line_record_size. /*
174	vec<line_info, va_heap> m_line_record;
175
176	void offset_buffer (int offset)
177	{
178	gcc_assert (offset < `0` ? m_alloc_offset + offset >= `0`
179	: (size_t) offset <= m_size);
180	gcc_assert (m_data);
181	m_alloc_offset += offset;
182	m_data += offset;
183	m_size -= offset;
184	}
185
186	};
187
188	/ Current position in real source file. /
189
190	location_t input_location = UNKNOWN_LOCATION;
191
192	class line_maps *line_table;
193
194	/ A stashed copy of "line_table" for use by selftest::line_table_test.*
195	This needs to be a global so that it can be a GC root, and thus
196	prevent the stashed copy from being garbage-collected if the GC runs
197	during a line_table_test. /*
198
199	class line_maps *saved_line_table;
200
201	/ Expand the source location LOC into a human readable location. If*
202	LOC resolves to a builtin location, the file name of the readable
203	location is set to the string "<built-in>". If EXPANSION_POINT_P is
204	TRUE and LOC is virtual, then it is resolved to the expansion
205	point of the involved macro. Otherwise, it is resolved to the
206	spelling location of the token.
207
208	When resolving to the spelling location of the token, if the
209	resulting location is for a built-in location (that is, it has no
210	associated line/column) in the context of a macro expansion, the
211	returned location is the first one (while unwinding the macro
212	location towards its expansion point) that is in real source
213	code.
214
215	ASPECT controls which part of the location to use. /*
216
217	static expanded_location
218	expand_location_1 (const line_maps *set,
219	location_t loc,
220	bool expansion_point_p,
221	enum location_aspect aspect)
222	{
223	expanded_location xloc;
224	const line_map_ordinary *map;
225	enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT;
226	tree block = NULL;
227
228	if (IS_ADHOC_LOC (loc))
229	{
230	block = LOCATION_BLOCK (loc);
231	loc = LOCATION_LOCUS (loc);
232	}
233
234	memset (s: &xloc, c: `0`, n: sizeof (xloc));
235
236	if (loc >= RESERVED_LOCATION_COUNT)
237	{
238	if (!expansion_point_p)
239	{
240	/ We want to resolve LOC to its spelling location.*
241
242	But if that spelling location is a reserved location that
243	appears in the context of a macro expansion (like for a
244	location for a built-in token), let's consider the first
245	location (toward the expansion point) that is not reserved;
246	that is, the first location that is in real source code. /*
247	loc = linemap_unwind_to_first_non_reserved_loc (set,
248	loc, NULL);
249	lrk = LRK_SPELLING_LOCATION;
250	}
251	loc = linemap_resolve_location (set, loc, lrk, loc_map: &map);
252
253	/ loc is now either in an ordinary map, or is a reserved location.*
254	If it is a compound location, the caret is in a spelling location,
255	but the start/finish might still be a virtual location.
256	Depending of what the caller asked for, we may need to recurse
257	one level in order to resolve any virtual locations in the
258	end-points. /*
259	switch (aspect)
260	{
261	default:
262	gcc_unreachable ();
263	/ Fall through. /
264	case LOCATION_ASPECT_CARET:
265	break;
266	case LOCATION_ASPECT_START:
267	{
268	location_t start = get_start (loc);
269	if (start != loc)
270	return expand_location_1 (set, loc: start, expansion_point_p, aspect);
271	}
272	break;
273	case LOCATION_ASPECT_FINISH:
274	{
275	location_t finish = get_finish (loc);
276	if (finish != loc)
277	return expand_location_1 (set, loc: finish, expansion_point_p, aspect);
278	}
279	break;
280	}
281	xloc = linemap_expand_location (set, map, loc);
282	}
283
284	xloc.data = block;
285	if (loc <= BUILTINS_LOCATION)
286	xloc.file = loc == UNKNOWN_LOCATION ? NULL : special_fname_builtin ();
287
288	return xloc;
289	}
290
291	/ Initialize the set of cache used for files accessed by caret*
292	diagnostic. /*
293
294	static void
295	diagnostic_file_cache_init (void)
296	{
297	gcc_assert (global_dc);
298	global_dc->file_cache_init ();
299	}
300
301	void
302	diagnostic_context::file_cache_init ()
303	{
304	if (m_file_cache == nullptr)
305	m_file_cache = new file_cache ();
306	}
307
308	/ Return the total lines number that have been read so far by the*
309	line map (in the preprocessor) so far. For languages like C++ that
310	entirely preprocess the input file before starting to parse, this
311	equals the actual number of lines of the file. /*
312
313	static size_t
314	total_lines_num (const char *file_path)
315	{
316	size_t r = `0`;
317	location_t l = `0`;
318	if (linemap_get_file_highest_location (set: line_table, file_name: file_path, loc: &l))
319	{
320	gcc_assert (l >= RESERVED_LOCATION_COUNT);
321	expanded_location xloc = expand_location (l);
322	r = xloc.line;
323	}
324	return r;
325	}
326
327	/ Lookup the cache used for the content of a given file accessed by*
328	caret diagnostic. Return the found cached file, or NULL if no
329	cached file was found. /*
330
331	file_cache_slot *
332	file_cache::lookup_file (const char *file_path)
333	{
334	gcc_assert (file_path);
335
336	/ This will contain the found cached file. /
337	file_cache_slot *r = NULL;
338	for (unsigned i = `0`; i < num_file_slots; ++i)
339	{
340	file_cache_slot *c = &m_file_slots[i];
341	if (c->get_file_path () && !strcmp (s1: c->get_file_path (), s2: file_path))
342	{
343	c->inc_use_count ();
344	r = c;
345	}
346	}
347
348	if (r)
349	r->inc_use_count ();
350
351	return r;
352	}
353
354	/ Purge any mention of FILENAME from the cache of files used for*
355	printing source code. For use in selftests when working
356	with tempfiles. /*
357
358	void
359	diagnostics_file_cache_forcibly_evict_file (const char *file_path)
360	{
361	gcc_assert (file_path);
362
363	auto file_cache = global_dc->get_file_cache ();
364	if (!file_cache)
365	return;
366	file_cache->forcibly_evict_file (file_path);
367	}
368
369	void
370	file_cache::forcibly_evict_file (const char *file_path)
371	{
372	gcc_assert (file_path);
373
374	file_cache_slot *r = lookup_file (file_path);
375	if (!r)
376	/ Not found. /
377	return;
378
379	r->evict ();
380	}
381
382	void
383	file_cache_slot::evict ()
384	{
385	m_file_path = NULL;
386	if (m_fp)
387	fclose (stream: m_fp);
388	m_fp = NULL;
389	m_nb_read = `0`;
390	m_line_start_idx = `0`;
391	m_line_num = `0`;
392	m_line_record.truncate (size: `0`);
393	m_use_count = `0`;
394	m_total_lines = `0`;
395	m_missing_trailing_newline = true;
396	}
397
398	/ Return the file cache that has been less used, recently, or the*
399	first empty one. If HIGHEST_USE_COUNT is non-null,
400	*HIGHEST_USE_COUNT is set to the highest use count of the entries
401	in the cache table. /*
402
403	file_cache_slot*
404	file_cache::evicted_cache_tab_entry (unsigned *highest_use_count)
405	{
406	diagnostic_file_cache_init ();
407
408	file_cache_slot *to_evict = &m_file_slots[`0`];
409	unsigned huc = to_evict->get_use_count ();
410	for (unsigned i = `1`; i < num_file_slots; ++i)
411	{
412	file_cache_slot *c = &m_file_slots[i];
413	bool c_is_empty = (c->get_file_path () == NULL);
414
415	if (c->get_use_count () < to_evict->get_use_count ()
416	\|\| (to_evict->get_file_path () && c_is_empty))
417	/ We evict C because it's either an entry with a lower use*
418	count or one that is empty. /*
419	to_evict = c;
420
421	if (huc < c->get_use_count ())
422	huc = c->get_use_count ();
423
424	if (c_is_empty)
425	/ We've reached the end of the cache; subsequent elements are*
426	all empty. /*
427	break;
428	}
429
430	if (highest_use_count)
431	*highest_use_count = huc;
432
433	return to_evict;
434	}
435
436	/ Create the cache used for the content of a given file to be*
437	accessed by caret diagnostic. This cache is added to an array of
438	cache and can be retrieved by lookup_file_in_cache_tab. This
439	function returns the created cache. Note that only the last
440	num_file_slots files are cached.
441
442	This can return nullptr if the FILE_PATH can't be opened for
443	reading, or if the content can't be converted to the input_charset. /*
444
445	file_cache_slot*
446	file_cache::add_file (const char *file_path)
447	{
448
449	FILE *fp = fopen (filename: file_path, modes: "r");
450	if (fp == NULL)
451	return NULL;
452
453	unsigned highest_use_count = `0`;
454	file_cache_slot *r = evicted_cache_tab_entry (highest_use_count: &highest_use_count);
455	if (!r->create (in_context, file_path, fp, highest_use_count))
456	return NULL;
457	return r;
458	}
459
460	/ Get a borrowed char_span to the full content of this file*
461	as decoded according to the input charset, encoded as UTF-8. /*
462
463	char_span
464	file_cache_slot::get_full_file_content ()
465	{
466	char *line;
467	ssize_t line_len;
468	while (get_next_line (line: &line, line_len: &line_len))
469	{
470	}
471	return char_span (m_data, m_nb_read);
472	}
473
474	/ Populate this slot for use on FILE_PATH and FP, dropping any*
475	existing cached content within it. /*
476
477	bool
478	file_cache_slot::create (const file_cache::input_context &in_context,
479	const char file_path, FILE fp,
480	unsigned highest_use_count)
481	{
482	m_file_path = file_path;
483	if (m_fp)
484	fclose (stream: m_fp);
485	m_fp = fp;
486	if (m_alloc_offset)
487	offset_buffer (offset: -m_alloc_offset);
488	m_nb_read = `0`;
489	m_line_start_idx = `0`;
490	m_line_num = `0`;
491	m_line_record.truncate (size: `0`);
492	/ Ensure that this cache entry doesn't get evicted next time*
493	add_file_to_cache_tab is called. /*
494	m_use_count = ++highest_use_count;
495	m_total_lines = total_lines_num (file_path);
496	m_missing_trailing_newline = true;
497
498
499	/ Check the input configuration to determine if we need to do any*
500	transformations, such as charset conversion or BOM skipping. /*
501	if (const char *input_charset = in_context.ccb (file_path))
502	{
503	/ Need a full-blown conversion of the input charset. /
504	fclose (stream: m_fp);
505	m_fp = NULL;
506	const cpp_converted_source cs
507	= cpp_get_converted_source (fname: file_path, input_charset);
508	if (!cs.data)
509	return false;
510	if (m_data)
511	XDELETEVEC (m_data);
512	m_data = cs.data;
513	m_nb_read = m_size = cs.len;
514	m_alloc_offset = cs.data - cs.to_free;
515	}
516	else if (in_context.should_skip_bom)
517	{
518	if (read_data ())
519	{
520	const int offset = cpp_check_utf8_bom (data: m_data, data_length: m_nb_read);
521	offset_buffer (offset);
522	m_nb_read -= offset;
523	}
524	}
525
526	return true;
527	}
528
529	/ file_cache's ctor. /
530
531	file_cache::file_cache ()
532	: m_file_slots (new file_cache_slot[num_file_slots])
533	{
534	initialize_input_context (ccb: nullptr, should_skip_bom: false);
535	}
536
537	/ file_cache's dtor. /
538
539	file_cache::~file_cache ()
540	{
541	delete[] m_file_slots;
542	}
543
544	/ Lookup the cache used for the content of a given file accessed by*
545	caret diagnostic. If no cached file was found, create a new cache
546	for this file, add it to the array of cached file and return
547	it.
548
549	This can return nullptr on a cache miss if FILE_PATH can't be opened for
550	reading, or if the content can't be converted to the input_charset. /*
551
552	file_cache_slot*
553	file_cache::lookup_or_add_file (const char *file_path)
554	{
555	file_cache_slot *r = lookup_file (file_path);
556	if (r == NULL)
557	r = add_file (file_path);
558	return r;
559	}
560
561	/ Default constructor for a cache of file used by caret*
562	diagnostic. /*
563
564	file_cache_slot::file_cache_slot ()
565	: m_use_count (`0`), m_file_path (NULL), m_fp (NULL), m_data (`0`),
566	m_alloc_offset (`0`), m_size (`0`), m_nb_read (`0`), m_line_start_idx (`0`),
567	m_line_num (`0`), m_total_lines (`0`), m_missing_trailing_newline (true)
568	{
569	m_line_record.create (nelems: `0`);
570	}
571
572	/ Destructor for a cache of file used by caret diagnostic. /
573
574	file_cache_slot::~file_cache_slot ()
575	{
576	if (m_fp)
577	{
578	fclose (stream: m_fp);
579	m_fp = NULL;
580	}
581	if (m_data)
582	{
583	offset_buffer (offset: -m_alloc_offset);
584	XDELETEVEC (m_data);
585	m_data = `0`;
586	}
587	m_line_record.release ();
588	}
589
590	/ Returns TRUE iff the cache would need to be filled with data coming*
591	from the file. That is, either the cache is empty or full or the
592	current line is empty. Note that if the cache is full, it would
593	need to be extended and filled again. /*
594
595	bool
596	file_cache_slot::needs_read_p () const
597	{
598	return m_fp && (m_nb_read == `0`
599	\|\| m_nb_read == m_size
600	\|\| (m_line_start_idx >= m_nb_read - `1`));
601	}
602
603	/ Return TRUE iff the cache is full and thus needs to be*
604	extended. /*
605
606	bool
607	file_cache_slot::needs_grow_p () const
608	{
609	return m_nb_read == m_size;
610	}
611
612	/ Grow the cache if it needs to be extended. /
613
614	void
615	file_cache_slot::maybe_grow ()
616	{
617	if (!needs_grow_p ())
618	return;
619
620	if (!m_data)
621	{
622	gcc_assert (m_size == `0` && m_alloc_offset == `0`);
623	m_size = buffer_size;
624	m_data = XNEWVEC (char, m_size);
625	}
626	else
627	{
628	const int offset = m_alloc_offset;
629	offset_buffer (offset: -offset);
630	m_size *= `2`;
631	m_data = XRESIZEVEC (char, m_data, m_size);
632	offset_buffer (offset);
633	}
634	}
635
636	/ Read more data into the cache. Extends the cache if need be.*
637	Returns TRUE iff new data could be read. /*
638
639	bool
640	file_cache_slot::read_data ()
641	{
642	if (feof (stream: m_fp) \|\| ferror (stream: m_fp))
643	return false;
644
645	maybe_grow ();
646
647	char * from = m_data + m_nb_read;
648	size_t to_read = m_size - m_nb_read;
649	size_t nb_read = fread (ptr: from, size: `1`, n: to_read, stream: m_fp);
650
651	if (ferror (stream: m_fp))
652	return false;
653
654	m_nb_read += nb_read;
655	return !!nb_read;
656	}
657
658	/ Read new data iff the cache needs to be filled with more data*
659	coming from the file FP. Return TRUE iff the cache was filled with
660	mode data. /*
661
662	bool
663	file_cache_slot::maybe_read_data ()
664	{
665	if (!needs_read_p ())
666	return false;
667	return read_data ();
668	}
669
670	/ Helper function for file_cache_slot::get_next_line (), to find the end of*
671	the next line. Returns with the memchr convention, i.e. nullptr if a line
672	terminator was not found. We need to determine line endings in the same
673	manner that libcpp does: any of \n, \r\n, or \r is a line ending. /*
674
675	static char *
676	find_end_of_line (char *s, size_t len)
677	{
678	for (const auto end = s + len; s != end; ++s)
679	{
680	if (*s == `'\n'`)
681	return s;
682	if (*s == `'\r'`)
683	{
684	const auto next = s + `1`;
685	if (next == end)
686	{
687	/ Don't find the line ending if \r is the very last character*
688	in the buffer; we do not know if it's the end of the file or
689	just the end of what has been read so far, and we wouldn't
690	want to break in the middle of what's actually a \r\n
691	sequence. Instead, we will handle the case of a file ending
692	in a \r later. /*
693	break;
694	}
695	return (*next == `'\n'` ? next : s);
696	}
697	}
698	return nullptr;
699	}
700
701	/ Read a new line from file FP, using C as a cache for the data*
702	coming from the file. Upon successful completion, LINE is set to*
703	the beginning of the line found. LINE points directly in the*
704	line cache and is only valid until the next call of get_next_line.
705	*LINE_LEN is set to the length of the line. Note that the line
706	does not contain any terminal delimiter. This function returns
707	true if some data was read or process from the cache, false
708	otherwise. Note that subsequent calls to get_next_line might
709	make the content of LINE invalid. /
710
711	bool
712	file_cache_slot::get_next_line (char *line, ssize_t line_len)
713	{
714	/ Fill the cache with data to process. /
715	maybe_read_data ();
716
717	size_t remaining_size = m_nb_read - m_line_start_idx;
718	if (remaining_size == `0`)
719	/ There is no more data to process. /
720	return false;
721
722	char *line_start = m_data + m_line_start_idx;
723
724	char *next_line_start = NULL;
725	size_t len = `0`;
726	char *line_end = find_end_of_line (s: line_start, len: remaining_size);
727	if (line_end == NULL)
728	{
729	/ We haven't found an end-of-line delimiter in the cache.*
730	Fill the cache with more data from the file and look again. /*
731	while (maybe_read_data ())
732	{
733	line_start = m_data + m_line_start_idx;
734	remaining_size = m_nb_read - m_line_start_idx;
735	line_end = find_end_of_line (s: line_start, len: remaining_size);
736	if (line_end != NULL)
737	{
738	next_line_start = line_end + `1`;
739	break;
740	}
741	}
742	if (line_end == NULL)
743	{
744	/ We've loaded all the file into the cache and still no*
745	terminator. Let's say the line ends up at one byte past the
746	end of the file. This is to stay consistent with the case
747	of when the line ends up with a terminator and line_end points to
748	that. That consistency is useful below in the len calculation.
749
750	If the file ends in a \r, we didn't identify it as a line
751	terminator above, so do that now instead. /*
752	line_end = m_data + m_nb_read;
753	if (m_nb_read && line_end[-`1`] == `'\r'`)
754	{
755	--line_end;
756	m_missing_trailing_newline = false;
757	}
758	else
759	m_missing_trailing_newline = true;
760	}
761	else
762	m_missing_trailing_newline = false;
763	}
764	else
765	{
766	next_line_start = line_end + `1`;
767	m_missing_trailing_newline = false;
768	}
769
770	if (m_fp && ferror (stream: m_fp))
771	return false;
772
773	/ At this point, we've found the end of the of line. It either points to*
774	the line terminator or to one byte after the last byte of the file. /*
775	gcc_assert (line_end != NULL);
776
777	len = line_end - line_start;
778
779	if (m_line_start_idx < m_nb_read)
780	*line = line_start;
781
782	++m_line_num;
783
784	/ Before we update our line record, make sure the hint about the*
785	total number of lines of the file is correct. If it's not, then
786	we give up recording line boundaries from now on. /*
787	bool update_line_record = true;
788	if (m_line_num > m_total_lines)
789	update_line_record = false;
790
791	/ Now update our line record so that re-reading lines from the*
792	before m_line_start_idx is faster. /*
793	if (update_line_record
794	&& m_line_record.length () < line_record_size)
795	{
796	/ If the file lines fits in the line record, we just record all*
797	its lines .../*
798	if (m_total_lines <= line_record_size
799	&& m_line_num > m_line_record.length ())
800	m_line_record.safe_push
801	(obj: file_cache_slot::line_info (m_line_num,
802	m_line_start_idx,
803	line_end - m_data));
804	else if (m_total_lines > line_record_size)
805	{
806	/ ... otherwise, we just scale total_lines down to*
807	(line_record_size lines. /*
808	size_t n = (m_line_num * line_record_size) / m_total_lines;
809	if (m_line_record.length () == `0`
810	\|\| n >= m_line_record.length ())
811	m_line_record.safe_push
812	(obj: file_cache_slot::line_info (m_line_num,
813	m_line_start_idx,
814	line_end - m_data));
815	}
816	}
817
818	/ Update m_line_start_idx so that it points to the next line to be*
819	read. /*
820	if (next_line_start)
821	m_line_start_idx = next_line_start - m_data;
822	else
823	/ We didn't find any terminal '\n'. Let's consider that the end*
824	of line is the end of the data in the cache. The next
825	invocation of get_next_line will either read more data from the
826	underlying file or return false early because we've reached the
827	end of the file. /*
828	m_line_start_idx = m_nb_read;
829
830	*line_len = len;
831
832	return true;
833	}
834
835	/ Consume the next bytes coming from the cache (or from its*
836	underlying file if there are remaining unread bytes in the file)
837	until we reach the next end-of-line (or end-of-file). There is no
838	copying from the cache involved. Return TRUE upon successful
839	completion. /*
840
841	bool
842	file_cache_slot::goto_next_line ()
843	{
844	char *l;
845	ssize_t len;
846
847	return get_next_line (line: &l, line_len: &len);
848	}
849
850	/ Read an arbitrary line number LINE_NUM from the file cached in C.*
851	If the line was read successfully, LINE points to the beginning*
852	of the line in the file cache and LINE_LEN is the length of the*
853	line. LINE is not nul-terminated, but may contain zero bytes.*
854	*LINE is only valid until the next call of read_line_num.
855	This function returns bool if a line was read. /*
856
857	bool
858	file_cache_slot::read_line_num (size_t line_num,
859	char ** line, ssize_t *line_len)
860	{
861	gcc_assert (line_num > `0`);
862
863	if (line_num <= m_line_num)
864	{
865	/ We've been asked to read lines that are before m_line_num.*
866	So lets use our line record (if it's not empty) to try to
867	avoid re-reading the file from the beginning again. /*
868
869	if (m_line_record.is_empty ())
870	{
871	m_line_start_idx = `0`;
872	m_line_num = `0`;
873	}
874	else
875	{
876	file_cache_slot::line_info *i = NULL;
877	if (m_total_lines <= line_record_size)
878	{
879	/ In languages where the input file is not totally*
880	preprocessed up front, the m_total_lines hint
881	can be smaller than the number of lines of the
882	file. In that case, only the first
883	m_total_lines have been recorded.
884
885	Otherwise, the first m_total_lines we've read have
886	their start/end recorded here. /*
887	i = (line_num <= m_total_lines)
888	? &m_line_record [line_num - `1`]
889	: &m_line_record [m_total_lines - `1`];
890	gcc_assert (i->line_num <= line_num);
891	}
892	else
893	{
894	/ So the file had more lines than our line record*
895	size. Thus the number of lines we've recorded has
896	been scaled down to line_record_size. Let's
897	pick the start/end of the recorded line that is
898	closest to line_num. /*
899	size_t n = (line_num <= m_total_lines)
900	? line_num * line_record_size / m_total_lines
901	: m_line_record.length () - `1`;
902	if (n < m_line_record.length ())
903	{
904	i = &m_line_record [n];
905	gcc_assert (i->line_num <= line_num);
906	}
907	}
908
909	if (i && i->line_num == line_num)
910	{
911	/ We have the start/end of the line. /
912	*line = m_data + i->start_pos;
913	*line_len = i->end_pos - i->start_pos;
914	return true;
915	}
916
917	if (i)
918	{
919	m_line_start_idx = i->start_pos;
920	m_line_num = i->line_num - `1`;
921	}
922	else
923	{
924	m_line_start_idx = `0`;
925	m_line_num = `0`;
926	}
927	}
928	}
929
930	/ Let's walk from line m_line_num up to line_num - 1, without*
931	copying any line. /*
932	while (m_line_num < line_num - `1`)
933	if (!goto_next_line ())
934	return false;
935
936	/ The line we want is the next one. Let's read and copy it back to*
937	the caller. /*
938	return get_next_line (line, line_len);
939	}
940
941	/ Return the physical source line that corresponds to FILE_PATH/LINE.*
942	The line is not nul-terminated. The returned pointer is only
943	valid until the next call of location_get_source_line.
944	Note that the line can contain several null characters,
945	so the returned value's length has the actual length of the line.
946	If the function fails, a NULL char_span is returned. /*
947
948	char_span
949	file_cache::get_source_line (const char file_path, int* line)
950	{
951	char *buffer = NULL;
952	ssize_t len;
953
954	if (line == `0`)
955	return char_span (NULL, `0`);
956
957	if (file_path == NULL)
958	return char_span (NULL, `0`);
959
960	file_cache_slot *c = lookup_or_add_file (file_path);
961	if (c == NULL)
962	return char_span (NULL, `0`);
963
964	bool read = c->read_line_num (line_num: line, line: &buffer, line_len: &len);
965	if (!read)
966	return char_span (NULL, `0`);
967
968	return char_span (buffer, len);
969	}
970
971	char_span
972	location_get_source_line (const char file_path, int* line)
973	{
974	diagnostic_file_cache_init ();
975	return global_dc->get_file_cache ()->get_source_line (file_path, line);
976	}
977
978	/ Return a NUL-terminated copy of the source text between two locations, or*
979	NULL if the arguments are invalid. The caller is responsible for freeing
980	the return value. /*
981
982	char *
983	get_source_text_between (location_t start, location_t end)
984	{
985	expanded_location expstart =
986	expand_location_to_spelling_point (start, aspect: LOCATION_ASPECT_START);
987	expanded_location expend =
988	expand_location_to_spelling_point (end, aspect: LOCATION_ASPECT_FINISH);
989
990	/ If the locations are in different files or the end comes before the*
991	start, give up and return nothing. /*
992	if (!expstart.file \|\| !expend.file)
993	return NULL;
994	if (strcmp (s1: expstart.file, s2: expend.file) != `0`)
995	return NULL;
996	if (expstart.line > expend.line)
997	return NULL;
998	if (expstart.line == expend.line
999	&& expstart.column > expend.column)
1000	return NULL;
1001	/ These aren't real column numbers, give up. /
1002	if (expstart.column == `0` \|\| expend.column == `0`)
1003	return NULL;
1004
1005	/ For a single line we need to trim both edges. /
1006	if (expstart.line == expend.line)
1007	{
1008	char_span line = location_get_source_line (file_path: expstart.file, line: expstart.line);
1009	if (line.length () < `1`)
1010	return NULL;
1011	int s = expstart.column - `1`;
1012	int len = expend.column - s;
1013	if (line.length () < (size_t)expend.column)
1014	return NULL;
1015	return line.subspan (offset: s, n_elts: len).xstrdup ();
1016	}
1017
1018	struct obstack buf_obstack;
1019	obstack_init (&buf_obstack);
1020
1021	/ Loop through all lines in the range and append each to buf; may trim*
1022	parts of the start and end lines off depending on column values. /*
1023	for (int lnum = expstart.line; lnum <= expend.line; ++lnum)
1024	{
1025	char_span line = location_get_source_line (file_path: expstart.file, line: lnum);
1026	if (line.length () < `1` && (lnum != expstart.line && lnum != expend.line))
1027	continue;
1028
1029	/ For the first line in the range, only start at expstart.column /
1030	if (lnum == expstart.line)
1031	{
1032	unsigned off = expstart.column - `1`;
1033	if (line.length () < off)
1034	return NULL;
1035	line = line.subspan (offset: off, n_elts: line.length() - off);
1036	}
1037	/ For the last line, don't go past expend.column /
1038	else if (lnum == expend.line)
1039	{
1040	if (line.length () < (size_t)expend.column)
1041	return NULL;
1042	line = line.subspan (offset: `0`, n_elts: expend.column);
1043	}
1044
1045	/ Combine spaces at the beginning of later lines. /
1046	if (lnum > expstart.line)
1047	{
1048	unsigned off;
1049	for (off = `0`; off < line.length(); ++off)
1050	if (line [off] != `' '` && line [off] != `'\t'`)
1051	break;
1052	if (off > `0`)
1053	{
1054	obstack_1grow (&buf_obstack, `' '`);
1055	line = line.subspan (offset: off, n_elts: line.length() - off);
1056	}
1057	}
1058
1059	/ This does not include any trailing newlines. /
1060	obstack_grow (&buf_obstack, line.get_buffer (), line.length ());
1061	}
1062
1063	/ NUL-terminate and finish the buf obstack. /
1064	obstack_1grow (&buf_obstack, `0`);
1065	const char buf = (const* char *) obstack_finish (&buf_obstack);
1066
1067	return xstrdup (buf);
1068	}
1069
1070
1071	char_span
1072	file_cache::get_source_file_content (const char *file_path)
1073	{
1074	file_cache_slot *c = lookup_or_add_file (file_path);
1075	if (c == nullptr)
1076	return char_span (nullptr, `0`);
1077	return c->get_full_file_content ();
1078	}
1079
1080
1081	/ Get a borrowed char_span to the full content of FILE_PATH*
1082	as decoded according to the input charset, encoded as UTF-8. /*
1083
1084	char_span
1085	get_source_file_content (const char *file_path)
1086	{
1087	diagnostic_file_cache_init ();
1088	return global_dc->get_file_cache ()->get_source_file_content (file_path);
1089	}
1090
1091	/ Determine if FILE_PATH missing a trailing newline on its final line.*
1092	Only valid to call once all of the file has been loaded, by
1093	requesting a line number beyond the end of the file. /*
1094
1095	bool
1096	location_missing_trailing_newline (const char *file_path)
1097	{
1098	diagnostic_file_cache_init ();
1099
1100	file_cache_slot *c = global_dc->get_file_cache ()->lookup_or_add_file (file_path);
1101	if (c == NULL)
1102	return false;
1103
1104	return c->missing_trailing_newline_p ();
1105	}
1106
1107	/ Test if the location originates from the spelling location of a*
1108	builtin-tokens. That is, return TRUE if LOC is a (possibly
1109	virtual) location of a built-in token that appears in the expansion
1110	list of a macro. Please note that this function also works on
1111	tokens that result from built-in tokens. For instance, the
1112	function would return true if passed a token "4" that is the result
1113	of the expansion of the built-in __LINE__ macro. /*
1114	bool
1115	is_location_from_builtin_token (location_t loc)
1116	{
1117	const line_map_ordinary *map = NULL;
1118	loc = linemap_resolve_location (line_table, loc,
1119	lrk: LRK_SPELLING_LOCATION, loc_map: &map);
1120	return loc == BUILTINS_LOCATION;
1121	}
1122
1123	/ Expand the source location LOC into a human readable location. If*
1124	LOC is virtual, it resolves to the expansion point of the involved
1125	macro. If LOC resolves to a builtin location, the file name of the
1126	readable location is set to the string "<built-in>". /*
1127
1128	expanded_location
1129	expand_location (location_t loc)
1130	{
1131	return expand_location_1 (set: line_table, loc, /expansion_point_p=/true,
1132	aspect: LOCATION_ASPECT_CARET);
1133	}
1134
1135	/ Expand the source location LOC into a human readable location. If*
1136	LOC is virtual, it resolves to the expansion location of the
1137	relevant macro. If LOC resolves to a builtin location, the file
1138	name of the readable location is set to the string
1139	"<built-in>". /*
1140
1141	expanded_location
1142	expand_location_to_spelling_point (location_t loc,
1143	enum location_aspect aspect)
1144	{
1145	return expand_location_1 (set: line_table, loc, /expansion_point_p=/false,
1146	aspect);
1147	}
1148
1149	/ The rich_location class within libcpp requires a way to expand*
1150	location_t instances, and relies on the client code
1151	providing a symbol named
1152	linemap_client_expand_location_to_spelling_point
1153	to do this.
1154
1155	This is the implementation for libcommon.a (all host binaries),
1156	which simply calls into expand_location_1. /*
1157
1158	expanded_location
1159	linemap_client_expand_location_to_spelling_point (const line_maps *set,
1160	location_t loc,
1161	enum location_aspect aspect)
1162	{
1163	return expand_location_1 (set, loc, /expansion_point_p=/false, aspect);
1164	}
1165
1166
1167	/ If LOCATION is in a system header and if it is a virtual location*
1168	for a token coming from the expansion of a macro, unwind it to
1169	the location of the expansion point of the macro. If the expansion
1170	point is also in a system header return the original LOCATION.
1171	Otherwise, return the location of the expansion point.
1172
1173	This is used for instance when we want to emit diagnostics about a
1174	token that may be located in a macro that is itself defined in a
1175	system header, for example, for the NULL macro. In such a case, if
1176	LOCATION were passed directly to diagnostic functions such as
1177	warning_at, the diagnostic would be suppressed (unless
1178	-Wsystem-headers). /*
1179
1180	location_t
1181	expansion_point_location_if_in_system_header (location_t location)
1182	{
1183	if (!in_system_header_at (loc: location))
1184	return location;
1185
1186	location_t xloc = linemap_resolve_location (line_table, loc: location,
1187	lrk: LRK_MACRO_EXPANSION_POINT,
1188	NULL);
1189	return in_system_header_at (loc: xloc) ? location : xloc;
1190	}
1191
1192	/ If LOCATION is a virtual location for a token coming from the expansion*
1193	of a macro, unwind to the location of the expansion point of the macro. /*
1194
1195	location_t
1196	expansion_point_location (location_t location)
1197	{
1198	return linemap_resolve_location (line_table, loc: location,
1199	lrk: LRK_MACRO_EXPANSION_POINT, NULL);
1200	}
1201
1202	/ Construct a location with caret at CARET, ranging from START to*
1203	FINISH.
1204
1205	For example, consider:
1206
1207	11111111112
1208	12345678901234567890
1209	522
1210	523 return foo + bar;
1211	~~~~^~~~~
1212	524
1213
1214	The location's caret is at the "+", line 523 column 15, but starts
1215	earlier, at the "f" of "foo" at column 11. The finish is at the "r"
1216	of "bar" at column 19. /*
1217
1218	location_t
1219	make_location (location_t caret, location_t start, location_t finish)
1220	{
1221	return line_table->make_location (caret, start, finish);
1222	}
1223
1224	/ Same as above, but taking a source range rather than two locations. /
1225
1226	location_t
1227	make_location (location_t caret, source_range src_range)
1228	{
1229	location_t pure_loc = get_pure_location (loc: caret);
1230	return line_table->get_or_create_combined_loc (locus: pure_loc, src_range,
1231	data: nullptr, discriminator: `0`);
1232	}
1233
1234	/ An expanded_location stores the column in byte units. This function*
1235	converts that column to display units. That requires reading the associated
1236	source line in order to calculate the display width. If that cannot be done
1237	for any reason, then returns the byte column as a fallback. /*
1238	int
1239	location_compute_display_column (expanded_location exploc,
1240	const cpp_char_column_policy &policy)
1241	{
1242	if (!(exploc.file && *exploc.file && exploc.line && exploc.column))
1243	return exploc.column;
1244	char_span line = location_get_source_line (file_path: exploc.file, line: exploc.line);
1245	/ If line is NULL, this function returns exploc.column which is the*
1246	desired fallback. /*
1247	return cpp_byte_column_to_display_column (data: line.get_buffer (), data_length: line.length (),
1248	column: exploc.column, policy);
1249	}
1250
1251	/ Dump statistics to stderr about the memory usage of the line_table*
1252	set of line maps. This also displays some statistics about macro
1253	expansion. /*
1254
1255	void
1256	dump_line_table_statistics (void)
1257	{
1258	struct linemap_stats s;
1259	long total_used_map_size,
1260	macro_maps_size,
1261	total_allocated_map_size;
1262
1263	memset (s: &s, c: `0`, n: sizeof (s));
1264
1265	linemap_get_statistics (line_table, &s);
1266
1267	macro_maps_size = s.macro_maps_used_size
1268	+ s.macro_maps_locations_size;
1269
1270	total_allocated_map_size = s.ordinary_maps_allocated_size
1271	+ s.macro_maps_allocated_size
1272	+ s.macro_maps_locations_size;
1273
1274	total_used_map_size = s.ordinary_maps_used_size
1275	+ s.macro_maps_used_size
1276	+ s.macro_maps_locations_size;
1277
1278	fprintf (stderr, format: "Number of expanded macros: %5ld\n",
1279	s.num_expanded_macros);
1280	if (s.num_expanded_macros != `0`)
1281	fprintf (stderr, format: "Average number of tokens per macro expansion: %5ld\n",
1282	s.num_macro_tokens / s.num_expanded_macros);
1283	fprintf (stderr,
1284	format: "\nLine Table allocations during the "
1285	"compilation process\n");
1286	fprintf (stderr, format: "Number of ordinary maps used: " PRsa (`5`) "\n",
1287	SIZE_AMOUNT (s.num_ordinary_maps_used));
1288	fprintf (stderr, format: "Ordinary map used size: " PRsa (`5`) "\n",
1289	SIZE_AMOUNT (s.ordinary_maps_used_size));
1290	fprintf (stderr, format: "Number of ordinary maps allocated: " PRsa (`5`) "\n",
1291	SIZE_AMOUNT (s.num_ordinary_maps_allocated));
1292	fprintf (stderr, format: "Ordinary maps allocated size: " PRsa (`5`) "\n",
1293	SIZE_AMOUNT (s.ordinary_maps_allocated_size));
1294	fprintf (stderr, format: "Number of macro maps used: " PRsa (`5`) "\n",
1295	SIZE_AMOUNT (s.num_macro_maps_used));
1296	fprintf (stderr, format: "Macro maps used size: " PRsa (`5`) "\n",
1297	SIZE_AMOUNT (s.macro_maps_used_size));
1298	fprintf (stderr, format: "Macro maps locations size: " PRsa (`5`) "\n",
1299	SIZE_AMOUNT (s.macro_maps_locations_size));
1300	fprintf (stderr, format: "Macro maps size: " PRsa (`5`) "\n",
1301	SIZE_AMOUNT (macro_maps_size));
1302	fprintf (stderr, format: "Duplicated maps locations size: " PRsa (`5`) "\n",
1303	SIZE_AMOUNT (s.duplicated_macro_maps_locations_size));
1304	fprintf (stderr, format: "Total allocated maps size: " PRsa (`5`) "\n",
1305	SIZE_AMOUNT (total_allocated_map_size));
1306	fprintf (stderr, format: "Total used maps size: " PRsa (`5`) "\n",
1307	SIZE_AMOUNT (total_used_map_size));
1308	fprintf (stderr, format: "Ad-hoc table size: " PRsa (`5`) "\n",
1309	SIZE_AMOUNT (s.adhoc_table_size));
1310	fprintf (stderr, format: "Ad-hoc table entries used: " PRsa (`5`) "\n",
1311	SIZE_AMOUNT (s.adhoc_table_entries_used));
1312	fprintf (stderr, format: "optimized_ranges: " PRsa (`5`) "\n",
1313	SIZE_AMOUNT (line_table->m_num_optimized_ranges));
1314	fprintf (stderr, format: "unoptimized_ranges: " PRsa (`5`) "\n",
1315	SIZE_AMOUNT (line_table->m_num_unoptimized_ranges));
1316
1317	fprintf (stderr, format: "\n");
1318	}
1319
1320	/ Get location one beyond the final location in ordinary map IDX. /
1321
1322	static location_t
1323	get_end_location (class line_maps set, unsigned* int idx)
1324	{
1325	if (idx == LINEMAPS_ORDINARY_USED (set) - `1`)
1326	return set->highest_location;
1327
1328	struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, index: idx + `1`);
1329	return MAP_START_LOCATION (map: next_map);
1330	}
1331
1332	/ Helper function for write_digit_row. /
1333
1334	static void
1335	write_digit (FILE stream, int* digit)
1336	{
1337	fputc (c: `'0'` + (digit % `10`), stream: stream);
1338	}
1339
1340	/ Helper function for dump_location_info.*
1341	Write a row of numbers to STREAM, numbering a source line,
1342	giving the units, tens, hundreds etc of the column number. /*
1343
1344	static void
1345	write_digit_row (FILE stream, int* indent,
1346	const line_map_ordinary *map,
1347	location_t loc, int max_col, int divisor)
1348	{
1349	fprintf (stream: stream, format: "%*c", indent, `' '`);
1350	fprintf (stream: stream, format: "\|");
1351	for (int column = `1`; column < max_col; column++)
1352	{
1353	location_t column_loc = loc + (column << map->m_range_bits);
1354	write_digit (stream, digit: column_loc / divisor);
1355	}
1356	fprintf (stream: stream, format: "\n");
1357	}
1358
1359	/ Write a half-closed (START) / half-open (END) interval of*
1360	location_t to STREAM. /*
1361
1362	static void
1363	dump_location_range (FILE *stream,
1364	location_t start, location_t end)
1365	{
1366	fprintf (stream: stream,
1367	format: " location_t interval: %u <= loc < %u\n",
1368	start, end);
1369	}
1370
1371	/ Write a labelled description of a half-closed (START) / half-open (END)*
1372	interval of location_t to STREAM. /*
1373
1374	static void
1375	dump_labelled_location_range (FILE *stream,
1376	const char *name,
1377	location_t start, location_t end)
1378	{
1379	fprintf (stream: stream, format: "%s\n", name);
1380	dump_location_range (stream, start, end);
1381	fprintf (stream: stream, format: "\n");
1382	}
1383
1384	/ Write a visualization of the locations in the line_table to STREAM. /
1385
1386	void
1387	dump_location_info (FILE *stream)
1388	{
1389	/ Visualize the reserved locations. /
1390	dump_labelled_location_range (stream, name: "RESERVED LOCATIONS",
1391	start: `0`, end: RESERVED_LOCATION_COUNT);
1392
1393	/ Visualize the ordinary line_map instances, rendering the sources. /
1394	for (unsigned int idx = `0`; idx < LINEMAPS_ORDINARY_USED (set: line_table); idx++)
1395	{
1396	location_t end_location = get_end_location (set: line_table, idx);
1397	/ half-closed: doesn't include this one. /
1398
1399	const line_map_ordinary *map
1400	= LINEMAPS_ORDINARY_MAP_AT (set: line_table, index: idx);
1401	fprintf (stream: stream, format: "ORDINARY MAP: %i\n", idx);
1402	dump_location_range (stream,
1403	start: MAP_START_LOCATION (map), end: end_location);
1404	fprintf (stream: stream, format: " file: %s\n", ORDINARY_MAP_FILE_NAME (ord_map: map));
1405	fprintf (stream: stream, format: " starting at line: %i\n",
1406	ORDINARY_MAP_STARTING_LINE_NUMBER (ord_map: map));
1407	fprintf (stream: stream, format: " column and range bits: %i\n",
1408	map->m_column_and_range_bits);
1409	fprintf (stream: stream, format: " column bits: %i\n",
1410	map->m_column_and_range_bits - map->m_range_bits);
1411	fprintf (stream: stream, format: " range bits: %i\n",
1412	map->m_range_bits);
1413	const char * reason;
1414	switch (map->reason) {
1415	case LC_ENTER:
1416	reason = "LC_ENTER";
1417	break;
1418	case LC_LEAVE:
1419	reason = "LC_LEAVE";
1420	break;
1421	case LC_RENAME:
1422	reason = "LC_RENAME";
1423	break;
1424	case LC_RENAME_VERBATIM:
1425	reason = "LC_RENAME_VERBATIM";
1426	break;
1427	case LC_ENTER_MACRO:
1428	reason = "LC_RENAME_MACRO";
1429	break;
1430	default:
1431	reason = "Unknown";
1432	}
1433	fprintf (stream: stream, format: " reason: %d (%s)\n", map->reason, reason);
1434
1435	const line_map_ordinary *includer_map
1436	= linemap_included_from_linemap (set: line_table, map);
1437	fprintf (stream: stream, format: " included from location: %d",
1438	linemap_included_from (ord_map: map));
1439	if (includer_map) {
1440	fprintf (stream: stream, format: " (in ordinary map %d)",
1441	int (includer_map - line_table->info_ordinary.maps));
1442	}
1443	fprintf (stream: stream, format: "\n");
1444
1445	/ Render the span of source lines that this "map" covers. /
1446	for (location_t loc = MAP_START_LOCATION (map);
1447	loc < end_location;
1448	loc += (`1` << map->m_range_bits) )
1449	{
1450	gcc_assert (pure_location_p (line_table, loc) );
1451
1452	expanded_location exploc
1453	= linemap_expand_location (line_table, map, loc);
1454
1455	if (exploc.column == `0`)
1456	{
1457	/ Beginning of a new source line: draw the line. /
1458
1459	char_span line_text = location_get_source_line (file_path: exploc.file,
1460	line: exploc.line);
1461	if (!line_text)
1462	break;
1463	fprintf (stream: stream,
1464	format: "%s:%3i\|loc:%5i\|%.*s\n",
1465	exploc.file, exploc.line,
1466	loc,
1467	(int)line_text.length (), line_text.get_buffer ());
1468
1469	/ "loc" is at column 0, which means "the whole line".*
1470	Render the locations within* the line, by underlining*
1471	it, showing the location_t numeric values
1472	at each column. /*
1473	size_t max_col = (`1` << map->m_column_and_range_bits) - `1`;
1474	if (max_col > line_text.length ())
1475	max_col = line_text.length () + `1`;
1476
1477	int len_lnum = num_digits (exploc.line);
1478	if (len_lnum < `3`)
1479	len_lnum = `3`;
1480	int len_loc = num_digits (loc);
1481	if (len_loc < `5`)
1482	len_loc = `5`;
1483
1484	int indent = `6` + strlen (s: exploc.file) + len_lnum + len_loc;
1485
1486	/ Thousands. /
1487	if (end_location > `999`)
1488	write_digit_row (stream, indent, map, loc, max_col, divisor: `1000`);
1489
1490	/ Hundreds. /
1491	if (end_location > `99`)
1492	write_digit_row (stream, indent, map, loc, max_col, divisor: `100`);
1493
1494	/ Tens. /
1495	write_digit_row (stream, indent, map, loc, max_col, divisor: `10`);
1496
1497	/ Units. /
1498	write_digit_row (stream, indent, map, loc, max_col, divisor: `1`);
1499	}
1500	}
1501	fprintf (stream: stream, format: "\n");
1502	}
1503
1504	/ Visualize unallocated values. /
1505	dump_labelled_location_range (stream, name: "UNALLOCATED LOCATIONS",
1506	start: line_table->highest_location,
1507	end: LINEMAPS_MACRO_LOWEST_LOCATION (set: line_table));
1508
1509	/ Visualize the macro line_map instances, rendering the sources. /
1510	for (unsigned int i = `0`; i < LINEMAPS_MACRO_USED (set: line_table); i++)
1511	{
1512	/ Each macro map that is allocated owns location_t values*
1513	that are lower* that the one before them.*
1514	Hence it's meaningful to view them either in order of ascending
1515	source locations, or in order of ascending macro map index. /*
1516	const bool ascending_location_ts = true;
1517	unsigned int idx = (ascending_location_ts
1518	? (LINEMAPS_MACRO_USED (set: line_table) - (i + `1`))
1519	: i);
1520	const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (set: line_table, index: idx);
1521	fprintf (stream: stream, format: "MACRO %i: %s (%u tokens)\n",
1522	idx,
1523	linemap_map_get_macro_name (map),
1524	MACRO_MAP_NUM_MACRO_TOKENS (macro_map: map));
1525	dump_location_range (stream,
1526	start: map->start_location,
1527	end: (map->start_location
1528	+ MACRO_MAP_NUM_MACRO_TOKENS (macro_map: map)));
1529	inform (map->get_expansion_point_location (),
1530	"expansion point is location %i",
1531	map->get_expansion_point_location ());
1532	fprintf (stream: stream, format: " map->start_location: %u\n",
1533	map->start_location);
1534
1535	fprintf (stream: stream, format: " macro_locations:\n");
1536	for (unsigned int i = `0`; i < MACRO_MAP_NUM_MACRO_TOKENS (macro_map: map); i++)
1537	{
1538	location_t x = MACRO_MAP_LOCATIONS (macro_map: map)[`2` * i];
1539	location_t y = MACRO_MAP_LOCATIONS (macro_map: map)[(`2` * i) + `1`];
1540
1541	/ linemap_add_macro_token encodes token numbers in an expansion*
1542	by putting them after MAP_START_LOCATION. /*
1543
1544	/ I'm typically seeing 4 uninitialized entries at the end of*
1545	0xafafafaf.
1546	This appears to be due to macro.cc:replace_args
1547	adding 2 extra args for padding tokens; presumably there may
1548	be a leading and/or trailing padding token injected,
1549	each for 2 more location slots.
1550	This would explain there being up to 4 location_ts slots
1551	that may be uninitialized. /*
1552
1553	fprintf (stream: stream, format: " %u: %u, %u\n",
1554	i,
1555	x,
1556	y);
1557	if (x == y)
1558	{
1559	if (x < MAP_START_LOCATION (map))
1560	inform (x, "token %u has %<x-location == y-location == %u%>",
1561	i, x);
1562	else
1563	fprintf (stream: stream,
1564	format: "x-location == y-location == %u encodes token # %u\n",
1565	x, x - MAP_START_LOCATION (map));
1566	}
1567	else
1568	{
1569	inform (x, "token %u has %<x-location == %u%>", i, x);
1570	inform (x, "token %u has %<y-location == %u%>", i, y);
1571	}
1572	}
1573	fprintf (stream: stream, format: "\n");
1574	}
1575
1576	/ It appears that MAX_LOCATION_T itself is never assigned to a*
1577	macro map, presumably due to an off-by-one error somewhere
1578	between the logic in linemap_enter_macro and
1579	LINEMAPS_MACRO_LOWEST_LOCATION. /*
1580	dump_labelled_location_range (stream, name: "MAX_LOCATION_T",
1581	start: MAX_LOCATION_T,
1582	end: MAX_LOCATION_T + `1`);
1583
1584	/ Visualize ad-hoc values. /
1585	dump_labelled_location_range (stream, name: "AD-HOC LOCATIONS",
1586	start: MAX_LOCATION_T + `1`, UINT_MAX);
1587	}
1588
1589	/ string_concat's constructor. /
1590
1591	string_concat::string_concat (int num, location_t *locs)
1592	: m_num (num)
1593	{
1594	m_locs = ggc_vec_alloc <location_t> (c: num);
1595	for (int i = `0`; i < num; i++)
1596	m_locs[i] = locs[i];
1597	}
1598
1599	/ string_concat_db's constructor. /
1600
1601	string_concat_db::string_concat_db ()
1602	{
1603	m_table = hash_map <location_hash, string_concat *>::create_ggc (size: `64`);
1604	}
1605
1606	/ Record that a string concatenation occurred, covering NUM*
1607	string literal tokens. LOCS is an array of size NUM, containing the
1608	locations of the tokens. A copy of LOCS is taken. /*
1609
1610	void
1611	string_concat_db::record_string_concatenation (int num, location_t *locs)
1612	{
1613	gcc_assert (num > `1`);
1614	gcc_assert (locs);
1615
1616	location_t key_loc = get_key_loc (loc: locs[`0`]);
1617	/ We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values:*
1618	any data now recorded under key 'key_loc' would be overwritten by a
1619	subsequent call with the same key 'key_loc'. /*
1620	if (RESERVED_LOCATION_P (key_loc))
1621	return;
1622
1623	string_concat *concat
1624	= new (ggc_alloc <string_concat> ()) string_concat (num, locs);
1625	m_table->put (k: key_loc, v: concat);
1626	}
1627
1628	/ Determine if LOC was the location of the initial token of a*
1629	concatenation of string literal tokens.
1630	If so, OUT_NUM is written to with the number of tokens, and*
1631	*OUT_LOCS with the location of an array of locations of the
1632	tokens, and return true. OUT_LOCS is a borrowed pointer to*
1633	storage owned by the string_concat_db.
1634	Otherwise, return false. /*
1635
1636	bool
1637	string_concat_db::get_string_concatenation (location_t loc,
1638	int *out_num,
1639	location_t **out_locs)
1640	{
1641	gcc_assert (out_num);
1642	gcc_assert (out_locs);
1643
1644	location_t key_loc = get_key_loc (loc);
1645	/ We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see*
1646	discussion in 'string_concat_db::record_string_concatenation'. /*
1647	if (RESERVED_LOCATION_P (key_loc))
1648	return false;
1649
1650	string_concat **concat = m_table->get (k: key_loc);
1651	if (!concat)
1652	return false;
1653
1654	out_num = (concat)->m_num;
1655	out_locs =(concat)->m_locs;
1656	return true;
1657	}
1658
1659	/ Internal function. Canonicalize LOC into a form suitable for*
1660	use as a key within the database, stripping away macro expansion,
1661	ad-hoc information, and range information, using the location of
1662	the start of LOC within an ordinary linemap. /*
1663
1664	location_t
1665	string_concat_db::get_key_loc (location_t loc)
1666	{
1667	loc = linemap_resolve_location (line_table, loc, lrk: LRK_SPELLING_LOCATION,
1668	NULL);
1669
1670	loc = get_range_from_loc (set: line_table, loc).m_start;
1671
1672	return loc;
1673	}
1674
1675	/ Helper class for use within get_substring_ranges_for_loc.*
1676	An vec of cpp_string with responsibility for releasing all of the
1677	str->text for each str in the vector. /*
1678
1679	class auto_cpp_string_vec : public auto_vec <cpp_string>
1680	{
1681	public:
1682	auto_cpp_string_vec (int alloc)
1683	: auto_vec <cpp_string> (alloc) {}
1684
1685	~auto_cpp_string_vec ()
1686	{
1687	/ Clean up the copies within this vec. /
1688	int i;
1689	cpp_string *str;
1690	FOR_EACH_VEC_ELT (*this, i, str)
1691	free (ptr: const_cast <unsigned char *> (str->text));
1692	}
1693	};
1694
1695	/ Attempt to populate RANGES with source location information on the*
1696	individual characters within the string literal found at STRLOC.
1697	If CONCATS is non-NULL, then any string literals that the token at
1698	STRLOC was concatenated with are also added to RANGES.
1699
1700	Return NULL if successful, or an error message if any errors occurred (in
1701	which case RANGES may be only partially populated and should not
1702	be used).
1703
1704	This is implemented by re-parsing the relevant source line(s). /*
1705
1706	static const char *
1707	get_substring_ranges_for_loc (cpp_reader *pfile,
1708	string_concat_db *concats,
1709	location_t strloc,
1710	enum cpp_ttype type,
1711	cpp_substring_ranges &ranges)
1712	{
1713	gcc_assert (pfile);
1714
1715	if (strloc == UNKNOWN_LOCATION)
1716	return "unknown location";
1717
1718	/ Reparsing the strings requires accurate location information.*
1719	If -ftrack-macro-expansion has been overridden from its default
1720	of 2, then we might have a location of a macro expansion point,
1721	rather than the location of the literal itself.
1722	Avoid this by requiring that we have full macro expansion tracking
1723	for substring locations to be available. /*
1724	if (cpp_get_options (pfile)->track_macro_expansion != `2`)
1725	return "track_macro_expansion != 2";
1726
1727	/ If #line or # 44 "file"-style directives are present, then there's*
1728	no guarantee that the line numbers we have can be used to locate
1729	the strings. For example, we might have a .i file with # directives
1730	pointing back to lines within a .c file, but the .c file might
1731	have been edited since the .i file was created.
1732	In such a case, the safest course is to disable on-demand substring
1733	locations. /*
1734	if (line_table->seen_line_directive)
1735	return "seen line directive";
1736
1737	/ If string concatenation has occurred at STRLOC, get the locations*
1738	of all of the literal tokens making up the compound string.
1739	Otherwise, just use STRLOC. /*
1740	int num_locs = `1`;
1741	location_t *strlocs = &strloc;
1742	if (concats)
1743	concats->get_string_concatenation (loc: strloc, out_num: &num_locs, out_locs: &strlocs);
1744
1745	auto_cpp_string_vec strs (num_locs);
1746	auto_vec <cpp_string_location_reader> loc_readers (num_locs);
1747	for (int i = `0`; i < num_locs; i++)
1748	{
1749	/ Get range of strloc. We will use it to locate the start and finish*
1750	of the literal token within the line. /*
1751	source_range src_range = get_range_from_loc (set: line_table, loc: strlocs[i]);
1752
1753	if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (set: line_table))
1754	{
1755	/ If the string token was within a macro expansion, then we can*
1756	cope with it for the simple case where we have a single token.
1757	Otherwise, bail out. /*
1758	if (src_range.m_start != src_range.m_finish)
1759	return "macro expansion";
1760	}
1761	else
1762	{
1763	if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1764	/ If so, we can't reliably determine where the token started within*
1765	its line. /*
1766	return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS";
1767
1768	if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS)
1769	/ If so, we can't reliably determine where the token finished*
1770	within its line. /*
1771	return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS";
1772	}
1773
1774	expanded_location start
1775	= expand_location_to_spelling_point (loc: src_range.m_start,
1776	aspect: LOCATION_ASPECT_START);
1777	expanded_location finish
1778	= expand_location_to_spelling_point (loc: src_range.m_finish,
1779	aspect: LOCATION_ASPECT_FINISH);
1780	if (start.file != finish.file)
1781	return "range endpoints are in different files";
1782	if (start.line != finish.line)
1783	return "range endpoints are on different lines";
1784	if (start.column > finish.column)
1785	return "range endpoints are reversed";
1786
1787	char_span line = location_get_source_line (file_path: start.file, line: start.line);
1788	if (!line)
1789	return "unable to read source line";
1790
1791	/ Determine the location of the literal (including quotes*
1792	and leading prefix chars, such as the 'u' in a u""
1793	token). /*
1794	size_t literal_length = finish.column - start.column + `1`;
1795
1796	/ Ensure that we don't crash if we got the wrong location. /
1797	if (start.column < `1`)
1798	return "zero start column";
1799	if (line.length () < (start.column - `1` + literal_length))
1800	return "line is not wide enough";
1801
1802	char_span literal = line.subspan (offset: start.column - `1`, n_elts: literal_length);
1803
1804	cpp_string from;
1805	from.len = literal_length;
1806	/ Make a copy of the literal, to avoid having to rely on*
1807	the lifetime of the copy of the line within the cache.
1808	This will be released by the auto_cpp_string_vec dtor. /*
1809	from.text = (unsigned char *)literal.xstrdup ();
1810	strs.safe_push (obj: from);
1811
1812	/ For very long lines, a new linemap could have started*
1813	halfway through the token.
1814	Ensure that the loc_reader uses the linemap of the
1815	end of the token for its start location. */
1816	const line_map_ordinary *start_ord_map;
1817	linemap_resolve_location (line_table, loc: src_range.m_start,
1818	lrk: LRK_SPELLING_LOCATION, loc_map: &start_ord_map);
1819	const line_map_ordinary *final_ord_map;
1820	linemap_resolve_location (line_table, loc: src_range.m_finish,
1821	lrk: LRK_SPELLING_LOCATION, loc_map: &final_ord_map);
1822	if (start_ord_map == NULL \|\| final_ord_map == NULL)
1823	return "failed to get ordinary maps";
1824	/ Bulletproofing. We ought to only have different ordinary maps*
1825	for start vs finish due to line-length jumps. /*
1826	if (start_ord_map != final_ord_map
1827	&& start_ord_map->to_file != final_ord_map->to_file)
1828	return "start and finish are spelled in different ordinary maps";
1829	/ The file from linemap_resolve_location ought to match that from*
1830	expand_location_to_spelling_point. /*
1831	if (start_ord_map->to_file != start.file)
1832	return "mismatching file after resolving linemap";
1833
1834	location_t start_loc
1835	= linemap_position_for_line_and_column (set: line_table, final_ord_map,
1836	start.line, start.column);
1837
1838	cpp_string_location_reader loc_reader (start_loc, line_table);
1839	loc_readers.safe_push (obj: loc_reader);
1840	}
1841
1842	/ Rerun cpp_interpret_string, or rather, a modified version of it. /
1843	const char *err = cpp_interpret_string_ranges (pfile, from: strs.address (),
1844	loc_readers.address (),
1845	count: num_locs, out: &ranges, type);
1846	if (err)
1847	return err;
1848
1849	/ Success: "ranges" should now contain information on the string. /
1850	return NULL;
1851	}
1852
1853	/ Attempt to populate OUT_LOC with source location information on the
1854	given characters within the string literal found at STRLOC.
1855	CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution
1856	character set.
1857
1858	For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7
1859	and string literal "012345\n789"
1860	*OUT_LOC is written to with:
1861	"012345\n789"
1862	~^~~~~
1863
1864	If CONCATS is non-NULL, then any string literals that the token at
1865	STRLOC was concatenated with are also considered.
1866
1867	This is implemented by re-parsing the relevant source line(s).
1868
1869	Return NULL if successful, or an error message if any errors occurred.
1870	Error messages are intended for GCC developers (to help debugging) rather
1871	than for end-users. /*
1872
1873	const char *
1874	get_location_within_string (cpp_reader *pfile,
1875	string_concat_db *concats,
1876	location_t strloc,
1877	enum cpp_ttype type,
1878	int caret_idx, int start_idx, int end_idx,
1879	location_t *out_loc)
1880	{
1881	gcc_checking_assert (caret_idx >= `0`);
1882	gcc_checking_assert (start_idx >= `0`);
1883	gcc_checking_assert (end_idx >= `0`);
1884	gcc_assert (out_loc);
1885
1886	cpp_substring_ranges ranges;
1887	const char *err
1888	= get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1889	if (err)
1890	return err;
1891
1892	if (caret_idx >= ranges.get_num_ranges ())
1893	return "caret_idx out of range";
1894	if (start_idx >= ranges.get_num_ranges ())
1895	return "start_idx out of range";
1896	if (end_idx >= ranges.get_num_ranges ())
1897	return "end_idx out of range";
1898
1899	*out_loc = make_location (caret: ranges.get_range (idx: caret_idx).m_start,
1900	start: ranges.get_range (idx: start_idx).m_start,
1901	finish: ranges.get_range (idx: end_idx).m_finish);
1902	return NULL;
1903	}
1904
1905	/ Associate the DISCRIMINATOR with LOCUS, and return a new locus. /
1906
1907	location_t
1908	location_with_discriminator (location_t locus, int discriminator)
1909	{
1910	tree block = LOCATION_BLOCK (locus);
1911	source_range src_range = get_range_from_loc (set: line_table, loc: locus);
1912	locus = get_pure_location (loc: locus);
1913
1914	if (locus == UNKNOWN_LOCATION)
1915	return locus;
1916
1917	return line_table->get_or_create_combined_loc (locus, src_range, data: block,
1918	discriminator);
1919	}
1920
1921	/ Return TRUE if LOCUS represents a location with a discriminator. /
1922
1923	bool
1924	has_discriminator (location_t locus)
1925	{
1926	return get_discriminator_from_loc (locus) != `0`;
1927	}
1928
1929	/ Return the discriminator for LOCUS. /
1930
1931	int
1932	get_discriminator_from_loc (location_t locus)
1933	{
1934	return get_discriminator_from_loc (set: line_table, loc: locus);
1935	}
1936
1937	#if CHECKING_P
1938
1939	namespace selftest {
1940
1941	/ Selftests of location handling. /
1942
1943	/ Attempt to populate OUT_RANGE with source location information on the
1944	given character within the string literal found at STRLOC.
1945	CHAR_IDX refers to an offset within the execution character set.
1946	If CONCATS is non-NULL, then any string literals that the token at
1947	STRLOC was concatenated with are also considered.
1948
1949	This is implemented by re-parsing the relevant source line(s).
1950
1951	Return NULL if successful, or an error message if any errors occurred.
1952	Error messages are intended for GCC developers (to help debugging) rather
1953	than for end-users. /*
1954
1955	static const char *
1956	get_source_range_for_char (cpp_reader *pfile,
1957	string_concat_db *concats,
1958	location_t strloc,
1959	enum cpp_ttype type,
1960	int char_idx,
1961	source_range *out_range)
1962	{
1963	gcc_checking_assert (char_idx >= `0`);
1964	gcc_assert (out_range);
1965
1966	cpp_substring_ranges ranges;
1967	const char *err
1968	= get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1969	if (err)
1970	return err;
1971
1972	if (char_idx >= ranges.get_num_ranges ())
1973	return "char_idx out of range";
1974
1975	*out_range = ranges.get_range (idx: char_idx);
1976	return NULL;
1977	}
1978
1979	/ As get_source_range_for_char, but write to OUT the number
1980	of ranges that are available. /*
1981
1982	static const char *
1983	get_num_source_ranges_for_substring (cpp_reader *pfile,
1984	string_concat_db *concats,
1985	location_t strloc,
1986	enum cpp_ttype type,
1987	int *out)
1988	{
1989	gcc_assert (out);
1990
1991	cpp_substring_ranges ranges;
1992	const char *err
1993	= get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges);
1994
1995	if (err)
1996	return err;
1997
1998	*out = ranges.get_num_ranges ();
1999	return NULL;
2000	}
2001
2002	/ Selftests of location handling. /
2003
2004	/ Verify that compare() on linenum_type handles comparisons over the full*
2005	range of the type. /*
2006
2007	static void
2008	test_linenum_comparisons ()
2009	{
2010	linenum_type min_line (`0`);
2011	linenum_type max_line (`0xffffffff`);
2012	ASSERT_EQ (`0`, compare (min_line, min_line));
2013	ASSERT_EQ (`0`, compare (max_line, max_line));
2014
2015	ASSERT_GT (compare (max_line, min_line), `0`);
2016	ASSERT_LT (compare (min_line, max_line), `0`);
2017	}
2018
2019	/ Helper function for verifying location data: when location_t*
2020	values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated
2021	as having column 0. /*
2022
2023	static bool
2024	should_have_column_data_p (location_t loc)
2025	{
2026	if (IS_ADHOC_LOC (loc))
2027	loc = get_location_from_adhoc_loc (line_table, loc);
2028	if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS)
2029	return false;
2030	return true;
2031	}
2032
2033	/ Selftest for should_have_column_data_p. /
2034
2035	static void
2036	test_should_have_column_data_p ()
2037	{
2038	ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT));
2039	ASSERT_TRUE
2040	(should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS));
2041	ASSERT_FALSE
2042	(should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + `1`));
2043	}
2044
2045	/ Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN*
2046	on LOC. /*
2047
2048	static void
2049	assert_loceq (const char exp_filename, int* exp_linenum, int exp_colnum,
2050	location_t loc)
2051	{
2052	ASSERT_STREQ (exp_filename, LOCATION_FILE (loc));
2053	ASSERT_EQ (exp_linenum, LOCATION_LINE (loc));
2054	/ If location_t values are sufficiently high, then column numbers*
2055	will be unavailable and LOCATION_COLUMN (loc) will be 0.
2056	When close to the threshold, column numbers may* be present: if*
2057	the final linemap before the threshold contains a line that straddles
2058	the threshold, locations in that line have column information. /*
2059	if (should_have_column_data_p (loc))
2060	ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc));
2061	}
2062
2063	/ Various selftests involve constructing a line table and one or more*
2064	line maps within it.
2065
2066	For maximum test coverage we want to run these tests with a variety
2067	of situations:
2068	- line_table->default_range_bits: some frontends use a non-zero value
2069	and others use zero
2070	- the fallback modes within line-map.cc: there are various threshold
2071	values for location_t beyond line-map.cc changes
2072	behavior (disabling of the range-packing optimization, disabling
2073	of column-tracking). We can exercise these by starting the line_table
2074	at interesting values at or near these thresholds.
2075
2076	The following struct describes a particular case within our test
2077	matrix. /*
2078
2079	class line_table_case
2080	{
2081	public:
2082	line_table_case (int default_range_bits, int base_location)
2083	: m_default_range_bits (default_range_bits),
2084	m_base_location (base_location)
2085	{}
2086
2087	int m_default_range_bits;
2088	int m_base_location;
2089	};
2090
2091	/ Constructor. Store the old value of line_table, and create a new*
2092	one, using sane defaults. /*
2093
2094	line_table_test::line_table_test ()
2095	{
2096	gcc_assert (saved_line_table == NULL);
2097	saved_line_table = line_table;
2098	line_table = ggc_alloc<line_maps> ();
2099	linemap_init (set: line_table, BUILTINS_LOCATION);
2100	gcc_assert (saved_line_table->m_reallocator);
2101	line_table->m_reallocator = saved_line_table->m_reallocator;
2102	gcc_assert (saved_line_table->m_round_alloc_size);
2103	line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
2104	line_table->default_range_bits = `0`;
2105	}
2106
2107	/ Constructor. Store the old value of line_table, and create a new*
2108	one, using the sitation described in CASE_. /*
2109
2110	line_table_test::line_table_test (const line_table_case &case_)
2111	{
2112	gcc_assert (saved_line_table == NULL);
2113	saved_line_table = line_table;
2114	line_table = ggc_alloc<line_maps> ();
2115	linemap_init (set: line_table, BUILTINS_LOCATION);
2116	gcc_assert (saved_line_table->m_reallocator);
2117	line_table->m_reallocator = saved_line_table->m_reallocator;
2118	gcc_assert (saved_line_table->m_round_alloc_size);
2119	line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size;
2120	line_table->default_range_bits = case_.m_default_range_bits;
2121	if (case_.m_base_location)
2122	{
2123	line_table->highest_location = case_.m_base_location;
2124	line_table->highest_line = case_.m_base_location;
2125	}
2126	}
2127
2128	/ Destructor. Restore the old value of line_table. /
2129
2130	line_table_test::~line_table_test ()
2131	{
2132	gcc_assert (saved_line_table != NULL);
2133	line_table = saved_line_table;
2134	saved_line_table = NULL;
2135	}
2136
2137	/ Verify basic operation of ordinary linemaps. /
2138
2139	static void
2140	test_accessing_ordinary_linemaps (const line_table_case &case_)
2141	{
2142	line_table_test ltt (case_);
2143
2144	/ Build a simple linemap describing some locations. /
2145	linemap_add (line_table, LC_ENTER, sysp: false, to_file: "foo.c", to_line: `0`);
2146
2147	linemap_line_start (set: line_table, to_line: `1`, max_column_hint: `100`);
2148	location_t loc_a = linemap_position_for_column (line_table, `1`);
2149	location_t loc_b = linemap_position_for_column (line_table, `23`);
2150
2151	linemap_line_start (set: line_table, to_line: `2`, max_column_hint: `100`);
2152	location_t loc_c = linemap_position_for_column (line_table, `1`);
2153	location_t loc_d = linemap_position_for_column (line_table, `17`);
2154
2155	/ Example of a very long line. /
2156	linemap_line_start (set: line_table, to_line: `3`, max_column_hint: `2000`);
2157	location_t loc_e = linemap_position_for_column (line_table, `700`);
2158
2159	/ Transitioning back to a short line. /
2160	linemap_line_start (set: line_table, to_line: `4`, max_column_hint: `0`);
2161	location_t loc_back_to_short = linemap_position_for_column (line_table, `100`);
2162
2163	if (should_have_column_data_p (loc: loc_back_to_short))
2164	{
2165	/ Verify that we switched to short lines in the linemap. /
2166	line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (set: line_table);
2167	ASSERT_EQ (`7`, map->m_column_and_range_bits - map->m_range_bits);
2168	}
2169
2170	/ Example of a line that will eventually be seen to be longer*
2171	than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is
2172	below that. /*
2173	linemap_line_start (set: line_table, to_line: `5`, max_column_hint: `2000`);
2174
2175	location_t loc_start_of_very_long_line
2176	= linemap_position_for_column (line_table, `2000`);
2177	location_t loc_too_wide
2178	= linemap_position_for_column (line_table, `4097`);
2179	location_t loc_too_wide_2
2180	= linemap_position_for_column (line_table, `4098`);
2181
2182	/ ...and back to a sane line length. /
2183	linemap_line_start (set: line_table, to_line: `6`, max_column_hint: `100`);
2184	location_t loc_sane_again = linemap_position_for_column (line_table, `10`);
2185
2186	linemap_add (line_table, LC_LEAVE, sysp: false, NULL, to_line: `0`);
2187
2188	/ Multiple files. /
2189	linemap_add (line_table, LC_ENTER, sysp: false, to_file: "bar.c", to_line: `0`);
2190	linemap_line_start (set: line_table, to_line: `1`, max_column_hint: `200`);
2191	location_t loc_f = linemap_position_for_column (line_table, `150`);
2192	linemap_add (line_table, LC_LEAVE, sysp: false, NULL, to_line: `0`);
2193
2194	/ Verify that we can recover the location info. /
2195	assert_loceq (exp_filename: "foo.c", exp_linenum: `1`, exp_colnum: `1`, loc: loc_a);
2196	assert_loceq (exp_filename: "foo.c", exp_linenum: `1`, exp_colnum: `23`, loc: loc_b);
2197	assert_loceq (exp_filename: "foo.c", exp_linenum: `2`, exp_colnum: `1`, loc: loc_c);
2198	assert_loceq (exp_filename: "foo.c", exp_linenum: `2`, exp_colnum: `17`, loc: loc_d);
2199	assert_loceq (exp_filename: "foo.c", exp_linenum: `3`, exp_colnum: `700`, loc: loc_e);
2200	assert_loceq (exp_filename: "foo.c", exp_linenum: `4`, exp_colnum: `100`, loc: loc_back_to_short);
2201
2202	/ In the very wide line, the initial location should be fully tracked. /
2203	assert_loceq (exp_filename: "foo.c", exp_linenum: `5`, exp_colnum: `2000`, loc: loc_start_of_very_long_line);
2204	/ ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should*
2205	be disabled. /*
2206	assert_loceq (exp_filename: "foo.c", exp_linenum: `5`, exp_colnum: `0`, loc: loc_too_wide);
2207	assert_loceq (exp_filename: "foo.c", exp_linenum: `5`, exp_colnum: `0`, loc: loc_too_wide_2);
2208	/...and column-tracking should be re-enabled for subsequent lines. /
2209	assert_loceq (exp_filename: "foo.c", exp_linenum: `6`, exp_colnum: `10`, loc: loc_sane_again);
2210
2211	assert_loceq (exp_filename: "bar.c", exp_linenum: `1`, exp_colnum: `150`, loc: loc_f);
2212
2213	ASSERT_FALSE (is_location_from_builtin_token (loc_a));
2214	ASSERT_TRUE (pure_location_p (line_table, loc_a));
2215
2216	/ Verify using make_location to build a range, and extracting data*
2217	back from it. /*
2218	location_t range_c_b_d = make_location (caret: loc_c, start: loc_b, finish: loc_d);
2219	ASSERT_FALSE (pure_location_p (line_table, range_c_b_d));
2220	ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d));
2221	source_range src_range = get_range_from_loc (set: line_table, loc: range_c_b_d);
2222	ASSERT_EQ (loc_b, src_range.m_start);
2223	ASSERT_EQ (loc_d, src_range.m_finish);
2224	}
2225
2226	/ Verify various properties of UNKNOWN_LOCATION. /
2227
2228	static void
2229	test_unknown_location ()
2230	{
2231	ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION));
2232	ASSERT_EQ (`0`, LOCATION_LINE (UNKNOWN_LOCATION));
2233	ASSERT_EQ (`0`, LOCATION_COLUMN (UNKNOWN_LOCATION));
2234	}
2235
2236	/ Verify various properties of BUILTINS_LOCATION. /
2237
2238	static void
2239	test_builtins ()
2240	{
2241	assert_loceq (exp_filename: special_fname_builtin (), exp_linenum: `0`, exp_colnum: `0`, BUILTINS_LOCATION);
2242	ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION);
2243	}
2244
2245	/ Regression test for make_location.*
2246	Ensure that we use pure locations for the start/finish of the range,
2247	rather than storing a packed or ad-hoc range as the start/finish. /*
2248
2249	static void
2250	test_make_location_nonpure_range_endpoints (const line_table_case &case_)
2251	{
2252	/ Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c*
2253	with C++ frontend.
2254	....................0000000001111111111222.
2255	....................1234567890123456789012. /*
2256	const char *content = " r += !aaa == bbb;\n";
2257	temp_source_file tmp (SELFTEST_LOCATION, ".C", content);
2258	line_table_test ltt (case_);
2259	linemap_add (line_table, LC_ENTER, sysp: false, to_file: tmp.get_filename (), to_line: `1`);
2260
2261	const location_t c11 = linemap_position_for_column (line_table, `11`);
2262	const location_t c12 = linemap_position_for_column (line_table, `12`);
2263	const location_t c13 = linemap_position_for_column (line_table, `13`);
2264	const location_t c14 = linemap_position_for_column (line_table, `14`);
2265	const location_t c21 = linemap_position_for_column (line_table, `21`);
2266
2267	if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS)
2268	return;
2269
2270	/ Use column 13 for the caret location, arbitrarily, to verify that we*
2271	handle start != caret. /*
2272	const location_t aaa = make_location (caret: c13, start: c12, finish: c14);
2273	ASSERT_EQ (c13, get_pure_location (aaa));
2274	ASSERT_EQ (c12, get_start (aaa));
2275	ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa)));
2276	ASSERT_EQ (c14, get_finish (aaa));
2277	ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa)));
2278
2279	/ Make a location using a location with a range as the start-point. /
2280	const location_t not_aaa = make_location (caret: c11, start: aaa, finish: c14);
2281	ASSERT_EQ (c11, get_pure_location (not_aaa));
2282	/ It should use the start location of the range, not store the range*
2283	itself. /*
2284	ASSERT_EQ (c12, get_start (not_aaa));
2285	ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa)));
2286	ASSERT_EQ (c14, get_finish (not_aaa));
2287	ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa)));
2288
2289	/ Similarly, make a location with a range as the end-point. /
2290	const location_t aaa_eq_bbb = make_location (caret: c12, start: c12, finish: c21);
2291	ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb));
2292	ASSERT_EQ (c12, get_start (aaa_eq_bbb));
2293	ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb)));
2294	ASSERT_EQ (c21, get_finish (aaa_eq_bbb));
2295	ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb)));
2296	const location_t not_aaa_eq_bbb = make_location (caret: c11, start: c12, finish: aaa_eq_bbb);
2297	/ It should use the finish location of the range, not store the range*
2298	itself. /*
2299	ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb));
2300	ASSERT_EQ (c12, get_start (not_aaa_eq_bbb));
2301	ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb)));
2302	ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb));
2303	ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb)));
2304	}
2305
2306	/ Verify reading of input files (e.g. for caret-based diagnostics). /
2307
2308	static void
2309	test_reading_source_line ()
2310	{
2311	/ Create a tempfile and write some text to it. /
2312	temp_source_file tmp (SELFTEST_LOCATION, ".txt",
2313	"01234567890123456789\n"
2314	"This is the test text\n"
2315	"This is the 3rd line");
2316
2317	/ Read back a specific line from the tempfile. /
2318	char_span source_line = location_get_source_line (file_path: tmp.get_filename (), line: `3`);
2319	ASSERT_TRUE (source_line);
2320	ASSERT_TRUE (source_line.get_buffer () != NULL);
2321	ASSERT_EQ (`20`, source_line.length ());
2322	ASSERT_TRUE (!strncmp ("This is the 3rd line",
2323	source_line.get_buffer (), source_line.length ()));
2324
2325	source_line = location_get_source_line (file_path: tmp.get_filename (), line: `2`);
2326	ASSERT_TRUE (source_line);
2327	ASSERT_TRUE (source_line.get_buffer () != NULL);
2328	ASSERT_EQ (`21`, source_line.length ());
2329	ASSERT_TRUE (!strncmp ("This is the test text",
2330	source_line.get_buffer (), source_line.length ()));
2331
2332	source_line = location_get_source_line (file_path: tmp.get_filename (), line: `4`);
2333	ASSERT_FALSE (source_line);
2334	ASSERT_TRUE (source_line.get_buffer () == NULL);
2335	}
2336
2337	/ Tests of lexing. /
2338
2339	/ Verify that token TOK from PARSER has cpp_token_as_text*
2340	equal to EXPECTED_TEXT. /*
2341
2342	#define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \
2343	SELFTEST_BEGIN_STMT \
2344	unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \
2345	ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \
2346	SELFTEST_END_STMT
2347
2348	/ Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM,*
2349	and ranges from EXP_START_COL to EXP_FINISH_COL.
2350	Use LOC as the effective location of the selftest. /*
2351
2352	static void
2353	assert_token_loc_eq (const location &loc,
2354	const cpp_token *tok,
2355	const char exp_filename, int* exp_linenum,
2356	int exp_start_col, int exp_finish_col)
2357	{
2358	location_t tok_loc = tok->src_loc;
2359	ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc));
2360	ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc));
2361
2362	/ If location_t values are sufficiently high, then column numbers*
2363	will be unavailable. /*
2364	if (!should_have_column_data_p (loc: tok_loc))
2365	return;
2366
2367	ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc));
2368	source_range tok_range = get_range_from_loc (set: line_table, loc: tok_loc);
2369	ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start));
2370	ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish));
2371	}
2372
2373	/ Use assert_token_loc_eq to verify the TOK->src_loc, using*
2374	SELFTEST_LOCATION as the effective location of the selftest. /*
2375
2376	#define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \
2377	EXP_START_COL, EXP_FINISH_COL) \
2378	assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \
2379	(EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL))
2380
2381	/ Test of lexing a file using libcpp, verifying tokens and their*
2382	location information. /*
2383
2384	static void
2385	test_lexer (const line_table_case &case_)
2386	{
2387	/ Create a tempfile and write some text to it. /
2388	const char *content =
2389	/00000000011111111112222222222333333.3333444444444.455555555556*
2390	12345678901234567890123456789012345.6789012345678.901234567890. /*
2391	("test_name /* c-style comment */\n"
2392	" \"test literal\"\n"
2393	" // test c++-style comment\n"
2394	" 42\n");
2395	temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);
2396
2397	line_table_test ltt (case_);
2398
2399	cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table);
2400
2401	const char *fname = cpp_read_main_file (parser, tmp.get_filename ());
2402	ASSERT_NE (fname, NULL);
2403
2404	/ Verify that we get the expected tokens back, with the correct*
2405	location information. /*
2406
2407	location_t loc;
2408	const cpp_token *tok;
2409	tok = cpp_get_token_with_location (parser, &loc);
2410	ASSERT_NE (tok, NULL);
2411	ASSERT_EQ (tok->type, CPP_NAME);
2412	ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name");
2413	ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), `1`, `1`, `9`);
2414
2415	tok = cpp_get_token_with_location (parser, &loc);
2416	ASSERT_NE (tok, NULL);
2417	ASSERT_EQ (tok->type, CPP_STRING);
2418	ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"");
2419	ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), `2`, `35`, `48`);
2420
2421	tok = cpp_get_token_with_location (parser, &loc);
2422	ASSERT_NE (tok, NULL);
2423	ASSERT_EQ (tok->type, CPP_NUMBER);
2424	ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42");
2425	ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), `4`, `4`, `5`);
2426
2427	tok = cpp_get_token_with_location (parser, &loc);
2428	ASSERT_NE (tok, NULL);
2429	ASSERT_EQ (tok->type, CPP_EOF);
2430
2431	cpp_finish (parser, NULL);
2432	cpp_destroy (parser);
2433	}
2434
2435	/ Forward decls. /
2436
2437	class lexer_test;
2438	class lexer_test_options;
2439
2440	/ A class for specifying options of a lexer_test.*
2441	The "apply" vfunc is called during the lexer_test constructor. /*
2442
2443	class lexer_test_options
2444	{
2445	public:
2446	virtual void apply (lexer_test &) = `0`;
2447	};
2448
2449	/ Wrapper around an cpp_reader , which calls cpp_finish and cpp_destroy
2450	in its dtor.
2451
2452	This is needed by struct lexer_test to ensure that the cleanup of the
2453	cpp_reader happens after* the cleanup of the temp_source_file. /
2454
2455	class cpp_reader_ptr
2456	{
2457	public:
2458	cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {}
2459
2460	~cpp_reader_ptr ()
2461	{
2462	cpp_finish (m_ptr, NULL);
2463	cpp_destroy (m_ptr);
2464	}
2465
2466	operator cpp_reader * () const { return m_ptr; }
2467
2468	private:
2469	cpp_reader *m_ptr;
2470	};
2471
2472	/ A struct for writing lexer tests. /
2473
2474	class lexer_test
2475	{
2476	public:
2477	lexer_test (const line_table_case &case_, const char *content,
2478	lexer_test_options *options);
2479	~lexer_test ();
2480
2481	const cpp_token *get_token ();
2482
2483	/ The ordering of these fields matters.*
2484	The line_table_test must be first, since the cpp_reader_ptr
2485	uses it.
2486	The cpp_reader must be cleaned up after* the temp_source_file*
2487	since the filenames in input.cc's input cache are owned by the
2488	cpp_reader; in particular, when ~temp_source_file evicts the
2489	filename the filenames must still be alive. /*
2490	line_table_test m_ltt;
2491	cpp_reader_ptr m_parser;
2492	temp_source_file m_tempfile;
2493	string_concat_db m_concats;
2494	bool m_implicitly_expect_EOF;
2495	};
2496
2497	/ Use an EBCDIC encoding for the execution charset, specifically*
2498	IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2499
2500	This exercises iconv integration within libcpp.
2501	Not every build of iconv supports the given charset,
2502	so we need to flag this error and handle it gracefully. /*
2503
2504	class ebcdic_execution_charset : public lexer_test_options
2505	{
2506	public:
2507	ebcdic_execution_charset () : m_num_iconv_errors (`0`)
2508	{
2509	gcc_assert (s_singleton == NULL);
2510	s_singleton = this;
2511	}
2512	~ebcdic_execution_charset ()
2513	{
2514	gcc_assert (s_singleton == this);
2515	s_singleton = NULL;
2516	}
2517
2518	void apply (lexer_test &test) final override
2519	{
2520	cpp_options *cpp_opts = cpp_get_options (test.m_parser);
2521	cpp_opts->narrow_charset = "IBM1047";
2522
2523	cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2524	callbacks->diagnostic = on_diagnostic;
2525	}
2526
2527	static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2528	enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2529	enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2530	rich_location *richloc ATTRIBUTE_UNUSED,
2531	const char msgid, va_list ap ATTRIBUTE_UNUSED)
2532	ATTRIBUTE_FPTR_PRINTF(`5`,`0`)
2533	{
2534	gcc_assert (s_singleton);
2535	/ Avoid exgettext from picking this up, it is translated in libcpp. /
2536	const char *msg = "conversion from %s to %s not supported by iconv";
2537	#ifdef ENABLE_NLS
2538	msg = dgettext (domainname: "cpplib", msgid: msg);
2539	#endif
2540	/ Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc*
2541	when the local iconv build doesn't support the conversion. /*
2542	if (strcmp (s1: msgid, s2: msg) == `0`)
2543	{
2544	s_singleton->m_num_iconv_errors++;
2545	return true;
2546	}
2547
2548	/ Otherwise, we have an unexpected error. /
2549	abort ();
2550	}
2551
2552	bool iconv_errors_occurred_p () const { return m_num_iconv_errors > `0`; }
2553
2554	private:
2555	static ebcdic_execution_charset *s_singleton;
2556	int m_num_iconv_errors;
2557	};
2558
2559	ebcdic_execution_charset *ebcdic_execution_charset::s_singleton;
2560
2561	/ A lexer_test_options subclass that records a list of diagnostic*
2562	messages emitted by the lexer. /*
2563
2564	class lexer_diagnostic_sink : public lexer_test_options
2565	{
2566	public:
2567	lexer_diagnostic_sink ()
2568	{
2569	gcc_assert (s_singleton == NULL);
2570	s_singleton = this;
2571	}
2572	~lexer_diagnostic_sink ()
2573	{
2574	gcc_assert (s_singleton == this);
2575	s_singleton = NULL;
2576
2577	int i;
2578	char *str;
2579	FOR_EACH_VEC_ELT (m_diagnostics, i, str)
2580	free (ptr: str);
2581	}
2582
2583	void apply (lexer_test &test) final override
2584	{
2585	cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser);
2586	callbacks->diagnostic = on_diagnostic;
2587	}
2588
2589	static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED,
2590	enum cpp_diagnostic_level level ATTRIBUTE_UNUSED,
2591	enum cpp_warning_reason reason ATTRIBUTE_UNUSED,
2592	rich_location *richloc ATTRIBUTE_UNUSED,
2593	const char msgid, va_list ap)
2594	ATTRIBUTE_FPTR_PRINTF(`5`,`0`)
2595	{
2596	char msg = xvasprintf (msgid, ap);
2597	s_singleton->m_diagnostics.safe_push (obj: msg);
2598	return true;
2599	}
2600
2601	auto_vec<char *> m_diagnostics;
2602
2603	private:
2604	static lexer_diagnostic_sink *s_singleton;
2605	};
2606
2607	lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton;
2608
2609	/ Constructor. Override line_table with a new instance based on CASE_,*
2610	and write CONTENT to a tempfile. Create a cpp_reader, and use it to
2611	start parsing the tempfile. /*
2612
2613	lexer_test::lexer_test (const line_table_case &case_, const char *content,
2614	lexer_test_options *options)
2615	: m_ltt (case_),
2616	m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)),
2617	/ Create a tempfile and write the text to it. /
2618	m_tempfile (SELFTEST_LOCATION, ".c", content),
2619	m_concats (),
2620	m_implicitly_expect_EOF (true)
2621	{
2622	if (options)
2623	options->apply (*this);
2624
2625	cpp_init_iconv (m_parser);
2626
2627	/ Parse the file. /
2628	const char *fname = cpp_read_main_file (m_parser,
2629	m_tempfile.get_filename ());
2630	ASSERT_NE (fname, NULL);
2631	}
2632
2633	/ Destructor. By default, verify that the next token in m_parser is EOF. /
2634
2635	lexer_test::~lexer_test ()
2636	{
2637	location_t loc;
2638	const cpp_token *tok;
2639
2640	if (m_implicitly_expect_EOF)
2641	{
2642	tok = cpp_get_token_with_location (m_parser, &loc);
2643	ASSERT_NE (tok, NULL);
2644	ASSERT_EQ (tok->type, CPP_EOF);
2645	}
2646	}
2647
2648	/ Get the next token from m_parser. /
2649
2650	const cpp_token *
2651	lexer_test::get_token ()
2652	{
2653	location_t loc;
2654	const cpp_token *tok;
2655
2656	tok = cpp_get_token_with_location (m_parser, &loc);
2657	ASSERT_NE (tok, NULL);
2658	return tok;
2659	}
2660
2661	/ Verify that locations within string literals are correctly handled. /
2662
2663	/ Verify get_source_range_for_substring for token(s) at STRLOC,*
2664	using the string concatenation database for TEST.
2665
2666	Assert that the character at index IDX is on EXPECTED_LINE,
2667	and that it begins at column EXPECTED_START_COL and ends at
2668	EXPECTED_FINISH_COL (unless the locations are beyond
2669	LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their
2670	columns). /*
2671
2672	static void
2673	assert_char_at_range (const location &loc,
2674	lexer_test& test,
2675	location_t strloc, enum cpp_ttype type, int idx,
2676	int expected_line, int expected_start_col,
2677	int expected_finish_col)
2678	{
2679	cpp_reader *pfile = test.m_parser;
2680	string_concat_db *concats = &test.m_concats;
2681
2682	source_range actual_range = source_range ();
2683	const char *err
2684	= get_source_range_for_char (pfile, concats, strloc, type, char_idx: idx,
2685	out_range: &actual_range);
2686	if (should_have_column_data_p (loc: strloc))
2687	ASSERT_EQ_AT (loc, NULL, err);
2688	else
2689	{
2690	ASSERT_STREQ_AT (loc,
2691	"range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2692	err);
2693	return;
2694	}
2695
2696	int actual_start_line = LOCATION_LINE (actual_range.m_start);
2697	ASSERT_EQ_AT (loc, expected_line, actual_start_line);
2698	int actual_finish_line = LOCATION_LINE (actual_range.m_finish);
2699	ASSERT_EQ_AT (loc, expected_line, actual_finish_line);
2700
2701	if (should_have_column_data_p (loc: actual_range.m_start))
2702	{
2703	int actual_start_col = LOCATION_COLUMN (actual_range.m_start);
2704	ASSERT_EQ_AT (loc, expected_start_col, actual_start_col);
2705	}
2706	if (should_have_column_data_p (loc: actual_range.m_finish))
2707	{
2708	int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish);
2709	ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col);
2710	}
2711	}
2712
2713	/ Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for*
2714	the effective location of any errors. /*
2715
2716	#define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \
2717	EXPECTED_START_COL, EXPECTED_FINISH_COL) \
2718	assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \
2719	(IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \
2720	(EXPECTED_FINISH_COL))
2721
2722	/ Verify get_num_source_ranges_for_substring for token(s) at STRLOC,*
2723	using the string concatenation database for TEST.
2724
2725	Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. /*
2726
2727	static void
2728	assert_num_substring_ranges (const location &loc,
2729	lexer_test& test,
2730	location_t strloc,
2731	enum cpp_ttype type,
2732	int expected_num_ranges)
2733	{
2734	cpp_reader *pfile = test.m_parser;
2735	string_concat_db *concats = &test.m_concats;
2736
2737	int actual_num_ranges = -`1`;
2738	const char *err
2739	= get_num_source_ranges_for_substring (pfile, concats, strloc, type,
2740	out: &actual_num_ranges);
2741	if (should_have_column_data_p (loc: strloc))
2742	ASSERT_EQ_AT (loc, NULL, err);
2743	else
2744	{
2745	ASSERT_STREQ_AT (loc,
2746	"range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2747	err);
2748	return;
2749	}
2750	ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges);
2751	}
2752
2753	/ Macro for calling assert_num_substring_ranges, supplying*
2754	SELFTEST_LOCATION for the effective location of any errors. /*
2755
2756	#define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \
2757	EXPECTED_NUM_RANGES) \
2758	assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \
2759	(TYPE), (EXPECTED_NUM_RANGES))
2760
2761
2762	/ Verify that get_num_source_ranges_for_substring for token(s) at STRLOC*
2763	returns an error (using the string concatenation database for TEST). /*
2764
2765	static void
2766	assert_has_no_substring_ranges (const location &loc,
2767	lexer_test& test,
2768	location_t strloc,
2769	enum cpp_ttype type,
2770	const char *expected_err)
2771	{
2772	cpp_reader *pfile = test.m_parser;
2773	string_concat_db *concats = &test.m_concats;
2774	cpp_substring_ranges ranges;
2775	const char *actual_err
2776	= get_substring_ranges_for_loc (pfile, concats, strloc,
2777	type, ranges);
2778	if (should_have_column_data_p (loc: strloc))
2779	ASSERT_STREQ_AT (loc, expected_err, actual_err);
2780	else
2781	ASSERT_STREQ_AT (loc,
2782	"range starts after LINE_MAP_MAX_LOCATION_WITH_COLS",
2783	actual_err);
2784	}
2785
2786	#define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \
2787	assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \
2788	(STRLOC), (TYPE), (ERR))
2789
2790	/ Lex a simple string literal. Verify the substring location data, before*
2791	and after running cpp_interpret_string on it. /*
2792
2793	static void
2794	test_lexer_string_locations_simple (const line_table_case &case_)
2795	{
2796	/ Digits 0-9 (with 0 at column 10), the simple way.*
2797	....................000000000.11111111112.2222222223333333333
2798	....................123456789.01234567890.1234567890123456789
2799	We add a trailing comment to ensure that we correctly locate
2800	the end of the string literal token. /*
2801	const char content = " \"0123456789\" / not a string */\n";
2802	lexer_test test (case_, content, NULL);
2803
2804	/ Verify that we get the expected token back, with the correct*
2805	location information. /*
2806	const cpp_token *tok = test.get_token ();
2807	ASSERT_EQ (tok->type, CPP_STRING);
2808	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2809	ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), `1`, `9`, `20`);
2810
2811	/ At this point in lexing, the quote characters are treated as part of*
2812	the string (they are stripped off by cpp_interpret_string). /*
2813
2814	ASSERT_EQ (tok->val.str.len, `12`);
2815
2816	/ Verify that cpp_interpret_string works. /
2817	cpp_string dst_string;
2818	const enum cpp_ttype type = CPP_STRING;
2819	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
2820	&dst_string, type);
2821	ASSERT_TRUE (result);
2822	ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
2823	free (ptr: const_cast <unsigned char *> (dst_string.text));
2824
2825	/ Verify ranges of individual characters. This no longer includes the*
2826	opening quote, but does include the closing quote. /*
2827	for (int i = `0`; i <= `10`; i++)
2828	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`,
2829	`10` + i, `10` + i);
2830
2831	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, `11`);
2832	}
2833
2834	/ As test_lexer_string_locations_simple, but use an EBCDIC execution*
2835	encoding. /*
2836
2837	static void
2838	test_lexer_string_locations_ebcdic (const line_table_case &case_)
2839	{
2840	/ EBCDIC support requires iconv. /
2841	if (!HAVE_ICONV)
2842	return;
2843
2844	/ Digits 0-9 (with 0 at column 10), the simple way.*
2845	....................000000000.11111111112.2222222223333333333
2846	....................123456789.01234567890.1234567890123456789
2847	We add a trailing comment to ensure that we correctly locate
2848	the end of the string literal token. /*
2849	const char content = " \"0123456789\" / not a string */\n";
2850	ebcdic_execution_charset use_ebcdic;
2851	lexer_test test (case_, content, &use_ebcdic);
2852
2853	/ Verify that we get the expected token back, with the correct*
2854	location information. /*
2855	const cpp_token *tok = test.get_token ();
2856	ASSERT_EQ (tok->type, CPP_STRING);
2857	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
2858	ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), `1`, `9`, `20`);
2859
2860	/ At this point in lexing, the quote characters are treated as part of*
2861	the string (they are stripped off by cpp_interpret_string). /*
2862
2863	ASSERT_EQ (tok->val.str.len, `12`);
2864
2865	/ The remainder of the test requires an iconv implementation that*
2866	can convert from UTF-8 to the EBCDIC encoding requested above. /*
2867	if (use_ebcdic.iconv_errors_occurred_p ())
2868	return;
2869
2870	/ Verify that cpp_interpret_string works. /
2871	cpp_string dst_string;
2872	const enum cpp_ttype type = CPP_STRING;
2873	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
2874	&dst_string, type);
2875	ASSERT_TRUE (result);
2876	/ We should now have EBCDIC-encoded text, specifically*
2877	IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047").
2878	The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. /*
2879	ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9",
2880	(const char *)dst_string.text);
2881	free (ptr: const_cast <unsigned char *> (dst_string.text));
2882
2883	/ Verify that we don't attempt to record substring location information*
2884	for such cases. /*
2885	ASSERT_HAS_NO_SUBSTRING_RANGES
2886	(test, tok->src_loc, type,
2887	"execution character set != source character set");
2888	}
2889
2890	/ Lex a string literal containing a hex-escaped character.*
2891	Verify the substring location data, before and after running
2892	cpp_interpret_string on it. /*
2893
2894	static void
2895	test_lexer_string_locations_hex (const line_table_case &case_)
2896	{
2897	/ Digits 0-9, expressing digit 5 in ASCII as "\x35"*
2898	and with a space in place of digit 6, to terminate the escaped
2899	hex code.
2900	....................000000000.111111.11112222.
2901	....................123456789.012345.67890123. /*
2902	const char *content = " \"01234\\x35 789\"\n";
2903	lexer_test test (case_, content, NULL);
2904
2905	/ Verify that we get the expected token back, with the correct*
2906	location information. /*
2907	const cpp_token *tok = test.get_token ();
2908	ASSERT_EQ (tok->type, CPP_STRING);
2909	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"");
2910	ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), `1`, `9`, `23`);
2911
2912	/ At this point in lexing, the quote characters are treated as part of*
2913	the string (they are stripped off by cpp_interpret_string). /*
2914	ASSERT_EQ (tok->val.str.len, `15`);
2915
2916	/ Verify that cpp_interpret_string works. /
2917	cpp_string dst_string;
2918	const enum cpp_ttype type = CPP_STRING;
2919	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
2920	&dst_string, type);
2921	ASSERT_TRUE (result);
2922	ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2923	free (ptr: const_cast <unsigned char *> (dst_string.text));
2924
2925	/ Verify ranges of individual characters. This no longer includes the*
2926	opening quote, but does include the closing quote. /*
2927	for (int i = `0`; i <= `4`; i++)
2928	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `10` + i, `10` + i);
2929	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, `5`, `1`, `15`, `18`);
2930	for (int i = `6`; i <= `10`; i++)
2931	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `13` + i, `13` + i);
2932
2933	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, `11`);
2934	}
2935
2936	/ Lex a string literal containing an octal-escaped character.*
2937	Verify the substring location data after running cpp_interpret_string
2938	on it. /*
2939
2940	static void
2941	test_lexer_string_locations_oct (const line_table_case &case_)
2942	{
2943	/ Digits 0-9, expressing digit 5 in ASCII as "\065"*
2944	and with a space in place of digit 6, to terminate the escaped
2945	octal code.
2946	....................000000000.111111.11112222.2222223333333333444
2947	....................123456789.012345.67890123.4567890123456789012 /*
2948	const char content = " \"01234\\065 789\" / not a string */\n";
2949	lexer_test test (case_, content, NULL);
2950
2951	/ Verify that we get the expected token back, with the correct*
2952	location information. /*
2953	const cpp_token *tok = test.get_token ();
2954	ASSERT_EQ (tok->type, CPP_STRING);
2955	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"");
2956
2957	/ Verify that cpp_interpret_string works. /
2958	cpp_string dst_string;
2959	const enum cpp_ttype type = CPP_STRING;
2960	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
2961	&dst_string, type);
2962	ASSERT_TRUE (result);
2963	ASSERT_STREQ ("012345 789", (const char *)dst_string.text);
2964	free (ptr: const_cast <unsigned char *> (dst_string.text));
2965
2966	/ Verify ranges of individual characters. This no longer includes the*
2967	opening quote, but does include the closing quote. /*
2968	for (int i = `0`; i < `5`; i++)
2969	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `10` + i, `10` + i);
2970	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, `5`, `1`, `15`, `18`);
2971	for (int i = `6`; i <= `10`; i++)
2972	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `13` + i, `13` + i);
2973
2974	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, `11`);
2975	}
2976
2977	/ Test of string literal containing letter escapes. /
2978
2979	static void
2980	test_lexer_string_locations_letter_escape_1 (const line_table_case &case_)
2981	{
2982	/ The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar.*
2983	.....................000000000.1.11111.1.1.11222.22222223333333
2984	.....................123456789.0.12345.6.7.89012.34567890123456. /*
2985	const char content = (" \"\\tfoo\\\\\\nbar\" / non-str */\n");
2986	lexer_test test (case_, content, NULL);
2987
2988	/ Verify that we get the expected tokens back. /
2989	const cpp_token *tok = test.get_token ();
2990	ASSERT_EQ (tok->type, CPP_STRING);
2991	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"");
2992
2993	/ Verify ranges of individual characters. /
2994	/ "\t". /
2995	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
2996	`0`, `1`, `10`, `11`);
2997	/ "foo". /
2998	for (int i = `1`; i <= `3`; i++)
2999	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3000	i, `1`, `11` + i, `11` + i);
3001	/ "\\" and "\n". /
3002	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3003	`4`, `1`, `15`, `16`);
3004	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3005	`5`, `1`, `17`, `18`);
3006
3007	/ "bar" and closing quote for nul-terminator. /
3008	for (int i = `6`; i <= `9`; i++)
3009	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3010	i, `1`, `13` + i, `13` + i);
3011
3012	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, `10`);
3013	}
3014
3015	/ Another test of a string literal containing a letter escape.*
3016	Based on string seen in
3017	printf ("%-%\n");
3018	in gcc.dg/format/c90-printf-1.c. /*
3019
3020	static void
3021	test_lexer_string_locations_letter_escape_2 (const line_table_case &case_)
3022	{
3023	/ .....................000000000.1111.11.1111.22222222223.*
3024	.....................123456789.0123.45.6789.01234567890. /*
3025	const char content = (" \"%-%\\n\" / non-str */\n");
3026	lexer_test test (case_, content, NULL);
3027
3028	/ Verify that we get the expected tokens back. /
3029	const cpp_token *tok = test.get_token ();
3030	ASSERT_EQ (tok->type, CPP_STRING);
3031	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"");
3032
3033	/ Verify ranges of individual characters. /
3034	/ "%-%". /
3035	for (int i = `0`; i < `3`; i++)
3036	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3037	i, `1`, `10` + i, `10` + i);
3038	/ "\n". /
3039	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3040	`3`, `1`, `13`, `14`);
3041
3042	/ Closing quote for nul-terminator. /
3043	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3044	`4`, `1`, `15`, `15`);
3045
3046	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, `5`);
3047	}
3048
3049	/ Lex a string literal containing UCN 4 characters.*
3050	Verify the substring location data after running cpp_interpret_string
3051	on it. /*
3052
3053	static void
3054	test_lexer_string_locations_ucn4 (const line_table_case &case_)
3055	{
3056	/ Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed*
3057	as UCN 4.
3058	....................000000000.111111.111122.222222223.33333333344444
3059	....................123456789.012345.678901.234567890.12345678901234 /*
3060	const char content = " \"01234\\u2174\\u2175789\" / non-str */\n";
3061	lexer_test test (case_, content, NULL);
3062
3063	/ Verify that we get the expected token back, with the correct*
3064	location information. /*
3065	const cpp_token *tok = test.get_token ();
3066	ASSERT_EQ (tok->type, CPP_STRING);
3067	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"");
3068
3069	/ Verify that cpp_interpret_string works.*
3070	The string should be encoded in the execution character
3071	set. Assuming that is UTF-8, we should have the following:
3072	----------- ---- ----- ------- ----------------
3073	Byte offset Byte Octal Unicode Source Column(s)
3074	----------- ---- ----- ------- ----------------
3075	0 0x30 '0' 10
3076	1 0x31 '1' 11
3077	2 0x32 '2' 12
3078	3 0x33 '3' 13
3079	4 0x34 '4' 14
3080	5 0xE2 \342 U+2174 15-20
3081	6 0x85 \205 (cont) 15-20
3082	7 0xB4 \264 (cont) 15-20
3083	8 0xE2 \342 U+2175 21-26
3084	9 0x85 \205 (cont) 21-26
3085	10 0xB5 \265 (cont) 21-26
3086	11 0x37 '7' 27
3087	12 0x38 '8' 28
3088	13 0x39 '9' 29
3089	14 0x00 30 (closing quote)
3090	----------- ---- ----- ------- ---------------. /*
3091
3092	cpp_string dst_string;
3093	const enum cpp_ttype type = CPP_STRING;
3094	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3095	&dst_string, type);
3096	ASSERT_TRUE (result);
3097	ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3098	(const char *)dst_string.text);
3099	free (ptr: const_cast <unsigned char *> (dst_string.text));
3100
3101	/ Verify ranges of individual characters. This no longer includes the*
3102	opening quote, but does include the closing quote.
3103	'01234'. /*
3104	for (int i = `0`; i <= `4`; i++)
3105	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `10` + i, `10` + i);
3106	/ U+2174. /
3107	for (int i = `5`; i <= `7`; i++)
3108	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `15`, `20`);
3109	/ U+2175. /
3110	for (int i = `8`; i <= `10`; i++)
3111	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `21`, `26`);
3112	/ '789' and nul terminator /
3113	for (int i = `11`; i <= `14`; i++)
3114	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `16` + i, `16` + i);
3115
3116	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, `15`);
3117	}
3118
3119	/ Lex a string literal containing UCN 8 characters.*
3120	Verify the substring location data after running cpp_interpret_string
3121	on it. /*
3122
3123	static void
3124	test_lexer_string_locations_ucn8 (const line_table_case &case_)
3125	{
3126	/ Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.*
3127	....................000000000.111111.1111222222.2222333333333.344444
3128	....................123456789.012345.6789012345.6789012345678.901234 /*
3129	const char content = " \"01234\\U00002174\\U00002175789\" / */\n";
3130	lexer_test test (case_, content, NULL);
3131
3132	/ Verify that we get the expected token back, with the correct*
3133	location information. /*
3134	const cpp_token *tok = test.get_token ();
3135	ASSERT_EQ (tok->type, CPP_STRING);
3136	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok,
3137	"\"01234\\U00002174\\U00002175789\"");
3138
3139	/ Verify that cpp_interpret_string works.*
3140	The UTF-8 encoding of the string is identical to that from
3141	the ucn4 testcase above; the only difference is the column
3142	locations. /*
3143	cpp_string dst_string;
3144	const enum cpp_ttype type = CPP_STRING;
3145	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3146	&dst_string, type);
3147	ASSERT_TRUE (result);
3148	ASSERT_STREQ ("01234\342\205\264\342\205\265789",
3149	(const char *)dst_string.text);
3150	free (ptr: const_cast <unsigned char *> (dst_string.text));
3151
3152	/ Verify ranges of individual characters. This no longer includes the*
3153	opening quote, but does include the closing quote.
3154	'01234'. /*
3155	for (int i = `0`; i <= `4`; i++)
3156	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `10` + i, `10` + i);
3157	/ U+2174. /
3158	for (int i = `5`; i <= `7`; i++)
3159	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `15`, `24`);
3160	/ U+2175. /
3161	for (int i = `8`; i <= `10`; i++)
3162	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `25`, `34`);
3163	/ '789' at columns 35-37 /
3164	for (int i = `11`; i <= `13`; i++)
3165	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `24` + i, `24` + i);
3166	/ Closing quote/nul-terminator at column 38. /
3167	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, `14`, `1`, `38`, `38`);
3168
3169	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, `15`);
3170	}
3171
3172	/ Fetch a big-endian 32-bit value and convert to host endianness. /
3173
3174	static uint32_t
3175	uint32_from_big_endian (const uint32_t *ptr_be_value)
3176	{
3177	const unsigned char buf = (const* unsigned char *)ptr_be_value;
3178	return (((uint32_t) buf[`0`] << `24`)
3179	\| ((uint32_t) buf[`1`] << `16`)
3180	\| ((uint32_t) buf[`2`] << `8`)
3181	\| (uint32_t) buf[`3`]);
3182	}
3183
3184	/ Lex a wide string literal and verify that attempts to read substring*
3185	location data from it fail gracefully. /*
3186
3187	static void
3188	test_lexer_string_locations_wide_string (const line_table_case &case_)
3189	{
3190	/ Digits 0-9.*
3191	....................000000000.11111111112.22222222233333
3192	....................123456789.01234567890.12345678901234 /*
3193	const char content = " L\"0123456789\" / non-str */\n";
3194	lexer_test test (case_, content, NULL);
3195
3196	/ Verify that we get the expected token back, with the correct*
3197	location information. /*
3198	const cpp_token *tok = test.get_token ();
3199	ASSERT_EQ (tok->type, CPP_WSTRING);
3200	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"");
3201
3202	/ Verify that cpp_interpret_string works, using CPP_WSTRING. /
3203	cpp_string dst_string;
3204	const enum cpp_ttype type = CPP_WSTRING;
3205	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3206	&dst_string, type);
3207	ASSERT_TRUE (result);
3208	/ The cpp_reader defaults to big-endian with*
3209	CHAR_BIT sizeof (int) for the wchar_precision, so dst_string should*
3210	now be encoded as UTF-32BE. /*
3211	const uint32_t be32_chars = (const* uint32_t *)dst_string.text;
3212	ASSERT_EQ (`'0'`, uint32_from_big_endian (&be32_chars[`0`]));
3213	ASSERT_EQ (`'5'`, uint32_from_big_endian (&be32_chars[`5`]));
3214	ASSERT_EQ (`'9'`, uint32_from_big_endian (&be32_chars[`9`]));
3215	ASSERT_EQ (`0`, uint32_from_big_endian (&be32_chars[`10`]));
3216	free (ptr: const_cast <unsigned char *> (dst_string.text));
3217
3218	/ We don't yet support generating substring location information*
3219	for L"" strings. /*
3220	ASSERT_HAS_NO_SUBSTRING_RANGES
3221	(test, tok->src_loc, type,
3222	"execution character set != source character set");
3223	}
3224
3225	/ Fetch a big-endian 16-bit value and convert to host endianness. /
3226
3227	static uint16_t
3228	uint16_from_big_endian (const uint16_t *ptr_be_value)
3229	{
3230	const unsigned char buf = (const* unsigned char *)ptr_be_value;
3231	return ((uint16_t) buf[`0`] << `8`) \| (uint16_t) buf[`1`];
3232	}
3233
3234	/ Lex a u"" string literal and verify that attempts to read substring*
3235	location data from it fail gracefully. /*
3236
3237	static void
3238	test_lexer_string_locations_string16 (const line_table_case &case_)
3239	{
3240	/ Digits 0-9.*
3241	....................000000000.11111111112.22222222233333
3242	....................123456789.01234567890.12345678901234 /*
3243	const char content = " u\"0123456789\" / non-str */\n";
3244	lexer_test test (case_, content, NULL);
3245
3246	/ Verify that we get the expected token back, with the correct*
3247	location information. /*
3248	const cpp_token *tok = test.get_token ();
3249	ASSERT_EQ (tok->type, CPP_STRING16);
3250	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"");
3251
3252	/ Verify that cpp_interpret_string works, using CPP_STRING16. /
3253	cpp_string dst_string;
3254	const enum cpp_ttype type = CPP_STRING16;
3255	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3256	&dst_string, type);
3257	ASSERT_TRUE (result);
3258
3259	/ The cpp_reader defaults to big-endian, so dst_string should*
3260	now be encoded as UTF-16BE. /*
3261	const uint16_t be16_chars = (const* uint16_t *)dst_string.text;
3262	ASSERT_EQ (`'0'`, uint16_from_big_endian (&be16_chars[`0`]));
3263	ASSERT_EQ (`'5'`, uint16_from_big_endian (&be16_chars[`5`]));
3264	ASSERT_EQ (`'9'`, uint16_from_big_endian (&be16_chars[`9`]));
3265	ASSERT_EQ (`0`, uint16_from_big_endian (&be16_chars[`10`]));
3266	free (ptr: const_cast <unsigned char *> (dst_string.text));
3267
3268	/ We don't yet support generating substring location information*
3269	for L"" strings. /*
3270	ASSERT_HAS_NO_SUBSTRING_RANGES
3271	(test, tok->src_loc, type,
3272	"execution character set != source character set");
3273	}
3274
3275	/ Lex a U"" string literal and verify that attempts to read substring*
3276	location data from it fail gracefully. /*
3277
3278	static void
3279	test_lexer_string_locations_string32 (const line_table_case &case_)
3280	{
3281	/ Digits 0-9.*
3282	....................000000000.11111111112.22222222233333
3283	....................123456789.01234567890.12345678901234 /*
3284	const char content = " U\"0123456789\" / non-str */\n";
3285	lexer_test test (case_, content, NULL);
3286
3287	/ Verify that we get the expected token back, with the correct*
3288	location information. /*
3289	const cpp_token *tok = test.get_token ();
3290	ASSERT_EQ (tok->type, CPP_STRING32);
3291	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"");
3292
3293	/ Verify that cpp_interpret_string works, using CPP_STRING32. /
3294	cpp_string dst_string;
3295	const enum cpp_ttype type = CPP_STRING32;
3296	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3297	&dst_string, type);
3298	ASSERT_TRUE (result);
3299
3300	/ The cpp_reader defaults to big-endian, so dst_string should*
3301	now be encoded as UTF-32BE. /*
3302	const uint32_t be32_chars = (const* uint32_t *)dst_string.text;
3303	ASSERT_EQ (`'0'`, uint32_from_big_endian (&be32_chars[`0`]));
3304	ASSERT_EQ (`'5'`, uint32_from_big_endian (&be32_chars[`5`]));
3305	ASSERT_EQ (`'9'`, uint32_from_big_endian (&be32_chars[`9`]));
3306	ASSERT_EQ (`0`, uint32_from_big_endian (&be32_chars[`10`]));
3307	free (ptr: const_cast <unsigned char *> (dst_string.text));
3308
3309	/ We don't yet support generating substring location information*
3310	for L"" strings. /*
3311	ASSERT_HAS_NO_SUBSTRING_RANGES
3312	(test, tok->src_loc, type,
3313	"execution character set != source character set");
3314	}
3315
3316	/ Lex a u8-string literal.*
3317	Verify the substring location data after running cpp_interpret_string
3318	on it. /*
3319
3320	static void
3321	test_lexer_string_locations_u8 (const line_table_case &case_)
3322	{
3323	/ Digits 0-9.*
3324	....................000000000.11111111112.22222222233333
3325	....................123456789.01234567890.12345678901234 /*
3326	const char content = " u8\"0123456789\" / non-str */\n";
3327	lexer_test test (case_, content, NULL);
3328
3329	/ Verify that we get the expected token back, with the correct*
3330	location information. /*
3331	const cpp_token *tok = test.get_token ();
3332	ASSERT_EQ (tok->type, CPP_UTF8STRING);
3333	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"");
3334
3335	/ Verify that cpp_interpret_string works. /
3336	cpp_string dst_string;
3337	const enum cpp_ttype type = CPP_STRING;
3338	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3339	&dst_string, type);
3340	ASSERT_TRUE (result);
3341	ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3342	free (ptr: const_cast <unsigned char *> (dst_string.text));
3343
3344	/ Verify ranges of individual characters. This no longer includes the*
3345	opening quote, but does include the closing quote. /*
3346	for (int i = `0`; i <= `10`; i++)
3347	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `10` + i, `10` + i);
3348	}
3349
3350	/ Lex a string literal containing UTF-8 source characters.*
3351	Verify the substring location data after running cpp_interpret_string
3352	on it. /*
3353
3354	static void
3355	test_lexer_string_locations_utf8_source (const line_table_case &case_)
3356	{
3357	/ This string literal is written out to the source file as UTF-8,*
3358	and is of the form "before mojibake after", where "mojibake"
3359	is written as the following four unicode code points:
3360	U+6587 CJK UNIFIED IDEOGRAPH-6587
3361	U+5B57 CJK UNIFIED IDEOGRAPH-5B57
3362	U+5316 CJK UNIFIED IDEOGRAPH-5316
3363	U+3051 HIRAGANA LETTER KE.
3364	Each of these is 3 bytes wide when encoded in UTF-8, whereas the
3365	"before" and "after" are 1 byte per unicode character.
3366
3367	The numbering shown are "columns", which are byte* numbers within*
3368	the line, rather than unicode character numbers.
3369
3370	.................... 000000000.1111111.
3371	.................... 123456789.0123456. /*
3372	const char *content = (" \"before "
3373	/ U+6587 CJK UNIFIED IDEOGRAPH-6587*
3374	UTF-8: 0xE6 0x96 0x87
3375	C octal escaped UTF-8: \346\226\207
3376	"column" numbers: 17-19. /*
3377	"\346\226\207"
3378
3379	/ U+5B57 CJK UNIFIED IDEOGRAPH-5B57*
3380	UTF-8: 0xE5 0xAD 0x97
3381	C octal escaped UTF-8: \345\255\227
3382	"column" numbers: 20-22. /*
3383	"\345\255\227"
3384
3385	/ U+5316 CJK UNIFIED IDEOGRAPH-5316*
3386	UTF-8: 0xE5 0x8C 0x96
3387	C octal escaped UTF-8: \345\214\226
3388	"column" numbers: 23-25. /*
3389	"\345\214\226"
3390
3391	/ U+3051 HIRAGANA LETTER KE*
3392	UTF-8: 0xE3 0x81 0x91
3393	C octal escaped UTF-8: \343\201\221
3394	"column" numbers: 26-28. /*
3395	"\343\201\221"
3396
3397	/ column numbers 29 onwards*
3398	2333333.33334444444444
3399	9012345.67890123456789. /*
3400	" after\" /* non-str */\n");
3401	lexer_test test (case_, content, NULL);
3402
3403	/ Verify that we get the expected token back, with the correct*
3404	location information. /*
3405	const cpp_token *tok = test.get_token ();
3406	ASSERT_EQ (tok->type, CPP_STRING);
3407	ASSERT_TOKEN_AS_TEXT_EQ
3408	(test.m_parser, tok,
3409	"\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"");
3410
3411	/ Verify that cpp_interpret_string works. /
3412	cpp_string dst_string;
3413	const enum cpp_ttype type = CPP_STRING;
3414	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3415	&dst_string, type);
3416	ASSERT_TRUE (result);
3417	ASSERT_STREQ
3418	("before \346\226\207\345\255\227\345\214\226\343\201\221 after",
3419	(const char *)dst_string.text);
3420	free (ptr: const_cast <unsigned char *> (dst_string.text));
3421
3422	/ Verify ranges of individual characters. This no longer includes the*
3423	opening quote, but does include the closing quote.
3424	Assuming that both source and execution encodings are UTF-8, we have
3425	a run of 25 octets in each, plus the NUL terminator. /*
3426	for (int i = `0`; i < `25`; i++)
3427	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, `1`, `10` + i, `10` + i);
3428	/ NUL-terminator should use the closing quote at column 35. /
3429	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, `25`, `1`, `35`, `35`);
3430
3431	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, `26`);
3432	}
3433
3434	/ Test of string literal concatenation. /
3435
3436	static void
3437	test_lexer_string_locations_concatenation_1 (const line_table_case &case_)
3438	{
3439	/ Digits 0-9.*
3440	.....................000000000.111111.11112222222222
3441	.....................123456789.012345.67890123456789. /*
3442	const char content = (" \"01234\" / non-str */\n"
3443	" \"56789\" /* non-str */\n");
3444	lexer_test test (case_, content, NULL);
3445
3446	location_t input_locs[`2`];
3447
3448	/ Verify that we get the expected tokens back. /
3449	auto_vec <cpp_string> input_strings;
3450	const cpp_token *tok_a = test.get_token ();
3451	ASSERT_EQ (tok_a->type, CPP_STRING);
3452	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"");
3453	input_strings.safe_push (obj: tok_a->val.str);
3454	input_locs[`0`] = tok_a->src_loc;
3455
3456	const cpp_token *tok_b = test.get_token ();
3457	ASSERT_EQ (tok_b->type, CPP_STRING);
3458	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"");
3459	input_strings.safe_push (obj: tok_b->val.str);
3460	input_locs[`1`] = tok_b->src_loc;
3461
3462	/ Verify that cpp_interpret_string works. /
3463	cpp_string dst_string;
3464	const enum cpp_ttype type = CPP_STRING;
3465	bool result = cpp_interpret_string (test.m_parser,
3466	input_strings.address (), `2`,
3467	&dst_string, type);
3468	ASSERT_TRUE (result);
3469	ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3470	free (ptr: const_cast <unsigned char *> (dst_string.text));
3471
3472	/ Simulate c-lex.cc's lex_string in order to record concatenation. /
3473	test.m_concats.record_string_concatenation (num: `2`, locs: input_locs);
3474
3475	location_t initial_loc = input_locs[`0`];
3476
3477	/ "01234" on line 1. /
3478	for (int i = `0`; i <= `4`; i++)
3479	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, `1`, `10` + i, `10` + i);
3480	/ "56789" in line 2, plus its closing quote for the nul terminator. /
3481	for (int i = `5`; i <= `10`; i++)
3482	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, `2`, `5` + i, `5` + i);
3483
3484	ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, `11`);
3485	}
3486
3487	/ Another test of string literal concatenation. /
3488
3489	static void
3490	test_lexer_string_locations_concatenation_2 (const line_table_case &case_)
3491	{
3492	/ Digits 0-9.*
3493	.....................000000000.111.11111112222222
3494	.....................123456789.012.34567890123456. /*
3495	const char content = (" \"01\" / non-str */\n"
3496	" \"23\" /* non-str */\n"
3497	" \"45\" /* non-str */\n"
3498	" \"67\" /* non-str */\n"
3499	" \"89\" /* non-str */\n");
3500	lexer_test test (case_, content, NULL);
3501
3502	auto_vec <cpp_string> input_strings;
3503	location_t input_locs[`5`];
3504
3505	/ Verify that we get the expected tokens back. /
3506	for (int i = `0`; i < `5`; i++)
3507	{
3508	const cpp_token *tok = test.get_token ();
3509	ASSERT_EQ (tok->type, CPP_STRING);
3510	input_strings.safe_push (obj: tok->val.str);
3511	input_locs[i] = tok->src_loc;
3512	}
3513
3514	/ Verify that cpp_interpret_string works. /
3515	cpp_string dst_string;
3516	const enum cpp_ttype type = CPP_STRING;
3517	bool result = cpp_interpret_string (test.m_parser,
3518	input_strings.address (), `5`,
3519	&dst_string, type);
3520	ASSERT_TRUE (result);
3521	ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3522	free (ptr: const_cast <unsigned char *> (dst_string.text));
3523
3524	/ Simulate c-lex.cc's lex_string in order to record concatenation. /
3525	test.m_concats.record_string_concatenation (num: `5`, locs: input_locs);
3526
3527	location_t initial_loc = input_locs[`0`];
3528
3529	/ Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can*
3530	detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS
3531	and expect get_source_range_for_substring to fail.
3532	However, for a string concatenation test, we can have a case
3533	where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS,
3534	but subsequent strings can be after it.
3535	Attempting to detect this within assert_char_at_range
3536	would overcomplicate the logic for the common test cases, so
3537	we detect it here. /*
3538	if (should_have_column_data_p (loc: input_locs[`0`])
3539	&& !should_have_column_data_p (loc: input_locs[`4`]))
3540	{
3541	/ Verify that get_source_range_for_substring gracefully rejects*
3542	this case. /*
3543	source_range actual_range;
3544	const char *err
3545	= get_source_range_for_char (pfile: test.m_parser, concats: &test.m_concats,
3546	strloc: initial_loc, type, char_idx: `0`, out_range: &actual_range);
3547	ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS", err);
3548	return;
3549	}
3550
3551	for (int i = `0`; i < `5`; i++)
3552	for (int j = `0`; j < `2`; j++)
3553	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * `2`) + j,
3554	i + `1`, `10` + j, `10` + j);
3555
3556	/ NUL-terminator should use the final closing quote at line 5 column 12. /
3557	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, `10`, `5`, `12`, `12`);
3558
3559	ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, `11`);
3560	}
3561
3562	/ Another test of string literal concatenation, this time combined with*
3563	various kinds of escaped characters. /*
3564
3565	static void
3566	test_lexer_string_locations_concatenation_3 (const line_table_case &case_)
3567	{
3568	/ Digits 0-9, expressing digit 5 in ASCII as hex "\x35"*
3569	digit 6 in ASCII as octal "\066", concatenating multiple strings. /*
3570	const char *content
3571	/ .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555*
3572	.123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. /*
3573	= (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n");
3574	lexer_test test (case_, content, NULL);
3575
3576	auto_vec <cpp_string> input_strings;
3577	location_t input_locs[`4`];
3578
3579	/ Verify that we get the expected tokens back. /
3580	for (int i = `0`; i < `4`; i++)
3581	{
3582	const cpp_token *tok = test.get_token ();
3583	ASSERT_EQ (tok->type, CPP_STRING);
3584	input_strings.safe_push (obj: tok->val.str);
3585	input_locs[i] = tok->src_loc;
3586	}
3587
3588	/ Verify that cpp_interpret_string works. /
3589	cpp_string dst_string;
3590	const enum cpp_ttype type = CPP_STRING;
3591	bool result = cpp_interpret_string (test.m_parser,
3592	input_strings.address (), `4`,
3593	&dst_string, type);
3594	ASSERT_TRUE (result);
3595	ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3596	free (ptr: const_cast <unsigned char *> (dst_string.text));
3597
3598	/ Simulate c-lex.cc's lex_string in order to record concatenation. /
3599	test.m_concats.record_string_concatenation (num: `4`, locs: input_locs);
3600
3601	location_t initial_loc = input_locs[`0`];
3602
3603	for (int i = `0`; i <= `4`; i++)
3604	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, `1`, `10` + i, `10` + i);
3605	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, `5`, `1`, `19`, `22`);
3606	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, `6`, `1`, `27`, `30`);
3607	for (int i = `7`; i <= `9`; i++)
3608	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, `1`, `28` + i, `28` + i);
3609
3610	/ NUL-terminator should use the location of the final closing quote. /
3611	ASSERT_CHAR_AT_RANGE (test, initial_loc, type, `10`, `1`, `38`, `38`);
3612
3613	ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, `11`);
3614	}
3615
3616	/ Test of string literal in a macro. /
3617
3618	static void
3619	test_lexer_string_locations_macro (const line_table_case &case_)
3620	{
3621	/ Digits 0-9.*
3622	.....................0000000001111111111.22222222223.
3623	.....................1234567890123456789.01234567890. /*
3624	const char content = ("#define MACRO \"0123456789\" / non-str */\n"
3625	" MACRO");
3626	lexer_test test (case_, content, NULL);
3627
3628	/ Verify that we get the expected tokens back. /
3629	const cpp_token *tok = test.get_token ();
3630	ASSERT_EQ (tok->type, CPP_PADDING);
3631
3632	tok = test.get_token ();
3633	ASSERT_EQ (tok->type, CPP_STRING);
3634	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"");
3635
3636	/ Verify ranges of individual characters. We ought to*
3637	see columns within the macro definition. /*
3638	for (int i = `0`; i <= `10`; i++)
3639	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3640	i, `1`, `20` + i, `20` + i);
3641
3642	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, `11`);
3643
3644	tok = test.get_token ();
3645	ASSERT_EQ (tok->type, CPP_PADDING);
3646	}
3647
3648	/ Test of stringification of a macro argument. /
3649
3650	static void
3651	test_lexer_string_locations_stringified_macro_argument
3652	(const line_table_case &case_)
3653	{
3654	/ .....................000000000111111111122222222223.*
3655	.....................123456789012345678901234567890. /*
3656	const char content = ("#define MACRO(X) #X / non-str */\n"
3657	"MACRO(foo)\n");
3658	lexer_test test (case_, content, NULL);
3659
3660	/ Verify that we get the expected token back. /
3661	const cpp_token *tok = test.get_token ();
3662	ASSERT_EQ (tok->type, CPP_PADDING);
3663
3664	tok = test.get_token ();
3665	ASSERT_EQ (tok->type, CPP_STRING);
3666	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"");
3667
3668	/ We don't support getting the location of a stringified macro*
3669	argument. Verify that it fails gracefully. /*
3670	ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3671	"cpp_interpret_string_1 failed");
3672
3673	tok = test.get_token ();
3674	ASSERT_EQ (tok->type, CPP_PADDING);
3675
3676	tok = test.get_token ();
3677	ASSERT_EQ (tok->type, CPP_PADDING);
3678	}
3679
3680	/ Ensure that we are fail gracefully if something attempts to pass*
3681	in a location that isn't a string literal token. Seen on this code:
3682
3683	const char a[] = " %d ";
3684	__builtin_printf (a, 0.5);
3685	^
3686
3687	when c-format.cc erroneously used the indicated one-character
3688	location as the format string location, leading to a read past the
3689	end of a string buffer in cpp_interpret_string_1. /*
3690
3691	static void
3692	test_lexer_string_locations_non_string (const line_table_case &case_)
3693	{
3694	/ .....................000000000111111111122222222223.*
3695	.....................123456789012345678901234567890. /*
3696	const char *content = (" a\n");
3697	lexer_test test (case_, content, NULL);
3698
3699	/ Verify that we get the expected token back. /
3700	const cpp_token *tok = test.get_token ();
3701	ASSERT_EQ (tok->type, CPP_NAME);
3702	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a");
3703
3704	/ At this point, libcpp is attempting to interpret the name as a*
3705	string literal, despite it not starting with a quote. We don't detect
3706	that, but we should at least fail gracefully. /*
3707	ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING,
3708	"cpp_interpret_string_1 failed");
3709	}
3710
3711	/ Ensure that we can read substring information for a token which*
3712	starts in one linemap and ends in another . Adapted from
3713	gcc.dg/cpp/pr69985.c. /*
3714
3715	static void
3716	test_lexer_string_locations_long_line (const line_table_case &case_)
3717	{
3718	/ .....................000000.000111111111*
3719	.....................123456.789012346789. /*
3720	const char content = ("/ A very long line, so that we start a new line map. */\n"
3721	" \"0123456789012345678901234567890123456789"
3722	"0123456789012345678901234567890123456789"
3723	"0123456789012345678901234567890123456789"
3724	"0123456789\"\n");
3725
3726	lexer_test test (case_, content, NULL);
3727
3728	/ Verify that we get the expected token back. /
3729	const cpp_token *tok = test.get_token ();
3730	ASSERT_EQ (tok->type, CPP_STRING);
3731
3732	if (!should_have_column_data_p (loc: line_table->highest_location))
3733	return;
3734
3735	/ Verify ranges of individual characters. /
3736	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, `131`);
3737	for (int i = `0`; i < `131`; i++)
3738	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3739	i, `2`, `7` + i, `7` + i);
3740	}
3741
3742	/ Test of locations within a raw string that doesn't contain a newline. /
3743
3744	static void
3745	test_lexer_string_locations_raw_string_one_line (const line_table_case &case_)
3746	{
3747	/ .....................00.0000000111111111122.*
3748	.....................12.3456789012345678901. /*
3749	const char *content = ("R\"foo(0123456789)foo\"\n");
3750	lexer_test test (case_, content, NULL);
3751
3752	/ Verify that we get the expected token back. /
3753	const cpp_token *tok = test.get_token ();
3754	ASSERT_EQ (tok->type, CPP_STRING);
3755
3756	/ Verify that cpp_interpret_string works. /
3757	cpp_string dst_string;
3758	const enum cpp_ttype type = CPP_STRING;
3759	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3760	&dst_string, type);
3761	ASSERT_TRUE (result);
3762	ASSERT_STREQ ("0123456789", (const char *)dst_string.text);
3763	free (ptr: const_cast <unsigned char *> (dst_string.text));
3764
3765	if (!should_have_column_data_p (loc: line_table->highest_location))
3766	return;
3767
3768	/ 0-9, plus the nil terminator. /
3769	ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, `11`);
3770	for (int i = `0`; i < `11`; i++)
3771	ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING,
3772	i, `1`, `7` + i, `7` + i);
3773	}
3774
3775	/ Test of locations within a raw string that contains a newline. /
3776
3777	static void
3778	test_lexer_string_locations_raw_string_multiline (const line_table_case &case_)
3779	{
3780	/ .....................00.0000.*
3781	.....................12.3456. /*
3782	const char *content = ("R\"foo(\n"
3783	/ .....................00000.*
3784	.....................12345. /*
3785	"hello\n"
3786	"world\n"
3787	/ .....................00000.*
3788	.....................12345. /*
3789	")foo\"\n");
3790	lexer_test test (case_, content, NULL);
3791
3792	/ Verify that we get the expected token back. /
3793	const cpp_token *tok = test.get_token ();
3794	ASSERT_EQ (tok->type, CPP_STRING);
3795
3796	/ Verify that cpp_interpret_string works. /
3797	cpp_string dst_string;
3798	const enum cpp_ttype type = CPP_STRING;
3799	bool result = cpp_interpret_string (test.m_parser, &tok->val.str, `1`,
3800	&dst_string, type);
3801	ASSERT_TRUE (result);
3802	ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text);
3803	free (ptr: const_cast <unsigned char *> (dst_string.text));
3804
3805	if (!should_have_column_data_p (loc: line_table->highest_location))
3806	return;
3807
3808	/ Currently we don't support locations within raw strings that*
3809	contain newlines. /*
3810	ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type,
3811	"range endpoints are on different lines");
3812	}
3813
3814	/ Test of parsing an unterminated raw string. /
3815
3816	static void
3817	test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_)
3818	{
3819	const char content = "R\"ouch()ouCh\" / etc */";
3820
3821	lexer_diagnostic_sink diagnostics;
3822	lexer_test test (case_, content, &diagnostics);
3823	test.m_implicitly_expect_EOF = false;
3824
3825	/ Attempt to parse the raw string. /
3826	const cpp_token *tok = test.get_token ();
3827	ASSERT_EQ (tok->type, CPP_EOF);
3828
3829	ASSERT_EQ (`1`, diagnostics.m_diagnostics.length ());
3830	/ We expect the message "unterminated raw string"*
3831	in the "cpplib" translation domain.
3832	It's not clear that dgettext is available on all supported hosts,
3833	so this assertion is commented-out for now.
3834	ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"),
3835	diagnostics.m_diagnostics[0]);
3836	*/
3837	}
3838
3839	/ Test of lexing char constants. /
3840
3841	static void
3842	test_lexer_char_constants (const line_table_case &case_)
3843	{
3844	/ Various char constants.*
3845	.....................0000000001111111111.22222222223.
3846	.....................1234567890123456789.01234567890. /*
3847	const char *content = (" 'a'\n"
3848	" u'a'\n"
3849	" U'a'\n"
3850	" L'a'\n"
3851	" 'abc'\n");
3852	lexer_test test (case_, content, NULL);
3853
3854	/ Verify that we get the expected tokens back. /
3855	/ 'a'. /
3856	const cpp_token *tok = test.get_token ();
3857	ASSERT_EQ (tok->type, CPP_CHAR);
3858	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'");
3859
3860	unsigned int chars_seen;
3861	int unsignedp;
3862	cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok,
3863	&chars_seen, &unsignedp);
3864	ASSERT_EQ (cc, `'a'`);
3865	ASSERT_EQ (chars_seen, `1`);
3866
3867	/ u'a'. /
3868	tok = test.get_token ();
3869	ASSERT_EQ (tok->type, CPP_CHAR16);
3870	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'");
3871
3872	/ U'a'. /
3873	tok = test.get_token ();
3874	ASSERT_EQ (tok->type, CPP_CHAR32);
3875	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'");
3876
3877	/ L'a'. /
3878	tok = test.get_token ();
3879	ASSERT_EQ (tok->type, CPP_WCHAR);
3880	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'");
3881
3882	/ 'abc' (c-char-sequence). /
3883	tok = test.get_token ();
3884	ASSERT_EQ (tok->type, CPP_CHAR);
3885	ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'");
3886	}
3887	/ A table of interesting location_t values, giving one axis of our test*
3888	matrix. /*
3889
3890	static const location_t boundary_locations[] = {
3891	/ Zero means "don't override the default values for a new line_table". /
3892	`0`,
3893
3894	/ An arbitrary non-zero value that isn't close to one of*
3895	the boundary values below. /*
3896	`0x10000`,
3897
3898	/ Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. /
3899	LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - `0x100`,
3900	LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - `1`,
3901	LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES,
3902	LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + `1`,
3903	LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + `0x100`,
3904
3905	/ Values near LINE_MAP_MAX_LOCATION_WITH_COLS. /
3906	LINE_MAP_MAX_LOCATION_WITH_COLS - `0x100`,
3907	LINE_MAP_MAX_LOCATION_WITH_COLS - `1`,
3908	LINE_MAP_MAX_LOCATION_WITH_COLS,
3909	LINE_MAP_MAX_LOCATION_WITH_COLS + `1`,
3910	LINE_MAP_MAX_LOCATION_WITH_COLS + `0x100`,
3911	};
3912
3913	/ Run TESTCASE multiple times, once for each case in our test matrix. /
3914
3915	void
3916	for_each_line_table_case (void (testcase) (const* line_table_case &))
3917	{
3918	/ As noted above in the description of struct line_table_case,*
3919	we want to explore a test matrix of interesting line_table
3920	situations, running various selftests for each case within the
3921	matrix. /*
3922
3923	/ Run all tests with:*
3924	(a) line_table->default_range_bits == 0, and
3925	(b) line_table->default_range_bits == 5. /*
3926	int num_cases_tested = `0`;
3927	for (int default_range_bits = `0`; default_range_bits <= `5`;
3928	default_range_bits += `5`)
3929	{
3930	/ ...and use each of the "interesting" location values as*
3931	the starting location within line_table. /*
3932	const int num_boundary_locations = ARRAY_SIZE (boundary_locations);
3933	for (int loc_idx = `0`; loc_idx < num_boundary_locations; loc_idx++)
3934	{
3935	line_table_case c (default_range_bits, boundary_locations[loc_idx]);
3936
3937	testcase (c);
3938
3939	num_cases_tested++;
3940	}
3941	}
3942
3943	/ Verify that we fully covered the test matrix. /
3944	ASSERT_EQ (num_cases_tested, `2` * `12`);
3945	}
3946
3947	/ Verify that when presented with a consecutive pair of locations with*
3948	a very large line offset, we don't attempt to consolidate them into
3949	a single ordinary linemap where the line offsets within the line map
3950	would lead to overflow (PR lto/88147). /*
3951
3952	static void
3953	test_line_offset_overflow ()
3954	{
3955	line_table_test ltt (line_table_case (`5`, `0`));
3956
3957	linemap_add (line_table, LC_ENTER, sysp: false, to_file: "foo.c", to_line: `0`);
3958	linemap_line_start (set: line_table, to_line: `1`, max_column_hint: `100`);
3959	location_t loc_a = linemap_line_start (set: line_table, to_line: `2578`, max_column_hint: `255`);
3960	assert_loceq (exp_filename: "foo.c", exp_linenum: `2578`, exp_colnum: `0`, loc: loc_a);
3961
3962	const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (set: line_table);
3963	ASSERT_EQ (ordmap_a->m_column_and_range_bits, `13`);
3964	ASSERT_EQ (ordmap_a->m_range_bits, `5`);
3965
3966	location_t loc_b = linemap_line_start (set: line_table, to_line: `404198`, max_column_hint: `512`);
3967	assert_loceq (exp_filename: "foo.c", exp_linenum: `404198`, exp_colnum: `0`, loc: loc_b);
3968
3969	/ We should have started a new linemap, rather than attempting to store*
3970	a very large line offset. /*
3971	const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (set: line_table);
3972	ASSERT_NE (ordmap_a, ordmap_b);
3973	}
3974
3975	void test_cpp_utf8 ()
3976	{
3977	const int def_tabstop = `8`;
3978	cpp_char_column_policy policy (def_tabstop, cpp_wcwidth);
3979
3980	/ Verify that wcwidth of invalid UTF-8 or control bytes is 1. /
3981	{
3982	int w_bad = cpp_display_width (data: "\xf0!\x9f!\x98!\x82!", data_length: `8`, policy);
3983	ASSERT_EQ (`8`, w_bad);
3984	int w_ctrl = cpp_display_width (data: "\r\n\v\0\1", data_length: `5`, policy);
3985	ASSERT_EQ (`5`, w_ctrl);
3986	}
3987
3988	/ Verify that wcwidth of valid UTF-8 is as expected. /
3989	{
3990	const int w_pi = cpp_display_width (data: "\xcf\x80", data_length: `2`, policy);
3991	ASSERT_EQ (`1`, w_pi);
3992	const int w_emoji = cpp_display_width (data: "\xf0\x9f\x98\x82", data_length: `4`, policy);
3993	ASSERT_EQ (`2`, w_emoji);
3994	const int w_umlaut_precomposed = cpp_display_width (data: "\xc3\xbf", data_length: `2`,
3995	policy);
3996	ASSERT_EQ (`1`, w_umlaut_precomposed);
3997	const int w_umlaut_combining = cpp_display_width (data: "y\xcc\x88", data_length: `3`,
3998	policy);
3999	ASSERT_EQ (`1`, w_umlaut_combining);
4000	const int w_han = cpp_display_width (data: "\xe4\xb8\xba", data_length: `3`, policy);
4001	ASSERT_EQ (`2`, w_han);
4002	const int w_ascii = cpp_display_width (data: "GCC", data_length: `3`, policy);
4003	ASSERT_EQ (`3`, w_ascii);
4004	const int w_mixed = cpp_display_width (data: "\xcf\x80 = 3.14 \xf0\x9f\x98\x82"
4005	"\x9f! \xe4\xb8\xba y\xcc\x88",
4006	data_length: `24`, policy);
4007	ASSERT_EQ (`18`, w_mixed);
4008	}
4009
4010	/ Verify that display width properly expands tabs. /
4011	{
4012	const char *tstr = "\tabc\td";
4013	ASSERT_EQ (`6`, cpp_display_width (tstr, `6`,
4014	cpp_char_column_policy (`1`, cpp_wcwidth)));
4015	ASSERT_EQ (`10`, cpp_display_width (tstr, `6`,
4016	cpp_char_column_policy (`3`, cpp_wcwidth)));
4017	ASSERT_EQ (`17`, cpp_display_width (tstr, `6`,
4018	cpp_char_column_policy (`8`, cpp_wcwidth)));
4019	ASSERT_EQ (`1`,
4020	cpp_display_column_to_byte_column
4021	(tstr, `6`, `7`, cpp_char_column_policy (`8`, cpp_wcwidth)));
4022	}
4023
4024	/ Verify that cpp_byte_column_to_display_column can go past the end,*
4025	and similar edge cases. /*
4026	{
4027	const char *str
4028	/ Display columns.*
4029	111111112345 /*
4030	= "\xcf\x80 abc";
4031	/ 111122223456*
4032	Byte columns. /*
4033
4034	ASSERT_EQ (`5`, cpp_display_width (str, `6`, policy));
4035	ASSERT_EQ (`105`,
4036	cpp_byte_column_to_display_column (str, `6`, `106`, policy));
4037	ASSERT_EQ (`10000`,
4038	cpp_byte_column_to_display_column (NULL, `0`, `10000`, policy));
4039	ASSERT_EQ (`0`,
4040	cpp_byte_column_to_display_column (NULL, `10000`, `0`, policy));
4041	}
4042
4043	/ Verify that cpp_display_column_to_byte_column can go past the end,*
4044	and similar edge cases, and check invertibility. /*
4045	{
4046	const char *str
4047	/ Display columns.*
4048	000000000000000000000000000000000000011
4049	111111112222222234444444455555555678901 /*
4050	= "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello";
4051	/ 000000000000000000000000000000000111111*
4052	111122223333444456666777788889999012345
4053	Byte columns. /*
4054	ASSERT_EQ (`4`, cpp_display_column_to_byte_column (str, `15`, `2`, policy));
4055	ASSERT_EQ (`15`,
4056	cpp_display_column_to_byte_column (str, `15`, `11`, policy));
4057	ASSERT_EQ (`115`,
4058	cpp_display_column_to_byte_column (str, `15`, `111`, policy));
4059	ASSERT_EQ (`10000`,
4060	cpp_display_column_to_byte_column (NULL, `0`, `10000`, policy));
4061	ASSERT_EQ (`0`,
4062	cpp_display_column_to_byte_column (NULL, `10000`, `0`, policy));
4063
4064	/ Verify that we do not interrupt a UTF-8 sequence. /
4065	ASSERT_EQ (`4`, cpp_display_column_to_byte_column (str, `15`, `1`, policy));
4066
4067	for (int byte_col = `1`; byte_col <= `15`; ++byte_col)
4068	{
4069	const int disp_col
4070	= cpp_byte_column_to_display_column (data: str, data_length: `15`, column: byte_col, policy);
4071	const int byte_col2
4072	= cpp_display_column_to_byte_column (data: str, data_length: `15`, display_col: disp_col, policy);
4073
4074	/ If we ask for the display column in the middle of a UTF-8*
4075	sequence, it will return the length of the partial sequence,
4076	matching the behavior of GCC before display column support.
4077	Otherwise check the round trip was successful. /*
4078	if (byte_col < `4`)
4079	ASSERT_EQ (byte_col, disp_col);
4080	else if (byte_col >= `6` && byte_col < `9`)
4081	ASSERT_EQ (`3` + (byte_col - `5`), disp_col);
4082	else
4083	ASSERT_EQ (byte_col2, byte_col);
4084	}
4085	}
4086	}
4087
4088	static bool
4089	check_cpp_valid_utf8_p (const char *str)
4090	{
4091	return cpp_valid_utf8_p (data: str, num_bytes: strlen (s: str));
4092	}
4093
4094	/ Check that cpp_valid_utf8_p works as expected. /
4095
4096	static void
4097	test_cpp_valid_utf8_p ()
4098	{
4099	ASSERT_TRUE (check_cpp_valid_utf8_p ("hello world"));
4100
4101	/ 2-byte char (pi). /
4102	ASSERT_TRUE (check_cpp_valid_utf8_p("\xcf\x80"));
4103
4104	/ 3-byte chars (the Japanese word "mojibake"). /
4105	ASSERT_TRUE (check_cpp_valid_utf8_p
4106	(
4107	/ U+6587 CJK UNIFIED IDEOGRAPH-6587*
4108	UTF-8: 0xE6 0x96 0x87
4109	C octal escaped UTF-8: \346\226\207. /*
4110	"\346\226\207"
4111	/ U+5B57 CJK UNIFIED IDEOGRAPH-5B57*
4112	UTF-8: 0xE5 0xAD 0x97
4113	C octal escaped UTF-8: \345\255\227. /*
4114	"\345\255\227"
4115	/ U+5316 CJK UNIFIED IDEOGRAPH-5316*
4116	UTF-8: 0xE5 0x8C 0x96
4117	C octal escaped UTF-8: \345\214\226. /*
4118	"\345\214\226"
4119	/ U+3051 HIRAGANA LETTER KE*
4120	UTF-8: 0xE3 0x81 0x91
4121	C octal escaped UTF-8: \343\201\221. /*
4122	"\343\201\221"));
4123
4124	/ 4-byte char: an emoji. /
4125	ASSERT_TRUE (check_cpp_valid_utf8_p ("\xf0\x9f\x98\x82"));
4126
4127	/ Control codes, including the NUL byte. /
4128	ASSERT_TRUE (cpp_valid_utf8_p ("\r\n\v\0\1", `5`));
4129
4130	ASSERT_FALSE (check_cpp_valid_utf8_p ("\xf0!\x9f!\x98!\x82!"));
4131
4132	/ Unexpected continuation bytes. /
4133	for (unsigned char continuation_byte = `0x80`;
4134	continuation_byte <= `0xbf`;
4135	continuation_byte++)
4136	ASSERT_FALSE (cpp_valid_utf8_p ((const char *)&continuation_byte, `1`));
4137
4138	/ "Lonely start characters" for 2-byte sequences. /
4139	{
4140	unsigned char buf[`2`];
4141	buf[`1`] = `' '`;
4142	for (buf[`0`] = `0xc0`;
4143	buf[`0`] <= `0xdf`;
4144	buf[`0`]++)
4145	ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, `2`));
4146	}
4147
4148	/ "Lonely start characters" for 3-byte sequences. /
4149	{
4150	unsigned char buf[`2`];
4151	buf[`1`] = `' '`;
4152	for (buf[`0`] = `0xe0`;
4153	buf[`0`] <= `0xef`;
4154	buf[`0`]++)
4155	ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, `2`));
4156	}
4157
4158	/ "Lonely start characters" for 4-byte sequences. /
4159	{
4160	unsigned char buf[`2`];
4161	buf[`1`] = `' '`;
4162	for (buf[`0`] = `0xf0`;
4163	buf[`0`] <= `0xf4`;
4164	buf[`0`]++)
4165	ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, `2`));
4166	}
4167
4168	/ Invalid start characters (formerly valid for 5-byte and 6-byte*
4169	sequences). /*
4170	{
4171	unsigned char buf[`2`];
4172	buf[`1`] = `' '`;
4173	for (buf[`0`] = `0xf5`;
4174	buf[`0`] <= `0xfd`;
4175	buf[`0`]++)
4176	ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, `2`));
4177	}
4178
4179	/ Impossible bytes. /
4180	ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc0"));
4181	ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc1"));
4182	ASSERT_FALSE (check_cpp_valid_utf8_p ("\xfe"));
4183	ASSERT_FALSE (check_cpp_valid_utf8_p ("\xff"));
4184	}
4185
4186	/ Run all of the selftests within this file. /
4187
4188	void
4189	input_cc_tests ()
4190	{
4191	test_linenum_comparisons ();
4192	test_should_have_column_data_p ();
4193	test_unknown_location ();
4194	test_builtins ();
4195	for_each_line_table_case (testcase: test_make_location_nonpure_range_endpoints);
4196
4197	for_each_line_table_case (testcase: test_accessing_ordinary_linemaps);
4198	for_each_line_table_case (testcase: test_lexer);
4199	for_each_line_table_case (testcase: test_lexer_string_locations_simple);
4200	for_each_line_table_case (testcase: test_lexer_string_locations_ebcdic);
4201	for_each_line_table_case (testcase: test_lexer_string_locations_hex);
4202	for_each_line_table_case (testcase: test_lexer_string_locations_oct);
4203	for_each_line_table_case (testcase: test_lexer_string_locations_letter_escape_1);
4204	for_each_line_table_case (testcase: test_lexer_string_locations_letter_escape_2);
4205	for_each_line_table_case (testcase: test_lexer_string_locations_ucn4);
4206	for_each_line_table_case (testcase: test_lexer_string_locations_ucn8);
4207	for_each_line_table_case (testcase: test_lexer_string_locations_wide_string);
4208	for_each_line_table_case (testcase: test_lexer_string_locations_string16);
4209	for_each_line_table_case (testcase: test_lexer_string_locations_string32);
4210	for_each_line_table_case (testcase: test_lexer_string_locations_u8);
4211	for_each_line_table_case (testcase: test_lexer_string_locations_utf8_source);
4212	for_each_line_table_case (testcase: test_lexer_string_locations_concatenation_1);
4213	for_each_line_table_case (testcase: test_lexer_string_locations_concatenation_2);
4214	for_each_line_table_case (testcase: test_lexer_string_locations_concatenation_3);
4215	for_each_line_table_case (testcase: test_lexer_string_locations_macro);
4216	for_each_line_table_case (testcase: test_lexer_string_locations_stringified_macro_argument);
4217	for_each_line_table_case (testcase: test_lexer_string_locations_non_string);
4218	for_each_line_table_case (testcase: test_lexer_string_locations_long_line);
4219	for_each_line_table_case (testcase: test_lexer_string_locations_raw_string_one_line);
4220	for_each_line_table_case (testcase: test_lexer_string_locations_raw_string_multiline);
4221	for_each_line_table_case (testcase: test_lexer_string_locations_raw_string_unterminated);
4222	for_each_line_table_case (testcase: test_lexer_char_constants);
4223
4224	test_reading_source_line ();
4225
4226	test_line_offset_overflow ();
4227
4228	test_cpp_utf8 ();
4229	test_cpp_valid_utf8_p ();
4230	}
4231
4232	} // namespace selftest
4233
4234	#endif /* CHECKING_P */
4235

source code of gcc/input.cc