1 | /* Data and functions related to line maps and input files. |
2 | Copyright (C) 2004-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free |
8 | Software Foundation; either version 3, or (at your option) any later |
9 | version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "config.h" |
21 | #include "system.h" |
22 | #include "coretypes.h" |
23 | #include "intl.h" |
24 | #include "diagnostic.h" |
25 | #include "selftest.h" |
26 | #include "cpplib.h" |
27 | |
28 | #ifndef HAVE_ICONV |
29 | #define HAVE_ICONV 0 |
30 | #endif |
31 | |
32 | const char * |
33 | special_fname_builtin () |
34 | { |
35 | return _("<built-in>" ); |
36 | } |
37 | |
38 | /* Input charset configuration. */ |
39 | static const char *default_charset_callback (const char *) |
40 | { |
41 | return nullptr; |
42 | } |
43 | |
44 | void |
45 | file_cache::initialize_input_context (diagnostic_input_charset_callback ccb, |
46 | bool should_skip_bom) |
47 | { |
48 | in_context.ccb = (ccb ? ccb : default_charset_callback); |
49 | in_context.should_skip_bom = should_skip_bom; |
50 | } |
51 | |
52 | /* This is a cache used by get_next_line to store the content of a |
53 | file to be searched for file lines. */ |
54 | class file_cache_slot |
55 | { |
56 | public: |
57 | file_cache_slot (); |
58 | ~file_cache_slot (); |
59 | |
60 | bool read_line_num (size_t line_num, |
61 | char ** line, ssize_t *line_len); |
62 | |
63 | /* Accessors. */ |
64 | const char *get_file_path () const { return m_file_path; } |
65 | unsigned get_use_count () const { return m_use_count; } |
66 | bool missing_trailing_newline_p () const |
67 | { |
68 | return m_missing_trailing_newline; |
69 | } |
70 | char_span get_full_file_content (); |
71 | |
72 | void inc_use_count () { m_use_count++; } |
73 | |
74 | bool create (const file_cache::input_context &in_context, |
75 | const char *file_path, FILE *fp, unsigned highest_use_count); |
76 | void evict (); |
77 | |
78 | private: |
79 | /* These are information used to store a line boundary. */ |
80 | class line_info |
81 | { |
82 | public: |
83 | /* The line number. It starts from 1. */ |
84 | size_t line_num; |
85 | |
86 | /* The position (byte count) of the beginning of the line, |
87 | relative to the file data pointer. This starts at zero. */ |
88 | size_t start_pos; |
89 | |
90 | /* The position (byte count) of the last byte of the line. This |
91 | normally points to the '\n' character, or to one byte after the |
92 | last byte of the file, if the file doesn't contain a '\n' |
93 | character. */ |
94 | size_t end_pos; |
95 | |
96 | line_info (size_t l, size_t s, size_t e) |
97 | : line_num (l), start_pos (s), end_pos (e) |
98 | {} |
99 | |
100 | line_info () |
101 | :line_num (0), start_pos (0), end_pos (0) |
102 | {} |
103 | }; |
104 | |
105 | bool needs_read_p () const; |
106 | bool needs_grow_p () const; |
107 | void maybe_grow (); |
108 | bool read_data (); |
109 | bool maybe_read_data (); |
110 | bool get_next_line (char **line, ssize_t *line_len); |
111 | bool read_next_line (char ** line, ssize_t *line_len); |
112 | bool goto_next_line (); |
113 | |
114 | static const size_t buffer_size = 4 * 1024; |
115 | static const size_t line_record_size = 100; |
116 | |
117 | /* The number of time this file has been accessed. This is used |
118 | to designate which file cache to evict from the cache |
119 | array. */ |
120 | unsigned m_use_count; |
121 | |
122 | /* The file_path is the key for identifying a particular file in |
123 | the cache. |
124 | For libcpp-using code, the underlying buffer for this field is |
125 | owned by the corresponding _cpp_file within the cpp_reader. */ |
126 | const char *m_file_path; |
127 | |
128 | FILE *m_fp; |
129 | |
130 | /* This points to the content of the file that we've read so |
131 | far. */ |
132 | char *m_data; |
133 | |
134 | /* The allocated buffer to be freed may start a little earlier than DATA, |
135 | e.g. if a UTF8 BOM was skipped at the beginning. */ |
136 | int m_alloc_offset; |
137 | |
138 | /* The size of the DATA array above.*/ |
139 | size_t m_size; |
140 | |
141 | /* The number of bytes read from the underlying file so far. This |
142 | must be less (or equal) than SIZE above. */ |
143 | size_t m_nb_read; |
144 | |
145 | /* The index of the beginning of the current line. */ |
146 | size_t m_line_start_idx; |
147 | |
148 | /* The number of the previous line read. This starts at 1. Zero |
149 | means we've read no line so far. */ |
150 | size_t m_line_num; |
151 | |
152 | /* This is the total number of lines of the current file. At the |
153 | moment, we try to get this information from the line map |
154 | subsystem. Note that this is just a hint. When using the C++ |
155 | front-end, this hint is correct because the input file is then |
156 | completely tokenized before parsing starts; so the line map knows |
157 | the number of lines before compilation really starts. For e.g, |
158 | the C front-end, it can happen that we start emitting diagnostics |
159 | before the line map has seen the end of the file. */ |
160 | size_t m_total_lines; |
161 | |
162 | /* Could this file be missing a trailing newline on its final line? |
163 | Initially true (to cope with empty files), set to true/false |
164 | as each line is read. */ |
165 | bool m_missing_trailing_newline; |
166 | |
167 | /* This is a record of the beginning and end of the lines we've seen |
168 | while reading the file. This is useful to avoid walking the data |
169 | from the beginning when we are asked to read a line that is |
170 | before LINE_START_IDX above. Note that the maximum size of this |
171 | record is line_record_size, so that the memory consumption |
172 | doesn't explode. We thus scale total_lines down to |
173 | line_record_size. */ |
174 | vec<line_info, va_heap> m_line_record; |
175 | |
176 | void offset_buffer (int offset) |
177 | { |
178 | gcc_assert (offset < 0 ? m_alloc_offset + offset >= 0 |
179 | : (size_t) offset <= m_size); |
180 | gcc_assert (m_data); |
181 | m_alloc_offset += offset; |
182 | m_data += offset; |
183 | m_size -= offset; |
184 | } |
185 | |
186 | }; |
187 | |
188 | /* Current position in real source file. */ |
189 | |
190 | location_t input_location = UNKNOWN_LOCATION; |
191 | |
192 | class line_maps *line_table; |
193 | |
194 | /* A stashed copy of "line_table" for use by selftest::line_table_test. |
195 | This needs to be a global so that it can be a GC root, and thus |
196 | prevent the stashed copy from being garbage-collected if the GC runs |
197 | during a line_table_test. */ |
198 | |
199 | class line_maps *saved_line_table; |
200 | |
201 | /* Expand the source location LOC into a human readable location. If |
202 | LOC resolves to a builtin location, the file name of the readable |
203 | location is set to the string "<built-in>". If EXPANSION_POINT_P is |
204 | TRUE and LOC is virtual, then it is resolved to the expansion |
205 | point of the involved macro. Otherwise, it is resolved to the |
206 | spelling location of the token. |
207 | |
208 | When resolving to the spelling location of the token, if the |
209 | resulting location is for a built-in location (that is, it has no |
210 | associated line/column) in the context of a macro expansion, the |
211 | returned location is the first one (while unwinding the macro |
212 | location towards its expansion point) that is in real source |
213 | code. |
214 | |
215 | ASPECT controls which part of the location to use. */ |
216 | |
217 | static expanded_location |
218 | expand_location_1 (const line_maps *set, |
219 | location_t loc, |
220 | bool expansion_point_p, |
221 | enum location_aspect aspect) |
222 | { |
223 | expanded_location xloc; |
224 | const line_map_ordinary *map; |
225 | enum location_resolution_kind lrk = LRK_MACRO_EXPANSION_POINT; |
226 | tree block = NULL; |
227 | |
228 | if (IS_ADHOC_LOC (loc)) |
229 | { |
230 | block = LOCATION_BLOCK (loc); |
231 | loc = LOCATION_LOCUS (loc); |
232 | } |
233 | |
234 | memset (s: &xloc, c: 0, n: sizeof (xloc)); |
235 | |
236 | if (loc >= RESERVED_LOCATION_COUNT) |
237 | { |
238 | if (!expansion_point_p) |
239 | { |
240 | /* We want to resolve LOC to its spelling location. |
241 | |
242 | But if that spelling location is a reserved location that |
243 | appears in the context of a macro expansion (like for a |
244 | location for a built-in token), let's consider the first |
245 | location (toward the expansion point) that is not reserved; |
246 | that is, the first location that is in real source code. */ |
247 | loc = linemap_unwind_to_first_non_reserved_loc (set, |
248 | loc, NULL); |
249 | lrk = LRK_SPELLING_LOCATION; |
250 | } |
251 | loc = linemap_resolve_location (set, loc, lrk, loc_map: &map); |
252 | |
253 | /* loc is now either in an ordinary map, or is a reserved location. |
254 | If it is a compound location, the caret is in a spelling location, |
255 | but the start/finish might still be a virtual location. |
256 | Depending of what the caller asked for, we may need to recurse |
257 | one level in order to resolve any virtual locations in the |
258 | end-points. */ |
259 | switch (aspect) |
260 | { |
261 | default: |
262 | gcc_unreachable (); |
263 | /* Fall through. */ |
264 | case LOCATION_ASPECT_CARET: |
265 | break; |
266 | case LOCATION_ASPECT_START: |
267 | { |
268 | location_t start = get_start (loc); |
269 | if (start != loc) |
270 | return expand_location_1 (set, loc: start, expansion_point_p, aspect); |
271 | } |
272 | break; |
273 | case LOCATION_ASPECT_FINISH: |
274 | { |
275 | location_t finish = get_finish (loc); |
276 | if (finish != loc) |
277 | return expand_location_1 (set, loc: finish, expansion_point_p, aspect); |
278 | } |
279 | break; |
280 | } |
281 | xloc = linemap_expand_location (set, map, loc); |
282 | } |
283 | |
284 | xloc.data = block; |
285 | if (loc <= BUILTINS_LOCATION) |
286 | xloc.file = loc == UNKNOWN_LOCATION ? NULL : special_fname_builtin (); |
287 | |
288 | return xloc; |
289 | } |
290 | |
291 | /* Initialize the set of cache used for files accessed by caret |
292 | diagnostic. */ |
293 | |
294 | static void |
295 | diagnostic_file_cache_init (void) |
296 | { |
297 | gcc_assert (global_dc); |
298 | global_dc->file_cache_init (); |
299 | } |
300 | |
301 | void |
302 | diagnostic_context::file_cache_init () |
303 | { |
304 | if (m_file_cache == nullptr) |
305 | m_file_cache = new file_cache (); |
306 | } |
307 | |
308 | /* Return the total lines number that have been read so far by the |
309 | line map (in the preprocessor) so far. For languages like C++ that |
310 | entirely preprocess the input file before starting to parse, this |
311 | equals the actual number of lines of the file. */ |
312 | |
313 | static size_t |
314 | total_lines_num (const char *file_path) |
315 | { |
316 | size_t r = 0; |
317 | location_t l = 0; |
318 | if (linemap_get_file_highest_location (set: line_table, file_name: file_path, loc: &l)) |
319 | { |
320 | gcc_assert (l >= RESERVED_LOCATION_COUNT); |
321 | expanded_location xloc = expand_location (l); |
322 | r = xloc.line; |
323 | } |
324 | return r; |
325 | } |
326 | |
327 | /* Lookup the cache used for the content of a given file accessed by |
328 | caret diagnostic. Return the found cached file, or NULL if no |
329 | cached file was found. */ |
330 | |
331 | file_cache_slot * |
332 | file_cache::lookup_file (const char *file_path) |
333 | { |
334 | gcc_assert (file_path); |
335 | |
336 | /* This will contain the found cached file. */ |
337 | file_cache_slot *r = NULL; |
338 | for (unsigned i = 0; i < num_file_slots; ++i) |
339 | { |
340 | file_cache_slot *c = &m_file_slots[i]; |
341 | if (c->get_file_path () && !strcmp (s1: c->get_file_path (), s2: file_path)) |
342 | { |
343 | c->inc_use_count (); |
344 | r = c; |
345 | } |
346 | } |
347 | |
348 | if (r) |
349 | r->inc_use_count (); |
350 | |
351 | return r; |
352 | } |
353 | |
354 | /* Purge any mention of FILENAME from the cache of files used for |
355 | printing source code. For use in selftests when working |
356 | with tempfiles. */ |
357 | |
358 | void |
359 | diagnostics_file_cache_forcibly_evict_file (const char *file_path) |
360 | { |
361 | gcc_assert (file_path); |
362 | |
363 | auto file_cache = global_dc->get_file_cache (); |
364 | if (!file_cache) |
365 | return; |
366 | file_cache->forcibly_evict_file (file_path); |
367 | } |
368 | |
369 | void |
370 | file_cache::forcibly_evict_file (const char *file_path) |
371 | { |
372 | gcc_assert (file_path); |
373 | |
374 | file_cache_slot *r = lookup_file (file_path); |
375 | if (!r) |
376 | /* Not found. */ |
377 | return; |
378 | |
379 | r->evict (); |
380 | } |
381 | |
382 | void |
383 | file_cache_slot::evict () |
384 | { |
385 | m_file_path = NULL; |
386 | if (m_fp) |
387 | fclose (stream: m_fp); |
388 | m_fp = NULL; |
389 | m_nb_read = 0; |
390 | m_line_start_idx = 0; |
391 | m_line_num = 0; |
392 | m_line_record.truncate (size: 0); |
393 | m_use_count = 0; |
394 | m_total_lines = 0; |
395 | m_missing_trailing_newline = true; |
396 | } |
397 | |
398 | /* Return the file cache that has been less used, recently, or the |
399 | first empty one. If HIGHEST_USE_COUNT is non-null, |
400 | *HIGHEST_USE_COUNT is set to the highest use count of the entries |
401 | in the cache table. */ |
402 | |
403 | file_cache_slot* |
404 | file_cache::evicted_cache_tab_entry (unsigned *highest_use_count) |
405 | { |
406 | diagnostic_file_cache_init (); |
407 | |
408 | file_cache_slot *to_evict = &m_file_slots[0]; |
409 | unsigned huc = to_evict->get_use_count (); |
410 | for (unsigned i = 1; i < num_file_slots; ++i) |
411 | { |
412 | file_cache_slot *c = &m_file_slots[i]; |
413 | bool c_is_empty = (c->get_file_path () == NULL); |
414 | |
415 | if (c->get_use_count () < to_evict->get_use_count () |
416 | || (to_evict->get_file_path () && c_is_empty)) |
417 | /* We evict C because it's either an entry with a lower use |
418 | count or one that is empty. */ |
419 | to_evict = c; |
420 | |
421 | if (huc < c->get_use_count ()) |
422 | huc = c->get_use_count (); |
423 | |
424 | if (c_is_empty) |
425 | /* We've reached the end of the cache; subsequent elements are |
426 | all empty. */ |
427 | break; |
428 | } |
429 | |
430 | if (highest_use_count) |
431 | *highest_use_count = huc; |
432 | |
433 | return to_evict; |
434 | } |
435 | |
436 | /* Create the cache used for the content of a given file to be |
437 | accessed by caret diagnostic. This cache is added to an array of |
438 | cache and can be retrieved by lookup_file_in_cache_tab. This |
439 | function returns the created cache. Note that only the last |
440 | num_file_slots files are cached. |
441 | |
442 | This can return nullptr if the FILE_PATH can't be opened for |
443 | reading, or if the content can't be converted to the input_charset. */ |
444 | |
445 | file_cache_slot* |
446 | file_cache::add_file (const char *file_path) |
447 | { |
448 | |
449 | FILE *fp = fopen (filename: file_path, modes: "r" ); |
450 | if (fp == NULL) |
451 | return NULL; |
452 | |
453 | unsigned highest_use_count = 0; |
454 | file_cache_slot *r = evicted_cache_tab_entry (highest_use_count: &highest_use_count); |
455 | if (!r->create (in_context, file_path, fp, highest_use_count)) |
456 | return NULL; |
457 | return r; |
458 | } |
459 | |
460 | /* Get a borrowed char_span to the full content of this file |
461 | as decoded according to the input charset, encoded as UTF-8. */ |
462 | |
463 | char_span |
464 | file_cache_slot::get_full_file_content () |
465 | { |
466 | char *line; |
467 | ssize_t line_len; |
468 | while (get_next_line (line: &line, line_len: &line_len)) |
469 | { |
470 | } |
471 | return char_span (m_data, m_nb_read); |
472 | } |
473 | |
474 | /* Populate this slot for use on FILE_PATH and FP, dropping any |
475 | existing cached content within it. */ |
476 | |
477 | bool |
478 | file_cache_slot::create (const file_cache::input_context &in_context, |
479 | const char *file_path, FILE *fp, |
480 | unsigned highest_use_count) |
481 | { |
482 | m_file_path = file_path; |
483 | if (m_fp) |
484 | fclose (stream: m_fp); |
485 | m_fp = fp; |
486 | if (m_alloc_offset) |
487 | offset_buffer (offset: -m_alloc_offset); |
488 | m_nb_read = 0; |
489 | m_line_start_idx = 0; |
490 | m_line_num = 0; |
491 | m_line_record.truncate (size: 0); |
492 | /* Ensure that this cache entry doesn't get evicted next time |
493 | add_file_to_cache_tab is called. */ |
494 | m_use_count = ++highest_use_count; |
495 | m_total_lines = total_lines_num (file_path); |
496 | m_missing_trailing_newline = true; |
497 | |
498 | |
499 | /* Check the input configuration to determine if we need to do any |
500 | transformations, such as charset conversion or BOM skipping. */ |
501 | if (const char *input_charset = in_context.ccb (file_path)) |
502 | { |
503 | /* Need a full-blown conversion of the input charset. */ |
504 | fclose (stream: m_fp); |
505 | m_fp = NULL; |
506 | const cpp_converted_source cs |
507 | = cpp_get_converted_source (fname: file_path, input_charset); |
508 | if (!cs.data) |
509 | return false; |
510 | if (m_data) |
511 | XDELETEVEC (m_data); |
512 | m_data = cs.data; |
513 | m_nb_read = m_size = cs.len; |
514 | m_alloc_offset = cs.data - cs.to_free; |
515 | } |
516 | else if (in_context.should_skip_bom) |
517 | { |
518 | if (read_data ()) |
519 | { |
520 | const int offset = cpp_check_utf8_bom (data: m_data, data_length: m_nb_read); |
521 | offset_buffer (offset); |
522 | m_nb_read -= offset; |
523 | } |
524 | } |
525 | |
526 | return true; |
527 | } |
528 | |
529 | /* file_cache's ctor. */ |
530 | |
531 | file_cache::file_cache () |
532 | : m_file_slots (new file_cache_slot[num_file_slots]) |
533 | { |
534 | initialize_input_context (ccb: nullptr, should_skip_bom: false); |
535 | } |
536 | |
537 | /* file_cache's dtor. */ |
538 | |
539 | file_cache::~file_cache () |
540 | { |
541 | delete[] m_file_slots; |
542 | } |
543 | |
544 | /* Lookup the cache used for the content of a given file accessed by |
545 | caret diagnostic. If no cached file was found, create a new cache |
546 | for this file, add it to the array of cached file and return |
547 | it. |
548 | |
549 | This can return nullptr on a cache miss if FILE_PATH can't be opened for |
550 | reading, or if the content can't be converted to the input_charset. */ |
551 | |
552 | file_cache_slot* |
553 | file_cache::lookup_or_add_file (const char *file_path) |
554 | { |
555 | file_cache_slot *r = lookup_file (file_path); |
556 | if (r == NULL) |
557 | r = add_file (file_path); |
558 | return r; |
559 | } |
560 | |
561 | /* Default constructor for a cache of file used by caret |
562 | diagnostic. */ |
563 | |
564 | file_cache_slot::file_cache_slot () |
565 | : m_use_count (0), m_file_path (NULL), m_fp (NULL), m_data (0), |
566 | m_alloc_offset (0), m_size (0), m_nb_read (0), m_line_start_idx (0), |
567 | m_line_num (0), m_total_lines (0), m_missing_trailing_newline (true) |
568 | { |
569 | m_line_record.create (nelems: 0); |
570 | } |
571 | |
572 | /* Destructor for a cache of file used by caret diagnostic. */ |
573 | |
574 | file_cache_slot::~file_cache_slot () |
575 | { |
576 | if (m_fp) |
577 | { |
578 | fclose (stream: m_fp); |
579 | m_fp = NULL; |
580 | } |
581 | if (m_data) |
582 | { |
583 | offset_buffer (offset: -m_alloc_offset); |
584 | XDELETEVEC (m_data); |
585 | m_data = 0; |
586 | } |
587 | m_line_record.release (); |
588 | } |
589 | |
590 | /* Returns TRUE iff the cache would need to be filled with data coming |
591 | from the file. That is, either the cache is empty or full or the |
592 | current line is empty. Note that if the cache is full, it would |
593 | need to be extended and filled again. */ |
594 | |
595 | bool |
596 | file_cache_slot::needs_read_p () const |
597 | { |
598 | return m_fp && (m_nb_read == 0 |
599 | || m_nb_read == m_size |
600 | || (m_line_start_idx >= m_nb_read - 1)); |
601 | } |
602 | |
603 | /* Return TRUE iff the cache is full and thus needs to be |
604 | extended. */ |
605 | |
606 | bool |
607 | file_cache_slot::needs_grow_p () const |
608 | { |
609 | return m_nb_read == m_size; |
610 | } |
611 | |
612 | /* Grow the cache if it needs to be extended. */ |
613 | |
614 | void |
615 | file_cache_slot::maybe_grow () |
616 | { |
617 | if (!needs_grow_p ()) |
618 | return; |
619 | |
620 | if (!m_data) |
621 | { |
622 | gcc_assert (m_size == 0 && m_alloc_offset == 0); |
623 | m_size = buffer_size; |
624 | m_data = XNEWVEC (char, m_size); |
625 | } |
626 | else |
627 | { |
628 | const int offset = m_alloc_offset; |
629 | offset_buffer (offset: -offset); |
630 | m_size *= 2; |
631 | m_data = XRESIZEVEC (char, m_data, m_size); |
632 | offset_buffer (offset); |
633 | } |
634 | } |
635 | |
636 | /* Read more data into the cache. Extends the cache if need be. |
637 | Returns TRUE iff new data could be read. */ |
638 | |
639 | bool |
640 | file_cache_slot::read_data () |
641 | { |
642 | if (feof (stream: m_fp) || ferror (stream: m_fp)) |
643 | return false; |
644 | |
645 | maybe_grow (); |
646 | |
647 | char * from = m_data + m_nb_read; |
648 | size_t to_read = m_size - m_nb_read; |
649 | size_t nb_read = fread (ptr: from, size: 1, n: to_read, stream: m_fp); |
650 | |
651 | if (ferror (stream: m_fp)) |
652 | return false; |
653 | |
654 | m_nb_read += nb_read; |
655 | return !!nb_read; |
656 | } |
657 | |
658 | /* Read new data iff the cache needs to be filled with more data |
659 | coming from the file FP. Return TRUE iff the cache was filled with |
660 | mode data. */ |
661 | |
662 | bool |
663 | file_cache_slot::maybe_read_data () |
664 | { |
665 | if (!needs_read_p ()) |
666 | return false; |
667 | return read_data (); |
668 | } |
669 | |
670 | /* Helper function for file_cache_slot::get_next_line (), to find the end of |
671 | the next line. Returns with the memchr convention, i.e. nullptr if a line |
672 | terminator was not found. We need to determine line endings in the same |
673 | manner that libcpp does: any of \n, \r\n, or \r is a line ending. */ |
674 | |
675 | static char * |
676 | find_end_of_line (char *s, size_t len) |
677 | { |
678 | for (const auto end = s + len; s != end; ++s) |
679 | { |
680 | if (*s == '\n') |
681 | return s; |
682 | if (*s == '\r') |
683 | { |
684 | const auto next = s + 1; |
685 | if (next == end) |
686 | { |
687 | /* Don't find the line ending if \r is the very last character |
688 | in the buffer; we do not know if it's the end of the file or |
689 | just the end of what has been read so far, and we wouldn't |
690 | want to break in the middle of what's actually a \r\n |
691 | sequence. Instead, we will handle the case of a file ending |
692 | in a \r later. */ |
693 | break; |
694 | } |
695 | return (*next == '\n' ? next : s); |
696 | } |
697 | } |
698 | return nullptr; |
699 | } |
700 | |
701 | /* Read a new line from file FP, using C as a cache for the data |
702 | coming from the file. Upon successful completion, *LINE is set to |
703 | the beginning of the line found. *LINE points directly in the |
704 | line cache and is only valid until the next call of get_next_line. |
705 | *LINE_LEN is set to the length of the line. Note that the line |
706 | does not contain any terminal delimiter. This function returns |
707 | true if some data was read or process from the cache, false |
708 | otherwise. Note that subsequent calls to get_next_line might |
709 | make the content of *LINE invalid. */ |
710 | |
711 | bool |
712 | file_cache_slot::get_next_line (char **line, ssize_t *line_len) |
713 | { |
714 | /* Fill the cache with data to process. */ |
715 | maybe_read_data (); |
716 | |
717 | size_t remaining_size = m_nb_read - m_line_start_idx; |
718 | if (remaining_size == 0) |
719 | /* There is no more data to process. */ |
720 | return false; |
721 | |
722 | char *line_start = m_data + m_line_start_idx; |
723 | |
724 | char *next_line_start = NULL; |
725 | size_t len = 0; |
726 | char *line_end = find_end_of_line (s: line_start, len: remaining_size); |
727 | if (line_end == NULL) |
728 | { |
729 | /* We haven't found an end-of-line delimiter in the cache. |
730 | Fill the cache with more data from the file and look again. */ |
731 | while (maybe_read_data ()) |
732 | { |
733 | line_start = m_data + m_line_start_idx; |
734 | remaining_size = m_nb_read - m_line_start_idx; |
735 | line_end = find_end_of_line (s: line_start, len: remaining_size); |
736 | if (line_end != NULL) |
737 | { |
738 | next_line_start = line_end + 1; |
739 | break; |
740 | } |
741 | } |
742 | if (line_end == NULL) |
743 | { |
744 | /* We've loaded all the file into the cache and still no |
745 | terminator. Let's say the line ends up at one byte past the |
746 | end of the file. This is to stay consistent with the case |
747 | of when the line ends up with a terminator and line_end points to |
748 | that. That consistency is useful below in the len calculation. |
749 | |
750 | If the file ends in a \r, we didn't identify it as a line |
751 | terminator above, so do that now instead. */ |
752 | line_end = m_data + m_nb_read; |
753 | if (m_nb_read && line_end[-1] == '\r') |
754 | { |
755 | --line_end; |
756 | m_missing_trailing_newline = false; |
757 | } |
758 | else |
759 | m_missing_trailing_newline = true; |
760 | } |
761 | else |
762 | m_missing_trailing_newline = false; |
763 | } |
764 | else |
765 | { |
766 | next_line_start = line_end + 1; |
767 | m_missing_trailing_newline = false; |
768 | } |
769 | |
770 | if (m_fp && ferror (stream: m_fp)) |
771 | return false; |
772 | |
773 | /* At this point, we've found the end of the of line. It either points to |
774 | the line terminator or to one byte after the last byte of the file. */ |
775 | gcc_assert (line_end != NULL); |
776 | |
777 | len = line_end - line_start; |
778 | |
779 | if (m_line_start_idx < m_nb_read) |
780 | *line = line_start; |
781 | |
782 | ++m_line_num; |
783 | |
784 | /* Before we update our line record, make sure the hint about the |
785 | total number of lines of the file is correct. If it's not, then |
786 | we give up recording line boundaries from now on. */ |
787 | bool update_line_record = true; |
788 | if (m_line_num > m_total_lines) |
789 | update_line_record = false; |
790 | |
791 | /* Now update our line record so that re-reading lines from the |
792 | before m_line_start_idx is faster. */ |
793 | if (update_line_record |
794 | && m_line_record.length () < line_record_size) |
795 | { |
796 | /* If the file lines fits in the line record, we just record all |
797 | its lines ...*/ |
798 | if (m_total_lines <= line_record_size |
799 | && m_line_num > m_line_record.length ()) |
800 | m_line_record.safe_push |
801 | (obj: file_cache_slot::line_info (m_line_num, |
802 | m_line_start_idx, |
803 | line_end - m_data)); |
804 | else if (m_total_lines > line_record_size) |
805 | { |
806 | /* ... otherwise, we just scale total_lines down to |
807 | (line_record_size lines. */ |
808 | size_t n = (m_line_num * line_record_size) / m_total_lines; |
809 | if (m_line_record.length () == 0 |
810 | || n >= m_line_record.length ()) |
811 | m_line_record.safe_push |
812 | (obj: file_cache_slot::line_info (m_line_num, |
813 | m_line_start_idx, |
814 | line_end - m_data)); |
815 | } |
816 | } |
817 | |
818 | /* Update m_line_start_idx so that it points to the next line to be |
819 | read. */ |
820 | if (next_line_start) |
821 | m_line_start_idx = next_line_start - m_data; |
822 | else |
823 | /* We didn't find any terminal '\n'. Let's consider that the end |
824 | of line is the end of the data in the cache. The next |
825 | invocation of get_next_line will either read more data from the |
826 | underlying file or return false early because we've reached the |
827 | end of the file. */ |
828 | m_line_start_idx = m_nb_read; |
829 | |
830 | *line_len = len; |
831 | |
832 | return true; |
833 | } |
834 | |
835 | /* Consume the next bytes coming from the cache (or from its |
836 | underlying file if there are remaining unread bytes in the file) |
837 | until we reach the next end-of-line (or end-of-file). There is no |
838 | copying from the cache involved. Return TRUE upon successful |
839 | completion. */ |
840 | |
841 | bool |
842 | file_cache_slot::goto_next_line () |
843 | { |
844 | char *l; |
845 | ssize_t len; |
846 | |
847 | return get_next_line (line: &l, line_len: &len); |
848 | } |
849 | |
850 | /* Read an arbitrary line number LINE_NUM from the file cached in C. |
851 | If the line was read successfully, *LINE points to the beginning |
852 | of the line in the file cache and *LINE_LEN is the length of the |
853 | line. *LINE is not nul-terminated, but may contain zero bytes. |
854 | *LINE is only valid until the next call of read_line_num. |
855 | This function returns bool if a line was read. */ |
856 | |
857 | bool |
858 | file_cache_slot::read_line_num (size_t line_num, |
859 | char ** line, ssize_t *line_len) |
860 | { |
861 | gcc_assert (line_num > 0); |
862 | |
863 | if (line_num <= m_line_num) |
864 | { |
865 | /* We've been asked to read lines that are before m_line_num. |
866 | So lets use our line record (if it's not empty) to try to |
867 | avoid re-reading the file from the beginning again. */ |
868 | |
869 | if (m_line_record.is_empty ()) |
870 | { |
871 | m_line_start_idx = 0; |
872 | m_line_num = 0; |
873 | } |
874 | else |
875 | { |
876 | file_cache_slot::line_info *i = NULL; |
877 | if (m_total_lines <= line_record_size) |
878 | { |
879 | /* In languages where the input file is not totally |
880 | preprocessed up front, the m_total_lines hint |
881 | can be smaller than the number of lines of the |
882 | file. In that case, only the first |
883 | m_total_lines have been recorded. |
884 | |
885 | Otherwise, the first m_total_lines we've read have |
886 | their start/end recorded here. */ |
887 | i = (line_num <= m_total_lines) |
888 | ? &m_line_record[line_num - 1] |
889 | : &m_line_record[m_total_lines - 1]; |
890 | gcc_assert (i->line_num <= line_num); |
891 | } |
892 | else |
893 | { |
894 | /* So the file had more lines than our line record |
895 | size. Thus the number of lines we've recorded has |
896 | been scaled down to line_record_size. Let's |
897 | pick the start/end of the recorded line that is |
898 | closest to line_num. */ |
899 | size_t n = (line_num <= m_total_lines) |
900 | ? line_num * line_record_size / m_total_lines |
901 | : m_line_record.length () - 1; |
902 | if (n < m_line_record.length ()) |
903 | { |
904 | i = &m_line_record[n]; |
905 | gcc_assert (i->line_num <= line_num); |
906 | } |
907 | } |
908 | |
909 | if (i && i->line_num == line_num) |
910 | { |
911 | /* We have the start/end of the line. */ |
912 | *line = m_data + i->start_pos; |
913 | *line_len = i->end_pos - i->start_pos; |
914 | return true; |
915 | } |
916 | |
917 | if (i) |
918 | { |
919 | m_line_start_idx = i->start_pos; |
920 | m_line_num = i->line_num - 1; |
921 | } |
922 | else |
923 | { |
924 | m_line_start_idx = 0; |
925 | m_line_num = 0; |
926 | } |
927 | } |
928 | } |
929 | |
930 | /* Let's walk from line m_line_num up to line_num - 1, without |
931 | copying any line. */ |
932 | while (m_line_num < line_num - 1) |
933 | if (!goto_next_line ()) |
934 | return false; |
935 | |
936 | /* The line we want is the next one. Let's read and copy it back to |
937 | the caller. */ |
938 | return get_next_line (line, line_len); |
939 | } |
940 | |
941 | /* Return the physical source line that corresponds to FILE_PATH/LINE. |
942 | The line is not nul-terminated. The returned pointer is only |
943 | valid until the next call of location_get_source_line. |
944 | Note that the line can contain several null characters, |
945 | so the returned value's length has the actual length of the line. |
946 | If the function fails, a NULL char_span is returned. */ |
947 | |
948 | char_span |
949 | file_cache::get_source_line (const char *file_path, int line) |
950 | { |
951 | char *buffer = NULL; |
952 | ssize_t len; |
953 | |
954 | if (line == 0) |
955 | return char_span (NULL, 0); |
956 | |
957 | if (file_path == NULL) |
958 | return char_span (NULL, 0); |
959 | |
960 | file_cache_slot *c = lookup_or_add_file (file_path); |
961 | if (c == NULL) |
962 | return char_span (NULL, 0); |
963 | |
964 | bool read = c->read_line_num (line_num: line, line: &buffer, line_len: &len); |
965 | if (!read) |
966 | return char_span (NULL, 0); |
967 | |
968 | return char_span (buffer, len); |
969 | } |
970 | |
971 | char_span |
972 | location_get_source_line (const char *file_path, int line) |
973 | { |
974 | diagnostic_file_cache_init (); |
975 | return global_dc->get_file_cache ()->get_source_line (file_path, line); |
976 | } |
977 | |
978 | /* Return a NUL-terminated copy of the source text between two locations, or |
979 | NULL if the arguments are invalid. The caller is responsible for freeing |
980 | the return value. */ |
981 | |
982 | char * |
983 | get_source_text_between (location_t start, location_t end) |
984 | { |
985 | expanded_location expstart = |
986 | expand_location_to_spelling_point (start, aspect: LOCATION_ASPECT_START); |
987 | expanded_location expend = |
988 | expand_location_to_spelling_point (end, aspect: LOCATION_ASPECT_FINISH); |
989 | |
990 | /* If the locations are in different files or the end comes before the |
991 | start, give up and return nothing. */ |
992 | if (!expstart.file || !expend.file) |
993 | return NULL; |
994 | if (strcmp (s1: expstart.file, s2: expend.file) != 0) |
995 | return NULL; |
996 | if (expstart.line > expend.line) |
997 | return NULL; |
998 | if (expstart.line == expend.line |
999 | && expstart.column > expend.column) |
1000 | return NULL; |
1001 | /* These aren't real column numbers, give up. */ |
1002 | if (expstart.column == 0 || expend.column == 0) |
1003 | return NULL; |
1004 | |
1005 | /* For a single line we need to trim both edges. */ |
1006 | if (expstart.line == expend.line) |
1007 | { |
1008 | char_span line = location_get_source_line (file_path: expstart.file, line: expstart.line); |
1009 | if (line.length () < 1) |
1010 | return NULL; |
1011 | int s = expstart.column - 1; |
1012 | int len = expend.column - s; |
1013 | if (line.length () < (size_t)expend.column) |
1014 | return NULL; |
1015 | return line.subspan (offset: s, n_elts: len).xstrdup (); |
1016 | } |
1017 | |
1018 | struct obstack buf_obstack; |
1019 | obstack_init (&buf_obstack); |
1020 | |
1021 | /* Loop through all lines in the range and append each to buf; may trim |
1022 | parts of the start and end lines off depending on column values. */ |
1023 | for (int lnum = expstart.line; lnum <= expend.line; ++lnum) |
1024 | { |
1025 | char_span line = location_get_source_line (file_path: expstart.file, line: lnum); |
1026 | if (line.length () < 1 && (lnum != expstart.line && lnum != expend.line)) |
1027 | continue; |
1028 | |
1029 | /* For the first line in the range, only start at expstart.column */ |
1030 | if (lnum == expstart.line) |
1031 | { |
1032 | unsigned off = expstart.column - 1; |
1033 | if (line.length () < off) |
1034 | return NULL; |
1035 | line = line.subspan (offset: off, n_elts: line.length() - off); |
1036 | } |
1037 | /* For the last line, don't go past expend.column */ |
1038 | else if (lnum == expend.line) |
1039 | { |
1040 | if (line.length () < (size_t)expend.column) |
1041 | return NULL; |
1042 | line = line.subspan (offset: 0, n_elts: expend.column); |
1043 | } |
1044 | |
1045 | /* Combine spaces at the beginning of later lines. */ |
1046 | if (lnum > expstart.line) |
1047 | { |
1048 | unsigned off; |
1049 | for (off = 0; off < line.length(); ++off) |
1050 | if (line[off] != ' ' && line[off] != '\t') |
1051 | break; |
1052 | if (off > 0) |
1053 | { |
1054 | obstack_1grow (&buf_obstack, ' '); |
1055 | line = line.subspan (offset: off, n_elts: line.length() - off); |
1056 | } |
1057 | } |
1058 | |
1059 | /* This does not include any trailing newlines. */ |
1060 | obstack_grow (&buf_obstack, line.get_buffer (), line.length ()); |
1061 | } |
1062 | |
1063 | /* NUL-terminate and finish the buf obstack. */ |
1064 | obstack_1grow (&buf_obstack, 0); |
1065 | const char *buf = (const char *) obstack_finish (&buf_obstack); |
1066 | |
1067 | return xstrdup (buf); |
1068 | } |
1069 | |
1070 | |
1071 | char_span |
1072 | file_cache::get_source_file_content (const char *file_path) |
1073 | { |
1074 | file_cache_slot *c = lookup_or_add_file (file_path); |
1075 | if (c == nullptr) |
1076 | return char_span (nullptr, 0); |
1077 | return c->get_full_file_content (); |
1078 | } |
1079 | |
1080 | |
1081 | /* Get a borrowed char_span to the full content of FILE_PATH |
1082 | as decoded according to the input charset, encoded as UTF-8. */ |
1083 | |
1084 | char_span |
1085 | get_source_file_content (const char *file_path) |
1086 | { |
1087 | diagnostic_file_cache_init (); |
1088 | return global_dc->get_file_cache ()->get_source_file_content (file_path); |
1089 | } |
1090 | |
1091 | /* Determine if FILE_PATH missing a trailing newline on its final line. |
1092 | Only valid to call once all of the file has been loaded, by |
1093 | requesting a line number beyond the end of the file. */ |
1094 | |
1095 | bool |
1096 | location_missing_trailing_newline (const char *file_path) |
1097 | { |
1098 | diagnostic_file_cache_init (); |
1099 | |
1100 | file_cache_slot *c = global_dc->get_file_cache ()->lookup_or_add_file (file_path); |
1101 | if (c == NULL) |
1102 | return false; |
1103 | |
1104 | return c->missing_trailing_newline_p (); |
1105 | } |
1106 | |
1107 | /* Test if the location originates from the spelling location of a |
1108 | builtin-tokens. That is, return TRUE if LOC is a (possibly |
1109 | virtual) location of a built-in token that appears in the expansion |
1110 | list of a macro. Please note that this function also works on |
1111 | tokens that result from built-in tokens. For instance, the |
1112 | function would return true if passed a token "4" that is the result |
1113 | of the expansion of the built-in __LINE__ macro. */ |
1114 | bool |
1115 | is_location_from_builtin_token (location_t loc) |
1116 | { |
1117 | const line_map_ordinary *map = NULL; |
1118 | loc = linemap_resolve_location (line_table, loc, |
1119 | lrk: LRK_SPELLING_LOCATION, loc_map: &map); |
1120 | return loc == BUILTINS_LOCATION; |
1121 | } |
1122 | |
1123 | /* Expand the source location LOC into a human readable location. If |
1124 | LOC is virtual, it resolves to the expansion point of the involved |
1125 | macro. If LOC resolves to a builtin location, the file name of the |
1126 | readable location is set to the string "<built-in>". */ |
1127 | |
1128 | expanded_location |
1129 | expand_location (location_t loc) |
1130 | { |
1131 | return expand_location_1 (set: line_table, loc, /*expansion_point_p=*/true, |
1132 | aspect: LOCATION_ASPECT_CARET); |
1133 | } |
1134 | |
1135 | /* Expand the source location LOC into a human readable location. If |
1136 | LOC is virtual, it resolves to the expansion location of the |
1137 | relevant macro. If LOC resolves to a builtin location, the file |
1138 | name of the readable location is set to the string |
1139 | "<built-in>". */ |
1140 | |
1141 | expanded_location |
1142 | expand_location_to_spelling_point (location_t loc, |
1143 | enum location_aspect aspect) |
1144 | { |
1145 | return expand_location_1 (set: line_table, loc, /*expansion_point_p=*/false, |
1146 | aspect); |
1147 | } |
1148 | |
1149 | /* The rich_location class within libcpp requires a way to expand |
1150 | location_t instances, and relies on the client code |
1151 | providing a symbol named |
1152 | linemap_client_expand_location_to_spelling_point |
1153 | to do this. |
1154 | |
1155 | This is the implementation for libcommon.a (all host binaries), |
1156 | which simply calls into expand_location_1. */ |
1157 | |
1158 | expanded_location |
1159 | linemap_client_expand_location_to_spelling_point (const line_maps *set, |
1160 | location_t loc, |
1161 | enum location_aspect aspect) |
1162 | { |
1163 | return expand_location_1 (set, loc, /*expansion_point_p=*/false, aspect); |
1164 | } |
1165 | |
1166 | |
1167 | /* If LOCATION is in a system header and if it is a virtual location |
1168 | for a token coming from the expansion of a macro, unwind it to |
1169 | the location of the expansion point of the macro. If the expansion |
1170 | point is also in a system header return the original LOCATION. |
1171 | Otherwise, return the location of the expansion point. |
1172 | |
1173 | This is used for instance when we want to emit diagnostics about a |
1174 | token that may be located in a macro that is itself defined in a |
1175 | system header, for example, for the NULL macro. In such a case, if |
1176 | LOCATION were passed directly to diagnostic functions such as |
1177 | warning_at, the diagnostic would be suppressed (unless |
1178 | -Wsystem-headers). */ |
1179 | |
1180 | location_t |
1181 | (location_t location) |
1182 | { |
1183 | if (!in_system_header_at (loc: location)) |
1184 | return location; |
1185 | |
1186 | location_t xloc = linemap_resolve_location (line_table, loc: location, |
1187 | lrk: LRK_MACRO_EXPANSION_POINT, |
1188 | NULL); |
1189 | return in_system_header_at (loc: xloc) ? location : xloc; |
1190 | } |
1191 | |
1192 | /* If LOCATION is a virtual location for a token coming from the expansion |
1193 | of a macro, unwind to the location of the expansion point of the macro. */ |
1194 | |
1195 | location_t |
1196 | expansion_point_location (location_t location) |
1197 | { |
1198 | return linemap_resolve_location (line_table, loc: location, |
1199 | lrk: LRK_MACRO_EXPANSION_POINT, NULL); |
1200 | } |
1201 | |
1202 | /* Construct a location with caret at CARET, ranging from START to |
1203 | FINISH. |
1204 | |
1205 | For example, consider: |
1206 | |
1207 | 11111111112 |
1208 | 12345678901234567890 |
1209 | 522 |
1210 | 523 return foo + bar; |
1211 | ~~~~^~~~~ |
1212 | 524 |
1213 | |
1214 | The location's caret is at the "+", line 523 column 15, but starts |
1215 | earlier, at the "f" of "foo" at column 11. The finish is at the "r" |
1216 | of "bar" at column 19. */ |
1217 | |
1218 | location_t |
1219 | make_location (location_t caret, location_t start, location_t finish) |
1220 | { |
1221 | return line_table->make_location (caret, start, finish); |
1222 | } |
1223 | |
1224 | /* Same as above, but taking a source range rather than two locations. */ |
1225 | |
1226 | location_t |
1227 | make_location (location_t caret, source_range src_range) |
1228 | { |
1229 | location_t pure_loc = get_pure_location (loc: caret); |
1230 | return line_table->get_or_create_combined_loc (locus: pure_loc, src_range, |
1231 | data: nullptr, discriminator: 0); |
1232 | } |
1233 | |
1234 | /* An expanded_location stores the column in byte units. This function |
1235 | converts that column to display units. That requires reading the associated |
1236 | source line in order to calculate the display width. If that cannot be done |
1237 | for any reason, then returns the byte column as a fallback. */ |
1238 | int |
1239 | location_compute_display_column (expanded_location exploc, |
1240 | const cpp_char_column_policy &policy) |
1241 | { |
1242 | if (!(exploc.file && *exploc.file && exploc.line && exploc.column)) |
1243 | return exploc.column; |
1244 | char_span line = location_get_source_line (file_path: exploc.file, line: exploc.line); |
1245 | /* If line is NULL, this function returns exploc.column which is the |
1246 | desired fallback. */ |
1247 | return cpp_byte_column_to_display_column (data: line.get_buffer (), data_length: line.length (), |
1248 | column: exploc.column, policy); |
1249 | } |
1250 | |
1251 | /* Dump statistics to stderr about the memory usage of the line_table |
1252 | set of line maps. This also displays some statistics about macro |
1253 | expansion. */ |
1254 | |
1255 | void |
1256 | dump_line_table_statistics (void) |
1257 | { |
1258 | struct linemap_stats s; |
1259 | long total_used_map_size, |
1260 | macro_maps_size, |
1261 | total_allocated_map_size; |
1262 | |
1263 | memset (s: &s, c: 0, n: sizeof (s)); |
1264 | |
1265 | linemap_get_statistics (line_table, &s); |
1266 | |
1267 | macro_maps_size = s.macro_maps_used_size |
1268 | + s.macro_maps_locations_size; |
1269 | |
1270 | total_allocated_map_size = s.ordinary_maps_allocated_size |
1271 | + s.macro_maps_allocated_size |
1272 | + s.macro_maps_locations_size; |
1273 | |
1274 | total_used_map_size = s.ordinary_maps_used_size |
1275 | + s.macro_maps_used_size |
1276 | + s.macro_maps_locations_size; |
1277 | |
1278 | fprintf (stderr, format: "Number of expanded macros: %5ld\n" , |
1279 | s.num_expanded_macros); |
1280 | if (s.num_expanded_macros != 0) |
1281 | fprintf (stderr, format: "Average number of tokens per macro expansion: %5ld\n" , |
1282 | s.num_macro_tokens / s.num_expanded_macros); |
1283 | fprintf (stderr, |
1284 | format: "\nLine Table allocations during the " |
1285 | "compilation process\n" ); |
1286 | fprintf (stderr, format: "Number of ordinary maps used: " PRsa (5) "\n" , |
1287 | SIZE_AMOUNT (s.num_ordinary_maps_used)); |
1288 | fprintf (stderr, format: "Ordinary map used size: " PRsa (5) "\n" , |
1289 | SIZE_AMOUNT (s.ordinary_maps_used_size)); |
1290 | fprintf (stderr, format: "Number of ordinary maps allocated: " PRsa (5) "\n" , |
1291 | SIZE_AMOUNT (s.num_ordinary_maps_allocated)); |
1292 | fprintf (stderr, format: "Ordinary maps allocated size: " PRsa (5) "\n" , |
1293 | SIZE_AMOUNT (s.ordinary_maps_allocated_size)); |
1294 | fprintf (stderr, format: "Number of macro maps used: " PRsa (5) "\n" , |
1295 | SIZE_AMOUNT (s.num_macro_maps_used)); |
1296 | fprintf (stderr, format: "Macro maps used size: " PRsa (5) "\n" , |
1297 | SIZE_AMOUNT (s.macro_maps_used_size)); |
1298 | fprintf (stderr, format: "Macro maps locations size: " PRsa (5) "\n" , |
1299 | SIZE_AMOUNT (s.macro_maps_locations_size)); |
1300 | fprintf (stderr, format: "Macro maps size: " PRsa (5) "\n" , |
1301 | SIZE_AMOUNT (macro_maps_size)); |
1302 | fprintf (stderr, format: "Duplicated maps locations size: " PRsa (5) "\n" , |
1303 | SIZE_AMOUNT (s.duplicated_macro_maps_locations_size)); |
1304 | fprintf (stderr, format: "Total allocated maps size: " PRsa (5) "\n" , |
1305 | SIZE_AMOUNT (total_allocated_map_size)); |
1306 | fprintf (stderr, format: "Total used maps size: " PRsa (5) "\n" , |
1307 | SIZE_AMOUNT (total_used_map_size)); |
1308 | fprintf (stderr, format: "Ad-hoc table size: " PRsa (5) "\n" , |
1309 | SIZE_AMOUNT (s.adhoc_table_size)); |
1310 | fprintf (stderr, format: "Ad-hoc table entries used: " PRsa (5) "\n" , |
1311 | SIZE_AMOUNT (s.adhoc_table_entries_used)); |
1312 | fprintf (stderr, format: "optimized_ranges: " PRsa (5) "\n" , |
1313 | SIZE_AMOUNT (line_table->m_num_optimized_ranges)); |
1314 | fprintf (stderr, format: "unoptimized_ranges: " PRsa (5) "\n" , |
1315 | SIZE_AMOUNT (line_table->m_num_unoptimized_ranges)); |
1316 | |
1317 | fprintf (stderr, format: "\n" ); |
1318 | } |
1319 | |
1320 | /* Get location one beyond the final location in ordinary map IDX. */ |
1321 | |
1322 | static location_t |
1323 | get_end_location (class line_maps *set, unsigned int idx) |
1324 | { |
1325 | if (idx == LINEMAPS_ORDINARY_USED (set) - 1) |
1326 | return set->highest_location; |
1327 | |
1328 | struct line_map *next_map = LINEMAPS_ORDINARY_MAP_AT (set, index: idx + 1); |
1329 | return MAP_START_LOCATION (map: next_map); |
1330 | } |
1331 | |
1332 | /* Helper function for write_digit_row. */ |
1333 | |
1334 | static void |
1335 | write_digit (FILE *stream, int digit) |
1336 | { |
1337 | fputc (c: '0' + (digit % 10), stream: stream); |
1338 | } |
1339 | |
1340 | /* Helper function for dump_location_info. |
1341 | Write a row of numbers to STREAM, numbering a source line, |
1342 | giving the units, tens, hundreds etc of the column number. */ |
1343 | |
1344 | static void |
1345 | write_digit_row (FILE *stream, int indent, |
1346 | const line_map_ordinary *map, |
1347 | location_t loc, int max_col, int divisor) |
1348 | { |
1349 | fprintf (stream: stream, format: "%*c" , indent, ' '); |
1350 | fprintf (stream: stream, format: "|" ); |
1351 | for (int column = 1; column < max_col; column++) |
1352 | { |
1353 | location_t column_loc = loc + (column << map->m_range_bits); |
1354 | write_digit (stream, digit: column_loc / divisor); |
1355 | } |
1356 | fprintf (stream: stream, format: "\n" ); |
1357 | } |
1358 | |
1359 | /* Write a half-closed (START) / half-open (END) interval of |
1360 | location_t to STREAM. */ |
1361 | |
1362 | static void |
1363 | dump_location_range (FILE *stream, |
1364 | location_t start, location_t end) |
1365 | { |
1366 | fprintf (stream: stream, |
1367 | format: " location_t interval: %u <= loc < %u\n" , |
1368 | start, end); |
1369 | } |
1370 | |
1371 | /* Write a labelled description of a half-closed (START) / half-open (END) |
1372 | interval of location_t to STREAM. */ |
1373 | |
1374 | static void |
1375 | dump_labelled_location_range (FILE *stream, |
1376 | const char *name, |
1377 | location_t start, location_t end) |
1378 | { |
1379 | fprintf (stream: stream, format: "%s\n" , name); |
1380 | dump_location_range (stream, start, end); |
1381 | fprintf (stream: stream, format: "\n" ); |
1382 | } |
1383 | |
1384 | /* Write a visualization of the locations in the line_table to STREAM. */ |
1385 | |
1386 | void |
1387 | dump_location_info (FILE *stream) |
1388 | { |
1389 | /* Visualize the reserved locations. */ |
1390 | dump_labelled_location_range (stream, name: "RESERVED LOCATIONS" , |
1391 | start: 0, end: RESERVED_LOCATION_COUNT); |
1392 | |
1393 | /* Visualize the ordinary line_map instances, rendering the sources. */ |
1394 | for (unsigned int idx = 0; idx < LINEMAPS_ORDINARY_USED (set: line_table); idx++) |
1395 | { |
1396 | location_t end_location = get_end_location (set: line_table, idx); |
1397 | /* half-closed: doesn't include this one. */ |
1398 | |
1399 | const line_map_ordinary *map |
1400 | = LINEMAPS_ORDINARY_MAP_AT (set: line_table, index: idx); |
1401 | fprintf (stream: stream, format: "ORDINARY MAP: %i\n" , idx); |
1402 | dump_location_range (stream, |
1403 | start: MAP_START_LOCATION (map), end: end_location); |
1404 | fprintf (stream: stream, format: " file: %s\n" , ORDINARY_MAP_FILE_NAME (ord_map: map)); |
1405 | fprintf (stream: stream, format: " starting at line: %i\n" , |
1406 | ORDINARY_MAP_STARTING_LINE_NUMBER (ord_map: map)); |
1407 | fprintf (stream: stream, format: " column and range bits: %i\n" , |
1408 | map->m_column_and_range_bits); |
1409 | fprintf (stream: stream, format: " column bits: %i\n" , |
1410 | map->m_column_and_range_bits - map->m_range_bits); |
1411 | fprintf (stream: stream, format: " range bits: %i\n" , |
1412 | map->m_range_bits); |
1413 | const char * reason; |
1414 | switch (map->reason) { |
1415 | case LC_ENTER: |
1416 | reason = "LC_ENTER" ; |
1417 | break; |
1418 | case LC_LEAVE: |
1419 | reason = "LC_LEAVE" ; |
1420 | break; |
1421 | case LC_RENAME: |
1422 | reason = "LC_RENAME" ; |
1423 | break; |
1424 | case LC_RENAME_VERBATIM: |
1425 | reason = "LC_RENAME_VERBATIM" ; |
1426 | break; |
1427 | case LC_ENTER_MACRO: |
1428 | reason = "LC_RENAME_MACRO" ; |
1429 | break; |
1430 | default: |
1431 | reason = "Unknown" ; |
1432 | } |
1433 | fprintf (stream: stream, format: " reason: %d (%s)\n" , map->reason, reason); |
1434 | |
1435 | const line_map_ordinary *includer_map |
1436 | = linemap_included_from_linemap (set: line_table, map); |
1437 | fprintf (stream: stream, format: " included from location: %d" , |
1438 | linemap_included_from (ord_map: map)); |
1439 | if (includer_map) { |
1440 | fprintf (stream: stream, format: " (in ordinary map %d)" , |
1441 | int (includer_map - line_table->info_ordinary.maps)); |
1442 | } |
1443 | fprintf (stream: stream, format: "\n" ); |
1444 | |
1445 | /* Render the span of source lines that this "map" covers. */ |
1446 | for (location_t loc = MAP_START_LOCATION (map); |
1447 | loc < end_location; |
1448 | loc += (1 << map->m_range_bits) ) |
1449 | { |
1450 | gcc_assert (pure_location_p (line_table, loc) ); |
1451 | |
1452 | expanded_location exploc |
1453 | = linemap_expand_location (line_table, map, loc); |
1454 | |
1455 | if (exploc.column == 0) |
1456 | { |
1457 | /* Beginning of a new source line: draw the line. */ |
1458 | |
1459 | char_span line_text = location_get_source_line (file_path: exploc.file, |
1460 | line: exploc.line); |
1461 | if (!line_text) |
1462 | break; |
1463 | fprintf (stream: stream, |
1464 | format: "%s:%3i|loc:%5i|%.*s\n" , |
1465 | exploc.file, exploc.line, |
1466 | loc, |
1467 | (int)line_text.length (), line_text.get_buffer ()); |
1468 | |
1469 | /* "loc" is at column 0, which means "the whole line". |
1470 | Render the locations *within* the line, by underlining |
1471 | it, showing the location_t numeric values |
1472 | at each column. */ |
1473 | size_t max_col = (1 << map->m_column_and_range_bits) - 1; |
1474 | if (max_col > line_text.length ()) |
1475 | max_col = line_text.length () + 1; |
1476 | |
1477 | int len_lnum = num_digits (exploc.line); |
1478 | if (len_lnum < 3) |
1479 | len_lnum = 3; |
1480 | int len_loc = num_digits (loc); |
1481 | if (len_loc < 5) |
1482 | len_loc = 5; |
1483 | |
1484 | int indent = 6 + strlen (s: exploc.file) + len_lnum + len_loc; |
1485 | |
1486 | /* Thousands. */ |
1487 | if (end_location > 999) |
1488 | write_digit_row (stream, indent, map, loc, max_col, divisor: 1000); |
1489 | |
1490 | /* Hundreds. */ |
1491 | if (end_location > 99) |
1492 | write_digit_row (stream, indent, map, loc, max_col, divisor: 100); |
1493 | |
1494 | /* Tens. */ |
1495 | write_digit_row (stream, indent, map, loc, max_col, divisor: 10); |
1496 | |
1497 | /* Units. */ |
1498 | write_digit_row (stream, indent, map, loc, max_col, divisor: 1); |
1499 | } |
1500 | } |
1501 | fprintf (stream: stream, format: "\n" ); |
1502 | } |
1503 | |
1504 | /* Visualize unallocated values. */ |
1505 | dump_labelled_location_range (stream, name: "UNALLOCATED LOCATIONS" , |
1506 | start: line_table->highest_location, |
1507 | end: LINEMAPS_MACRO_LOWEST_LOCATION (set: line_table)); |
1508 | |
1509 | /* Visualize the macro line_map instances, rendering the sources. */ |
1510 | for (unsigned int i = 0; i < LINEMAPS_MACRO_USED (set: line_table); i++) |
1511 | { |
1512 | /* Each macro map that is allocated owns location_t values |
1513 | that are *lower* that the one before them. |
1514 | Hence it's meaningful to view them either in order of ascending |
1515 | source locations, or in order of ascending macro map index. */ |
1516 | const bool ascending_location_ts = true; |
1517 | unsigned int idx = (ascending_location_ts |
1518 | ? (LINEMAPS_MACRO_USED (set: line_table) - (i + 1)) |
1519 | : i); |
1520 | const line_map_macro *map = LINEMAPS_MACRO_MAP_AT (set: line_table, index: idx); |
1521 | fprintf (stream: stream, format: "MACRO %i: %s (%u tokens)\n" , |
1522 | idx, |
1523 | linemap_map_get_macro_name (map), |
1524 | MACRO_MAP_NUM_MACRO_TOKENS (macro_map: map)); |
1525 | dump_location_range (stream, |
1526 | start: map->start_location, |
1527 | end: (map->start_location |
1528 | + MACRO_MAP_NUM_MACRO_TOKENS (macro_map: map))); |
1529 | inform (map->get_expansion_point_location (), |
1530 | "expansion point is location %i" , |
1531 | map->get_expansion_point_location ()); |
1532 | fprintf (stream: stream, format: " map->start_location: %u\n" , |
1533 | map->start_location); |
1534 | |
1535 | fprintf (stream: stream, format: " macro_locations:\n" ); |
1536 | for (unsigned int i = 0; i < MACRO_MAP_NUM_MACRO_TOKENS (macro_map: map); i++) |
1537 | { |
1538 | location_t x = MACRO_MAP_LOCATIONS (macro_map: map)[2 * i]; |
1539 | location_t y = MACRO_MAP_LOCATIONS (macro_map: map)[(2 * i) + 1]; |
1540 | |
1541 | /* linemap_add_macro_token encodes token numbers in an expansion |
1542 | by putting them after MAP_START_LOCATION. */ |
1543 | |
1544 | /* I'm typically seeing 4 uninitialized entries at the end of |
1545 | 0xafafafaf. |
1546 | This appears to be due to macro.cc:replace_args |
1547 | adding 2 extra args for padding tokens; presumably there may |
1548 | be a leading and/or trailing padding token injected, |
1549 | each for 2 more location slots. |
1550 | This would explain there being up to 4 location_ts slots |
1551 | that may be uninitialized. */ |
1552 | |
1553 | fprintf (stream: stream, format: " %u: %u, %u\n" , |
1554 | i, |
1555 | x, |
1556 | y); |
1557 | if (x == y) |
1558 | { |
1559 | if (x < MAP_START_LOCATION (map)) |
1560 | inform (x, "token %u has %<x-location == y-location == %u%>" , |
1561 | i, x); |
1562 | else |
1563 | fprintf (stream: stream, |
1564 | format: "x-location == y-location == %u encodes token # %u\n" , |
1565 | x, x - MAP_START_LOCATION (map)); |
1566 | } |
1567 | else |
1568 | { |
1569 | inform (x, "token %u has %<x-location == %u%>" , i, x); |
1570 | inform (x, "token %u has %<y-location == %u%>" , i, y); |
1571 | } |
1572 | } |
1573 | fprintf (stream: stream, format: "\n" ); |
1574 | } |
1575 | |
1576 | /* It appears that MAX_LOCATION_T itself is never assigned to a |
1577 | macro map, presumably due to an off-by-one error somewhere |
1578 | between the logic in linemap_enter_macro and |
1579 | LINEMAPS_MACRO_LOWEST_LOCATION. */ |
1580 | dump_labelled_location_range (stream, name: "MAX_LOCATION_T" , |
1581 | start: MAX_LOCATION_T, |
1582 | end: MAX_LOCATION_T + 1); |
1583 | |
1584 | /* Visualize ad-hoc values. */ |
1585 | dump_labelled_location_range (stream, name: "AD-HOC LOCATIONS" , |
1586 | start: MAX_LOCATION_T + 1, UINT_MAX); |
1587 | } |
1588 | |
1589 | /* string_concat's constructor. */ |
1590 | |
1591 | string_concat::string_concat (int num, location_t *locs) |
1592 | : m_num (num) |
1593 | { |
1594 | m_locs = ggc_vec_alloc <location_t> (c: num); |
1595 | for (int i = 0; i < num; i++) |
1596 | m_locs[i] = locs[i]; |
1597 | } |
1598 | |
1599 | /* string_concat_db's constructor. */ |
1600 | |
1601 | string_concat_db::string_concat_db () |
1602 | { |
1603 | m_table = hash_map <location_hash, string_concat *>::create_ggc (size: 64); |
1604 | } |
1605 | |
1606 | /* Record that a string concatenation occurred, covering NUM |
1607 | string literal tokens. LOCS is an array of size NUM, containing the |
1608 | locations of the tokens. A copy of LOCS is taken. */ |
1609 | |
1610 | void |
1611 | string_concat_db::record_string_concatenation (int num, location_t *locs) |
1612 | { |
1613 | gcc_assert (num > 1); |
1614 | gcc_assert (locs); |
1615 | |
1616 | location_t key_loc = get_key_loc (loc: locs[0]); |
1617 | /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values: |
1618 | any data now recorded under key 'key_loc' would be overwritten by a |
1619 | subsequent call with the same key 'key_loc'. */ |
1620 | if (RESERVED_LOCATION_P (key_loc)) |
1621 | return; |
1622 | |
1623 | string_concat *concat |
1624 | = new (ggc_alloc <string_concat> ()) string_concat (num, locs); |
1625 | m_table->put (k: key_loc, v: concat); |
1626 | } |
1627 | |
1628 | /* Determine if LOC was the location of the initial token of a |
1629 | concatenation of string literal tokens. |
1630 | If so, *OUT_NUM is written to with the number of tokens, and |
1631 | *OUT_LOCS with the location of an array of locations of the |
1632 | tokens, and return true. *OUT_LOCS is a borrowed pointer to |
1633 | storage owned by the string_concat_db. |
1634 | Otherwise, return false. */ |
1635 | |
1636 | bool |
1637 | string_concat_db::get_string_concatenation (location_t loc, |
1638 | int *out_num, |
1639 | location_t **out_locs) |
1640 | { |
1641 | gcc_assert (out_num); |
1642 | gcc_assert (out_locs); |
1643 | |
1644 | location_t key_loc = get_key_loc (loc); |
1645 | /* We don't record data for 'RESERVED_LOCATION_P (key_loc)' key values; see |
1646 | discussion in 'string_concat_db::record_string_concatenation'. */ |
1647 | if (RESERVED_LOCATION_P (key_loc)) |
1648 | return false; |
1649 | |
1650 | string_concat **concat = m_table->get (k: key_loc); |
1651 | if (!concat) |
1652 | return false; |
1653 | |
1654 | *out_num = (*concat)->m_num; |
1655 | *out_locs =(*concat)->m_locs; |
1656 | return true; |
1657 | } |
1658 | |
1659 | /* Internal function. Canonicalize LOC into a form suitable for |
1660 | use as a key within the database, stripping away macro expansion, |
1661 | ad-hoc information, and range information, using the location of |
1662 | the start of LOC within an ordinary linemap. */ |
1663 | |
1664 | location_t |
1665 | string_concat_db::get_key_loc (location_t loc) |
1666 | { |
1667 | loc = linemap_resolve_location (line_table, loc, lrk: LRK_SPELLING_LOCATION, |
1668 | NULL); |
1669 | |
1670 | loc = get_range_from_loc (set: line_table, loc).m_start; |
1671 | |
1672 | return loc; |
1673 | } |
1674 | |
1675 | /* Helper class for use within get_substring_ranges_for_loc. |
1676 | An vec of cpp_string with responsibility for releasing all of the |
1677 | str->text for each str in the vector. */ |
1678 | |
1679 | class auto_cpp_string_vec : public auto_vec <cpp_string> |
1680 | { |
1681 | public: |
1682 | auto_cpp_string_vec (int alloc) |
1683 | : auto_vec <cpp_string> (alloc) {} |
1684 | |
1685 | ~auto_cpp_string_vec () |
1686 | { |
1687 | /* Clean up the copies within this vec. */ |
1688 | int i; |
1689 | cpp_string *str; |
1690 | FOR_EACH_VEC_ELT (*this, i, str) |
1691 | free (ptr: const_cast <unsigned char *> (str->text)); |
1692 | } |
1693 | }; |
1694 | |
1695 | /* Attempt to populate RANGES with source location information on the |
1696 | individual characters within the string literal found at STRLOC. |
1697 | If CONCATS is non-NULL, then any string literals that the token at |
1698 | STRLOC was concatenated with are also added to RANGES. |
1699 | |
1700 | Return NULL if successful, or an error message if any errors occurred (in |
1701 | which case RANGES may be only partially populated and should not |
1702 | be used). |
1703 | |
1704 | This is implemented by re-parsing the relevant source line(s). */ |
1705 | |
1706 | static const char * |
1707 | get_substring_ranges_for_loc (cpp_reader *pfile, |
1708 | string_concat_db *concats, |
1709 | location_t strloc, |
1710 | enum cpp_ttype type, |
1711 | cpp_substring_ranges &ranges) |
1712 | { |
1713 | gcc_assert (pfile); |
1714 | |
1715 | if (strloc == UNKNOWN_LOCATION) |
1716 | return "unknown location" ; |
1717 | |
1718 | /* Reparsing the strings requires accurate location information. |
1719 | If -ftrack-macro-expansion has been overridden from its default |
1720 | of 2, then we might have a location of a macro expansion point, |
1721 | rather than the location of the literal itself. |
1722 | Avoid this by requiring that we have full macro expansion tracking |
1723 | for substring locations to be available. */ |
1724 | if (cpp_get_options (pfile)->track_macro_expansion != 2) |
1725 | return "track_macro_expansion != 2" ; |
1726 | |
1727 | /* If #line or # 44 "file"-style directives are present, then there's |
1728 | no guarantee that the line numbers we have can be used to locate |
1729 | the strings. For example, we might have a .i file with # directives |
1730 | pointing back to lines within a .c file, but the .c file might |
1731 | have been edited since the .i file was created. |
1732 | In such a case, the safest course is to disable on-demand substring |
1733 | locations. */ |
1734 | if (line_table->seen_line_directive) |
1735 | return "seen line directive" ; |
1736 | |
1737 | /* If string concatenation has occurred at STRLOC, get the locations |
1738 | of all of the literal tokens making up the compound string. |
1739 | Otherwise, just use STRLOC. */ |
1740 | int num_locs = 1; |
1741 | location_t *strlocs = &strloc; |
1742 | if (concats) |
1743 | concats->get_string_concatenation (loc: strloc, out_num: &num_locs, out_locs: &strlocs); |
1744 | |
1745 | auto_cpp_string_vec strs (num_locs); |
1746 | auto_vec <cpp_string_location_reader> loc_readers (num_locs); |
1747 | for (int i = 0; i < num_locs; i++) |
1748 | { |
1749 | /* Get range of strloc. We will use it to locate the start and finish |
1750 | of the literal token within the line. */ |
1751 | source_range src_range = get_range_from_loc (set: line_table, loc: strlocs[i]); |
1752 | |
1753 | if (src_range.m_start >= LINEMAPS_MACRO_LOWEST_LOCATION (set: line_table)) |
1754 | { |
1755 | /* If the string token was within a macro expansion, then we can |
1756 | cope with it for the simple case where we have a single token. |
1757 | Otherwise, bail out. */ |
1758 | if (src_range.m_start != src_range.m_finish) |
1759 | return "macro expansion" ; |
1760 | } |
1761 | else |
1762 | { |
1763 | if (src_range.m_start >= LINE_MAP_MAX_LOCATION_WITH_COLS) |
1764 | /* If so, we can't reliably determine where the token started within |
1765 | its line. */ |
1766 | return "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS" ; |
1767 | |
1768 | if (src_range.m_finish >= LINE_MAP_MAX_LOCATION_WITH_COLS) |
1769 | /* If so, we can't reliably determine where the token finished |
1770 | within its line. */ |
1771 | return "range ends after LINE_MAP_MAX_LOCATION_WITH_COLS" ; |
1772 | } |
1773 | |
1774 | expanded_location start |
1775 | = expand_location_to_spelling_point (loc: src_range.m_start, |
1776 | aspect: LOCATION_ASPECT_START); |
1777 | expanded_location finish |
1778 | = expand_location_to_spelling_point (loc: src_range.m_finish, |
1779 | aspect: LOCATION_ASPECT_FINISH); |
1780 | if (start.file != finish.file) |
1781 | return "range endpoints are in different files" ; |
1782 | if (start.line != finish.line) |
1783 | return "range endpoints are on different lines" ; |
1784 | if (start.column > finish.column) |
1785 | return "range endpoints are reversed" ; |
1786 | |
1787 | char_span line = location_get_source_line (file_path: start.file, line: start.line); |
1788 | if (!line) |
1789 | return "unable to read source line" ; |
1790 | |
1791 | /* Determine the location of the literal (including quotes |
1792 | and leading prefix chars, such as the 'u' in a u"" |
1793 | token). */ |
1794 | size_t literal_length = finish.column - start.column + 1; |
1795 | |
1796 | /* Ensure that we don't crash if we got the wrong location. */ |
1797 | if (start.column < 1) |
1798 | return "zero start column" ; |
1799 | if (line.length () < (start.column - 1 + literal_length)) |
1800 | return "line is not wide enough" ; |
1801 | |
1802 | char_span literal = line.subspan (offset: start.column - 1, n_elts: literal_length); |
1803 | |
1804 | cpp_string from; |
1805 | from.len = literal_length; |
1806 | /* Make a copy of the literal, to avoid having to rely on |
1807 | the lifetime of the copy of the line within the cache. |
1808 | This will be released by the auto_cpp_string_vec dtor. */ |
1809 | from.text = (unsigned char *)literal.xstrdup (); |
1810 | strs.safe_push (obj: from); |
1811 | |
1812 | /* For very long lines, a new linemap could have started |
1813 | halfway through the token. |
1814 | Ensure that the loc_reader uses the linemap of the |
1815 | *end* of the token for its start location. */ |
1816 | const line_map_ordinary *start_ord_map; |
1817 | linemap_resolve_location (line_table, loc: src_range.m_start, |
1818 | lrk: LRK_SPELLING_LOCATION, loc_map: &start_ord_map); |
1819 | const line_map_ordinary *final_ord_map; |
1820 | linemap_resolve_location (line_table, loc: src_range.m_finish, |
1821 | lrk: LRK_SPELLING_LOCATION, loc_map: &final_ord_map); |
1822 | if (start_ord_map == NULL || final_ord_map == NULL) |
1823 | return "failed to get ordinary maps" ; |
1824 | /* Bulletproofing. We ought to only have different ordinary maps |
1825 | for start vs finish due to line-length jumps. */ |
1826 | if (start_ord_map != final_ord_map |
1827 | && start_ord_map->to_file != final_ord_map->to_file) |
1828 | return "start and finish are spelled in different ordinary maps" ; |
1829 | /* The file from linemap_resolve_location ought to match that from |
1830 | expand_location_to_spelling_point. */ |
1831 | if (start_ord_map->to_file != start.file) |
1832 | return "mismatching file after resolving linemap" ; |
1833 | |
1834 | location_t start_loc |
1835 | = linemap_position_for_line_and_column (set: line_table, final_ord_map, |
1836 | start.line, start.column); |
1837 | |
1838 | cpp_string_location_reader loc_reader (start_loc, line_table); |
1839 | loc_readers.safe_push (obj: loc_reader); |
1840 | } |
1841 | |
1842 | /* Rerun cpp_interpret_string, or rather, a modified version of it. */ |
1843 | const char *err = cpp_interpret_string_ranges (pfile, from: strs.address (), |
1844 | loc_readers.address (), |
1845 | count: num_locs, out: &ranges, type); |
1846 | if (err) |
1847 | return err; |
1848 | |
1849 | /* Success: "ranges" should now contain information on the string. */ |
1850 | return NULL; |
1851 | } |
1852 | |
1853 | /* Attempt to populate *OUT_LOC with source location information on the |
1854 | given characters within the string literal found at STRLOC. |
1855 | CARET_IDX, START_IDX, and END_IDX refer to offsets within the execution |
1856 | character set. |
1857 | |
1858 | For example, given CARET_IDX = 4, START_IDX = 3, END_IDX = 7 |
1859 | and string literal "012345\n789" |
1860 | *OUT_LOC is written to with: |
1861 | "012345\n789" |
1862 | ~^~~~~ |
1863 | |
1864 | If CONCATS is non-NULL, then any string literals that the token at |
1865 | STRLOC was concatenated with are also considered. |
1866 | |
1867 | This is implemented by re-parsing the relevant source line(s). |
1868 | |
1869 | Return NULL if successful, or an error message if any errors occurred. |
1870 | Error messages are intended for GCC developers (to help debugging) rather |
1871 | than for end-users. */ |
1872 | |
1873 | const char * |
1874 | get_location_within_string (cpp_reader *pfile, |
1875 | string_concat_db *concats, |
1876 | location_t strloc, |
1877 | enum cpp_ttype type, |
1878 | int caret_idx, int start_idx, int end_idx, |
1879 | location_t *out_loc) |
1880 | { |
1881 | gcc_checking_assert (caret_idx >= 0); |
1882 | gcc_checking_assert (start_idx >= 0); |
1883 | gcc_checking_assert (end_idx >= 0); |
1884 | gcc_assert (out_loc); |
1885 | |
1886 | cpp_substring_ranges ranges; |
1887 | const char *err |
1888 | = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges); |
1889 | if (err) |
1890 | return err; |
1891 | |
1892 | if (caret_idx >= ranges.get_num_ranges ()) |
1893 | return "caret_idx out of range" ; |
1894 | if (start_idx >= ranges.get_num_ranges ()) |
1895 | return "start_idx out of range" ; |
1896 | if (end_idx >= ranges.get_num_ranges ()) |
1897 | return "end_idx out of range" ; |
1898 | |
1899 | *out_loc = make_location (caret: ranges.get_range (idx: caret_idx).m_start, |
1900 | start: ranges.get_range (idx: start_idx).m_start, |
1901 | finish: ranges.get_range (idx: end_idx).m_finish); |
1902 | return NULL; |
1903 | } |
1904 | |
1905 | /* Associate the DISCRIMINATOR with LOCUS, and return a new locus. */ |
1906 | |
1907 | location_t |
1908 | location_with_discriminator (location_t locus, int discriminator) |
1909 | { |
1910 | tree block = LOCATION_BLOCK (locus); |
1911 | source_range src_range = get_range_from_loc (set: line_table, loc: locus); |
1912 | locus = get_pure_location (loc: locus); |
1913 | |
1914 | if (locus == UNKNOWN_LOCATION) |
1915 | return locus; |
1916 | |
1917 | return line_table->get_or_create_combined_loc (locus, src_range, data: block, |
1918 | discriminator); |
1919 | } |
1920 | |
1921 | /* Return TRUE if LOCUS represents a location with a discriminator. */ |
1922 | |
1923 | bool |
1924 | has_discriminator (location_t locus) |
1925 | { |
1926 | return get_discriminator_from_loc (locus) != 0; |
1927 | } |
1928 | |
1929 | /* Return the discriminator for LOCUS. */ |
1930 | |
1931 | int |
1932 | get_discriminator_from_loc (location_t locus) |
1933 | { |
1934 | return get_discriminator_from_loc (set: line_table, loc: locus); |
1935 | } |
1936 | |
1937 | #if CHECKING_P |
1938 | |
1939 | namespace selftest { |
1940 | |
1941 | /* Selftests of location handling. */ |
1942 | |
1943 | /* Attempt to populate *OUT_RANGE with source location information on the |
1944 | given character within the string literal found at STRLOC. |
1945 | CHAR_IDX refers to an offset within the execution character set. |
1946 | If CONCATS is non-NULL, then any string literals that the token at |
1947 | STRLOC was concatenated with are also considered. |
1948 | |
1949 | This is implemented by re-parsing the relevant source line(s). |
1950 | |
1951 | Return NULL if successful, or an error message if any errors occurred. |
1952 | Error messages are intended for GCC developers (to help debugging) rather |
1953 | than for end-users. */ |
1954 | |
1955 | static const char * |
1956 | get_source_range_for_char (cpp_reader *pfile, |
1957 | string_concat_db *concats, |
1958 | location_t strloc, |
1959 | enum cpp_ttype type, |
1960 | int char_idx, |
1961 | source_range *out_range) |
1962 | { |
1963 | gcc_checking_assert (char_idx >= 0); |
1964 | gcc_assert (out_range); |
1965 | |
1966 | cpp_substring_ranges ranges; |
1967 | const char *err |
1968 | = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges); |
1969 | if (err) |
1970 | return err; |
1971 | |
1972 | if (char_idx >= ranges.get_num_ranges ()) |
1973 | return "char_idx out of range" ; |
1974 | |
1975 | *out_range = ranges.get_range (idx: char_idx); |
1976 | return NULL; |
1977 | } |
1978 | |
1979 | /* As get_source_range_for_char, but write to *OUT the number |
1980 | of ranges that are available. */ |
1981 | |
1982 | static const char * |
1983 | get_num_source_ranges_for_substring (cpp_reader *pfile, |
1984 | string_concat_db *concats, |
1985 | location_t strloc, |
1986 | enum cpp_ttype type, |
1987 | int *out) |
1988 | { |
1989 | gcc_assert (out); |
1990 | |
1991 | cpp_substring_ranges ranges; |
1992 | const char *err |
1993 | = get_substring_ranges_for_loc (pfile, concats, strloc, type, ranges); |
1994 | |
1995 | if (err) |
1996 | return err; |
1997 | |
1998 | *out = ranges.get_num_ranges (); |
1999 | return NULL; |
2000 | } |
2001 | |
2002 | /* Selftests of location handling. */ |
2003 | |
2004 | /* Verify that compare() on linenum_type handles comparisons over the full |
2005 | range of the type. */ |
2006 | |
2007 | static void |
2008 | test_linenum_comparisons () |
2009 | { |
2010 | linenum_type min_line (0); |
2011 | linenum_type max_line (0xffffffff); |
2012 | ASSERT_EQ (0, compare (min_line, min_line)); |
2013 | ASSERT_EQ (0, compare (max_line, max_line)); |
2014 | |
2015 | ASSERT_GT (compare (max_line, min_line), 0); |
2016 | ASSERT_LT (compare (min_line, max_line), 0); |
2017 | } |
2018 | |
2019 | /* Helper function for verifying location data: when location_t |
2020 | values are > LINE_MAP_MAX_LOCATION_WITH_COLS, they are treated |
2021 | as having column 0. */ |
2022 | |
2023 | static bool |
2024 | should_have_column_data_p (location_t loc) |
2025 | { |
2026 | if (IS_ADHOC_LOC (loc)) |
2027 | loc = get_location_from_adhoc_loc (line_table, loc); |
2028 | if (loc > LINE_MAP_MAX_LOCATION_WITH_COLS) |
2029 | return false; |
2030 | return true; |
2031 | } |
2032 | |
2033 | /* Selftest for should_have_column_data_p. */ |
2034 | |
2035 | static void |
2036 | test_should_have_column_data_p () |
2037 | { |
2038 | ASSERT_TRUE (should_have_column_data_p (RESERVED_LOCATION_COUNT)); |
2039 | ASSERT_TRUE |
2040 | (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS)); |
2041 | ASSERT_FALSE |
2042 | (should_have_column_data_p (LINE_MAP_MAX_LOCATION_WITH_COLS + 1)); |
2043 | } |
2044 | |
2045 | /* Verify the result of LOCATION_FILE/LOCATION_LINE/LOCATION_COLUMN |
2046 | on LOC. */ |
2047 | |
2048 | static void |
2049 | assert_loceq (const char *exp_filename, int exp_linenum, int exp_colnum, |
2050 | location_t loc) |
2051 | { |
2052 | ASSERT_STREQ (exp_filename, LOCATION_FILE (loc)); |
2053 | ASSERT_EQ (exp_linenum, LOCATION_LINE (loc)); |
2054 | /* If location_t values are sufficiently high, then column numbers |
2055 | will be unavailable and LOCATION_COLUMN (loc) will be 0. |
2056 | When close to the threshold, column numbers *may* be present: if |
2057 | the final linemap before the threshold contains a line that straddles |
2058 | the threshold, locations in that line have column information. */ |
2059 | if (should_have_column_data_p (loc)) |
2060 | ASSERT_EQ (exp_colnum, LOCATION_COLUMN (loc)); |
2061 | } |
2062 | |
2063 | /* Various selftests involve constructing a line table and one or more |
2064 | line maps within it. |
2065 | |
2066 | For maximum test coverage we want to run these tests with a variety |
2067 | of situations: |
2068 | - line_table->default_range_bits: some frontends use a non-zero value |
2069 | and others use zero |
2070 | - the fallback modes within line-map.cc: there are various threshold |
2071 | values for location_t beyond line-map.cc changes |
2072 | behavior (disabling of the range-packing optimization, disabling |
2073 | of column-tracking). We can exercise these by starting the line_table |
2074 | at interesting values at or near these thresholds. |
2075 | |
2076 | The following struct describes a particular case within our test |
2077 | matrix. */ |
2078 | |
2079 | class line_table_case |
2080 | { |
2081 | public: |
2082 | line_table_case (int default_range_bits, int base_location) |
2083 | : m_default_range_bits (default_range_bits), |
2084 | m_base_location (base_location) |
2085 | {} |
2086 | |
2087 | int m_default_range_bits; |
2088 | int m_base_location; |
2089 | }; |
2090 | |
2091 | /* Constructor. Store the old value of line_table, and create a new |
2092 | one, using sane defaults. */ |
2093 | |
2094 | line_table_test::line_table_test () |
2095 | { |
2096 | gcc_assert (saved_line_table == NULL); |
2097 | saved_line_table = line_table; |
2098 | line_table = ggc_alloc<line_maps> (); |
2099 | linemap_init (set: line_table, BUILTINS_LOCATION); |
2100 | gcc_assert (saved_line_table->m_reallocator); |
2101 | line_table->m_reallocator = saved_line_table->m_reallocator; |
2102 | gcc_assert (saved_line_table->m_round_alloc_size); |
2103 | line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size; |
2104 | line_table->default_range_bits = 0; |
2105 | } |
2106 | |
2107 | /* Constructor. Store the old value of line_table, and create a new |
2108 | one, using the sitation described in CASE_. */ |
2109 | |
2110 | line_table_test::line_table_test (const line_table_case &case_) |
2111 | { |
2112 | gcc_assert (saved_line_table == NULL); |
2113 | saved_line_table = line_table; |
2114 | line_table = ggc_alloc<line_maps> (); |
2115 | linemap_init (set: line_table, BUILTINS_LOCATION); |
2116 | gcc_assert (saved_line_table->m_reallocator); |
2117 | line_table->m_reallocator = saved_line_table->m_reallocator; |
2118 | gcc_assert (saved_line_table->m_round_alloc_size); |
2119 | line_table->m_round_alloc_size = saved_line_table->m_round_alloc_size; |
2120 | line_table->default_range_bits = case_.m_default_range_bits; |
2121 | if (case_.m_base_location) |
2122 | { |
2123 | line_table->highest_location = case_.m_base_location; |
2124 | line_table->highest_line = case_.m_base_location; |
2125 | } |
2126 | } |
2127 | |
2128 | /* Destructor. Restore the old value of line_table. */ |
2129 | |
2130 | line_table_test::~line_table_test () |
2131 | { |
2132 | gcc_assert (saved_line_table != NULL); |
2133 | line_table = saved_line_table; |
2134 | saved_line_table = NULL; |
2135 | } |
2136 | |
2137 | /* Verify basic operation of ordinary linemaps. */ |
2138 | |
2139 | static void |
2140 | test_accessing_ordinary_linemaps (const line_table_case &case_) |
2141 | { |
2142 | line_table_test ltt (case_); |
2143 | |
2144 | /* Build a simple linemap describing some locations. */ |
2145 | linemap_add (line_table, LC_ENTER, sysp: false, to_file: "foo.c" , to_line: 0); |
2146 | |
2147 | linemap_line_start (set: line_table, to_line: 1, max_column_hint: 100); |
2148 | location_t loc_a = linemap_position_for_column (line_table, 1); |
2149 | location_t loc_b = linemap_position_for_column (line_table, 23); |
2150 | |
2151 | linemap_line_start (set: line_table, to_line: 2, max_column_hint: 100); |
2152 | location_t loc_c = linemap_position_for_column (line_table, 1); |
2153 | location_t loc_d = linemap_position_for_column (line_table, 17); |
2154 | |
2155 | /* Example of a very long line. */ |
2156 | linemap_line_start (set: line_table, to_line: 3, max_column_hint: 2000); |
2157 | location_t loc_e = linemap_position_for_column (line_table, 700); |
2158 | |
2159 | /* Transitioning back to a short line. */ |
2160 | linemap_line_start (set: line_table, to_line: 4, max_column_hint: 0); |
2161 | location_t loc_back_to_short = linemap_position_for_column (line_table, 100); |
2162 | |
2163 | if (should_have_column_data_p (loc: loc_back_to_short)) |
2164 | { |
2165 | /* Verify that we switched to short lines in the linemap. */ |
2166 | line_map_ordinary *map = LINEMAPS_LAST_ORDINARY_MAP (set: line_table); |
2167 | ASSERT_EQ (7, map->m_column_and_range_bits - map->m_range_bits); |
2168 | } |
2169 | |
2170 | /* Example of a line that will eventually be seen to be longer |
2171 | than LINE_MAP_MAX_COLUMN_NUMBER; the initially seen width is |
2172 | below that. */ |
2173 | linemap_line_start (set: line_table, to_line: 5, max_column_hint: 2000); |
2174 | |
2175 | location_t loc_start_of_very_long_line |
2176 | = linemap_position_for_column (line_table, 2000); |
2177 | location_t loc_too_wide |
2178 | = linemap_position_for_column (line_table, 4097); |
2179 | location_t loc_too_wide_2 |
2180 | = linemap_position_for_column (line_table, 4098); |
2181 | |
2182 | /* ...and back to a sane line length. */ |
2183 | linemap_line_start (set: line_table, to_line: 6, max_column_hint: 100); |
2184 | location_t loc_sane_again = linemap_position_for_column (line_table, 10); |
2185 | |
2186 | linemap_add (line_table, LC_LEAVE, sysp: false, NULL, to_line: 0); |
2187 | |
2188 | /* Multiple files. */ |
2189 | linemap_add (line_table, LC_ENTER, sysp: false, to_file: "bar.c" , to_line: 0); |
2190 | linemap_line_start (set: line_table, to_line: 1, max_column_hint: 200); |
2191 | location_t loc_f = linemap_position_for_column (line_table, 150); |
2192 | linemap_add (line_table, LC_LEAVE, sysp: false, NULL, to_line: 0); |
2193 | |
2194 | /* Verify that we can recover the location info. */ |
2195 | assert_loceq (exp_filename: "foo.c" , exp_linenum: 1, exp_colnum: 1, loc: loc_a); |
2196 | assert_loceq (exp_filename: "foo.c" , exp_linenum: 1, exp_colnum: 23, loc: loc_b); |
2197 | assert_loceq (exp_filename: "foo.c" , exp_linenum: 2, exp_colnum: 1, loc: loc_c); |
2198 | assert_loceq (exp_filename: "foo.c" , exp_linenum: 2, exp_colnum: 17, loc: loc_d); |
2199 | assert_loceq (exp_filename: "foo.c" , exp_linenum: 3, exp_colnum: 700, loc: loc_e); |
2200 | assert_loceq (exp_filename: "foo.c" , exp_linenum: 4, exp_colnum: 100, loc: loc_back_to_short); |
2201 | |
2202 | /* In the very wide line, the initial location should be fully tracked. */ |
2203 | assert_loceq (exp_filename: "foo.c" , exp_linenum: 5, exp_colnum: 2000, loc: loc_start_of_very_long_line); |
2204 | /* ...but once we exceed LINE_MAP_MAX_COLUMN_NUMBER column-tracking should |
2205 | be disabled. */ |
2206 | assert_loceq (exp_filename: "foo.c" , exp_linenum: 5, exp_colnum: 0, loc: loc_too_wide); |
2207 | assert_loceq (exp_filename: "foo.c" , exp_linenum: 5, exp_colnum: 0, loc: loc_too_wide_2); |
2208 | /*...and column-tracking should be re-enabled for subsequent lines. */ |
2209 | assert_loceq (exp_filename: "foo.c" , exp_linenum: 6, exp_colnum: 10, loc: loc_sane_again); |
2210 | |
2211 | assert_loceq (exp_filename: "bar.c" , exp_linenum: 1, exp_colnum: 150, loc: loc_f); |
2212 | |
2213 | ASSERT_FALSE (is_location_from_builtin_token (loc_a)); |
2214 | ASSERT_TRUE (pure_location_p (line_table, loc_a)); |
2215 | |
2216 | /* Verify using make_location to build a range, and extracting data |
2217 | back from it. */ |
2218 | location_t range_c_b_d = make_location (caret: loc_c, start: loc_b, finish: loc_d); |
2219 | ASSERT_FALSE (pure_location_p (line_table, range_c_b_d)); |
2220 | ASSERT_EQ (loc_c, get_location_from_adhoc_loc (line_table, range_c_b_d)); |
2221 | source_range src_range = get_range_from_loc (set: line_table, loc: range_c_b_d); |
2222 | ASSERT_EQ (loc_b, src_range.m_start); |
2223 | ASSERT_EQ (loc_d, src_range.m_finish); |
2224 | } |
2225 | |
2226 | /* Verify various properties of UNKNOWN_LOCATION. */ |
2227 | |
2228 | static void |
2229 | test_unknown_location () |
2230 | { |
2231 | ASSERT_EQ (NULL, LOCATION_FILE (UNKNOWN_LOCATION)); |
2232 | ASSERT_EQ (0, LOCATION_LINE (UNKNOWN_LOCATION)); |
2233 | ASSERT_EQ (0, LOCATION_COLUMN (UNKNOWN_LOCATION)); |
2234 | } |
2235 | |
2236 | /* Verify various properties of BUILTINS_LOCATION. */ |
2237 | |
2238 | static void |
2239 | test_builtins () |
2240 | { |
2241 | assert_loceq (exp_filename: special_fname_builtin (), exp_linenum: 0, exp_colnum: 0, BUILTINS_LOCATION); |
2242 | ASSERT_PRED1 (is_location_from_builtin_token, BUILTINS_LOCATION); |
2243 | } |
2244 | |
2245 | /* Regression test for make_location. |
2246 | Ensure that we use pure locations for the start/finish of the range, |
2247 | rather than storing a packed or ad-hoc range as the start/finish. */ |
2248 | |
2249 | static void |
2250 | test_make_location_nonpure_range_endpoints (const line_table_case &case_) |
2251 | { |
2252 | /* Issue seen with testsuite/c-c++-common/Wlogical-not-parentheses-2.c |
2253 | with C++ frontend. |
2254 | ....................0000000001111111111222. |
2255 | ....................1234567890123456789012. */ |
2256 | const char *content = " r += !aaa == bbb;\n" ; |
2257 | temp_source_file tmp (SELFTEST_LOCATION, ".C" , content); |
2258 | line_table_test ltt (case_); |
2259 | linemap_add (line_table, LC_ENTER, sysp: false, to_file: tmp.get_filename (), to_line: 1); |
2260 | |
2261 | const location_t c11 = linemap_position_for_column (line_table, 11); |
2262 | const location_t c12 = linemap_position_for_column (line_table, 12); |
2263 | const location_t c13 = linemap_position_for_column (line_table, 13); |
2264 | const location_t c14 = linemap_position_for_column (line_table, 14); |
2265 | const location_t c21 = linemap_position_for_column (line_table, 21); |
2266 | |
2267 | if (c21 > LINE_MAP_MAX_LOCATION_WITH_COLS) |
2268 | return; |
2269 | |
2270 | /* Use column 13 for the caret location, arbitrarily, to verify that we |
2271 | handle start != caret. */ |
2272 | const location_t aaa = make_location (caret: c13, start: c12, finish: c14); |
2273 | ASSERT_EQ (c13, get_pure_location (aaa)); |
2274 | ASSERT_EQ (c12, get_start (aaa)); |
2275 | ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa))); |
2276 | ASSERT_EQ (c14, get_finish (aaa)); |
2277 | ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa))); |
2278 | |
2279 | /* Make a location using a location with a range as the start-point. */ |
2280 | const location_t not_aaa = make_location (caret: c11, start: aaa, finish: c14); |
2281 | ASSERT_EQ (c11, get_pure_location (not_aaa)); |
2282 | /* It should use the start location of the range, not store the range |
2283 | itself. */ |
2284 | ASSERT_EQ (c12, get_start (not_aaa)); |
2285 | ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa))); |
2286 | ASSERT_EQ (c14, get_finish (not_aaa)); |
2287 | ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa))); |
2288 | |
2289 | /* Similarly, make a location with a range as the end-point. */ |
2290 | const location_t aaa_eq_bbb = make_location (caret: c12, start: c12, finish: c21); |
2291 | ASSERT_EQ (c12, get_pure_location (aaa_eq_bbb)); |
2292 | ASSERT_EQ (c12, get_start (aaa_eq_bbb)); |
2293 | ASSERT_FALSE (IS_ADHOC_LOC (get_start (aaa_eq_bbb))); |
2294 | ASSERT_EQ (c21, get_finish (aaa_eq_bbb)); |
2295 | ASSERT_FALSE (IS_ADHOC_LOC (get_finish (aaa_eq_bbb))); |
2296 | const location_t not_aaa_eq_bbb = make_location (caret: c11, start: c12, finish: aaa_eq_bbb); |
2297 | /* It should use the finish location of the range, not store the range |
2298 | itself. */ |
2299 | ASSERT_EQ (c11, get_pure_location (not_aaa_eq_bbb)); |
2300 | ASSERT_EQ (c12, get_start (not_aaa_eq_bbb)); |
2301 | ASSERT_FALSE (IS_ADHOC_LOC (get_start (not_aaa_eq_bbb))); |
2302 | ASSERT_EQ (c21, get_finish (not_aaa_eq_bbb)); |
2303 | ASSERT_FALSE (IS_ADHOC_LOC (get_finish (not_aaa_eq_bbb))); |
2304 | } |
2305 | |
2306 | /* Verify reading of input files (e.g. for caret-based diagnostics). */ |
2307 | |
2308 | static void |
2309 | test_reading_source_line () |
2310 | { |
2311 | /* Create a tempfile and write some text to it. */ |
2312 | temp_source_file tmp (SELFTEST_LOCATION, ".txt" , |
2313 | "01234567890123456789\n" |
2314 | "This is the test text\n" |
2315 | "This is the 3rd line" ); |
2316 | |
2317 | /* Read back a specific line from the tempfile. */ |
2318 | char_span source_line = location_get_source_line (file_path: tmp.get_filename (), line: 3); |
2319 | ASSERT_TRUE (source_line); |
2320 | ASSERT_TRUE (source_line.get_buffer () != NULL); |
2321 | ASSERT_EQ (20, source_line.length ()); |
2322 | ASSERT_TRUE (!strncmp ("This is the 3rd line" , |
2323 | source_line.get_buffer (), source_line.length ())); |
2324 | |
2325 | source_line = location_get_source_line (file_path: tmp.get_filename (), line: 2); |
2326 | ASSERT_TRUE (source_line); |
2327 | ASSERT_TRUE (source_line.get_buffer () != NULL); |
2328 | ASSERT_EQ (21, source_line.length ()); |
2329 | ASSERT_TRUE (!strncmp ("This is the test text" , |
2330 | source_line.get_buffer (), source_line.length ())); |
2331 | |
2332 | source_line = location_get_source_line (file_path: tmp.get_filename (), line: 4); |
2333 | ASSERT_FALSE (source_line); |
2334 | ASSERT_TRUE (source_line.get_buffer () == NULL); |
2335 | } |
2336 | |
2337 | /* Tests of lexing. */ |
2338 | |
2339 | /* Verify that token TOK from PARSER has cpp_token_as_text |
2340 | equal to EXPECTED_TEXT. */ |
2341 | |
2342 | #define ASSERT_TOKEN_AS_TEXT_EQ(PARSER, TOK, EXPECTED_TEXT) \ |
2343 | SELFTEST_BEGIN_STMT \ |
2344 | unsigned char *actual_txt = cpp_token_as_text ((PARSER), (TOK)); \ |
2345 | ASSERT_STREQ ((EXPECTED_TEXT), (const char *)actual_txt); \ |
2346 | SELFTEST_END_STMT |
2347 | |
2348 | /* Verify that TOK's src_loc is within EXP_FILENAME at EXP_LINENUM, |
2349 | and ranges from EXP_START_COL to EXP_FINISH_COL. |
2350 | Use LOC as the effective location of the selftest. */ |
2351 | |
2352 | static void |
2353 | assert_token_loc_eq (const location &loc, |
2354 | const cpp_token *tok, |
2355 | const char *exp_filename, int exp_linenum, |
2356 | int exp_start_col, int exp_finish_col) |
2357 | { |
2358 | location_t tok_loc = tok->src_loc; |
2359 | ASSERT_STREQ_AT (loc, exp_filename, LOCATION_FILE (tok_loc)); |
2360 | ASSERT_EQ_AT (loc, exp_linenum, LOCATION_LINE (tok_loc)); |
2361 | |
2362 | /* If location_t values are sufficiently high, then column numbers |
2363 | will be unavailable. */ |
2364 | if (!should_have_column_data_p (loc: tok_loc)) |
2365 | return; |
2366 | |
2367 | ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_loc)); |
2368 | source_range tok_range = get_range_from_loc (set: line_table, loc: tok_loc); |
2369 | ASSERT_EQ_AT (loc, exp_start_col, LOCATION_COLUMN (tok_range.m_start)); |
2370 | ASSERT_EQ_AT (loc, exp_finish_col, LOCATION_COLUMN (tok_range.m_finish)); |
2371 | } |
2372 | |
2373 | /* Use assert_token_loc_eq to verify the TOK->src_loc, using |
2374 | SELFTEST_LOCATION as the effective location of the selftest. */ |
2375 | |
2376 | #define ASSERT_TOKEN_LOC_EQ(TOK, EXP_FILENAME, EXP_LINENUM, \ |
2377 | EXP_START_COL, EXP_FINISH_COL) \ |
2378 | assert_token_loc_eq (SELFTEST_LOCATION, (TOK), (EXP_FILENAME), \ |
2379 | (EXP_LINENUM), (EXP_START_COL), (EXP_FINISH_COL)) |
2380 | |
2381 | /* Test of lexing a file using libcpp, verifying tokens and their |
2382 | location information. */ |
2383 | |
2384 | static void |
2385 | test_lexer (const line_table_case &case_) |
2386 | { |
2387 | /* Create a tempfile and write some text to it. */ |
2388 | const char *content = |
2389 | /*00000000011111111112222222222333333.3333444444444.455555555556 |
2390 | 12345678901234567890123456789012345.6789012345678.901234567890. */ |
2391 | ("test_name /* c-style comment */\n" |
2392 | " \"test literal\"\n" |
2393 | " // test c++-style comment\n" |
2394 | " 42\n" ); |
2395 | temp_source_file tmp (SELFTEST_LOCATION, ".txt" , content); |
2396 | |
2397 | line_table_test ltt (case_); |
2398 | |
2399 | cpp_reader *parser = cpp_create_reader (CLK_GNUC89, NULL, line_table); |
2400 | |
2401 | const char *fname = cpp_read_main_file (parser, tmp.get_filename ()); |
2402 | ASSERT_NE (fname, NULL); |
2403 | |
2404 | /* Verify that we get the expected tokens back, with the correct |
2405 | location information. */ |
2406 | |
2407 | location_t loc; |
2408 | const cpp_token *tok; |
2409 | tok = cpp_get_token_with_location (parser, &loc); |
2410 | ASSERT_NE (tok, NULL); |
2411 | ASSERT_EQ (tok->type, CPP_NAME); |
2412 | ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "test_name" ); |
2413 | ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 1, 1, 9); |
2414 | |
2415 | tok = cpp_get_token_with_location (parser, &loc); |
2416 | ASSERT_NE (tok, NULL); |
2417 | ASSERT_EQ (tok->type, CPP_STRING); |
2418 | ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "\"test literal\"" ); |
2419 | ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 2, 35, 48); |
2420 | |
2421 | tok = cpp_get_token_with_location (parser, &loc); |
2422 | ASSERT_NE (tok, NULL); |
2423 | ASSERT_EQ (tok->type, CPP_NUMBER); |
2424 | ASSERT_TOKEN_AS_TEXT_EQ (parser, tok, "42" ); |
2425 | ASSERT_TOKEN_LOC_EQ (tok, tmp.get_filename (), 4, 4, 5); |
2426 | |
2427 | tok = cpp_get_token_with_location (parser, &loc); |
2428 | ASSERT_NE (tok, NULL); |
2429 | ASSERT_EQ (tok->type, CPP_EOF); |
2430 | |
2431 | cpp_finish (parser, NULL); |
2432 | cpp_destroy (parser); |
2433 | } |
2434 | |
2435 | /* Forward decls. */ |
2436 | |
2437 | class lexer_test; |
2438 | class lexer_test_options; |
2439 | |
2440 | /* A class for specifying options of a lexer_test. |
2441 | The "apply" vfunc is called during the lexer_test constructor. */ |
2442 | |
2443 | class lexer_test_options |
2444 | { |
2445 | public: |
2446 | virtual void apply (lexer_test &) = 0; |
2447 | }; |
2448 | |
2449 | /* Wrapper around an cpp_reader *, which calls cpp_finish and cpp_destroy |
2450 | in its dtor. |
2451 | |
2452 | This is needed by struct lexer_test to ensure that the cleanup of the |
2453 | cpp_reader happens *after* the cleanup of the temp_source_file. */ |
2454 | |
2455 | class cpp_reader_ptr |
2456 | { |
2457 | public: |
2458 | cpp_reader_ptr (cpp_reader *ptr) : m_ptr (ptr) {} |
2459 | |
2460 | ~cpp_reader_ptr () |
2461 | { |
2462 | cpp_finish (m_ptr, NULL); |
2463 | cpp_destroy (m_ptr); |
2464 | } |
2465 | |
2466 | operator cpp_reader * () const { return m_ptr; } |
2467 | |
2468 | private: |
2469 | cpp_reader *m_ptr; |
2470 | }; |
2471 | |
2472 | /* A struct for writing lexer tests. */ |
2473 | |
2474 | class lexer_test |
2475 | { |
2476 | public: |
2477 | lexer_test (const line_table_case &case_, const char *content, |
2478 | lexer_test_options *options); |
2479 | ~lexer_test (); |
2480 | |
2481 | const cpp_token *get_token (); |
2482 | |
2483 | /* The ordering of these fields matters. |
2484 | The line_table_test must be first, since the cpp_reader_ptr |
2485 | uses it. |
2486 | The cpp_reader must be cleaned up *after* the temp_source_file |
2487 | since the filenames in input.cc's input cache are owned by the |
2488 | cpp_reader; in particular, when ~temp_source_file evicts the |
2489 | filename the filenames must still be alive. */ |
2490 | line_table_test m_ltt; |
2491 | cpp_reader_ptr m_parser; |
2492 | temp_source_file m_tempfile; |
2493 | string_concat_db m_concats; |
2494 | bool m_implicitly_expect_EOF; |
2495 | }; |
2496 | |
2497 | /* Use an EBCDIC encoding for the execution charset, specifically |
2498 | IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047"). |
2499 | |
2500 | This exercises iconv integration within libcpp. |
2501 | Not every build of iconv supports the given charset, |
2502 | so we need to flag this error and handle it gracefully. */ |
2503 | |
2504 | class ebcdic_execution_charset : public lexer_test_options |
2505 | { |
2506 | public: |
2507 | ebcdic_execution_charset () : m_num_iconv_errors (0) |
2508 | { |
2509 | gcc_assert (s_singleton == NULL); |
2510 | s_singleton = this; |
2511 | } |
2512 | ~ebcdic_execution_charset () |
2513 | { |
2514 | gcc_assert (s_singleton == this); |
2515 | s_singleton = NULL; |
2516 | } |
2517 | |
2518 | void apply (lexer_test &test) final override |
2519 | { |
2520 | cpp_options *cpp_opts = cpp_get_options (test.m_parser); |
2521 | cpp_opts->narrow_charset = "IBM1047" ; |
2522 | |
2523 | cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser); |
2524 | callbacks->diagnostic = on_diagnostic; |
2525 | } |
2526 | |
2527 | static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED, |
2528 | enum cpp_diagnostic_level level ATTRIBUTE_UNUSED, |
2529 | enum cpp_warning_reason reason ATTRIBUTE_UNUSED, |
2530 | rich_location *richloc ATTRIBUTE_UNUSED, |
2531 | const char *msgid, va_list *ap ATTRIBUTE_UNUSED) |
2532 | ATTRIBUTE_FPTR_PRINTF(5,0) |
2533 | { |
2534 | gcc_assert (s_singleton); |
2535 | /* Avoid exgettext from picking this up, it is translated in libcpp. */ |
2536 | const char *msg = "conversion from %s to %s not supported by iconv" ; |
2537 | #ifdef ENABLE_NLS |
2538 | msg = dgettext (domainname: "cpplib" , msgid: msg); |
2539 | #endif |
2540 | /* Detect and record errors emitted by libcpp/charset.cc:init_iconv_desc |
2541 | when the local iconv build doesn't support the conversion. */ |
2542 | if (strcmp (s1: msgid, s2: msg) == 0) |
2543 | { |
2544 | s_singleton->m_num_iconv_errors++; |
2545 | return true; |
2546 | } |
2547 | |
2548 | /* Otherwise, we have an unexpected error. */ |
2549 | abort (); |
2550 | } |
2551 | |
2552 | bool iconv_errors_occurred_p () const { return m_num_iconv_errors > 0; } |
2553 | |
2554 | private: |
2555 | static ebcdic_execution_charset *s_singleton; |
2556 | int m_num_iconv_errors; |
2557 | }; |
2558 | |
2559 | ebcdic_execution_charset *ebcdic_execution_charset::s_singleton; |
2560 | |
2561 | /* A lexer_test_options subclass that records a list of diagnostic |
2562 | messages emitted by the lexer. */ |
2563 | |
2564 | class lexer_diagnostic_sink : public lexer_test_options |
2565 | { |
2566 | public: |
2567 | lexer_diagnostic_sink () |
2568 | { |
2569 | gcc_assert (s_singleton == NULL); |
2570 | s_singleton = this; |
2571 | } |
2572 | ~lexer_diagnostic_sink () |
2573 | { |
2574 | gcc_assert (s_singleton == this); |
2575 | s_singleton = NULL; |
2576 | |
2577 | int i; |
2578 | char *str; |
2579 | FOR_EACH_VEC_ELT (m_diagnostics, i, str) |
2580 | free (ptr: str); |
2581 | } |
2582 | |
2583 | void apply (lexer_test &test) final override |
2584 | { |
2585 | cpp_callbacks *callbacks = cpp_get_callbacks (test.m_parser); |
2586 | callbacks->diagnostic = on_diagnostic; |
2587 | } |
2588 | |
2589 | static bool on_diagnostic (cpp_reader *pfile ATTRIBUTE_UNUSED, |
2590 | enum cpp_diagnostic_level level ATTRIBUTE_UNUSED, |
2591 | enum cpp_warning_reason reason ATTRIBUTE_UNUSED, |
2592 | rich_location *richloc ATTRIBUTE_UNUSED, |
2593 | const char *msgid, va_list *ap) |
2594 | ATTRIBUTE_FPTR_PRINTF(5,0) |
2595 | { |
2596 | char *msg = xvasprintf (msgid, *ap); |
2597 | s_singleton->m_diagnostics.safe_push (obj: msg); |
2598 | return true; |
2599 | } |
2600 | |
2601 | auto_vec<char *> m_diagnostics; |
2602 | |
2603 | private: |
2604 | static lexer_diagnostic_sink *s_singleton; |
2605 | }; |
2606 | |
2607 | lexer_diagnostic_sink *lexer_diagnostic_sink::s_singleton; |
2608 | |
2609 | /* Constructor. Override line_table with a new instance based on CASE_, |
2610 | and write CONTENT to a tempfile. Create a cpp_reader, and use it to |
2611 | start parsing the tempfile. */ |
2612 | |
2613 | lexer_test::lexer_test (const line_table_case &case_, const char *content, |
2614 | lexer_test_options *options) |
2615 | : m_ltt (case_), |
2616 | m_parser (cpp_create_reader (CLK_GNUC99, NULL, line_table)), |
2617 | /* Create a tempfile and write the text to it. */ |
2618 | m_tempfile (SELFTEST_LOCATION, ".c" , content), |
2619 | m_concats (), |
2620 | m_implicitly_expect_EOF (true) |
2621 | { |
2622 | if (options) |
2623 | options->apply (*this); |
2624 | |
2625 | cpp_init_iconv (m_parser); |
2626 | |
2627 | /* Parse the file. */ |
2628 | const char *fname = cpp_read_main_file (m_parser, |
2629 | m_tempfile.get_filename ()); |
2630 | ASSERT_NE (fname, NULL); |
2631 | } |
2632 | |
2633 | /* Destructor. By default, verify that the next token in m_parser is EOF. */ |
2634 | |
2635 | lexer_test::~lexer_test () |
2636 | { |
2637 | location_t loc; |
2638 | const cpp_token *tok; |
2639 | |
2640 | if (m_implicitly_expect_EOF) |
2641 | { |
2642 | tok = cpp_get_token_with_location (m_parser, &loc); |
2643 | ASSERT_NE (tok, NULL); |
2644 | ASSERT_EQ (tok->type, CPP_EOF); |
2645 | } |
2646 | } |
2647 | |
2648 | /* Get the next token from m_parser. */ |
2649 | |
2650 | const cpp_token * |
2651 | lexer_test::get_token () |
2652 | { |
2653 | location_t loc; |
2654 | const cpp_token *tok; |
2655 | |
2656 | tok = cpp_get_token_with_location (m_parser, &loc); |
2657 | ASSERT_NE (tok, NULL); |
2658 | return tok; |
2659 | } |
2660 | |
2661 | /* Verify that locations within string literals are correctly handled. */ |
2662 | |
2663 | /* Verify get_source_range_for_substring for token(s) at STRLOC, |
2664 | using the string concatenation database for TEST. |
2665 | |
2666 | Assert that the character at index IDX is on EXPECTED_LINE, |
2667 | and that it begins at column EXPECTED_START_COL and ends at |
2668 | EXPECTED_FINISH_COL (unless the locations are beyond |
2669 | LINE_MAP_MAX_LOCATION_WITH_COLS, in which case don't check their |
2670 | columns). */ |
2671 | |
2672 | static void |
2673 | assert_char_at_range (const location &loc, |
2674 | lexer_test& test, |
2675 | location_t strloc, enum cpp_ttype type, int idx, |
2676 | int expected_line, int expected_start_col, |
2677 | int expected_finish_col) |
2678 | { |
2679 | cpp_reader *pfile = test.m_parser; |
2680 | string_concat_db *concats = &test.m_concats; |
2681 | |
2682 | source_range actual_range = source_range(); |
2683 | const char *err |
2684 | = get_source_range_for_char (pfile, concats, strloc, type, char_idx: idx, |
2685 | out_range: &actual_range); |
2686 | if (should_have_column_data_p (loc: strloc)) |
2687 | ASSERT_EQ_AT (loc, NULL, err); |
2688 | else |
2689 | { |
2690 | ASSERT_STREQ_AT (loc, |
2691 | "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS" , |
2692 | err); |
2693 | return; |
2694 | } |
2695 | |
2696 | int actual_start_line = LOCATION_LINE (actual_range.m_start); |
2697 | ASSERT_EQ_AT (loc, expected_line, actual_start_line); |
2698 | int actual_finish_line = LOCATION_LINE (actual_range.m_finish); |
2699 | ASSERT_EQ_AT (loc, expected_line, actual_finish_line); |
2700 | |
2701 | if (should_have_column_data_p (loc: actual_range.m_start)) |
2702 | { |
2703 | int actual_start_col = LOCATION_COLUMN (actual_range.m_start); |
2704 | ASSERT_EQ_AT (loc, expected_start_col, actual_start_col); |
2705 | } |
2706 | if (should_have_column_data_p (loc: actual_range.m_finish)) |
2707 | { |
2708 | int actual_finish_col = LOCATION_COLUMN (actual_range.m_finish); |
2709 | ASSERT_EQ_AT (loc, expected_finish_col, actual_finish_col); |
2710 | } |
2711 | } |
2712 | |
2713 | /* Macro for calling assert_char_at_range, supplying SELFTEST_LOCATION for |
2714 | the effective location of any errors. */ |
2715 | |
2716 | #define ASSERT_CHAR_AT_RANGE(LEXER_TEST, STRLOC, TYPE, IDX, EXPECTED_LINE, \ |
2717 | EXPECTED_START_COL, EXPECTED_FINISH_COL) \ |
2718 | assert_char_at_range (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), (TYPE), \ |
2719 | (IDX), (EXPECTED_LINE), (EXPECTED_START_COL), \ |
2720 | (EXPECTED_FINISH_COL)) |
2721 | |
2722 | /* Verify get_num_source_ranges_for_substring for token(s) at STRLOC, |
2723 | using the string concatenation database for TEST. |
2724 | |
2725 | Assert that the token(s) at STRLOC contain EXPECTED_NUM_RANGES. */ |
2726 | |
2727 | static void |
2728 | assert_num_substring_ranges (const location &loc, |
2729 | lexer_test& test, |
2730 | location_t strloc, |
2731 | enum cpp_ttype type, |
2732 | int expected_num_ranges) |
2733 | { |
2734 | cpp_reader *pfile = test.m_parser; |
2735 | string_concat_db *concats = &test.m_concats; |
2736 | |
2737 | int actual_num_ranges = -1; |
2738 | const char *err |
2739 | = get_num_source_ranges_for_substring (pfile, concats, strloc, type, |
2740 | out: &actual_num_ranges); |
2741 | if (should_have_column_data_p (loc: strloc)) |
2742 | ASSERT_EQ_AT (loc, NULL, err); |
2743 | else |
2744 | { |
2745 | ASSERT_STREQ_AT (loc, |
2746 | "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS" , |
2747 | err); |
2748 | return; |
2749 | } |
2750 | ASSERT_EQ_AT (loc, expected_num_ranges, actual_num_ranges); |
2751 | } |
2752 | |
2753 | /* Macro for calling assert_num_substring_ranges, supplying |
2754 | SELFTEST_LOCATION for the effective location of any errors. */ |
2755 | |
2756 | #define ASSERT_NUM_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, \ |
2757 | EXPECTED_NUM_RANGES) \ |
2758 | assert_num_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), (STRLOC), \ |
2759 | (TYPE), (EXPECTED_NUM_RANGES)) |
2760 | |
2761 | |
2762 | /* Verify that get_num_source_ranges_for_substring for token(s) at STRLOC |
2763 | returns an error (using the string concatenation database for TEST). */ |
2764 | |
2765 | static void |
2766 | assert_has_no_substring_ranges (const location &loc, |
2767 | lexer_test& test, |
2768 | location_t strloc, |
2769 | enum cpp_ttype type, |
2770 | const char *expected_err) |
2771 | { |
2772 | cpp_reader *pfile = test.m_parser; |
2773 | string_concat_db *concats = &test.m_concats; |
2774 | cpp_substring_ranges ranges; |
2775 | const char *actual_err |
2776 | = get_substring_ranges_for_loc (pfile, concats, strloc, |
2777 | type, ranges); |
2778 | if (should_have_column_data_p (loc: strloc)) |
2779 | ASSERT_STREQ_AT (loc, expected_err, actual_err); |
2780 | else |
2781 | ASSERT_STREQ_AT (loc, |
2782 | "range starts after LINE_MAP_MAX_LOCATION_WITH_COLS" , |
2783 | actual_err); |
2784 | } |
2785 | |
2786 | #define ASSERT_HAS_NO_SUBSTRING_RANGES(LEXER_TEST, STRLOC, TYPE, ERR) \ |
2787 | assert_has_no_substring_ranges (SELFTEST_LOCATION, (LEXER_TEST), \ |
2788 | (STRLOC), (TYPE), (ERR)) |
2789 | |
2790 | /* Lex a simple string literal. Verify the substring location data, before |
2791 | and after running cpp_interpret_string on it. */ |
2792 | |
2793 | static void |
2794 | test_lexer_string_locations_simple (const line_table_case &case_) |
2795 | { |
2796 | /* Digits 0-9 (with 0 at column 10), the simple way. |
2797 | ....................000000000.11111111112.2222222223333333333 |
2798 | ....................123456789.01234567890.1234567890123456789 |
2799 | We add a trailing comment to ensure that we correctly locate |
2800 | the end of the string literal token. */ |
2801 | const char *content = " \"0123456789\" /* not a string */\n" ; |
2802 | lexer_test test (case_, content, NULL); |
2803 | |
2804 | /* Verify that we get the expected token back, with the correct |
2805 | location information. */ |
2806 | const cpp_token *tok = test.get_token (); |
2807 | ASSERT_EQ (tok->type, CPP_STRING); |
2808 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"" ); |
2809 | ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20); |
2810 | |
2811 | /* At this point in lexing, the quote characters are treated as part of |
2812 | the string (they are stripped off by cpp_interpret_string). */ |
2813 | |
2814 | ASSERT_EQ (tok->val.str.len, 12); |
2815 | |
2816 | /* Verify that cpp_interpret_string works. */ |
2817 | cpp_string dst_string; |
2818 | const enum cpp_ttype type = CPP_STRING; |
2819 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, |
2820 | &dst_string, type); |
2821 | ASSERT_TRUE (result); |
2822 | ASSERT_STREQ ("0123456789" , (const char *)dst_string.text); |
2823 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
2824 | |
2825 | /* Verify ranges of individual characters. This no longer includes the |
2826 | opening quote, but does include the closing quote. */ |
2827 | for (int i = 0; i <= 10; i++) |
2828 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, |
2829 | 10 + i, 10 + i); |
2830 | |
2831 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); |
2832 | } |
2833 | |
2834 | /* As test_lexer_string_locations_simple, but use an EBCDIC execution |
2835 | encoding. */ |
2836 | |
2837 | static void |
2838 | test_lexer_string_locations_ebcdic (const line_table_case &case_) |
2839 | { |
2840 | /* EBCDIC support requires iconv. */ |
2841 | if (!HAVE_ICONV) |
2842 | return; |
2843 | |
2844 | /* Digits 0-9 (with 0 at column 10), the simple way. |
2845 | ....................000000000.11111111112.2222222223333333333 |
2846 | ....................123456789.01234567890.1234567890123456789 |
2847 | We add a trailing comment to ensure that we correctly locate |
2848 | the end of the string literal token. */ |
2849 | const char *content = " \"0123456789\" /* not a string */\n" ; |
2850 | ebcdic_execution_charset use_ebcdic; |
2851 | lexer_test test (case_, content, &use_ebcdic); |
2852 | |
2853 | /* Verify that we get the expected token back, with the correct |
2854 | location information. */ |
2855 | const cpp_token *tok = test.get_token (); |
2856 | ASSERT_EQ (tok->type, CPP_STRING); |
2857 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"" ); |
2858 | ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 20); |
2859 | |
2860 | /* At this point in lexing, the quote characters are treated as part of |
2861 | the string (they are stripped off by cpp_interpret_string). */ |
2862 | |
2863 | ASSERT_EQ (tok->val.str.len, 12); |
2864 | |
2865 | /* The remainder of the test requires an iconv implementation that |
2866 | can convert from UTF-8 to the EBCDIC encoding requested above. */ |
2867 | if (use_ebcdic.iconv_errors_occurred_p ()) |
2868 | return; |
2869 | |
2870 | /* Verify that cpp_interpret_string works. */ |
2871 | cpp_string dst_string; |
2872 | const enum cpp_ttype type = CPP_STRING; |
2873 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, |
2874 | &dst_string, type); |
2875 | ASSERT_TRUE (result); |
2876 | /* We should now have EBCDIC-encoded text, specifically |
2877 | IBM1047-encoded (aka "EBCDIC 1047", or "Code page 1047"). |
2878 | The digits 0-9 are encoded as 240-249 i.e. 0xf0-0xf9. */ |
2879 | ASSERT_STREQ ("\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9" , |
2880 | (const char *)dst_string.text); |
2881 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
2882 | |
2883 | /* Verify that we don't attempt to record substring location information |
2884 | for such cases. */ |
2885 | ASSERT_HAS_NO_SUBSTRING_RANGES |
2886 | (test, tok->src_loc, type, |
2887 | "execution character set != source character set" ); |
2888 | } |
2889 | |
2890 | /* Lex a string literal containing a hex-escaped character. |
2891 | Verify the substring location data, before and after running |
2892 | cpp_interpret_string on it. */ |
2893 | |
2894 | static void |
2895 | test_lexer_string_locations_hex (const line_table_case &case_) |
2896 | { |
2897 | /* Digits 0-9, expressing digit 5 in ASCII as "\x35" |
2898 | and with a space in place of digit 6, to terminate the escaped |
2899 | hex code. |
2900 | ....................000000000.111111.11112222. |
2901 | ....................123456789.012345.67890123. */ |
2902 | const char *content = " \"01234\\x35 789\"\n" ; |
2903 | lexer_test test (case_, content, NULL); |
2904 | |
2905 | /* Verify that we get the expected token back, with the correct |
2906 | location information. */ |
2907 | const cpp_token *tok = test.get_token (); |
2908 | ASSERT_EQ (tok->type, CPP_STRING); |
2909 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\x35 789\"" ); |
2910 | ASSERT_TOKEN_LOC_EQ (tok, test.m_tempfile.get_filename (), 1, 9, 23); |
2911 | |
2912 | /* At this point in lexing, the quote characters are treated as part of |
2913 | the string (they are stripped off by cpp_interpret_string). */ |
2914 | ASSERT_EQ (tok->val.str.len, 15); |
2915 | |
2916 | /* Verify that cpp_interpret_string works. */ |
2917 | cpp_string dst_string; |
2918 | const enum cpp_ttype type = CPP_STRING; |
2919 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, |
2920 | &dst_string, type); |
2921 | ASSERT_TRUE (result); |
2922 | ASSERT_STREQ ("012345 789" , (const char *)dst_string.text); |
2923 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
2924 | |
2925 | /* Verify ranges of individual characters. This no longer includes the |
2926 | opening quote, but does include the closing quote. */ |
2927 | for (int i = 0; i <= 4; i++) |
2928 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); |
2929 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18); |
2930 | for (int i = 6; i <= 10; i++) |
2931 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i); |
2932 | |
2933 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); |
2934 | } |
2935 | |
2936 | /* Lex a string literal containing an octal-escaped character. |
2937 | Verify the substring location data after running cpp_interpret_string |
2938 | on it. */ |
2939 | |
2940 | static void |
2941 | test_lexer_string_locations_oct (const line_table_case &case_) |
2942 | { |
2943 | /* Digits 0-9, expressing digit 5 in ASCII as "\065" |
2944 | and with a space in place of digit 6, to terminate the escaped |
2945 | octal code. |
2946 | ....................000000000.111111.11112222.2222223333333333444 |
2947 | ....................123456789.012345.67890123.4567890123456789012 */ |
2948 | const char *content = " \"01234\\065 789\" /* not a string */\n" ; |
2949 | lexer_test test (case_, content, NULL); |
2950 | |
2951 | /* Verify that we get the expected token back, with the correct |
2952 | location information. */ |
2953 | const cpp_token *tok = test.get_token (); |
2954 | ASSERT_EQ (tok->type, CPP_STRING); |
2955 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\065 789\"" ); |
2956 | |
2957 | /* Verify that cpp_interpret_string works. */ |
2958 | cpp_string dst_string; |
2959 | const enum cpp_ttype type = CPP_STRING; |
2960 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, |
2961 | &dst_string, type); |
2962 | ASSERT_TRUE (result); |
2963 | ASSERT_STREQ ("012345 789" , (const char *)dst_string.text); |
2964 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
2965 | |
2966 | /* Verify ranges of individual characters. This no longer includes the |
2967 | opening quote, but does include the closing quote. */ |
2968 | for (int i = 0; i < 5; i++) |
2969 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); |
2970 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 5, 1, 15, 18); |
2971 | for (int i = 6; i <= 10; i++) |
2972 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 13 + i, 13 + i); |
2973 | |
2974 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 11); |
2975 | } |
2976 | |
2977 | /* Test of string literal containing letter escapes. */ |
2978 | |
2979 | static void |
2980 | test_lexer_string_locations_letter_escape_1 (const line_table_case &case_) |
2981 | { |
2982 | /* The string "\tfoo\\\nbar" i.e. tab, "foo", backslash, newline, bar. |
2983 | .....................000000000.1.11111.1.1.11222.22222223333333 |
2984 | .....................123456789.0.12345.6.7.89012.34567890123456. */ |
2985 | const char *content = (" \"\\tfoo\\\\\\nbar\" /* non-str */\n" ); |
2986 | lexer_test test (case_, content, NULL); |
2987 | |
2988 | /* Verify that we get the expected tokens back. */ |
2989 | const cpp_token *tok = test.get_token (); |
2990 | ASSERT_EQ (tok->type, CPP_STRING); |
2991 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"\\tfoo\\\\\\nbar\"" ); |
2992 | |
2993 | /* Verify ranges of individual characters. */ |
2994 | /* "\t". */ |
2995 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
2996 | 0, 1, 10, 11); |
2997 | /* "foo". */ |
2998 | for (int i = 1; i <= 3; i++) |
2999 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
3000 | i, 1, 11 + i, 11 + i); |
3001 | /* "\\" and "\n". */ |
3002 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
3003 | 4, 1, 15, 16); |
3004 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
3005 | 5, 1, 17, 18); |
3006 | |
3007 | /* "bar" and closing quote for nul-terminator. */ |
3008 | for (int i = 6; i <= 9; i++) |
3009 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
3010 | i, 1, 13 + i, 13 + i); |
3011 | |
3012 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 10); |
3013 | } |
3014 | |
3015 | /* Another test of a string literal containing a letter escape. |
3016 | Based on string seen in |
3017 | printf ("%-%\n"); |
3018 | in gcc.dg/format/c90-printf-1.c. */ |
3019 | |
3020 | static void |
3021 | test_lexer_string_locations_letter_escape_2 (const line_table_case &case_) |
3022 | { |
3023 | /* .....................000000000.1111.11.1111.22222222223. |
3024 | .....................123456789.0123.45.6789.01234567890. */ |
3025 | const char *content = (" \"%-%\\n\" /* non-str */\n" ); |
3026 | lexer_test test (case_, content, NULL); |
3027 | |
3028 | /* Verify that we get the expected tokens back. */ |
3029 | const cpp_token *tok = test.get_token (); |
3030 | ASSERT_EQ (tok->type, CPP_STRING); |
3031 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"%-%\\n\"" ); |
3032 | |
3033 | /* Verify ranges of individual characters. */ |
3034 | /* "%-%". */ |
3035 | for (int i = 0; i < 3; i++) |
3036 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
3037 | i, 1, 10 + i, 10 + i); |
3038 | /* "\n". */ |
3039 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
3040 | 3, 1, 13, 14); |
3041 | |
3042 | /* Closing quote for nul-terminator. */ |
3043 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
3044 | 4, 1, 15, 15); |
3045 | |
3046 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 5); |
3047 | } |
3048 | |
3049 | /* Lex a string literal containing UCN 4 characters. |
3050 | Verify the substring location data after running cpp_interpret_string |
3051 | on it. */ |
3052 | |
3053 | static void |
3054 | test_lexer_string_locations_ucn4 (const line_table_case &case_) |
3055 | { |
3056 | /* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed |
3057 | as UCN 4. |
3058 | ....................000000000.111111.111122.222222223.33333333344444 |
3059 | ....................123456789.012345.678901.234567890.12345678901234 */ |
3060 | const char *content = " \"01234\\u2174\\u2175789\" /* non-str */\n" ; |
3061 | lexer_test test (case_, content, NULL); |
3062 | |
3063 | /* Verify that we get the expected token back, with the correct |
3064 | location information. */ |
3065 | const cpp_token *tok = test.get_token (); |
3066 | ASSERT_EQ (tok->type, CPP_STRING); |
3067 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"01234\\u2174\\u2175789\"" ); |
3068 | |
3069 | /* Verify that cpp_interpret_string works. |
3070 | The string should be encoded in the execution character |
3071 | set. Assuming that is UTF-8, we should have the following: |
3072 | ----------- ---- ----- ------- ---------------- |
3073 | Byte offset Byte Octal Unicode Source Column(s) |
3074 | ----------- ---- ----- ------- ---------------- |
3075 | 0 0x30 '0' 10 |
3076 | 1 0x31 '1' 11 |
3077 | 2 0x32 '2' 12 |
3078 | 3 0x33 '3' 13 |
3079 | 4 0x34 '4' 14 |
3080 | 5 0xE2 \342 U+2174 15-20 |
3081 | 6 0x85 \205 (cont) 15-20 |
3082 | 7 0xB4 \264 (cont) 15-20 |
3083 | 8 0xE2 \342 U+2175 21-26 |
3084 | 9 0x85 \205 (cont) 21-26 |
3085 | 10 0xB5 \265 (cont) 21-26 |
3086 | 11 0x37 '7' 27 |
3087 | 12 0x38 '8' 28 |
3088 | 13 0x39 '9' 29 |
3089 | 14 0x00 30 (closing quote) |
3090 | ----------- ---- ----- ------- ---------------. */ |
3091 | |
3092 | cpp_string dst_string; |
3093 | const enum cpp_ttype type = CPP_STRING; |
3094 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, |
3095 | &dst_string, type); |
3096 | ASSERT_TRUE (result); |
3097 | ASSERT_STREQ ("01234\342\205\264\342\205\265789" , |
3098 | (const char *)dst_string.text); |
3099 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
3100 | |
3101 | /* Verify ranges of individual characters. This no longer includes the |
3102 | opening quote, but does include the closing quote. |
3103 | '01234'. */ |
3104 | for (int i = 0; i <= 4; i++) |
3105 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); |
3106 | /* U+2174. */ |
3107 | for (int i = 5; i <= 7; i++) |
3108 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 20); |
3109 | /* U+2175. */ |
3110 | for (int i = 8; i <= 10; i++) |
3111 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 21, 26); |
3112 | /* '789' and nul terminator */ |
3113 | for (int i = 11; i <= 14; i++) |
3114 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 16 + i, 16 + i); |
3115 | |
3116 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15); |
3117 | } |
3118 | |
3119 | /* Lex a string literal containing UCN 8 characters. |
3120 | Verify the substring location data after running cpp_interpret_string |
3121 | on it. */ |
3122 | |
3123 | static void |
3124 | test_lexer_string_locations_ucn8 (const line_table_case &case_) |
3125 | { |
3126 | /* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8. |
3127 | ....................000000000.111111.1111222222.2222333333333.344444 |
3128 | ....................123456789.012345.6789012345.6789012345678.901234 */ |
3129 | const char *content = " \"01234\\U00002174\\U00002175789\" /* */\n" ; |
3130 | lexer_test test (case_, content, NULL); |
3131 | |
3132 | /* Verify that we get the expected token back, with the correct |
3133 | location information. */ |
3134 | const cpp_token *tok = test.get_token (); |
3135 | ASSERT_EQ (tok->type, CPP_STRING); |
3136 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, |
3137 | "\"01234\\U00002174\\U00002175789\"" ); |
3138 | |
3139 | /* Verify that cpp_interpret_string works. |
3140 | The UTF-8 encoding of the string is identical to that from |
3141 | the ucn4 testcase above; the only difference is the column |
3142 | locations. */ |
3143 | cpp_string dst_string; |
3144 | const enum cpp_ttype type = CPP_STRING; |
3145 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, |
3146 | &dst_string, type); |
3147 | ASSERT_TRUE (result); |
3148 | ASSERT_STREQ ("01234\342\205\264\342\205\265789" , |
3149 | (const char *)dst_string.text); |
3150 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
3151 | |
3152 | /* Verify ranges of individual characters. This no longer includes the |
3153 | opening quote, but does include the closing quote. |
3154 | '01234'. */ |
3155 | for (int i = 0; i <= 4; i++) |
3156 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); |
3157 | /* U+2174. */ |
3158 | for (int i = 5; i <= 7; i++) |
3159 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 15, 24); |
3160 | /* U+2175. */ |
3161 | for (int i = 8; i <= 10; i++) |
3162 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 25, 34); |
3163 | /* '789' at columns 35-37 */ |
3164 | for (int i = 11; i <= 13; i++) |
3165 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 24 + i, 24 + i); |
3166 | /* Closing quote/nul-terminator at column 38. */ |
3167 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 14, 1, 38, 38); |
3168 | |
3169 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 15); |
3170 | } |
3171 | |
3172 | /* Fetch a big-endian 32-bit value and convert to host endianness. */ |
3173 | |
3174 | static uint32_t |
3175 | uint32_from_big_endian (const uint32_t *ptr_be_value) |
3176 | { |
3177 | const unsigned char *buf = (const unsigned char *)ptr_be_value; |
3178 | return (((uint32_t) buf[0] << 24) |
3179 | | ((uint32_t) buf[1] << 16) |
3180 | | ((uint32_t) buf[2] << 8) |
3181 | | (uint32_t) buf[3]); |
3182 | } |
3183 | |
3184 | /* Lex a wide string literal and verify that attempts to read substring |
3185 | location data from it fail gracefully. */ |
3186 | |
3187 | static void |
3188 | test_lexer_string_locations_wide_string (const line_table_case &case_) |
3189 | { |
3190 | /* Digits 0-9. |
3191 | ....................000000000.11111111112.22222222233333 |
3192 | ....................123456789.01234567890.12345678901234 */ |
3193 | const char *content = " L\"0123456789\" /* non-str */\n" ; |
3194 | lexer_test test (case_, content, NULL); |
3195 | |
3196 | /* Verify that we get the expected token back, with the correct |
3197 | location information. */ |
3198 | const cpp_token *tok = test.get_token (); |
3199 | ASSERT_EQ (tok->type, CPP_WSTRING); |
3200 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L\"0123456789\"" ); |
3201 | |
3202 | /* Verify that cpp_interpret_string works, using CPP_WSTRING. */ |
3203 | cpp_string dst_string; |
3204 | const enum cpp_ttype type = CPP_WSTRING; |
3205 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, |
3206 | &dst_string, type); |
3207 | ASSERT_TRUE (result); |
3208 | /* The cpp_reader defaults to big-endian with |
3209 | CHAR_BIT * sizeof (int) for the wchar_precision, so dst_string should |
3210 | now be encoded as UTF-32BE. */ |
3211 | const uint32_t *be32_chars = (const uint32_t *)dst_string.text; |
3212 | ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0])); |
3213 | ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5])); |
3214 | ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9])); |
3215 | ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10])); |
3216 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
3217 | |
3218 | /* We don't yet support generating substring location information |
3219 | for L"" strings. */ |
3220 | ASSERT_HAS_NO_SUBSTRING_RANGES |
3221 | (test, tok->src_loc, type, |
3222 | "execution character set != source character set" ); |
3223 | } |
3224 | |
3225 | /* Fetch a big-endian 16-bit value and convert to host endianness. */ |
3226 | |
3227 | static uint16_t |
3228 | uint16_from_big_endian (const uint16_t *ptr_be_value) |
3229 | { |
3230 | const unsigned char *buf = (const unsigned char *)ptr_be_value; |
3231 | return ((uint16_t) buf[0] << 8) | (uint16_t) buf[1]; |
3232 | } |
3233 | |
3234 | /* Lex a u"" string literal and verify that attempts to read substring |
3235 | location data from it fail gracefully. */ |
3236 | |
3237 | static void |
3238 | test_lexer_string_locations_string16 (const line_table_case &case_) |
3239 | { |
3240 | /* Digits 0-9. |
3241 | ....................000000000.11111111112.22222222233333 |
3242 | ....................123456789.01234567890.12345678901234 */ |
3243 | const char *content = " u\"0123456789\" /* non-str */\n" ; |
3244 | lexer_test test (case_, content, NULL); |
3245 | |
3246 | /* Verify that we get the expected token back, with the correct |
3247 | location information. */ |
3248 | const cpp_token *tok = test.get_token (); |
3249 | ASSERT_EQ (tok->type, CPP_STRING16); |
3250 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u\"0123456789\"" ); |
3251 | |
3252 | /* Verify that cpp_interpret_string works, using CPP_STRING16. */ |
3253 | cpp_string dst_string; |
3254 | const enum cpp_ttype type = CPP_STRING16; |
3255 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, |
3256 | &dst_string, type); |
3257 | ASSERT_TRUE (result); |
3258 | |
3259 | /* The cpp_reader defaults to big-endian, so dst_string should |
3260 | now be encoded as UTF-16BE. */ |
3261 | const uint16_t *be16_chars = (const uint16_t *)dst_string.text; |
3262 | ASSERT_EQ ('0', uint16_from_big_endian (&be16_chars[0])); |
3263 | ASSERT_EQ ('5', uint16_from_big_endian (&be16_chars[5])); |
3264 | ASSERT_EQ ('9', uint16_from_big_endian (&be16_chars[9])); |
3265 | ASSERT_EQ (0, uint16_from_big_endian (&be16_chars[10])); |
3266 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
3267 | |
3268 | /* We don't yet support generating substring location information |
3269 | for L"" strings. */ |
3270 | ASSERT_HAS_NO_SUBSTRING_RANGES |
3271 | (test, tok->src_loc, type, |
3272 | "execution character set != source character set" ); |
3273 | } |
3274 | |
3275 | /* Lex a U"" string literal and verify that attempts to read substring |
3276 | location data from it fail gracefully. */ |
3277 | |
3278 | static void |
3279 | test_lexer_string_locations_string32 (const line_table_case &case_) |
3280 | { |
3281 | /* Digits 0-9. |
3282 | ....................000000000.11111111112.22222222233333 |
3283 | ....................123456789.01234567890.12345678901234 */ |
3284 | const char *content = " U\"0123456789\" /* non-str */\n" ; |
3285 | lexer_test test (case_, content, NULL); |
3286 | |
3287 | /* Verify that we get the expected token back, with the correct |
3288 | location information. */ |
3289 | const cpp_token *tok = test.get_token (); |
3290 | ASSERT_EQ (tok->type, CPP_STRING32); |
3291 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U\"0123456789\"" ); |
3292 | |
3293 | /* Verify that cpp_interpret_string works, using CPP_STRING32. */ |
3294 | cpp_string dst_string; |
3295 | const enum cpp_ttype type = CPP_STRING32; |
3296 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, |
3297 | &dst_string, type); |
3298 | ASSERT_TRUE (result); |
3299 | |
3300 | /* The cpp_reader defaults to big-endian, so dst_string should |
3301 | now be encoded as UTF-32BE. */ |
3302 | const uint32_t *be32_chars = (const uint32_t *)dst_string.text; |
3303 | ASSERT_EQ ('0', uint32_from_big_endian (&be32_chars[0])); |
3304 | ASSERT_EQ ('5', uint32_from_big_endian (&be32_chars[5])); |
3305 | ASSERT_EQ ('9', uint32_from_big_endian (&be32_chars[9])); |
3306 | ASSERT_EQ (0, uint32_from_big_endian (&be32_chars[10])); |
3307 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
3308 | |
3309 | /* We don't yet support generating substring location information |
3310 | for L"" strings. */ |
3311 | ASSERT_HAS_NO_SUBSTRING_RANGES |
3312 | (test, tok->src_loc, type, |
3313 | "execution character set != source character set" ); |
3314 | } |
3315 | |
3316 | /* Lex a u8-string literal. |
3317 | Verify the substring location data after running cpp_interpret_string |
3318 | on it. */ |
3319 | |
3320 | static void |
3321 | test_lexer_string_locations_u8 (const line_table_case &case_) |
3322 | { |
3323 | /* Digits 0-9. |
3324 | ....................000000000.11111111112.22222222233333 |
3325 | ....................123456789.01234567890.12345678901234 */ |
3326 | const char *content = " u8\"0123456789\" /* non-str */\n" ; |
3327 | lexer_test test (case_, content, NULL); |
3328 | |
3329 | /* Verify that we get the expected token back, with the correct |
3330 | location information. */ |
3331 | const cpp_token *tok = test.get_token (); |
3332 | ASSERT_EQ (tok->type, CPP_UTF8STRING); |
3333 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u8\"0123456789\"" ); |
3334 | |
3335 | /* Verify that cpp_interpret_string works. */ |
3336 | cpp_string dst_string; |
3337 | const enum cpp_ttype type = CPP_STRING; |
3338 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, |
3339 | &dst_string, type); |
3340 | ASSERT_TRUE (result); |
3341 | ASSERT_STREQ ("0123456789" , (const char *)dst_string.text); |
3342 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
3343 | |
3344 | /* Verify ranges of individual characters. This no longer includes the |
3345 | opening quote, but does include the closing quote. */ |
3346 | for (int i = 0; i <= 10; i++) |
3347 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); |
3348 | } |
3349 | |
3350 | /* Lex a string literal containing UTF-8 source characters. |
3351 | Verify the substring location data after running cpp_interpret_string |
3352 | on it. */ |
3353 | |
3354 | static void |
3355 | test_lexer_string_locations_utf8_source (const line_table_case &case_) |
3356 | { |
3357 | /* This string literal is written out to the source file as UTF-8, |
3358 | and is of the form "before mojibake after", where "mojibake" |
3359 | is written as the following four unicode code points: |
3360 | U+6587 CJK UNIFIED IDEOGRAPH-6587 |
3361 | U+5B57 CJK UNIFIED IDEOGRAPH-5B57 |
3362 | U+5316 CJK UNIFIED IDEOGRAPH-5316 |
3363 | U+3051 HIRAGANA LETTER KE. |
3364 | Each of these is 3 bytes wide when encoded in UTF-8, whereas the |
3365 | "before" and "after" are 1 byte per unicode character. |
3366 | |
3367 | The numbering shown are "columns", which are *byte* numbers within |
3368 | the line, rather than unicode character numbers. |
3369 | |
3370 | .................... 000000000.1111111. |
3371 | .................... 123456789.0123456. */ |
3372 | const char *content = (" \"before " |
3373 | /* U+6587 CJK UNIFIED IDEOGRAPH-6587 |
3374 | UTF-8: 0xE6 0x96 0x87 |
3375 | C octal escaped UTF-8: \346\226\207 |
3376 | "column" numbers: 17-19. */ |
3377 | "\346\226\207" |
3378 | |
3379 | /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57 |
3380 | UTF-8: 0xE5 0xAD 0x97 |
3381 | C octal escaped UTF-8: \345\255\227 |
3382 | "column" numbers: 20-22. */ |
3383 | "\345\255\227" |
3384 | |
3385 | /* U+5316 CJK UNIFIED IDEOGRAPH-5316 |
3386 | UTF-8: 0xE5 0x8C 0x96 |
3387 | C octal escaped UTF-8: \345\214\226 |
3388 | "column" numbers: 23-25. */ |
3389 | "\345\214\226" |
3390 | |
3391 | /* U+3051 HIRAGANA LETTER KE |
3392 | UTF-8: 0xE3 0x81 0x91 |
3393 | C octal escaped UTF-8: \343\201\221 |
3394 | "column" numbers: 26-28. */ |
3395 | "\343\201\221" |
3396 | |
3397 | /* column numbers 29 onwards |
3398 | 2333333.33334444444444 |
3399 | 9012345.67890123456789. */ |
3400 | " after\" /* non-str */\n" ); |
3401 | lexer_test test (case_, content, NULL); |
3402 | |
3403 | /* Verify that we get the expected token back, with the correct |
3404 | location information. */ |
3405 | const cpp_token *tok = test.get_token (); |
3406 | ASSERT_EQ (tok->type, CPP_STRING); |
3407 | ASSERT_TOKEN_AS_TEXT_EQ |
3408 | (test.m_parser, tok, |
3409 | "\"before \346\226\207\345\255\227\345\214\226\343\201\221 after\"" ); |
3410 | |
3411 | /* Verify that cpp_interpret_string works. */ |
3412 | cpp_string dst_string; |
3413 | const enum cpp_ttype type = CPP_STRING; |
3414 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, |
3415 | &dst_string, type); |
3416 | ASSERT_TRUE (result); |
3417 | ASSERT_STREQ |
3418 | ("before \346\226\207\345\255\227\345\214\226\343\201\221 after" , |
3419 | (const char *)dst_string.text); |
3420 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
3421 | |
3422 | /* Verify ranges of individual characters. This no longer includes the |
3423 | opening quote, but does include the closing quote. |
3424 | Assuming that both source and execution encodings are UTF-8, we have |
3425 | a run of 25 octets in each, plus the NUL terminator. */ |
3426 | for (int i = 0; i < 25; i++) |
3427 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, i, 1, 10 + i, 10 + i); |
3428 | /* NUL-terminator should use the closing quote at column 35. */ |
3429 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, type, 25, 1, 35, 35); |
3430 | |
3431 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, type, 26); |
3432 | } |
3433 | |
3434 | /* Test of string literal concatenation. */ |
3435 | |
3436 | static void |
3437 | test_lexer_string_locations_concatenation_1 (const line_table_case &case_) |
3438 | { |
3439 | /* Digits 0-9. |
3440 | .....................000000000.111111.11112222222222 |
3441 | .....................123456789.012345.67890123456789. */ |
3442 | const char *content = (" \"01234\" /* non-str */\n" |
3443 | " \"56789\" /* non-str */\n" ); |
3444 | lexer_test test (case_, content, NULL); |
3445 | |
3446 | location_t input_locs[2]; |
3447 | |
3448 | /* Verify that we get the expected tokens back. */ |
3449 | auto_vec <cpp_string> input_strings; |
3450 | const cpp_token *tok_a = test.get_token (); |
3451 | ASSERT_EQ (tok_a->type, CPP_STRING); |
3452 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_a, "\"01234\"" ); |
3453 | input_strings.safe_push (obj: tok_a->val.str); |
3454 | input_locs[0] = tok_a->src_loc; |
3455 | |
3456 | const cpp_token *tok_b = test.get_token (); |
3457 | ASSERT_EQ (tok_b->type, CPP_STRING); |
3458 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok_b, "\"56789\"" ); |
3459 | input_strings.safe_push (obj: tok_b->val.str); |
3460 | input_locs[1] = tok_b->src_loc; |
3461 | |
3462 | /* Verify that cpp_interpret_string works. */ |
3463 | cpp_string dst_string; |
3464 | const enum cpp_ttype type = CPP_STRING; |
3465 | bool result = cpp_interpret_string (test.m_parser, |
3466 | input_strings.address (), 2, |
3467 | &dst_string, type); |
3468 | ASSERT_TRUE (result); |
3469 | ASSERT_STREQ ("0123456789" , (const char *)dst_string.text); |
3470 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
3471 | |
3472 | /* Simulate c-lex.cc's lex_string in order to record concatenation. */ |
3473 | test.m_concats.record_string_concatenation (num: 2, locs: input_locs); |
3474 | |
3475 | location_t initial_loc = input_locs[0]; |
3476 | |
3477 | /* "01234" on line 1. */ |
3478 | for (int i = 0; i <= 4; i++) |
3479 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i); |
3480 | /* "56789" in line 2, plus its closing quote for the nul terminator. */ |
3481 | for (int i = 5; i <= 10; i++) |
3482 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 2, 5 + i, 5 + i); |
3483 | |
3484 | ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); |
3485 | } |
3486 | |
3487 | /* Another test of string literal concatenation. */ |
3488 | |
3489 | static void |
3490 | test_lexer_string_locations_concatenation_2 (const line_table_case &case_) |
3491 | { |
3492 | /* Digits 0-9. |
3493 | .....................000000000.111.11111112222222 |
3494 | .....................123456789.012.34567890123456. */ |
3495 | const char *content = (" \"01\" /* non-str */\n" |
3496 | " \"23\" /* non-str */\n" |
3497 | " \"45\" /* non-str */\n" |
3498 | " \"67\" /* non-str */\n" |
3499 | " \"89\" /* non-str */\n" ); |
3500 | lexer_test test (case_, content, NULL); |
3501 | |
3502 | auto_vec <cpp_string> input_strings; |
3503 | location_t input_locs[5]; |
3504 | |
3505 | /* Verify that we get the expected tokens back. */ |
3506 | for (int i = 0; i < 5; i++) |
3507 | { |
3508 | const cpp_token *tok = test.get_token (); |
3509 | ASSERT_EQ (tok->type, CPP_STRING); |
3510 | input_strings.safe_push (obj: tok->val.str); |
3511 | input_locs[i] = tok->src_loc; |
3512 | } |
3513 | |
3514 | /* Verify that cpp_interpret_string works. */ |
3515 | cpp_string dst_string; |
3516 | const enum cpp_ttype type = CPP_STRING; |
3517 | bool result = cpp_interpret_string (test.m_parser, |
3518 | input_strings.address (), 5, |
3519 | &dst_string, type); |
3520 | ASSERT_TRUE (result); |
3521 | ASSERT_STREQ ("0123456789" , (const char *)dst_string.text); |
3522 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
3523 | |
3524 | /* Simulate c-lex.cc's lex_string in order to record concatenation. */ |
3525 | test.m_concats.record_string_concatenation (num: 5, locs: input_locs); |
3526 | |
3527 | location_t initial_loc = input_locs[0]; |
3528 | |
3529 | /* Within ASSERT_CHAR_AT_RANGE (actually assert_char_at_range), we can |
3530 | detect if the initial loc is after LINE_MAP_MAX_LOCATION_WITH_COLS |
3531 | and expect get_source_range_for_substring to fail. |
3532 | However, for a string concatenation test, we can have a case |
3533 | where the initial string is fully before LINE_MAP_MAX_LOCATION_WITH_COLS, |
3534 | but subsequent strings can be after it. |
3535 | Attempting to detect this within assert_char_at_range |
3536 | would overcomplicate the logic for the common test cases, so |
3537 | we detect it here. */ |
3538 | if (should_have_column_data_p (loc: input_locs[0]) |
3539 | && !should_have_column_data_p (loc: input_locs[4])) |
3540 | { |
3541 | /* Verify that get_source_range_for_substring gracefully rejects |
3542 | this case. */ |
3543 | source_range actual_range; |
3544 | const char *err |
3545 | = get_source_range_for_char (pfile: test.m_parser, concats: &test.m_concats, |
3546 | strloc: initial_loc, type, char_idx: 0, out_range: &actual_range); |
3547 | ASSERT_STREQ ("range starts after LINE_MAP_MAX_LOCATION_WITH_COLS" , err); |
3548 | return; |
3549 | } |
3550 | |
3551 | for (int i = 0; i < 5; i++) |
3552 | for (int j = 0; j < 2; j++) |
3553 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, (i * 2) + j, |
3554 | i + 1, 10 + j, 10 + j); |
3555 | |
3556 | /* NUL-terminator should use the final closing quote at line 5 column 12. */ |
3557 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 5, 12, 12); |
3558 | |
3559 | ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); |
3560 | } |
3561 | |
3562 | /* Another test of string literal concatenation, this time combined with |
3563 | various kinds of escaped characters. */ |
3564 | |
3565 | static void |
3566 | test_lexer_string_locations_concatenation_3 (const line_table_case &case_) |
3567 | { |
3568 | /* Digits 0-9, expressing digit 5 in ASCII as hex "\x35" |
3569 | digit 6 in ASCII as octal "\066", concatenating multiple strings. */ |
3570 | const char *content |
3571 | /* .000000000.111111.111.1.2222.222.2.2233.333.3333.34444444444555 |
3572 | .123456789.012345.678.9.0123.456.7.8901.234.5678.90123456789012. */ |
3573 | = (" \"01234\" \"\\x35\" \"\\066\" \"789\" /* non-str */\n" ); |
3574 | lexer_test test (case_, content, NULL); |
3575 | |
3576 | auto_vec <cpp_string> input_strings; |
3577 | location_t input_locs[4]; |
3578 | |
3579 | /* Verify that we get the expected tokens back. */ |
3580 | for (int i = 0; i < 4; i++) |
3581 | { |
3582 | const cpp_token *tok = test.get_token (); |
3583 | ASSERT_EQ (tok->type, CPP_STRING); |
3584 | input_strings.safe_push (obj: tok->val.str); |
3585 | input_locs[i] = tok->src_loc; |
3586 | } |
3587 | |
3588 | /* Verify that cpp_interpret_string works. */ |
3589 | cpp_string dst_string; |
3590 | const enum cpp_ttype type = CPP_STRING; |
3591 | bool result = cpp_interpret_string (test.m_parser, |
3592 | input_strings.address (), 4, |
3593 | &dst_string, type); |
3594 | ASSERT_TRUE (result); |
3595 | ASSERT_STREQ ("0123456789" , (const char *)dst_string.text); |
3596 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
3597 | |
3598 | /* Simulate c-lex.cc's lex_string in order to record concatenation. */ |
3599 | test.m_concats.record_string_concatenation (num: 4, locs: input_locs); |
3600 | |
3601 | location_t initial_loc = input_locs[0]; |
3602 | |
3603 | for (int i = 0; i <= 4; i++) |
3604 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 10 + i, 10 + i); |
3605 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 5, 1, 19, 22); |
3606 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 6, 1, 27, 30); |
3607 | for (int i = 7; i <= 9; i++) |
3608 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, i, 1, 28 + i, 28 + i); |
3609 | |
3610 | /* NUL-terminator should use the location of the final closing quote. */ |
3611 | ASSERT_CHAR_AT_RANGE (test, initial_loc, type, 10, 1, 38, 38); |
3612 | |
3613 | ASSERT_NUM_SUBSTRING_RANGES (test, initial_loc, type, 11); |
3614 | } |
3615 | |
3616 | /* Test of string literal in a macro. */ |
3617 | |
3618 | static void |
3619 | test_lexer_string_locations_macro (const line_table_case &case_) |
3620 | { |
3621 | /* Digits 0-9. |
3622 | .....................0000000001111111111.22222222223. |
3623 | .....................1234567890123456789.01234567890. */ |
3624 | const char *content = ("#define MACRO \"0123456789\" /* non-str */\n" |
3625 | " MACRO" ); |
3626 | lexer_test test (case_, content, NULL); |
3627 | |
3628 | /* Verify that we get the expected tokens back. */ |
3629 | const cpp_token *tok = test.get_token (); |
3630 | ASSERT_EQ (tok->type, CPP_PADDING); |
3631 | |
3632 | tok = test.get_token (); |
3633 | ASSERT_EQ (tok->type, CPP_STRING); |
3634 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"0123456789\"" ); |
3635 | |
3636 | /* Verify ranges of individual characters. We ought to |
3637 | see columns within the macro definition. */ |
3638 | for (int i = 0; i <= 10; i++) |
3639 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
3640 | i, 1, 20 + i, 20 + i); |
3641 | |
3642 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11); |
3643 | |
3644 | tok = test.get_token (); |
3645 | ASSERT_EQ (tok->type, CPP_PADDING); |
3646 | } |
3647 | |
3648 | /* Test of stringification of a macro argument. */ |
3649 | |
3650 | static void |
3651 | test_lexer_string_locations_stringified_macro_argument |
3652 | (const line_table_case &case_) |
3653 | { |
3654 | /* .....................000000000111111111122222222223. |
3655 | .....................123456789012345678901234567890. */ |
3656 | const char *content = ("#define MACRO(X) #X /* non-str */\n" |
3657 | "MACRO(foo)\n" ); |
3658 | lexer_test test (case_, content, NULL); |
3659 | |
3660 | /* Verify that we get the expected token back. */ |
3661 | const cpp_token *tok = test.get_token (); |
3662 | ASSERT_EQ (tok->type, CPP_PADDING); |
3663 | |
3664 | tok = test.get_token (); |
3665 | ASSERT_EQ (tok->type, CPP_STRING); |
3666 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "\"foo\"" ); |
3667 | |
3668 | /* We don't support getting the location of a stringified macro |
3669 | argument. Verify that it fails gracefully. */ |
3670 | ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, |
3671 | "cpp_interpret_string_1 failed" ); |
3672 | |
3673 | tok = test.get_token (); |
3674 | ASSERT_EQ (tok->type, CPP_PADDING); |
3675 | |
3676 | tok = test.get_token (); |
3677 | ASSERT_EQ (tok->type, CPP_PADDING); |
3678 | } |
3679 | |
3680 | /* Ensure that we are fail gracefully if something attempts to pass |
3681 | in a location that isn't a string literal token. Seen on this code: |
3682 | |
3683 | const char a[] = " %d "; |
3684 | __builtin_printf (a, 0.5); |
3685 | ^ |
3686 | |
3687 | when c-format.cc erroneously used the indicated one-character |
3688 | location as the format string location, leading to a read past the |
3689 | end of a string buffer in cpp_interpret_string_1. */ |
3690 | |
3691 | static void |
3692 | test_lexer_string_locations_non_string (const line_table_case &case_) |
3693 | { |
3694 | /* .....................000000000111111111122222222223. |
3695 | .....................123456789012345678901234567890. */ |
3696 | const char *content = (" a\n" ); |
3697 | lexer_test test (case_, content, NULL); |
3698 | |
3699 | /* Verify that we get the expected token back. */ |
3700 | const cpp_token *tok = test.get_token (); |
3701 | ASSERT_EQ (tok->type, CPP_NAME); |
3702 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "a" ); |
3703 | |
3704 | /* At this point, libcpp is attempting to interpret the name as a |
3705 | string literal, despite it not starting with a quote. We don't detect |
3706 | that, but we should at least fail gracefully. */ |
3707 | ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, |
3708 | "cpp_interpret_string_1 failed" ); |
3709 | } |
3710 | |
3711 | /* Ensure that we can read substring information for a token which |
3712 | starts in one linemap and ends in another . Adapted from |
3713 | gcc.dg/cpp/pr69985.c. */ |
3714 | |
3715 | static void |
3716 | test_lexer_string_locations_long_line (const line_table_case &case_) |
3717 | { |
3718 | /* .....................000000.000111111111 |
3719 | .....................123456.789012346789. */ |
3720 | const char *content = ("/* A very long line, so that we start a new line map. */\n" |
3721 | " \"0123456789012345678901234567890123456789" |
3722 | "0123456789012345678901234567890123456789" |
3723 | "0123456789012345678901234567890123456789" |
3724 | "0123456789\"\n" ); |
3725 | |
3726 | lexer_test test (case_, content, NULL); |
3727 | |
3728 | /* Verify that we get the expected token back. */ |
3729 | const cpp_token *tok = test.get_token (); |
3730 | ASSERT_EQ (tok->type, CPP_STRING); |
3731 | |
3732 | if (!should_have_column_data_p (loc: line_table->highest_location)) |
3733 | return; |
3734 | |
3735 | /* Verify ranges of individual characters. */ |
3736 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 131); |
3737 | for (int i = 0; i < 131; i++) |
3738 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
3739 | i, 2, 7 + i, 7 + i); |
3740 | } |
3741 | |
3742 | /* Test of locations within a raw string that doesn't contain a newline. */ |
3743 | |
3744 | static void |
3745 | test_lexer_string_locations_raw_string_one_line (const line_table_case &case_) |
3746 | { |
3747 | /* .....................00.0000000111111111122. |
3748 | .....................12.3456789012345678901. */ |
3749 | const char *content = ("R\"foo(0123456789)foo\"\n" ); |
3750 | lexer_test test (case_, content, NULL); |
3751 | |
3752 | /* Verify that we get the expected token back. */ |
3753 | const cpp_token *tok = test.get_token (); |
3754 | ASSERT_EQ (tok->type, CPP_STRING); |
3755 | |
3756 | /* Verify that cpp_interpret_string works. */ |
3757 | cpp_string dst_string; |
3758 | const enum cpp_ttype type = CPP_STRING; |
3759 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, |
3760 | &dst_string, type); |
3761 | ASSERT_TRUE (result); |
3762 | ASSERT_STREQ ("0123456789" , (const char *)dst_string.text); |
3763 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
3764 | |
3765 | if (!should_have_column_data_p (loc: line_table->highest_location)) |
3766 | return; |
3767 | |
3768 | /* 0-9, plus the nil terminator. */ |
3769 | ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11); |
3770 | for (int i = 0; i < 11; i++) |
3771 | ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, |
3772 | i, 1, 7 + i, 7 + i); |
3773 | } |
3774 | |
3775 | /* Test of locations within a raw string that contains a newline. */ |
3776 | |
3777 | static void |
3778 | test_lexer_string_locations_raw_string_multiline (const line_table_case &case_) |
3779 | { |
3780 | /* .....................00.0000. |
3781 | .....................12.3456. */ |
3782 | const char *content = ("R\"foo(\n" |
3783 | /* .....................00000. |
3784 | .....................12345. */ |
3785 | "hello\n" |
3786 | "world\n" |
3787 | /* .....................00000. |
3788 | .....................12345. */ |
3789 | ")foo\"\n" ); |
3790 | lexer_test test (case_, content, NULL); |
3791 | |
3792 | /* Verify that we get the expected token back. */ |
3793 | const cpp_token *tok = test.get_token (); |
3794 | ASSERT_EQ (tok->type, CPP_STRING); |
3795 | |
3796 | /* Verify that cpp_interpret_string works. */ |
3797 | cpp_string dst_string; |
3798 | const enum cpp_ttype type = CPP_STRING; |
3799 | bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, |
3800 | &dst_string, type); |
3801 | ASSERT_TRUE (result); |
3802 | ASSERT_STREQ ("\nhello\nworld\n" , (const char *)dst_string.text); |
3803 | free (ptr: const_cast <unsigned char *> (dst_string.text)); |
3804 | |
3805 | if (!should_have_column_data_p (loc: line_table->highest_location)) |
3806 | return; |
3807 | |
3808 | /* Currently we don't support locations within raw strings that |
3809 | contain newlines. */ |
3810 | ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type, |
3811 | "range endpoints are on different lines" ); |
3812 | } |
3813 | |
3814 | /* Test of parsing an unterminated raw string. */ |
3815 | |
3816 | static void |
3817 | test_lexer_string_locations_raw_string_unterminated (const line_table_case &case_) |
3818 | { |
3819 | const char *content = "R\"ouch()ouCh\" /* etc */" ; |
3820 | |
3821 | lexer_diagnostic_sink diagnostics; |
3822 | lexer_test test (case_, content, &diagnostics); |
3823 | test.m_implicitly_expect_EOF = false; |
3824 | |
3825 | /* Attempt to parse the raw string. */ |
3826 | const cpp_token *tok = test.get_token (); |
3827 | ASSERT_EQ (tok->type, CPP_EOF); |
3828 | |
3829 | ASSERT_EQ (1, diagnostics.m_diagnostics.length ()); |
3830 | /* We expect the message "unterminated raw string" |
3831 | in the "cpplib" translation domain. |
3832 | It's not clear that dgettext is available on all supported hosts, |
3833 | so this assertion is commented-out for now. |
3834 | ASSERT_STREQ (dgettext ("cpplib", "unterminated raw string"), |
3835 | diagnostics.m_diagnostics[0]); |
3836 | */ |
3837 | } |
3838 | |
3839 | /* Test of lexing char constants. */ |
3840 | |
3841 | static void |
3842 | test_lexer_char_constants (const line_table_case &case_) |
3843 | { |
3844 | /* Various char constants. |
3845 | .....................0000000001111111111.22222222223. |
3846 | .....................1234567890123456789.01234567890. */ |
3847 | const char *content = (" 'a'\n" |
3848 | " u'a'\n" |
3849 | " U'a'\n" |
3850 | " L'a'\n" |
3851 | " 'abc'\n" ); |
3852 | lexer_test test (case_, content, NULL); |
3853 | |
3854 | /* Verify that we get the expected tokens back. */ |
3855 | /* 'a'. */ |
3856 | const cpp_token *tok = test.get_token (); |
3857 | ASSERT_EQ (tok->type, CPP_CHAR); |
3858 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'a'" ); |
3859 | |
3860 | unsigned int chars_seen; |
3861 | int unsignedp; |
3862 | cppchar_t cc = cpp_interpret_charconst (test.m_parser, tok, |
3863 | &chars_seen, &unsignedp); |
3864 | ASSERT_EQ (cc, 'a'); |
3865 | ASSERT_EQ (chars_seen, 1); |
3866 | |
3867 | /* u'a'. */ |
3868 | tok = test.get_token (); |
3869 | ASSERT_EQ (tok->type, CPP_CHAR16); |
3870 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "u'a'" ); |
3871 | |
3872 | /* U'a'. */ |
3873 | tok = test.get_token (); |
3874 | ASSERT_EQ (tok->type, CPP_CHAR32); |
3875 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "U'a'" ); |
3876 | |
3877 | /* L'a'. */ |
3878 | tok = test.get_token (); |
3879 | ASSERT_EQ (tok->type, CPP_WCHAR); |
3880 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "L'a'" ); |
3881 | |
3882 | /* 'abc' (c-char-sequence). */ |
3883 | tok = test.get_token (); |
3884 | ASSERT_EQ (tok->type, CPP_CHAR); |
3885 | ASSERT_TOKEN_AS_TEXT_EQ (test.m_parser, tok, "'abc'" ); |
3886 | } |
3887 | /* A table of interesting location_t values, giving one axis of our test |
3888 | matrix. */ |
3889 | |
3890 | static const location_t boundary_locations[] = { |
3891 | /* Zero means "don't override the default values for a new line_table". */ |
3892 | 0, |
3893 | |
3894 | /* An arbitrary non-zero value that isn't close to one of |
3895 | the boundary values below. */ |
3896 | 0x10000, |
3897 | |
3898 | /* Values near LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES. */ |
3899 | LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 0x100, |
3900 | LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - 1, |
3901 | LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES, |
3902 | LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 1, |
3903 | LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES + 0x100, |
3904 | |
3905 | /* Values near LINE_MAP_MAX_LOCATION_WITH_COLS. */ |
3906 | LINE_MAP_MAX_LOCATION_WITH_COLS - 0x100, |
3907 | LINE_MAP_MAX_LOCATION_WITH_COLS - 1, |
3908 | LINE_MAP_MAX_LOCATION_WITH_COLS, |
3909 | LINE_MAP_MAX_LOCATION_WITH_COLS + 1, |
3910 | LINE_MAP_MAX_LOCATION_WITH_COLS + 0x100, |
3911 | }; |
3912 | |
3913 | /* Run TESTCASE multiple times, once for each case in our test matrix. */ |
3914 | |
3915 | void |
3916 | for_each_line_table_case (void (*testcase) (const line_table_case &)) |
3917 | { |
3918 | /* As noted above in the description of struct line_table_case, |
3919 | we want to explore a test matrix of interesting line_table |
3920 | situations, running various selftests for each case within the |
3921 | matrix. */ |
3922 | |
3923 | /* Run all tests with: |
3924 | (a) line_table->default_range_bits == 0, and |
3925 | (b) line_table->default_range_bits == 5. */ |
3926 | int num_cases_tested = 0; |
3927 | for (int default_range_bits = 0; default_range_bits <= 5; |
3928 | default_range_bits += 5) |
3929 | { |
3930 | /* ...and use each of the "interesting" location values as |
3931 | the starting location within line_table. */ |
3932 | const int num_boundary_locations = ARRAY_SIZE (boundary_locations); |
3933 | for (int loc_idx = 0; loc_idx < num_boundary_locations; loc_idx++) |
3934 | { |
3935 | line_table_case c (default_range_bits, boundary_locations[loc_idx]); |
3936 | |
3937 | testcase (c); |
3938 | |
3939 | num_cases_tested++; |
3940 | } |
3941 | } |
3942 | |
3943 | /* Verify that we fully covered the test matrix. */ |
3944 | ASSERT_EQ (num_cases_tested, 2 * 12); |
3945 | } |
3946 | |
3947 | /* Verify that when presented with a consecutive pair of locations with |
3948 | a very large line offset, we don't attempt to consolidate them into |
3949 | a single ordinary linemap where the line offsets within the line map |
3950 | would lead to overflow (PR lto/88147). */ |
3951 | |
3952 | static void |
3953 | test_line_offset_overflow () |
3954 | { |
3955 | line_table_test ltt (line_table_case (5, 0)); |
3956 | |
3957 | linemap_add (line_table, LC_ENTER, sysp: false, to_file: "foo.c" , to_line: 0); |
3958 | linemap_line_start (set: line_table, to_line: 1, max_column_hint: 100); |
3959 | location_t loc_a = linemap_line_start (set: line_table, to_line: 2578, max_column_hint: 255); |
3960 | assert_loceq (exp_filename: "foo.c" , exp_linenum: 2578, exp_colnum: 0, loc: loc_a); |
3961 | |
3962 | const line_map_ordinary *ordmap_a = LINEMAPS_LAST_ORDINARY_MAP (set: line_table); |
3963 | ASSERT_EQ (ordmap_a->m_column_and_range_bits, 13); |
3964 | ASSERT_EQ (ordmap_a->m_range_bits, 5); |
3965 | |
3966 | location_t loc_b = linemap_line_start (set: line_table, to_line: 404198, max_column_hint: 512); |
3967 | assert_loceq (exp_filename: "foo.c" , exp_linenum: 404198, exp_colnum: 0, loc: loc_b); |
3968 | |
3969 | /* We should have started a new linemap, rather than attempting to store |
3970 | a very large line offset. */ |
3971 | const line_map_ordinary *ordmap_b = LINEMAPS_LAST_ORDINARY_MAP (set: line_table); |
3972 | ASSERT_NE (ordmap_a, ordmap_b); |
3973 | } |
3974 | |
3975 | void test_cpp_utf8 () |
3976 | { |
3977 | const int def_tabstop = 8; |
3978 | cpp_char_column_policy policy (def_tabstop, cpp_wcwidth); |
3979 | |
3980 | /* Verify that wcwidth of invalid UTF-8 or control bytes is 1. */ |
3981 | { |
3982 | int w_bad = cpp_display_width (data: "\xf0!\x9f!\x98!\x82!" , data_length: 8, policy); |
3983 | ASSERT_EQ (8, w_bad); |
3984 | int w_ctrl = cpp_display_width (data: "\r\n\v\0\1" , data_length: 5, policy); |
3985 | ASSERT_EQ (5, w_ctrl); |
3986 | } |
3987 | |
3988 | /* Verify that wcwidth of valid UTF-8 is as expected. */ |
3989 | { |
3990 | const int w_pi = cpp_display_width (data: "\xcf\x80" , data_length: 2, policy); |
3991 | ASSERT_EQ (1, w_pi); |
3992 | const int w_emoji = cpp_display_width (data: "\xf0\x9f\x98\x82" , data_length: 4, policy); |
3993 | ASSERT_EQ (2, w_emoji); |
3994 | const int w_umlaut_precomposed = cpp_display_width (data: "\xc3\xbf" , data_length: 2, |
3995 | policy); |
3996 | ASSERT_EQ (1, w_umlaut_precomposed); |
3997 | const int w_umlaut_combining = cpp_display_width (data: "y\xcc\x88" , data_length: 3, |
3998 | policy); |
3999 | ASSERT_EQ (1, w_umlaut_combining); |
4000 | const int w_han = cpp_display_width (data: "\xe4\xb8\xba" , data_length: 3, policy); |
4001 | ASSERT_EQ (2, w_han); |
4002 | const int w_ascii = cpp_display_width (data: "GCC" , data_length: 3, policy); |
4003 | ASSERT_EQ (3, w_ascii); |
4004 | const int w_mixed = cpp_display_width (data: "\xcf\x80 = 3.14 \xf0\x9f\x98\x82" |
4005 | "\x9f! \xe4\xb8\xba y\xcc\x88" , |
4006 | data_length: 24, policy); |
4007 | ASSERT_EQ (18, w_mixed); |
4008 | } |
4009 | |
4010 | /* Verify that display width properly expands tabs. */ |
4011 | { |
4012 | const char *tstr = "\tabc\td" ; |
4013 | ASSERT_EQ (6, cpp_display_width (tstr, 6, |
4014 | cpp_char_column_policy (1, cpp_wcwidth))); |
4015 | ASSERT_EQ (10, cpp_display_width (tstr, 6, |
4016 | cpp_char_column_policy (3, cpp_wcwidth))); |
4017 | ASSERT_EQ (17, cpp_display_width (tstr, 6, |
4018 | cpp_char_column_policy (8, cpp_wcwidth))); |
4019 | ASSERT_EQ (1, |
4020 | cpp_display_column_to_byte_column |
4021 | (tstr, 6, 7, cpp_char_column_policy (8, cpp_wcwidth))); |
4022 | } |
4023 | |
4024 | /* Verify that cpp_byte_column_to_display_column can go past the end, |
4025 | and similar edge cases. */ |
4026 | { |
4027 | const char *str |
4028 | /* Display columns. |
4029 | 111111112345 */ |
4030 | = "\xcf\x80 abc" ; |
4031 | /* 111122223456 |
4032 | Byte columns. */ |
4033 | |
4034 | ASSERT_EQ (5, cpp_display_width (str, 6, policy)); |
4035 | ASSERT_EQ (105, |
4036 | cpp_byte_column_to_display_column (str, 6, 106, policy)); |
4037 | ASSERT_EQ (10000, |
4038 | cpp_byte_column_to_display_column (NULL, 0, 10000, policy)); |
4039 | ASSERT_EQ (0, |
4040 | cpp_byte_column_to_display_column (NULL, 10000, 0, policy)); |
4041 | } |
4042 | |
4043 | /* Verify that cpp_display_column_to_byte_column can go past the end, |
4044 | and similar edge cases, and check invertibility. */ |
4045 | { |
4046 | const char *str |
4047 | /* Display columns. |
4048 | 000000000000000000000000000000000000011 |
4049 | 111111112222222234444444455555555678901 */ |
4050 | = "\xf0\x9f\x98\x82 \xf0\x9f\x98\x82 hello" ; |
4051 | /* 000000000000000000000000000000000111111 |
4052 | 111122223333444456666777788889999012345 |
4053 | Byte columns. */ |
4054 | ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 2, policy)); |
4055 | ASSERT_EQ (15, |
4056 | cpp_display_column_to_byte_column (str, 15, 11, policy)); |
4057 | ASSERT_EQ (115, |
4058 | cpp_display_column_to_byte_column (str, 15, 111, policy)); |
4059 | ASSERT_EQ (10000, |
4060 | cpp_display_column_to_byte_column (NULL, 0, 10000, policy)); |
4061 | ASSERT_EQ (0, |
4062 | cpp_display_column_to_byte_column (NULL, 10000, 0, policy)); |
4063 | |
4064 | /* Verify that we do not interrupt a UTF-8 sequence. */ |
4065 | ASSERT_EQ (4, cpp_display_column_to_byte_column (str, 15, 1, policy)); |
4066 | |
4067 | for (int byte_col = 1; byte_col <= 15; ++byte_col) |
4068 | { |
4069 | const int disp_col |
4070 | = cpp_byte_column_to_display_column (data: str, data_length: 15, column: byte_col, policy); |
4071 | const int byte_col2 |
4072 | = cpp_display_column_to_byte_column (data: str, data_length: 15, display_col: disp_col, policy); |
4073 | |
4074 | /* If we ask for the display column in the middle of a UTF-8 |
4075 | sequence, it will return the length of the partial sequence, |
4076 | matching the behavior of GCC before display column support. |
4077 | Otherwise check the round trip was successful. */ |
4078 | if (byte_col < 4) |
4079 | ASSERT_EQ (byte_col, disp_col); |
4080 | else if (byte_col >= 6 && byte_col < 9) |
4081 | ASSERT_EQ (3 + (byte_col - 5), disp_col); |
4082 | else |
4083 | ASSERT_EQ (byte_col2, byte_col); |
4084 | } |
4085 | } |
4086 | } |
4087 | |
4088 | static bool |
4089 | check_cpp_valid_utf8_p (const char *str) |
4090 | { |
4091 | return cpp_valid_utf8_p (data: str, num_bytes: strlen (s: str)); |
4092 | } |
4093 | |
4094 | /* Check that cpp_valid_utf8_p works as expected. */ |
4095 | |
4096 | static void |
4097 | test_cpp_valid_utf8_p () |
4098 | { |
4099 | ASSERT_TRUE (check_cpp_valid_utf8_p ("hello world" )); |
4100 | |
4101 | /* 2-byte char (pi). */ |
4102 | ASSERT_TRUE (check_cpp_valid_utf8_p("\xcf\x80" )); |
4103 | |
4104 | /* 3-byte chars (the Japanese word "mojibake"). */ |
4105 | ASSERT_TRUE (check_cpp_valid_utf8_p |
4106 | ( |
4107 | /* U+6587 CJK UNIFIED IDEOGRAPH-6587 |
4108 | UTF-8: 0xE6 0x96 0x87 |
4109 | C octal escaped UTF-8: \346\226\207. */ |
4110 | "\346\226\207" |
4111 | /* U+5B57 CJK UNIFIED IDEOGRAPH-5B57 |
4112 | UTF-8: 0xE5 0xAD 0x97 |
4113 | C octal escaped UTF-8: \345\255\227. */ |
4114 | "\345\255\227" |
4115 | /* U+5316 CJK UNIFIED IDEOGRAPH-5316 |
4116 | UTF-8: 0xE5 0x8C 0x96 |
4117 | C octal escaped UTF-8: \345\214\226. */ |
4118 | "\345\214\226" |
4119 | /* U+3051 HIRAGANA LETTER KE |
4120 | UTF-8: 0xE3 0x81 0x91 |
4121 | C octal escaped UTF-8: \343\201\221. */ |
4122 | "\343\201\221" )); |
4123 | |
4124 | /* 4-byte char: an emoji. */ |
4125 | ASSERT_TRUE (check_cpp_valid_utf8_p ("\xf0\x9f\x98\x82" )); |
4126 | |
4127 | /* Control codes, including the NUL byte. */ |
4128 | ASSERT_TRUE (cpp_valid_utf8_p ("\r\n\v\0\1" , 5)); |
4129 | |
4130 | ASSERT_FALSE (check_cpp_valid_utf8_p ("\xf0!\x9f!\x98!\x82!" )); |
4131 | |
4132 | /* Unexpected continuation bytes. */ |
4133 | for (unsigned char continuation_byte = 0x80; |
4134 | continuation_byte <= 0xbf; |
4135 | continuation_byte++) |
4136 | ASSERT_FALSE (cpp_valid_utf8_p ((const char *)&continuation_byte, 1)); |
4137 | |
4138 | /* "Lonely start characters" for 2-byte sequences. */ |
4139 | { |
4140 | unsigned char buf[2]; |
4141 | buf[1] = ' '; |
4142 | for (buf[0] = 0xc0; |
4143 | buf[0] <= 0xdf; |
4144 | buf[0]++) |
4145 | ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2)); |
4146 | } |
4147 | |
4148 | /* "Lonely start characters" for 3-byte sequences. */ |
4149 | { |
4150 | unsigned char buf[2]; |
4151 | buf[1] = ' '; |
4152 | for (buf[0] = 0xe0; |
4153 | buf[0] <= 0xef; |
4154 | buf[0]++) |
4155 | ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2)); |
4156 | } |
4157 | |
4158 | /* "Lonely start characters" for 4-byte sequences. */ |
4159 | { |
4160 | unsigned char buf[2]; |
4161 | buf[1] = ' '; |
4162 | for (buf[0] = 0xf0; |
4163 | buf[0] <= 0xf4; |
4164 | buf[0]++) |
4165 | ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2)); |
4166 | } |
4167 | |
4168 | /* Invalid start characters (formerly valid for 5-byte and 6-byte |
4169 | sequences). */ |
4170 | { |
4171 | unsigned char buf[2]; |
4172 | buf[1] = ' '; |
4173 | for (buf[0] = 0xf5; |
4174 | buf[0] <= 0xfd; |
4175 | buf[0]++) |
4176 | ASSERT_FALSE (cpp_valid_utf8_p ((const char *)buf, 2)); |
4177 | } |
4178 | |
4179 | /* Impossible bytes. */ |
4180 | ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc0" )); |
4181 | ASSERT_FALSE (check_cpp_valid_utf8_p ("\xc1" )); |
4182 | ASSERT_FALSE (check_cpp_valid_utf8_p ("\xfe" )); |
4183 | ASSERT_FALSE (check_cpp_valid_utf8_p ("\xff" )); |
4184 | } |
4185 | |
4186 | /* Run all of the selftests within this file. */ |
4187 | |
4188 | void |
4189 | input_cc_tests () |
4190 | { |
4191 | test_linenum_comparisons (); |
4192 | test_should_have_column_data_p (); |
4193 | test_unknown_location (); |
4194 | test_builtins (); |
4195 | for_each_line_table_case (testcase: test_make_location_nonpure_range_endpoints); |
4196 | |
4197 | for_each_line_table_case (testcase: test_accessing_ordinary_linemaps); |
4198 | for_each_line_table_case (testcase: test_lexer); |
4199 | for_each_line_table_case (testcase: test_lexer_string_locations_simple); |
4200 | for_each_line_table_case (testcase: test_lexer_string_locations_ebcdic); |
4201 | for_each_line_table_case (testcase: test_lexer_string_locations_hex); |
4202 | for_each_line_table_case (testcase: test_lexer_string_locations_oct); |
4203 | for_each_line_table_case (testcase: test_lexer_string_locations_letter_escape_1); |
4204 | for_each_line_table_case (testcase: test_lexer_string_locations_letter_escape_2); |
4205 | for_each_line_table_case (testcase: test_lexer_string_locations_ucn4); |
4206 | for_each_line_table_case (testcase: test_lexer_string_locations_ucn8); |
4207 | for_each_line_table_case (testcase: test_lexer_string_locations_wide_string); |
4208 | for_each_line_table_case (testcase: test_lexer_string_locations_string16); |
4209 | for_each_line_table_case (testcase: test_lexer_string_locations_string32); |
4210 | for_each_line_table_case (testcase: test_lexer_string_locations_u8); |
4211 | for_each_line_table_case (testcase: test_lexer_string_locations_utf8_source); |
4212 | for_each_line_table_case (testcase: test_lexer_string_locations_concatenation_1); |
4213 | for_each_line_table_case (testcase: test_lexer_string_locations_concatenation_2); |
4214 | for_each_line_table_case (testcase: test_lexer_string_locations_concatenation_3); |
4215 | for_each_line_table_case (testcase: test_lexer_string_locations_macro); |
4216 | for_each_line_table_case (testcase: test_lexer_string_locations_stringified_macro_argument); |
4217 | for_each_line_table_case (testcase: test_lexer_string_locations_non_string); |
4218 | for_each_line_table_case (testcase: test_lexer_string_locations_long_line); |
4219 | for_each_line_table_case (testcase: test_lexer_string_locations_raw_string_one_line); |
4220 | for_each_line_table_case (testcase: test_lexer_string_locations_raw_string_multiline); |
4221 | for_each_line_table_case (testcase: test_lexer_string_locations_raw_string_unterminated); |
4222 | for_each_line_table_case (testcase: test_lexer_char_constants); |
4223 | |
4224 | test_reading_source_line (); |
4225 | |
4226 | test_line_offset_overflow (); |
4227 | |
4228 | test_cpp_utf8 (); |
4229 | test_cpp_valid_utf8_p (); |
4230 | } |
4231 | |
4232 | } // namespace selftest |
4233 | |
4234 | #endif /* CHECKING_P */ |
4235 | |