1 | /* Definitions for CPP library. |
2 | Copyright (C) 1995-2024 Free Software Foundation, Inc. |
3 | Written by Per Bothner, 1994-95. |
4 | |
5 | This program is free software; you can redistribute it and/or modify it |
6 | under the terms of the GNU General Public License as published by the |
7 | Free Software Foundation; either version 3, or (at your option) any |
8 | later version. |
9 | |
10 | This program is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | GNU General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU General Public License |
16 | along with this program; see the file COPYING3. If not see |
17 | <http://www.gnu.org/licenses/>. |
18 | |
19 | In other words, you are welcome to use, share and improve this program. |
20 | You are forbidden to forbid anyone else to use, share and improve |
21 | what you give them. Help stamp out software-hoarding! */ |
22 | #ifndef LIBCPP_CPPLIB_H |
23 | #define LIBCPP_CPPLIB_H |
24 | |
25 | #include <sys/types.h> |
26 | #include "symtab.h" |
27 | #include "line-map.h" |
28 | |
29 | typedef struct cpp_reader cpp_reader; |
30 | typedef struct cpp_buffer cpp_buffer; |
31 | typedef struct cpp_options cpp_options; |
32 | typedef struct cpp_token cpp_token; |
33 | typedef struct cpp_string cpp_string; |
34 | typedef struct cpp_hashnode cpp_hashnode; |
35 | typedef struct cpp_macro cpp_macro; |
36 | typedef struct cpp_callbacks cpp_callbacks; |
37 | typedef struct cpp_dir cpp_dir; |
38 | |
39 | struct _cpp_file; |
40 | |
41 | class rich_location; |
42 | |
43 | /* The first three groups, apart from '=', can appear in preprocessor |
44 | expressions (+= and -= are used to indicate unary + and - resp.). |
45 | This allows a lookup table to be implemented in _cpp_parse_expr. |
46 | |
47 | The first group, to CPP_LAST_EQ, can be immediately followed by an |
48 | '='. The lexer needs operators ending in '=', like ">>=", to be in |
49 | the same order as their counterparts without the '=', like ">>". |
50 | |
51 | See the cpp_operator table optab in expr.cc if you change the order or |
52 | add or remove anything in the first group. */ |
53 | |
54 | #define TTYPE_TABLE \ |
55 | OP(EQ, "=") \ |
56 | OP(NOT, "!") \ |
57 | OP(GREATER, ">") /* compare */ \ |
58 | OP(LESS, "<") \ |
59 | OP(PLUS, "+") /* math */ \ |
60 | OP(MINUS, "-") \ |
61 | OP(MULT, "*") \ |
62 | OP(DIV, "/") \ |
63 | OP(MOD, "%") \ |
64 | OP(AND, "&") /* bit ops */ \ |
65 | OP(OR, "|") \ |
66 | OP(XOR, "^") \ |
67 | OP(RSHIFT, ">>") \ |
68 | OP(LSHIFT, "<<") \ |
69 | \ |
70 | OP(COMPL, "~") \ |
71 | OP(AND_AND, "&&") /* logical */ \ |
72 | OP(OR_OR, "||") \ |
73 | OP(QUERY, "?") \ |
74 | OP(COLON, ":") \ |
75 | OP(COMMA, ",") /* grouping */ \ |
76 | OP(OPEN_PAREN, "(") \ |
77 | OP(CLOSE_PAREN, ")") \ |
78 | TK(EOF, NONE) \ |
79 | OP(EQ_EQ, "==") /* compare */ \ |
80 | OP(NOT_EQ, "!=") \ |
81 | OP(GREATER_EQ, ">=") \ |
82 | OP(LESS_EQ, "<=") \ |
83 | OP(SPACESHIP, "<=>") \ |
84 | \ |
85 | /* These two are unary + / - in preprocessor expressions. */ \ |
86 | OP(PLUS_EQ, "+=") /* math */ \ |
87 | OP(MINUS_EQ, "-=") \ |
88 | \ |
89 | OP(MULT_EQ, "*=") \ |
90 | OP(DIV_EQ, "/=") \ |
91 | OP(MOD_EQ, "%=") \ |
92 | OP(AND_EQ, "&=") /* bit ops */ \ |
93 | OP(OR_EQ, "|=") \ |
94 | OP(XOR_EQ, "^=") \ |
95 | OP(RSHIFT_EQ, ">>=") \ |
96 | OP(LSHIFT_EQ, "<<=") \ |
97 | /* Digraphs together, beginning with CPP_FIRST_DIGRAPH. */ \ |
98 | OP(HASH, "#") /* digraphs */ \ |
99 | OP(PASTE, "##") \ |
100 | OP(OPEN_SQUARE, "[") \ |
101 | OP(CLOSE_SQUARE, "]") \ |
102 | OP(OPEN_BRACE, "{") \ |
103 | OP(CLOSE_BRACE, "}") \ |
104 | /* The remainder of the punctuation. Order is not significant. */ \ |
105 | OP(SEMICOLON, ";") /* structure */ \ |
106 | OP(ELLIPSIS, "...") \ |
107 | OP(PLUS_PLUS, "++") /* increment */ \ |
108 | OP(MINUS_MINUS, "--") \ |
109 | OP(DEREF, "->") /* accessors */ \ |
110 | OP(DOT, ".") \ |
111 | OP(SCOPE, "::") \ |
112 | OP(DEREF_STAR, "->*") \ |
113 | OP(DOT_STAR, ".*") \ |
114 | OP(ATSIGN, "@") /* used in Objective-C */ \ |
115 | \ |
116 | TK(NAME, IDENT) /* word */ \ |
117 | TK(AT_NAME, IDENT) /* @word - Objective-C */ \ |
118 | TK(NUMBER, LITERAL) /* 34_be+ta */ \ |
119 | \ |
120 | TK(CHAR, LITERAL) /* 'char' */ \ |
121 | TK(WCHAR, LITERAL) /* L'char' */ \ |
122 | TK(CHAR16, LITERAL) /* u'char' */ \ |
123 | TK(CHAR32, LITERAL) /* U'char' */ \ |
124 | TK(UTF8CHAR, LITERAL) /* u8'char' */ \ |
125 | TK(OTHER, LITERAL) /* stray punctuation */ \ |
126 | \ |
127 | TK(STRING, LITERAL) /* "string" */ \ |
128 | TK(WSTRING, LITERAL) /* L"string" */ \ |
129 | TK(STRING16, LITERAL) /* u"string" */ \ |
130 | TK(STRING32, LITERAL) /* U"string" */ \ |
131 | TK(UTF8STRING, LITERAL) /* u8"string" */ \ |
132 | TK(OBJC_STRING, LITERAL) /* @"string" - Objective-C */ \ |
133 | TK(HEADER_NAME, LITERAL) /* <stdio.h> in #include */ \ |
134 | TK(UNEVAL_STRING, LITERAL) /* unevaluated "string" - C++26 */ \ |
135 | \ |
136 | TK(CHAR_USERDEF, LITERAL) /* 'char'_suffix - C++11 */ \ |
137 | TK(WCHAR_USERDEF, LITERAL) /* L'char'_suffix - C++11 */ \ |
138 | TK(CHAR16_USERDEF, LITERAL) /* u'char'_suffix - C++11 */ \ |
139 | TK(CHAR32_USERDEF, LITERAL) /* U'char'_suffix - C++11 */ \ |
140 | TK(UTF8CHAR_USERDEF, LITERAL) /* u8'char'_suffix - C++11 */ \ |
141 | TK(STRING_USERDEF, LITERAL) /* "string"_suffix - C++11 */ \ |
142 | TK(WSTRING_USERDEF, LITERAL) /* L"string"_suffix - C++11 */ \ |
143 | TK(STRING16_USERDEF, LITERAL) /* u"string"_suffix - C++11 */ \ |
144 | TK(STRING32_USERDEF, LITERAL) /* U"string"_suffix - C++11 */ \ |
145 | TK(UTF8STRING_USERDEF,LITERAL) /* u8"string"_suffix - C++11 */ \ |
146 | \ |
147 | TK(COMMENT, LITERAL) /* Only if output comments. */ \ |
148 | /* SPELL_LITERAL happens to DTRT. */ \ |
149 | TK(MACRO_ARG, NONE) /* Macro argument. */ \ |
150 | TK(PRAGMA, NONE) /* Only for deferred pragmas. */ \ |
151 | TK(PRAGMA_EOL, NONE) /* End-of-line for deferred pragmas. */ \ |
152 | TK(PADDING, NONE) /* Whitespace for -E. */ |
153 | |
154 | #define OP(e, s) CPP_ ## e, |
155 | #define TK(e, s) CPP_ ## e, |
156 | enum cpp_ttype |
157 | { |
158 | TTYPE_TABLE |
159 | N_TTYPES, |
160 | |
161 | /* A token type for keywords, as opposed to ordinary identifiers. */ |
162 | CPP_KEYWORD, |
163 | |
164 | /* Positions in the table. */ |
165 | CPP_LAST_EQ = CPP_LSHIFT, |
166 | CPP_FIRST_DIGRAPH = CPP_HASH, |
167 | CPP_LAST_PUNCTUATOR= CPP_ATSIGN, |
168 | CPP_LAST_CPP_OP = CPP_LESS_EQ |
169 | }; |
170 | #undef OP |
171 | #undef TK |
172 | |
173 | /* C language kind, used when calling cpp_create_reader. */ |
174 | enum c_lang {CLK_GNUC89 = 0, CLK_GNUC99, CLK_GNUC11, CLK_GNUC17, CLK_GNUC23, |
175 | CLK_STDC89, CLK_STDC94, CLK_STDC99, CLK_STDC11, CLK_STDC17, |
176 | CLK_STDC23, |
177 | CLK_GNUCXX, CLK_CXX98, CLK_GNUCXX11, CLK_CXX11, |
178 | CLK_GNUCXX14, CLK_CXX14, CLK_GNUCXX17, CLK_CXX17, |
179 | CLK_GNUCXX20, CLK_CXX20, CLK_GNUCXX23, CLK_CXX23, |
180 | CLK_GNUCXX26, CLK_CXX26, CLK_ASM}; |
181 | |
182 | /* Payload of a NUMBER, STRING, CHAR or COMMENT token. */ |
183 | struct GTY(()) cpp_string { |
184 | unsigned int len; |
185 | |
186 | /* TEXT is always null terminated (terminator not included in len); but this |
187 | GTY markup arranges that PCH streaming works properly even if there is a |
188 | null byte in the middle of the string. */ |
189 | const unsigned char * GTY((string_length ("1 + %h.len" ))) text; |
190 | }; |
191 | |
192 | /* Flags for the cpp_token structure. */ |
193 | #define PREV_WHITE (1 << 0) /* If whitespace before this token. */ |
194 | #define DIGRAPH (1 << 1) /* If it was a digraph. */ |
195 | #define STRINGIFY_ARG (1 << 2) /* If macro argument to be stringified. */ |
196 | #define PASTE_LEFT (1 << 3) /* If on LHS of a ## operator. */ |
197 | #define NAMED_OP (1 << 4) /* C++ named operators. */ |
198 | #define PREV_FALLTHROUGH (1 << 5) /* On a token preceeded by FALLTHROUGH |
199 | comment. */ |
200 | #define DECIMAL_INT (1 << 6) /* Decimal integer, set in c-lex.cc. */ |
201 | #define PURE_ZERO (1 << 7) /* Single 0 digit, used by the C++ frontend, |
202 | set in c-lex.cc. */ |
203 | #define COLON_SCOPE PURE_ZERO /* Adjacent colons in C < 23. */ |
204 | #define SP_DIGRAPH (1 << 8) /* # or ## token was a digraph. */ |
205 | #define SP_PREV_WHITE (1 << 9) /* If whitespace before a ## |
206 | operator, or before this token |
207 | after a # operator. */ |
208 | #define NO_EXPAND (1 << 10) /* Do not macro-expand this token. */ |
209 | #define PRAGMA_OP (1 << 11) /* _Pragma token. */ |
210 | #define BOL (1 << 12) /* Token at beginning of line. */ |
211 | |
212 | /* Specify which field, if any, of the cpp_token union is used. */ |
213 | |
214 | enum cpp_token_fld_kind { |
215 | CPP_TOKEN_FLD_NODE, |
216 | CPP_TOKEN_FLD_SOURCE, |
217 | CPP_TOKEN_FLD_STR, |
218 | CPP_TOKEN_FLD_ARG_NO, |
219 | CPP_TOKEN_FLD_TOKEN_NO, |
220 | CPP_TOKEN_FLD_PRAGMA, |
221 | CPP_TOKEN_FLD_NONE |
222 | }; |
223 | |
224 | /* A macro argument in the cpp_token union. */ |
225 | struct GTY(()) cpp_macro_arg { |
226 | /* Argument number. */ |
227 | unsigned int arg_no; |
228 | /* The original spelling of the macro argument token. */ |
229 | cpp_hashnode * |
230 | GTY ((nested_ptr (union tree_node, |
231 | "%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL" , |
232 | "%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL" ))) |
233 | spelling; |
234 | }; |
235 | |
236 | /* An identifier in the cpp_token union. */ |
237 | struct GTY(()) cpp_identifier { |
238 | /* The canonical (UTF-8) spelling of the identifier. */ |
239 | cpp_hashnode * |
240 | GTY ((nested_ptr (union tree_node, |
241 | "%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL" , |
242 | "%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL" ))) |
243 | node; |
244 | /* The original spelling of the identifier. */ |
245 | cpp_hashnode * |
246 | GTY ((nested_ptr (union tree_node, |
247 | "%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL" , |
248 | "%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL" ))) |
249 | spelling; |
250 | }; |
251 | |
252 | /* A preprocessing token. This has been carefully packed and should |
253 | occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts. */ |
254 | struct GTY(()) cpp_token { |
255 | |
256 | /* Location of first char of token, together with range of full token. */ |
257 | location_t src_loc; |
258 | |
259 | ENUM_BITFIELD(cpp_ttype) type : CHAR_BIT; /* token type */ |
260 | unsigned short flags; /* flags - see above */ |
261 | |
262 | union cpp_token_u |
263 | { |
264 | /* An identifier. */ |
265 | struct cpp_identifier GTY ((tag ("CPP_TOKEN_FLD_NODE" ))) node; |
266 | |
267 | /* Inherit padding from this token. */ |
268 | cpp_token * GTY ((tag ("CPP_TOKEN_FLD_SOURCE" ))) source; |
269 | |
270 | /* A string, or number. */ |
271 | struct cpp_string GTY ((tag ("CPP_TOKEN_FLD_STR" ))) str; |
272 | |
273 | /* Argument no. (and original spelling) for a CPP_MACRO_ARG. */ |
274 | struct cpp_macro_arg GTY ((tag ("CPP_TOKEN_FLD_ARG_NO" ))) macro_arg; |
275 | |
276 | /* Original token no. for a CPP_PASTE (from a sequence of |
277 | consecutive paste tokens in a macro expansion). */ |
278 | unsigned int GTY ((tag ("CPP_TOKEN_FLD_TOKEN_NO" ))) token_no; |
279 | |
280 | /* Caller-supplied identifier for a CPP_PRAGMA. */ |
281 | unsigned int GTY ((tag ("CPP_TOKEN_FLD_PRAGMA" ))) pragma; |
282 | } GTY ((desc ("cpp_token_val_index (&%1)" ))) val; |
283 | }; |
284 | |
285 | /* Say which field is in use. */ |
286 | extern enum cpp_token_fld_kind cpp_token_val_index (const cpp_token *tok); |
287 | |
288 | /* A type wide enough to hold any multibyte source character. |
289 | cpplib's character constant interpreter requires an unsigned type. |
290 | Also, a typedef for the signed equivalent. |
291 | The width of this type is capped at 32 bits; there do exist targets |
292 | where wchar_t is 64 bits, but only in a non-default mode, and there |
293 | would be no meaningful interpretation for a wchar_t value greater |
294 | than 2^32 anyway -- the widest wide-character encoding around is |
295 | ISO 10646, which stops at 2^31. */ |
296 | #if CHAR_BIT * SIZEOF_INT >= 32 |
297 | # define CPPCHAR_SIGNED_T int |
298 | #elif CHAR_BIT * SIZEOF_LONG >= 32 |
299 | # define CPPCHAR_SIGNED_T long |
300 | #else |
301 | # error "Cannot find a least-32-bit signed integer type" |
302 | #endif |
303 | typedef unsigned CPPCHAR_SIGNED_T cppchar_t; |
304 | typedef CPPCHAR_SIGNED_T cppchar_signed_t; |
305 | |
306 | /* Style of header dependencies to generate. */ |
307 | enum cpp_deps_style { DEPS_NONE = 0, DEPS_USER, DEPS_SYSTEM }; |
308 | |
309 | /* Structured format of module dependencies to generate. */ |
310 | enum cpp_fdeps_format { FDEPS_FMT_NONE = 0, FDEPS_FMT_P1689R5 }; |
311 | |
312 | /* The possible normalization levels, from most restrictive to least. */ |
313 | enum cpp_normalize_level { |
314 | /* In NFKC. */ |
315 | normalized_KC = 0, |
316 | /* In NFC. */ |
317 | normalized_C, |
318 | /* In NFC, except for subsequences where being in NFC would make |
319 | the identifier invalid. */ |
320 | normalized_identifier_C, |
321 | /* Not normalized at all. */ |
322 | normalized_none |
323 | }; |
324 | |
325 | enum cpp_main_search |
326 | { |
327 | CMS_none, /* A regular source file. */ |
328 | , /* Is a directly-specified header file (eg PCH or |
329 | header-unit). */ |
330 | CMS_user, /* Search the user INCLUDE path. */ |
331 | CMS_system, /* Search the system INCLUDE path. */ |
332 | }; |
333 | |
334 | /* The possible bidirectional control characters checking levels. */ |
335 | enum cpp_bidirectional_level { |
336 | /* No checking. */ |
337 | bidirectional_none = 0, |
338 | /* Only detect unpaired uses of bidirectional control characters. */ |
339 | bidirectional_unpaired = 1, |
340 | /* Detect any use of bidirectional control characters. */ |
341 | bidirectional_any = 2, |
342 | /* Also warn about UCNs. */ |
343 | bidirectional_ucn = 4 |
344 | }; |
345 | |
346 | /* This structure is nested inside struct cpp_reader, and |
347 | carries all the options visible to the command line. */ |
348 | struct cpp_options |
349 | { |
350 | /* The language we're preprocessing. */ |
351 | enum c_lang lang; |
352 | |
353 | /* Nonzero means use extra default include directories for C++. */ |
354 | unsigned char cplusplus; |
355 | |
356 | /* Nonzero means handle cplusplus style comments. */ |
357 | unsigned char ; |
358 | |
359 | /* Nonzero means define __OBJC__, treat @ as a special token, use |
360 | the OBJC[PLUS]_INCLUDE_PATH environment variable, and allow |
361 | "#import". */ |
362 | unsigned char objc; |
363 | |
364 | /* Nonzero means don't copy comments into the output file. */ |
365 | unsigned char ; |
366 | |
367 | /* Nonzero means don't copy comments into the output file during |
368 | macro expansion. */ |
369 | unsigned char ; |
370 | |
371 | /* Nonzero means process the ISO trigraph sequences. */ |
372 | unsigned char trigraphs; |
373 | |
374 | /* Nonzero means process the ISO digraph sequences. */ |
375 | unsigned char digraphs; |
376 | |
377 | /* Nonzero means to allow hexadecimal floats and LL suffixes. */ |
378 | unsigned char extended_numbers; |
379 | |
380 | /* Nonzero means process u/U prefix literals (UTF-16/32). */ |
381 | unsigned char uliterals; |
382 | |
383 | /* Nonzero means process u8 prefixed character literals (UTF-8). */ |
384 | unsigned char utf8_char_literals; |
385 | |
386 | /* Nonzero means process r/R raw strings. If this is set, uliterals |
387 | must be set as well. */ |
388 | unsigned char rliterals; |
389 | |
390 | /* Nonzero means print names of header files (-H). */ |
391 | unsigned char print_include_names; |
392 | |
393 | /* Nonzero means complain about deprecated features. */ |
394 | unsigned char cpp_warn_deprecated; |
395 | |
396 | /* Nonzero means warn if slash-star appears in a comment. */ |
397 | unsigned char ; |
398 | |
399 | /* Nonzero means to warn about __DATA__, __TIME__ and __TIMESTAMP__ usage. */ |
400 | unsigned char warn_date_time; |
401 | |
402 | /* Nonzero means warn if a user-supplied include directory does not |
403 | exist. */ |
404 | unsigned char warn_missing_include_dirs; |
405 | |
406 | /* Nonzero means warn if there are any trigraphs. */ |
407 | unsigned char warn_trigraphs; |
408 | |
409 | /* Nonzero means warn about multicharacter charconsts. */ |
410 | unsigned char warn_multichar; |
411 | |
412 | /* Nonzero means warn about various incompatibilities with |
413 | traditional C. */ |
414 | unsigned char cpp_warn_traditional; |
415 | |
416 | /* Nonzero means warn about long long numeric constants. */ |
417 | unsigned char cpp_warn_long_long; |
418 | |
419 | /* Nonzero means warn about text after an #endif (or #else). */ |
420 | unsigned char warn_endif_labels; |
421 | |
422 | /* Nonzero means warn about implicit sign changes owing to integer |
423 | promotions. */ |
424 | unsigned char warn_num_sign_change; |
425 | |
426 | /* Zero means don't warn about __VA_ARGS__ usage in c89 pedantic mode. |
427 | Presumably the usage is protected by the appropriate #ifdef. */ |
428 | unsigned char warn_variadic_macros; |
429 | |
430 | /* Nonzero means warn about builtin macros that are redefined or |
431 | explicitly undefined. */ |
432 | unsigned char warn_builtin_macro_redefined; |
433 | |
434 | /* Different -Wimplicit-fallthrough= levels. */ |
435 | unsigned char cpp_warn_implicit_fallthrough; |
436 | |
437 | /* Nonzero means we should look for header.gcc files that remap file |
438 | names. */ |
439 | unsigned char remap; |
440 | |
441 | /* Zero means dollar signs are punctuation. */ |
442 | unsigned char dollars_in_ident; |
443 | |
444 | /* Nonzero means UCNs are accepted in identifiers. */ |
445 | unsigned char extended_identifiers; |
446 | |
447 | /* True if we should warn about dollars in identifiers or numbers |
448 | for this translation unit. */ |
449 | unsigned char warn_dollars; |
450 | |
451 | /* Nonzero means warn if undefined identifiers are evaluated in an #if. */ |
452 | unsigned char warn_undef; |
453 | |
454 | /* Nonzero means warn if "defined" is encountered in a place other than |
455 | an #if. */ |
456 | unsigned char warn_expansion_to_defined; |
457 | |
458 | /* Nonzero means warn of unused macros from the main file. */ |
459 | unsigned char warn_unused_macros; |
460 | |
461 | /* Nonzero for the 1999 C Standard, including corrigenda and amendments. */ |
462 | unsigned char c99; |
463 | |
464 | /* Nonzero if we are conforming to a specific C or C++ standard. */ |
465 | unsigned char std; |
466 | |
467 | /* Nonzero means give all the error messages the ANSI standard requires. */ |
468 | unsigned char cpp_pedantic; |
469 | |
470 | /* Nonzero means we're looking at already preprocessed code, so don't |
471 | bother trying to do macro expansion and whatnot. */ |
472 | unsigned char preprocessed; |
473 | |
474 | /* Nonzero means we are going to emit debugging logs during |
475 | preprocessing. */ |
476 | unsigned char debug; |
477 | |
478 | /* Nonzero means we are tracking locations of tokens involved in |
479 | macro expansion. 1 Means we track the location in degraded mode |
480 | where we do not track locations of tokens resulting from the |
481 | expansion of arguments of function-like macro. 2 Means we do |
482 | track all macro expansions. This last option is the one that |
483 | consumes the highest amount of memory. */ |
484 | unsigned char track_macro_expansion; |
485 | |
486 | /* Nonzero means handle C++ alternate operator names. */ |
487 | unsigned char operator_names; |
488 | |
489 | /* Nonzero means warn about use of C++ alternate operator names. */ |
490 | unsigned char warn_cxx_operator_names; |
491 | |
492 | /* True for traditional preprocessing. */ |
493 | unsigned char traditional; |
494 | |
495 | /* Nonzero for C++ 2011 Standard user-defined literals. */ |
496 | unsigned char user_literals; |
497 | |
498 | /* Nonzero means warn when a string or character literal is followed by a |
499 | ud-suffix which does not beging with an underscore. */ |
500 | unsigned char warn_literal_suffix; |
501 | |
502 | /* Nonzero means interpret imaginary, fixed-point, or other gnu extension |
503 | literal number suffixes as user-defined literal number suffixes. */ |
504 | unsigned char ext_numeric_literals; |
505 | |
506 | /* Nonzero means extended identifiers allow the characters specified |
507 | in C11. */ |
508 | unsigned char c11_identifiers; |
509 | |
510 | /* Nonzero means extended identifiers allow the characters specified |
511 | by Unicode XID_Start and XID_Continue properties. */ |
512 | unsigned char xid_identifiers; |
513 | |
514 | /* Nonzero for C++ 2014 Standard binary constants. */ |
515 | unsigned char binary_constants; |
516 | |
517 | /* Nonzero for C++ 2014 Standard digit separators. */ |
518 | unsigned char digit_separators; |
519 | |
520 | /* Nonzero for C23 decimal floating-point constants. */ |
521 | unsigned char dfp_constants; |
522 | |
523 | /* Nonzero for C++20 __VA_OPT__ feature. */ |
524 | unsigned char va_opt; |
525 | |
526 | /* Nonzero for the '::' token. */ |
527 | unsigned char scope; |
528 | |
529 | /* Nonzero for the '#elifdef' and '#elifndef' directives. */ |
530 | unsigned char elifdef; |
531 | |
532 | /* Nonzero for the '#warning' directive. */ |
533 | unsigned char warning_directive; |
534 | |
535 | /* Nonzero means tokenize C++20 module directives. */ |
536 | unsigned char module_directives; |
537 | |
538 | /* Nonzero for C++23 size_t literals. */ |
539 | unsigned char size_t_literals; |
540 | |
541 | /* Nonzero for C++23 delimited escape sequences. */ |
542 | unsigned char delimited_escape_seqs; |
543 | |
544 | /* Nonzero for 'true' and 'false' in #if expressions. */ |
545 | unsigned char true_false; |
546 | |
547 | /* Holds the name of the target (execution) character set. */ |
548 | const char *narrow_charset; |
549 | |
550 | /* Holds the name of the target wide character set. */ |
551 | const char *wide_charset; |
552 | |
553 | /* Holds the name of the input character set. */ |
554 | const char *input_charset; |
555 | |
556 | /* The minimum permitted level of normalization before a warning |
557 | is generated. See enum cpp_normalize_level. */ |
558 | int warn_normalize; |
559 | |
560 | /* True to warn about precompiled header files we couldn't use. */ |
561 | bool warn_invalid_pch; |
562 | |
563 | /* True if dependencies should be restored from a precompiled header. */ |
564 | bool restore_pch_deps; |
565 | |
566 | /* True if warn about differences between C90 and C99. */ |
567 | signed char cpp_warn_c90_c99_compat; |
568 | |
569 | /* True if warn about differences between C11 and C23. */ |
570 | signed char cpp_warn_c11_c23_compat; |
571 | |
572 | /* True if warn about differences between C++98 and C++11. */ |
573 | bool cpp_warn_cxx11_compat; |
574 | |
575 | /* True if warn about differences between C++17 and C++20. */ |
576 | bool cpp_warn_cxx20_compat; |
577 | |
578 | /* Nonzero if bidirectional control characters checking is on. See enum |
579 | cpp_bidirectional_level. */ |
580 | unsigned char cpp_warn_bidirectional; |
581 | |
582 | /* True if libcpp should warn about invalid UTF-8 characters in comments. |
583 | 2 if it should be a pedwarn. */ |
584 | unsigned char cpp_warn_invalid_utf8; |
585 | |
586 | /* True if libcpp should warn about invalid forms of delimited or named |
587 | escape sequences. */ |
588 | bool cpp_warn_unicode; |
589 | |
590 | /* True if -finput-charset= option has been used explicitly. */ |
591 | bool cpp_input_charset_explicit; |
592 | |
593 | /* Dependency generation. */ |
594 | struct |
595 | { |
596 | /* Style of header dependencies to generate. */ |
597 | enum cpp_deps_style style; |
598 | |
599 | /* Structured format of module dependencies to generate. */ |
600 | enum cpp_fdeps_format fdeps_format; |
601 | |
602 | /* Assume missing files are generated files. */ |
603 | bool missing_files; |
604 | |
605 | /* Generate phony targets for each dependency apart from the first |
606 | one. */ |
607 | bool phony_targets; |
608 | |
609 | /* Generate dependency info for modules. */ |
610 | bool modules; |
611 | |
612 | /* If true, no dependency is generated on the main file. */ |
613 | bool ignore_main_file; |
614 | |
615 | /* If true, intend to use the preprocessor output (e.g., for compilation) |
616 | in addition to the dependency info. */ |
617 | bool need_preprocessor_output; |
618 | } deps; |
619 | |
620 | /* Target-specific features set by the front end or client. */ |
621 | |
622 | /* Precision for target CPP arithmetic, target characters, target |
623 | ints and target wide characters, respectively. */ |
624 | size_t precision, char_precision, int_precision, wchar_precision; |
625 | |
626 | /* True means chars (wide chars, UTF-8 chars) are unsigned. */ |
627 | bool unsigned_char, unsigned_wchar, unsigned_utf8char; |
628 | |
629 | /* True if the most significant byte in a word has the lowest |
630 | address in memory. */ |
631 | bool bytes_big_endian; |
632 | |
633 | /* Nonzero means __STDC__ should have the value 0 in system headers. */ |
634 | unsigned char ; |
635 | |
636 | /* True disables tokenization outside of preprocessing directives. */ |
637 | bool directives_only; |
638 | |
639 | /* True enables canonicalization of system header file paths. */ |
640 | bool ; |
641 | |
642 | /* The maximum depth of the nested #include. */ |
643 | unsigned int max_include_depth; |
644 | |
645 | cpp_main_search main_search : 8; |
646 | }; |
647 | |
648 | /* Diagnostic levels. To get a diagnostic without associating a |
649 | position in the translation unit with it, use cpp_error_with_line |
650 | with a line number of zero. */ |
651 | |
652 | enum cpp_diagnostic_level { |
653 | /* Warning, an error with -Werror. */ |
654 | CPP_DL_WARNING = 0, |
655 | /* Same as CPP_DL_WARNING, except it is not suppressed in system headers. */ |
656 | CPP_DL_WARNING_SYSHDR, |
657 | /* Warning, an error with -pedantic-errors or -Werror. */ |
658 | CPP_DL_PEDWARN, |
659 | /* An error. */ |
660 | CPP_DL_ERROR, |
661 | /* An internal consistency check failed. Prints "internal error: ", |
662 | otherwise the same as CPP_DL_ERROR. */ |
663 | CPP_DL_ICE, |
664 | /* An informative note following a warning. */ |
665 | CPP_DL_NOTE, |
666 | /* A fatal error. */ |
667 | CPP_DL_FATAL |
668 | }; |
669 | |
670 | /* Warning reason codes. Use a reason code of CPP_W_NONE for unclassified |
671 | warnings and diagnostics that are not warnings. */ |
672 | |
673 | enum cpp_warning_reason { |
674 | CPP_W_NONE = 0, |
675 | CPP_W_DEPRECATED, |
676 | , |
677 | CPP_W_MISSING_INCLUDE_DIRS, |
678 | CPP_W_TRIGRAPHS, |
679 | CPP_W_MULTICHAR, |
680 | CPP_W_TRADITIONAL, |
681 | CPP_W_LONG_LONG, |
682 | CPP_W_ENDIF_LABELS, |
683 | CPP_W_NUM_SIGN_CHANGE, |
684 | CPP_W_VARIADIC_MACROS, |
685 | CPP_W_BUILTIN_MACRO_REDEFINED, |
686 | CPP_W_DOLLARS, |
687 | CPP_W_UNDEF, |
688 | CPP_W_UNUSED_MACROS, |
689 | CPP_W_CXX_OPERATOR_NAMES, |
690 | CPP_W_NORMALIZE, |
691 | CPP_W_INVALID_PCH, |
692 | CPP_W_WARNING_DIRECTIVE, |
693 | CPP_W_LITERAL_SUFFIX, |
694 | CPP_W_SIZE_T_LITERALS, |
695 | CPP_W_DATE_TIME, |
696 | CPP_W_PEDANTIC, |
697 | CPP_W_C90_C99_COMPAT, |
698 | CPP_W_C11_C23_COMPAT, |
699 | CPP_W_CXX11_COMPAT, |
700 | CPP_W_CXX20_COMPAT, |
701 | CPP_W_EXPANSION_TO_DEFINED, |
702 | CPP_W_BIDIRECTIONAL, |
703 | CPP_W_INVALID_UTF8, |
704 | CPP_W_UNICODE |
705 | }; |
706 | |
707 | /* Callback for header lookup for HEADER, which is the name of a |
708 | source file. It is used as a method of last resort to find headers |
709 | that are not otherwise found during the normal include processing. |
710 | The return value is the malloced name of a header to try and open, |
711 | if any, or NULL otherwise. This callback is called only if the |
712 | header is otherwise unfound. */ |
713 | typedef const char *(*)(cpp_reader *, const char *, cpp_dir **); |
714 | |
715 | /* Call backs to cpplib client. */ |
716 | struct cpp_callbacks |
717 | { |
718 | /* Called when a new line of preprocessed output is started. */ |
719 | void (*line_change) (cpp_reader *, const cpp_token *, int); |
720 | |
721 | /* Called when switching to/from a new file. |
722 | The line_map is for the new file. It is NULL if there is no new file. |
723 | (In C this happens when done with <built-in>+<command line> and also |
724 | when done with a main file.) This can be used for resource cleanup. */ |
725 | void (*file_change) (cpp_reader *, const line_map_ordinary *); |
726 | |
727 | void (*dir_change) (cpp_reader *, const char *); |
728 | void (*include) (cpp_reader *, location_t, const unsigned char *, |
729 | const char *, int, const cpp_token **); |
730 | void (*define) (cpp_reader *, location_t, cpp_hashnode *); |
731 | void (*undef) (cpp_reader *, location_t, cpp_hashnode *); |
732 | void (*ident) (cpp_reader *, location_t, const cpp_string *); |
733 | void (*def_pragma) (cpp_reader *, location_t); |
734 | int (*valid_pch) (cpp_reader *, const char *, int); |
735 | void (*read_pch) (cpp_reader *, const char *, int, const char *); |
736 | missing_header_cb ; |
737 | |
738 | /* Context-sensitive macro support. Returns macro (if any) that should |
739 | be expanded. */ |
740 | cpp_hashnode * (*macro_to_expand) (cpp_reader *, const cpp_token *); |
741 | |
742 | /* Called to emit a diagnostic. This callback receives the |
743 | translated message. */ |
744 | bool (*diagnostic) (cpp_reader *, |
745 | enum cpp_diagnostic_level, |
746 | enum cpp_warning_reason, |
747 | rich_location *, |
748 | const char *, va_list *) |
749 | ATTRIBUTE_FPTR_PRINTF(5,0); |
750 | |
751 | /* Callbacks for when a macro is expanded, or tested (whether |
752 | defined or not at the time) in #ifdef, #ifndef or "defined". */ |
753 | void (*used_define) (cpp_reader *, location_t, cpp_hashnode *); |
754 | void (*used_undef) (cpp_reader *, location_t, cpp_hashnode *); |
755 | /* Called before #define and #undef or other macro definition |
756 | changes are processed. */ |
757 | void (*before_define) (cpp_reader *); |
758 | /* Called whenever a macro is expanded or tested. |
759 | Second argument is the location of the start of the current expansion. */ |
760 | void (*used) (cpp_reader *, location_t, cpp_hashnode *); |
761 | |
762 | /* Callback to identify whether an attribute exists. */ |
763 | int (*has_attribute) (cpp_reader *, bool); |
764 | |
765 | /* Callback to determine whether a built-in function is recognized. */ |
766 | int (*has_builtin) (cpp_reader *); |
767 | |
768 | /* Callback to determine whether a feature is available. */ |
769 | int (*has_feature) (cpp_reader *, bool); |
770 | |
771 | /* Callback that can change a user lazy into normal macro. */ |
772 | void (*user_lazy_macro) (cpp_reader *, cpp_macro *, unsigned); |
773 | |
774 | /* Callback to handle deferred cpp_macros. */ |
775 | cpp_macro *(*user_deferred_macro) (cpp_reader *, location_t, cpp_hashnode *); |
776 | |
777 | /* Callback to parse SOURCE_DATE_EPOCH from environment. */ |
778 | time_t (*get_source_date_epoch) (cpp_reader *); |
779 | |
780 | /* Callback for providing suggestions for misspelled directives. */ |
781 | const char *(*get_suggestion) (cpp_reader *, const char *, const char *const *); |
782 | |
783 | /* Callback for when a comment is encountered, giving the location |
784 | of the opening slash, a pointer to the content (which is not |
785 | necessarily 0-terminated), and the length of the content. |
786 | The content contains the opening slash-star (or slash-slash), |
787 | and for C-style comments contains the closing star-slash. For |
788 | C++-style comments it does not include the terminating newline. */ |
789 | void (*) (cpp_reader *, location_t, const unsigned char *, |
790 | size_t); |
791 | |
792 | /* Callback for filename remapping in __FILE__ and __BASE_FILE__ macro |
793 | expansions. */ |
794 | const char *(*remap_filename) (const char*); |
795 | |
796 | /* Maybe translate a #include into something else. Return a |
797 | cpp_buffer containing the translation if translating. */ |
798 | char *(*translate_include) (cpp_reader *, line_maps *, location_t, |
799 | const char *path); |
800 | }; |
801 | |
802 | #ifdef VMS |
803 | #define INO_T_CPP ino_t ino[3] |
804 | #elif defined (_AIX) && SIZEOF_INO_T == 4 |
805 | #define INO_T_CPP ino64_t ino |
806 | #else |
807 | #define INO_T_CPP ino_t ino |
808 | #endif |
809 | |
810 | #if defined (_AIX) && SIZEOF_DEV_T == 4 |
811 | #define DEV_T_CPP dev64_t dev |
812 | #else |
813 | #define DEV_T_CPP dev_t dev |
814 | #endif |
815 | |
816 | /* Chain of directories to look for include files in. */ |
817 | struct cpp_dir |
818 | { |
819 | /* NULL-terminated singly-linked list. */ |
820 | struct cpp_dir *next; |
821 | |
822 | /* NAME of the directory, NUL-terminated. */ |
823 | char *name; |
824 | unsigned int len; |
825 | |
826 | /* One if a system header, two if a system header that has extern |
827 | "C" guards for C++. */ |
828 | unsigned char sysp; |
829 | |
830 | /* Is this a user-supplied directory? */ |
831 | bool user_supplied_p; |
832 | |
833 | /* The canonicalized NAME as determined by lrealpath. This field |
834 | is only used by hosts that lack reliable inode numbers. */ |
835 | char *canonical_name; |
836 | |
837 | /* Mapping of file names for this directory for MS-DOS and related |
838 | platforms. A NULL-terminated array of (from, to) pairs. */ |
839 | const char **name_map; |
840 | |
841 | /* Routine to construct pathname, given the search path name and the |
842 | HEADER we are trying to find, return a constructed pathname to |
843 | try and open. If this is NULL, the constructed pathname is as |
844 | constructed by append_file_to_dir. */ |
845 | char *(*construct) (const char *, cpp_dir *dir); |
846 | |
847 | /* The C front end uses these to recognize duplicated |
848 | directories in the search path. */ |
849 | INO_T_CPP; |
850 | DEV_T_CPP; |
851 | }; |
852 | |
853 | /* The kind of the cpp_macro. */ |
854 | enum cpp_macro_kind { |
855 | cmk_macro, /* An ISO macro (token expansion). */ |
856 | cmk_assert, /* An assertion. */ |
857 | cmk_traditional /* A traditional macro (text expansion). */ |
858 | }; |
859 | |
860 | /* Each macro definition is recorded in a cpp_macro structure. |
861 | Variadic macros cannot occur with traditional cpp. */ |
862 | struct GTY(()) cpp_macro { |
863 | union cpp_parm_u |
864 | { |
865 | /* Parameters, if any. If parameter names use extended identifiers, |
866 | the original spelling of those identifiers, not the canonical |
867 | UTF-8 spelling, goes here. */ |
868 | cpp_hashnode ** GTY ((tag ("false" ), |
869 | nested_ptr (union tree_node, |
870 | "%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL" , |
871 | "%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL" ), |
872 | length ("%1.paramc" ))) params; |
873 | |
874 | /* If this is an assertion, the next one in the chain. */ |
875 | cpp_macro *GTY ((tag ("true" ))) next; |
876 | } GTY ((desc ("%1.kind == cmk_assert" ))) parm; |
877 | |
878 | /* Definition line number. */ |
879 | location_t line; |
880 | |
881 | /* Number of tokens in body, or bytes for traditional macros. */ |
882 | /* Do we really need 2^32-1 range here? */ |
883 | unsigned int count; |
884 | |
885 | /* Number of parameters. */ |
886 | unsigned short paramc; |
887 | |
888 | /* Non-zero if this is a user-lazy macro, value provided by user. */ |
889 | unsigned char lazy; |
890 | |
891 | /* The kind of this macro (ISO, trad or assert) */ |
892 | unsigned kind : 2; |
893 | |
894 | /* If a function-like macro. */ |
895 | unsigned int fun_like : 1; |
896 | |
897 | /* If a variadic macro. */ |
898 | unsigned int variadic : 1; |
899 | |
900 | /* If macro defined in system header. */ |
901 | unsigned int syshdr : 1; |
902 | |
903 | /* Nonzero if it has been expanded or had its existence tested. */ |
904 | unsigned int used : 1; |
905 | |
906 | /* Indicate whether the tokens include extra CPP_PASTE tokens at the |
907 | end to track invalid redefinitions with consecutive CPP_PASTE |
908 | tokens. */ |
909 | unsigned int : 1; |
910 | |
911 | /* Imported C++20 macro (from a header unit). */ |
912 | unsigned int imported_p : 1; |
913 | |
914 | /* 0 bits spare (32-bit). 32 on 64-bit target. */ |
915 | |
916 | union cpp_exp_u |
917 | { |
918 | /* Trailing array of replacement tokens (ISO), or assertion body value. */ |
919 | cpp_token GTY ((tag ("false" ), length ("%1.count" ))) tokens[1]; |
920 | |
921 | /* Pointer to replacement text (traditional). See comment at top |
922 | of cpptrad.c for how traditional function-like macros are |
923 | encoded. */ |
924 | const unsigned char *GTY ((tag ("true" ))) text; |
925 | } GTY ((desc ("%1.kind == cmk_traditional" ))) exp; |
926 | }; |
927 | |
928 | /* Poisoned identifiers are flagged NODE_POISONED. NODE_OPERATOR (C++ |
929 | only) indicates an identifier that behaves like an operator such as |
930 | "xor". NODE_DIAGNOSTIC is for speed in lex_token: it indicates a |
931 | diagnostic may be required for this node. Currently this only |
932 | applies to __VA_ARGS__, poisoned identifiers, and -Wc++-compat |
933 | warnings about NODE_OPERATOR. */ |
934 | |
935 | /* Hash node flags. */ |
936 | #define NODE_OPERATOR (1 << 0) /* C++ named operator. */ |
937 | #define NODE_POISONED (1 << 1) /* Poisoned identifier. */ |
938 | #define NODE_DIAGNOSTIC (1 << 2) /* Possible diagnostic when lexed. */ |
939 | #define NODE_WARN (1 << 3) /* Warn if redefined or undefined. */ |
940 | #define NODE_DISABLED (1 << 4) /* A disabled macro. */ |
941 | #define NODE_USED (1 << 5) /* Dumped with -dU. */ |
942 | #define NODE_CONDITIONAL (1 << 6) /* Conditional macro */ |
943 | #define NODE_WARN_OPERATOR (1 << 7) /* Warn about C++ named operator. */ |
944 | #define NODE_MODULE (1 << 8) /* C++-20 module-related name. */ |
945 | |
946 | /* Different flavors of hash node. */ |
947 | enum node_type |
948 | { |
949 | NT_VOID = 0, /* Maybe an assert? */ |
950 | NT_MACRO_ARG, /* A macro arg. */ |
951 | NT_USER_MACRO, /* A user macro. */ |
952 | NT_BUILTIN_MACRO, /* A builtin macro. */ |
953 | NT_MACRO_MASK = NT_USER_MACRO /* Mask for either macro kind. */ |
954 | }; |
955 | |
956 | /* Different flavors of builtin macro. _Pragma is an operator, but we |
957 | handle it with the builtin code for efficiency reasons. */ |
958 | enum cpp_builtin_type |
959 | { |
960 | BT_SPECLINE = 0, /* `__LINE__' */ |
961 | BT_DATE, /* `__DATE__' */ |
962 | BT_FILE, /* `__FILE__' */ |
963 | BT_FILE_NAME, /* `__FILE_NAME__' */ |
964 | BT_BASE_FILE, /* `__BASE_FILE__' */ |
965 | BT_INCLUDE_LEVEL, /* `__INCLUDE_LEVEL__' */ |
966 | BT_TIME, /* `__TIME__' */ |
967 | BT_STDC, /* `__STDC__' */ |
968 | BT_PRAGMA, /* `_Pragma' operator */ |
969 | BT_TIMESTAMP, /* `__TIMESTAMP__' */ |
970 | BT_COUNTER, /* `__COUNTER__' */ |
971 | BT_HAS_ATTRIBUTE, /* `__has_attribute(x)' */ |
972 | BT_HAS_STD_ATTRIBUTE, /* `__has_c_attribute(x)' */ |
973 | BT_HAS_BUILTIN, /* `__has_builtin(x)' */ |
974 | BT_HAS_INCLUDE, /* `__has_include(x)' */ |
975 | BT_HAS_INCLUDE_NEXT, /* `__has_include_next(x)' */ |
976 | BT_HAS_FEATURE, /* `__has_feature(x)' */ |
977 | BT_HAS_EXTENSION /* `__has_extension(x)' */ |
978 | }; |
979 | |
980 | #define CPP_HASHNODE(HNODE) ((cpp_hashnode *) (HNODE)) |
981 | #define HT_NODE(NODE) (&(NODE)->ident) |
982 | #define NODE_LEN(NODE) HT_LEN (HT_NODE (NODE)) |
983 | #define NODE_NAME(NODE) HT_STR (HT_NODE (NODE)) |
984 | |
985 | /* The common part of an identifier node shared amongst all 3 C front |
986 | ends. Also used to store CPP identifiers, which are a superset of |
987 | identifiers in the grammatical sense. */ |
988 | |
989 | union GTY(()) _cpp_hashnode_value { |
990 | /* Assert (maybe NULL) */ |
991 | cpp_macro * GTY((tag ("NT_VOID" ))) answers; |
992 | /* Macro (maybe NULL) */ |
993 | cpp_macro * GTY((tag ("NT_USER_MACRO" ))) macro; |
994 | /* Code for a builtin macro. */ |
995 | enum cpp_builtin_type GTY ((tag ("NT_BUILTIN_MACRO" ))) builtin; |
996 | /* Macro argument index. */ |
997 | unsigned short GTY ((tag ("NT_MACRO_ARG" ))) arg_index; |
998 | }; |
999 | |
1000 | struct GTY(()) cpp_hashnode { |
1001 | struct ht_identifier ident; |
1002 | unsigned int is_directive : 1; |
1003 | unsigned int directive_index : 7; /* If is_directive, |
1004 | then index into directive table. |
1005 | Otherwise, a NODE_OPERATOR. */ |
1006 | unsigned int rid_code : 8; /* Rid code - for front ends. */ |
1007 | unsigned int flags : 9; /* CPP flags. */ |
1008 | ENUM_BITFIELD(node_type) type : 2; /* CPP node type. */ |
1009 | |
1010 | /* 5 bits spare. */ |
1011 | |
1012 | /* The deferred cookie is applicable to NT_USER_MACRO or NT_VOID. |
1013 | The latter for when a macro had a prevailing undef. |
1014 | On a 64-bit system there would be 32-bits of padding to the value |
1015 | field. So placing the deferred index here is not costly. */ |
1016 | unsigned deferred; /* Deferred cookie */ |
1017 | |
1018 | union _cpp_hashnode_value GTY ((desc ("%1.type" ))) value; |
1019 | }; |
1020 | |
1021 | /* Extra information we may need to store per identifier, which is needed rarely |
1022 | enough that it's not worth adding directly into the main identifier hash. */ |
1023 | struct GTY(()) |
1024 | { |
1025 | struct ht_identifier ; |
1026 | location_t ; |
1027 | }; |
1028 | |
1029 | /* A class for iterating through the source locations within a |
1030 | string token (before escapes are interpreted, and before |
1031 | concatenation). */ |
1032 | |
1033 | class cpp_string_location_reader { |
1034 | public: |
1035 | cpp_string_location_reader (location_t src_loc, |
1036 | line_maps *line_table); |
1037 | |
1038 | source_range get_next (); |
1039 | |
1040 | private: |
1041 | location_t m_loc; |
1042 | int m_offset_per_column; |
1043 | }; |
1044 | |
1045 | /* A class for storing the source ranges of all of the characters within |
1046 | a string literal, after escapes are interpreted, and after |
1047 | concatenation. |
1048 | |
1049 | This is not GTY-marked, as instances are intended to be temporary. */ |
1050 | |
1051 | class cpp_substring_ranges |
1052 | { |
1053 | public: |
1054 | cpp_substring_ranges (); |
1055 | ~cpp_substring_ranges (); |
1056 | |
1057 | int get_num_ranges () const { return m_num_ranges; } |
1058 | source_range get_range (int idx) const |
1059 | { |
1060 | linemap_assert (idx < m_num_ranges); |
1061 | return m_ranges[idx]; |
1062 | } |
1063 | |
1064 | void add_range (source_range range); |
1065 | void add_n_ranges (int num, cpp_string_location_reader &loc_reader); |
1066 | |
1067 | private: |
1068 | source_range *m_ranges; |
1069 | int m_num_ranges; |
1070 | int m_alloc_ranges; |
1071 | }; |
1072 | |
1073 | /* Call this first to get a handle to pass to other functions. |
1074 | |
1075 | The first hash table argument is for associating a struct cpp_hashnode |
1076 | with each identifier. The second hash table argument is for associating |
1077 | a struct cpp_hashnode_extra with each identifier that needs one. For |
1078 | either, pass in a NULL pointer if you want cpplib to create and manage |
1079 | the hash table itself, or else pass a suitably initialized hash table to |
1080 | be managed external to libcpp, as is done by the C-family frontends. */ |
1081 | extern cpp_reader *cpp_create_reader (enum c_lang, struct ht *, |
1082 | class line_maps *, |
1083 | struct ht * = nullptr); |
1084 | |
1085 | /* Reset the cpp_reader's line_map. This is only used after reading a |
1086 | PCH file. */ |
1087 | extern void cpp_set_line_map (cpp_reader *, class line_maps *); |
1088 | |
1089 | /* Call this to change the selected language standard (e.g. because of |
1090 | command line options). */ |
1091 | extern void cpp_set_lang (cpp_reader *, enum c_lang); |
1092 | |
1093 | /* Set the include paths. */ |
1094 | extern void cpp_set_include_chains (cpp_reader *, cpp_dir *, cpp_dir *, int); |
1095 | |
1096 | /* Call these to get pointers to the options, callback, and deps |
1097 | structures for a given reader. These pointers are good until you |
1098 | call cpp_finish on that reader. You can either edit the callbacks |
1099 | through the pointer returned from cpp_get_callbacks, or set them |
1100 | with cpp_set_callbacks. */ |
1101 | extern cpp_options *cpp_get_options (cpp_reader *) ATTRIBUTE_PURE; |
1102 | extern cpp_callbacks *cpp_get_callbacks (cpp_reader *) ATTRIBUTE_PURE; |
1103 | extern void cpp_set_callbacks (cpp_reader *, cpp_callbacks *); |
1104 | extern class mkdeps *cpp_get_deps (cpp_reader *) ATTRIBUTE_PURE; |
1105 | |
1106 | extern const char * (cpp_reader *, const char *file, |
1107 | bool angle_p, location_t); |
1108 | |
1109 | /* Call these to get name data about the various compile-time |
1110 | charsets. */ |
1111 | extern const char *cpp_get_narrow_charset_name (cpp_reader *) ATTRIBUTE_PURE; |
1112 | extern const char *cpp_get_wide_charset_name (cpp_reader *) ATTRIBUTE_PURE; |
1113 | |
1114 | /* This function reads the file, but does not start preprocessing. It |
1115 | returns the name of the original file; this is the same as the |
1116 | input file, except for preprocessed input. This will generate at |
1117 | least one file change callback, and possibly a line change callback |
1118 | too. If there was an error opening the file, it returns NULL. */ |
1119 | extern const char *cpp_read_main_file (cpp_reader *, const char *, |
1120 | bool injecting = false); |
1121 | extern location_t cpp_main_loc (const cpp_reader *); |
1122 | |
1123 | /* Adjust for the main file to be an include. */ |
1124 | extern void cpp_retrofit_as_include (cpp_reader *); |
1125 | |
1126 | /* Set up built-ins with special behavior. Use cpp_init_builtins() |
1127 | instead unless your know what you are doing. */ |
1128 | extern void cpp_init_special_builtins (cpp_reader *); |
1129 | |
1130 | /* Set up built-ins like __FILE__. */ |
1131 | extern void cpp_init_builtins (cpp_reader *, int); |
1132 | |
1133 | /* This is called after options have been parsed, and partially |
1134 | processed. */ |
1135 | extern void cpp_post_options (cpp_reader *); |
1136 | |
1137 | /* Set up translation to the target character set. */ |
1138 | extern void cpp_init_iconv (cpp_reader *); |
1139 | |
1140 | /* Call this to finish preprocessing. If you requested dependency |
1141 | generation, pass open stream(s) to write the information to, |
1142 | otherwise NULL. It is your responsibility to close the stream(s). */ |
1143 | extern void cpp_finish (cpp_reader *, FILE *deps_stream, FILE *fdeps_stream = NULL); |
1144 | |
1145 | /* Call this to release the handle at the end of preprocessing. Any |
1146 | use of the handle after this function returns is invalid. */ |
1147 | extern void cpp_destroy (cpp_reader *); |
1148 | |
1149 | extern unsigned int cpp_token_len (const cpp_token *); |
1150 | extern unsigned char *cpp_token_as_text (cpp_reader *, const cpp_token *); |
1151 | extern unsigned char *cpp_spell_token (cpp_reader *, const cpp_token *, |
1152 | unsigned char *, bool); |
1153 | extern void cpp_register_pragma (cpp_reader *, const char *, const char *, |
1154 | void (*) (cpp_reader *), bool); |
1155 | extern void cpp_register_deferred_pragma (cpp_reader *, const char *, |
1156 | const char *, unsigned, bool, bool); |
1157 | extern int cpp_avoid_paste (cpp_reader *, const cpp_token *, |
1158 | const cpp_token *); |
1159 | extern const cpp_token *cpp_get_token (cpp_reader *); |
1160 | extern const cpp_token *cpp_get_token_with_location (cpp_reader *, |
1161 | location_t *); |
1162 | inline bool cpp_user_macro_p (const cpp_hashnode *node) |
1163 | { |
1164 | return node->type == NT_USER_MACRO; |
1165 | } |
1166 | inline bool cpp_builtin_macro_p (const cpp_hashnode *node) |
1167 | { |
1168 | return node->type == NT_BUILTIN_MACRO; |
1169 | } |
1170 | inline bool cpp_macro_p (const cpp_hashnode *node) |
1171 | { |
1172 | return node->type & NT_MACRO_MASK; |
1173 | } |
1174 | inline cpp_macro *cpp_set_deferred_macro (cpp_hashnode *node, |
1175 | cpp_macro *forced = NULL) |
1176 | { |
1177 | cpp_macro *old = node->value.macro; |
1178 | |
1179 | node->value.macro = forced; |
1180 | node->type = NT_USER_MACRO; |
1181 | node->flags &= ~NODE_USED; |
1182 | |
1183 | return old; |
1184 | } |
1185 | cpp_macro *cpp_get_deferred_macro (cpp_reader *, cpp_hashnode *, location_t); |
1186 | |
1187 | /* Returns true if NODE is a function-like user macro. */ |
1188 | inline bool cpp_fun_like_macro_p (cpp_hashnode *node) |
1189 | { |
1190 | return cpp_user_macro_p (node) && node->value.macro->fun_like; |
1191 | } |
1192 | |
1193 | extern const unsigned char *cpp_macro_definition (cpp_reader *, cpp_hashnode *); |
1194 | extern const unsigned char *cpp_macro_definition (cpp_reader *, cpp_hashnode *, |
1195 | const cpp_macro *); |
1196 | inline location_t cpp_macro_definition_location (cpp_hashnode *node) |
1197 | { |
1198 | const cpp_macro *macro = node->value.macro; |
1199 | return macro ? macro->line : 0; |
1200 | } |
1201 | /* Return an idempotent time stamp (possibly from SOURCE_DATE_EPOCH). */ |
1202 | enum class CPP_time_kind |
1203 | { |
1204 | FIXED = -1, /* Fixed time via source epoch. */ |
1205 | DYNAMIC = -2, /* Dynamic via time(2). */ |
1206 | UNKNOWN = -3 /* Wibbly wobbly, timey wimey. */ |
1207 | }; |
1208 | extern CPP_time_kind cpp_get_date (cpp_reader *, time_t *); |
1209 | |
1210 | extern void _cpp_backup_tokens (cpp_reader *, unsigned int); |
1211 | extern const cpp_token *cpp_peek_token (cpp_reader *, int); |
1212 | |
1213 | /* Evaluate a CPP_*CHAR* token. */ |
1214 | extern cppchar_t cpp_interpret_charconst (cpp_reader *, const cpp_token *, |
1215 | unsigned int *, int *); |
1216 | /* Evaluate a vector of CPP_*STRING* tokens. */ |
1217 | extern bool cpp_interpret_string (cpp_reader *, |
1218 | const cpp_string *, size_t, |
1219 | cpp_string *, enum cpp_ttype); |
1220 | extern const char *cpp_interpret_string_ranges (cpp_reader *pfile, |
1221 | const cpp_string *from, |
1222 | cpp_string_location_reader *, |
1223 | size_t count, |
1224 | cpp_substring_ranges *out, |
1225 | enum cpp_ttype type); |
1226 | extern bool cpp_interpret_string_notranslate (cpp_reader *, |
1227 | const cpp_string *, size_t, |
1228 | cpp_string *, enum cpp_ttype); |
1229 | |
1230 | /* Convert a host character constant to the execution character set. */ |
1231 | extern cppchar_t cpp_host_to_exec_charset (cpp_reader *, cppchar_t); |
1232 | |
1233 | /* Used to register macros and assertions, perhaps from the command line. |
1234 | The text is the same as the command line argument. */ |
1235 | extern void cpp_define (cpp_reader *, const char *); |
1236 | extern void cpp_define_unused (cpp_reader *, const char *); |
1237 | extern void cpp_define_formatted (cpp_reader *pfile, |
1238 | const char *fmt, ...) ATTRIBUTE_PRINTF_2; |
1239 | extern void cpp_define_formatted_unused (cpp_reader *pfile, |
1240 | const char *fmt, |
1241 | ...) ATTRIBUTE_PRINTF_2; |
1242 | extern void cpp_assert (cpp_reader *, const char *); |
1243 | extern void cpp_undef (cpp_reader *, const char *); |
1244 | extern void cpp_unassert (cpp_reader *, const char *); |
1245 | |
1246 | /* Mark a node as a lazily defined macro. */ |
1247 | extern void cpp_define_lazily (cpp_reader *, cpp_hashnode *node, unsigned N); |
1248 | |
1249 | /* Undefine all macros and assertions. */ |
1250 | extern void cpp_undef_all (cpp_reader *); |
1251 | |
1252 | extern cpp_buffer *cpp_push_buffer (cpp_reader *, const unsigned char *, |
1253 | size_t, int); |
1254 | extern int cpp_defined (cpp_reader *, const unsigned char *, int); |
1255 | |
1256 | /* A preprocessing number. Code assumes that any unused high bits of |
1257 | the double integer are set to zero. */ |
1258 | |
1259 | /* This type has to be equal to unsigned HOST_WIDE_INT, see |
1260 | gcc/c-family/c-lex.cc. */ |
1261 | typedef uint64_t cpp_num_part; |
1262 | typedef struct cpp_num cpp_num; |
1263 | struct cpp_num |
1264 | { |
1265 | cpp_num_part high; |
1266 | cpp_num_part low; |
1267 | bool unsignedp; /* True if value should be treated as unsigned. */ |
1268 | bool overflow; /* True if the most recent calculation overflowed. */ |
1269 | }; |
1270 | |
1271 | /* cpplib provides two interfaces for interpretation of preprocessing |
1272 | numbers. |
1273 | |
1274 | cpp_classify_number categorizes numeric constants according to |
1275 | their field (integer, floating point, or invalid), radix (decimal, |
1276 | octal, hexadecimal), and type suffixes. */ |
1277 | |
1278 | #define CPP_N_CATEGORY 0x000F |
1279 | #define CPP_N_INVALID 0x0000 |
1280 | #define CPP_N_INTEGER 0x0001 |
1281 | #define CPP_N_FLOATING 0x0002 |
1282 | |
1283 | #define CPP_N_WIDTH 0x00F0 |
1284 | #define CPP_N_SMALL 0x0010 /* int, float, short _Fract/Accum */ |
1285 | #define CPP_N_MEDIUM 0x0020 /* long, double, long _Fract/_Accum. */ |
1286 | #define CPP_N_LARGE 0x0040 /* long long, long double, |
1287 | long long _Fract/Accum. */ |
1288 | |
1289 | #define CPP_N_WIDTH_MD 0xF0000 /* machine defined. */ |
1290 | #define CPP_N_MD_W 0x10000 |
1291 | #define CPP_N_MD_Q 0x20000 |
1292 | |
1293 | #define CPP_N_RADIX 0x0F00 |
1294 | #define CPP_N_DECIMAL 0x0100 |
1295 | #define CPP_N_HEX 0x0200 |
1296 | #define CPP_N_OCTAL 0x0400 |
1297 | #define CPP_N_BINARY 0x0800 |
1298 | |
1299 | #define CPP_N_UNSIGNED 0x1000 /* Properties. */ |
1300 | #define CPP_N_IMAGINARY 0x2000 |
1301 | #define CPP_N_DFLOAT 0x4000 |
1302 | #define CPP_N_DEFAULT 0x8000 |
1303 | |
1304 | #define CPP_N_FRACT 0x100000 /* Fract types. */ |
1305 | #define CPP_N_ACCUM 0x200000 /* Accum types. */ |
1306 | #define CPP_N_FLOATN 0x400000 /* _FloatN types. */ |
1307 | #define CPP_N_FLOATNX 0x800000 /* _FloatNx types. */ |
1308 | |
1309 | #define CPP_N_USERDEF 0x1000000 /* C++11 user-defined literal. */ |
1310 | |
1311 | #define CPP_N_SIZE_T 0x2000000 /* C++23 size_t literal. */ |
1312 | #define CPP_N_BFLOAT16 0x4000000 /* std::bfloat16_t type. */ |
1313 | #define CPP_N_BITINT 0x8000000 /* C23 _BitInt literal. */ |
1314 | |
1315 | #define CPP_N_WIDTH_FLOATN_NX 0xF0000000 /* _FloatN / _FloatNx value |
1316 | of N, divided by 16. */ |
1317 | #define CPP_FLOATN_SHIFT 24 |
1318 | #define CPP_FLOATN_MAX 0xF0 |
1319 | |
1320 | /* Classify a CPP_NUMBER token. The return value is a combination of |
1321 | the flags from the above sets. */ |
1322 | extern unsigned cpp_classify_number (cpp_reader *, const cpp_token *, |
1323 | const char **, location_t); |
1324 | |
1325 | /* Return the classification flags for a float suffix. */ |
1326 | extern unsigned int cpp_interpret_float_suffix (cpp_reader *, const char *, |
1327 | size_t); |
1328 | |
1329 | /* Return the classification flags for an int suffix. */ |
1330 | extern unsigned int cpp_interpret_int_suffix (cpp_reader *, const char *, |
1331 | size_t); |
1332 | |
1333 | /* Evaluate a token classified as category CPP_N_INTEGER. */ |
1334 | extern cpp_num cpp_interpret_integer (cpp_reader *, const cpp_token *, |
1335 | unsigned int); |
1336 | |
1337 | /* Sign extend a number, with PRECISION significant bits and all |
1338 | others assumed clear, to fill out a cpp_num structure. */ |
1339 | cpp_num cpp_num_sign_extend (cpp_num, size_t); |
1340 | |
1341 | /* Output a diagnostic of some kind. */ |
1342 | extern bool cpp_error (cpp_reader *, enum cpp_diagnostic_level, |
1343 | const char *msgid, ...) |
1344 | ATTRIBUTE_PRINTF_3; |
1345 | extern bool cpp_warning (cpp_reader *, enum cpp_warning_reason, |
1346 | const char *msgid, ...) |
1347 | ATTRIBUTE_PRINTF_3; |
1348 | extern bool cpp_pedwarning (cpp_reader *, enum cpp_warning_reason, |
1349 | const char *msgid, ...) |
1350 | ATTRIBUTE_PRINTF_3; |
1351 | extern bool cpp_warning_syshdr (cpp_reader *, enum cpp_warning_reason reason, |
1352 | const char *msgid, ...) |
1353 | ATTRIBUTE_PRINTF_3; |
1354 | |
1355 | /* As their counterparts above, but use RICHLOC. */ |
1356 | extern bool cpp_warning_at (cpp_reader *, enum cpp_warning_reason, |
1357 | rich_location *richloc, const char *msgid, ...) |
1358 | ATTRIBUTE_PRINTF_4; |
1359 | extern bool cpp_pedwarning_at (cpp_reader *, enum cpp_warning_reason, |
1360 | rich_location *richloc, const char *msgid, ...) |
1361 | ATTRIBUTE_PRINTF_4; |
1362 | |
1363 | /* Output a diagnostic with "MSGID: " preceding the |
1364 | error string of errno. No location is printed. */ |
1365 | extern bool cpp_errno (cpp_reader *, enum cpp_diagnostic_level, |
1366 | const char *msgid); |
1367 | /* Similarly, but with "FILENAME: " instead of "MSGID: ", where |
1368 | the filename is not localized. */ |
1369 | extern bool cpp_errno_filename (cpp_reader *, enum cpp_diagnostic_level, |
1370 | const char *filename, location_t loc); |
1371 | |
1372 | /* Same as cpp_error, except additionally specifies a position as a |
1373 | (translation unit) physical line and physical column. If the line is |
1374 | zero, then no location is printed. */ |
1375 | extern bool cpp_error_with_line (cpp_reader *, enum cpp_diagnostic_level, |
1376 | location_t, unsigned, |
1377 | const char *msgid, ...) |
1378 | ATTRIBUTE_PRINTF_5; |
1379 | extern bool cpp_warning_with_line (cpp_reader *, enum cpp_warning_reason, |
1380 | location_t, unsigned, |
1381 | const char *msgid, ...) |
1382 | ATTRIBUTE_PRINTF_5; |
1383 | extern bool cpp_pedwarning_with_line (cpp_reader *, enum cpp_warning_reason, |
1384 | location_t, unsigned, |
1385 | const char *msgid, ...) |
1386 | ATTRIBUTE_PRINTF_5; |
1387 | extern bool cpp_warning_with_line_syshdr (cpp_reader *, enum cpp_warning_reason, |
1388 | location_t, unsigned, |
1389 | const char *msgid, ...) |
1390 | ATTRIBUTE_PRINTF_5; |
1391 | |
1392 | extern bool cpp_error_at (cpp_reader * pfile, enum cpp_diagnostic_level, |
1393 | location_t src_loc, const char *msgid, ...) |
1394 | ATTRIBUTE_PRINTF_4; |
1395 | |
1396 | extern bool cpp_error_at (cpp_reader * pfile, enum cpp_diagnostic_level, |
1397 | rich_location *richloc, const char *msgid, ...) |
1398 | ATTRIBUTE_PRINTF_4; |
1399 | |
1400 | /* In lex.cc */ |
1401 | extern int cpp_ideq (const cpp_token *, const char *); |
1402 | extern void cpp_output_line (cpp_reader *, FILE *); |
1403 | extern unsigned char *cpp_output_line_to_string (cpp_reader *, |
1404 | const unsigned char *); |
1405 | extern const unsigned char *cpp_alloc_token_string |
1406 | (cpp_reader *, const unsigned char *, unsigned); |
1407 | extern void cpp_output_token (const cpp_token *, FILE *); |
1408 | extern const char *cpp_type2name (enum cpp_ttype, unsigned char flags); |
1409 | /* Returns the value of an escape sequence, truncated to the correct |
1410 | target precision. PSTR points to the input pointer, which is just |
1411 | after the backslash. LIMIT is how much text we have. WIDE is true |
1412 | if the escape sequence is part of a wide character constant or |
1413 | string literal. Handles all relevant diagnostics. */ |
1414 | extern cppchar_t cpp_parse_escape (cpp_reader *, const unsigned char ** pstr, |
1415 | const unsigned char *limit, int wide); |
1416 | |
1417 | /* Structure used to hold a comment block at a given location in the |
1418 | source code. */ |
1419 | |
1420 | typedef struct |
1421 | { |
1422 | /* Text of the comment including the terminators. */ |
1423 | char *; |
1424 | |
1425 | /* source location for the given comment. */ |
1426 | location_t ; |
1427 | } ; |
1428 | |
1429 | /* Structure holding all comments for a given cpp_reader. */ |
1430 | |
1431 | typedef struct |
1432 | { |
1433 | /* table of comment entries. */ |
1434 | cpp_comment *; |
1435 | |
1436 | /* number of actual entries entered in the table. */ |
1437 | int ; |
1438 | |
1439 | /* number of entries allocated currently. */ |
1440 | int ; |
1441 | } ; |
1442 | |
1443 | /* Returns the table of comments encountered by the preprocessor. This |
1444 | table is only populated when pfile->state.save_comments is true. */ |
1445 | extern cpp_comment_table * (cpp_reader *); |
1446 | |
1447 | /* In hash.c */ |
1448 | |
1449 | /* Lookup an identifier in the hashtable. Puts the identifier in the |
1450 | table if it is not already there. */ |
1451 | extern cpp_hashnode *cpp_lookup (cpp_reader *, const unsigned char *, |
1452 | unsigned int); |
1453 | |
1454 | typedef int (*cpp_cb) (cpp_reader *, cpp_hashnode *, void *); |
1455 | extern void cpp_forall_identifiers (cpp_reader *, cpp_cb, void *); |
1456 | |
1457 | /* In macro.cc */ |
1458 | extern void cpp_scan_nooutput (cpp_reader *); |
1459 | extern int cpp_sys_macro_p (cpp_reader *); |
1460 | extern unsigned char *cpp_quote_string (unsigned char *, const unsigned char *, |
1461 | unsigned int); |
1462 | extern bool cpp_compare_macros (const cpp_macro *macro1, |
1463 | const cpp_macro *macro2); |
1464 | |
1465 | /* In files.cc */ |
1466 | extern bool cpp_included (cpp_reader *, const char *); |
1467 | extern bool cpp_included_before (cpp_reader *, const char *, location_t); |
1468 | extern void (cpp_reader *, int, int); |
1469 | extern bool cpp_push_include (cpp_reader *, const char *); |
1470 | extern bool cpp_push_default_include (cpp_reader *, const char *); |
1471 | extern void cpp_change_file (cpp_reader *, enum lc_reason, const char *); |
1472 | extern const char *cpp_get_path (struct _cpp_file *); |
1473 | extern cpp_dir *cpp_get_dir (struct _cpp_file *); |
1474 | extern cpp_buffer *cpp_get_buffer (cpp_reader *); |
1475 | extern struct _cpp_file *cpp_get_file (cpp_buffer *); |
1476 | extern cpp_buffer *cpp_get_prev (cpp_buffer *); |
1477 | extern void cpp_clear_file_cache (cpp_reader *); |
1478 | |
1479 | /* cpp_get_converted_source returns the contents of the given file, as it exists |
1480 | after cpplib has read it and converted it from the input charset to the |
1481 | source charset. Return struct will be zero-filled if the data could not be |
1482 | read for any reason. The data starts at the DATA pointer, but the TO_FREE |
1483 | pointer is what should be passed to free(), as there may be an offset. */ |
1484 | struct cpp_converted_source |
1485 | { |
1486 | char *to_free; |
1487 | char *data; |
1488 | size_t len; |
1489 | }; |
1490 | cpp_converted_source cpp_get_converted_source (const char *fname, |
1491 | const char *input_charset); |
1492 | |
1493 | /* In pch.cc */ |
1494 | struct save_macro_data; |
1495 | extern int cpp_save_state (cpp_reader *, FILE *); |
1496 | extern int cpp_write_pch_deps (cpp_reader *, FILE *); |
1497 | extern int cpp_write_pch_state (cpp_reader *, FILE *); |
1498 | extern int cpp_valid_state (cpp_reader *, const char *, int); |
1499 | extern void cpp_prepare_state (cpp_reader *, struct save_macro_data **); |
1500 | extern int cpp_read_state (cpp_reader *, const char *, FILE *, |
1501 | struct save_macro_data *); |
1502 | |
1503 | /* In lex.cc */ |
1504 | extern void cpp_force_token_locations (cpp_reader *, location_t); |
1505 | extern void cpp_stop_forcing_token_locations (cpp_reader *); |
1506 | enum CPP_DO_task |
1507 | { |
1508 | CPP_DO_print, |
1509 | CPP_DO_location, |
1510 | CPP_DO_token |
1511 | }; |
1512 | |
1513 | extern void cpp_directive_only_process (cpp_reader *pfile, |
1514 | void *data, |
1515 | void (*cb) (cpp_reader *, |
1516 | CPP_DO_task, |
1517 | void *data, ...)); |
1518 | |
1519 | /* In expr.cc */ |
1520 | extern enum cpp_ttype cpp_userdef_string_remove_type |
1521 | (enum cpp_ttype type); |
1522 | extern enum cpp_ttype cpp_userdef_string_add_type |
1523 | (enum cpp_ttype type); |
1524 | extern enum cpp_ttype cpp_userdef_char_remove_type |
1525 | (enum cpp_ttype type); |
1526 | extern enum cpp_ttype cpp_userdef_char_add_type |
1527 | (enum cpp_ttype type); |
1528 | extern bool cpp_userdef_string_p |
1529 | (enum cpp_ttype type); |
1530 | extern bool cpp_userdef_char_p |
1531 | (enum cpp_ttype type); |
1532 | extern const char * cpp_get_userdef_suffix |
1533 | (const cpp_token *); |
1534 | |
1535 | /* In charset.cc */ |
1536 | |
1537 | /* The result of attempting to decode a run of UTF-8 bytes. */ |
1538 | |
1539 | struct cpp_decoded_char |
1540 | { |
1541 | const char *m_start_byte; |
1542 | const char *m_next_byte; |
1543 | |
1544 | bool m_valid_ch; |
1545 | cppchar_t m_ch; |
1546 | }; |
1547 | |
1548 | /* Information for mapping between code points and display columns. |
1549 | |
1550 | This is a tabstop value, along with a callback for getting the |
1551 | widths of characters. Normally this callback is cpp_wcwidth, but we |
1552 | support other schemes for escaping non-ASCII unicode as a series of |
1553 | ASCII chars when printing the user's source code in diagnostic-show-locus.cc |
1554 | |
1555 | For example, consider: |
1556 | - the Unicode character U+03C0 "GREEK SMALL LETTER PI" (UTF-8: 0xCF 0x80) |
1557 | - the Unicode character U+1F642 "SLIGHTLY SMILING FACE" |
1558 | (UTF-8: 0xF0 0x9F 0x99 0x82) |
1559 | - the byte 0xBF (a stray trailing byte of a UTF-8 character) |
1560 | Normally U+03C0 would occupy one display column, U+1F642 |
1561 | would occupy two display columns, and the stray byte would be |
1562 | printed verbatim as one display column. |
1563 | |
1564 | However when escaping them as unicode code points as "<U+03C0>" |
1565 | and "<U+1F642>" they occupy 8 and 9 display columns respectively, |
1566 | and when escaping them as bytes as "<CF><80>" and "<F0><9F><99><82>" |
1567 | they occupy 8 and 16 display columns respectively. In both cases |
1568 | the stray byte is escaped to <BF> as 4 display columns. */ |
1569 | |
1570 | struct cpp_char_column_policy |
1571 | { |
1572 | cpp_char_column_policy (int tabstop, |
1573 | int (*width_cb) (cppchar_t c)) |
1574 | : m_tabstop (tabstop), |
1575 | m_undecoded_byte_width (1), |
1576 | m_width_cb (width_cb) |
1577 | {} |
1578 | |
1579 | int m_tabstop; |
1580 | /* Width in display columns of a stray byte that isn't decodable |
1581 | as UTF-8. */ |
1582 | int m_undecoded_byte_width; |
1583 | int (*m_width_cb) (cppchar_t c); |
1584 | }; |
1585 | |
1586 | /* A class to manage the state while converting a UTF-8 sequence to cppchar_t |
1587 | and computing the display width one character at a time. */ |
1588 | class cpp_display_width_computation { |
1589 | public: |
1590 | cpp_display_width_computation (const char *data, int data_length, |
1591 | const cpp_char_column_policy &policy); |
1592 | const char *next_byte () const { return m_next; } |
1593 | int bytes_processed () const { return m_next - m_begin; } |
1594 | int bytes_left () const { return m_bytes_left; } |
1595 | bool done () const { return !bytes_left (); } |
1596 | int display_cols_processed () const { return m_display_cols; } |
1597 | |
1598 | int process_next_codepoint (cpp_decoded_char *out); |
1599 | int advance_display_cols (int n); |
1600 | |
1601 | private: |
1602 | const char *const m_begin; |
1603 | const char *m_next; |
1604 | size_t m_bytes_left; |
1605 | const cpp_char_column_policy &m_policy; |
1606 | int m_display_cols; |
1607 | }; |
1608 | |
1609 | /* Convenience functions that are simple use cases for class |
1610 | cpp_display_width_computation. Tab characters will be expanded to spaces |
1611 | as determined by POLICY.m_tabstop, and non-printable-ASCII characters |
1612 | will be escaped as per POLICY. */ |
1613 | |
1614 | int cpp_byte_column_to_display_column (const char *data, int data_length, |
1615 | int column, |
1616 | const cpp_char_column_policy &policy); |
1617 | inline int cpp_display_width (const char *data, int data_length, |
1618 | const cpp_char_column_policy &policy) |
1619 | { |
1620 | return cpp_byte_column_to_display_column (data, data_length, column: data_length, |
1621 | policy); |
1622 | } |
1623 | int cpp_display_column_to_byte_column (const char *data, int data_length, |
1624 | int display_col, |
1625 | const cpp_char_column_policy &policy); |
1626 | int cpp_wcwidth (cppchar_t c); |
1627 | |
1628 | bool cpp_input_conversion_is_trivial (const char *input_charset); |
1629 | int cpp_check_utf8_bom (const char *data, size_t data_length); |
1630 | bool cpp_valid_utf8_p (const char *data, size_t num_bytes); |
1631 | |
1632 | bool cpp_is_combining_char (cppchar_t c); |
1633 | bool cpp_is_printable_char (cppchar_t c); |
1634 | |
1635 | enum cpp_xid_property { |
1636 | CPP_XID_START = 1, |
1637 | CPP_XID_CONTINUE = 2 |
1638 | }; |
1639 | |
1640 | unsigned int cpp_check_xid_property (cppchar_t c); |
1641 | |
1642 | #endif /* ! LIBCPP_CPPLIB_H */ |
1643 | |