1/* CPP Library - traditional lexical analysis and macro expansion.
2 Copyright (C) 2002-2024 Free Software Foundation, Inc.
3 Contributed by Neil Booth, May 2002
4
5This program is free software; you can redistribute it and/or modify it
6under the terms of the GNU General Public License as published by the
7Free Software Foundation; either version 3, or (at your option) any
8later version.
9
10This program is distributed in the hope that it will be useful,
11but WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with this program; see the file COPYING3. If not see
17<http://www.gnu.org/licenses/>. */
18
19#include "config.h"
20#include "system.h"
21#include "cpplib.h"
22#include "internal.h"
23
24/* The replacement text of a function-like macro is stored as a
25 contiguous sequence of aligned blocks, each representing the text
26 between subsequent parameters.
27
28 Each block comprises the text between its surrounding parameters,
29 the length of that text, and the one-based index of the following
30 parameter. The final block in the replacement text is easily
31 recognizable as it has an argument index of zero. */
32
33struct block
34{
35 unsigned int text_len;
36 unsigned short arg_index;
37 uchar text[1];
38};
39
40#define BLOCK_HEADER_LEN offsetof (struct block, text)
41#define BLOCK_LEN(TEXT_LEN) CPP_ALIGN (BLOCK_HEADER_LEN + (TEXT_LEN))
42
43/* Structure holding information about a function-like macro
44 invocation. */
45struct fun_macro
46{
47 /* Memory buffer holding the trad_arg array. */
48 _cpp_buff *buff;
49
50 /* An array of size the number of macro parameters + 1, containing
51 the offsets of the start of each macro argument in the output
52 buffer. The argument continues until the character before the
53 start of the next one. */
54 size_t *args;
55
56 /* The hashnode of the macro. */
57 cpp_hashnode *node;
58
59 /* The offset of the macro name in the output buffer. */
60 size_t offset;
61
62 /* The line the macro name appeared on. */
63 location_t line;
64
65 /* Number of parameters. */
66 unsigned int paramc;
67
68 /* Zero-based index of argument being currently lexed. */
69 unsigned int argc;
70};
71
72/* Lexing state. It is mostly used to prevent macro expansion. */
73enum ls {ls_none = 0, /* Normal state. */
74 ls_fun_open, /* When looking for '('. */
75 ls_fun_close, /* When looking for ')'. */
76 ls_defined, /* After defined. */
77 ls_defined_close, /* Looking for ')' of defined(). */
78 ls_hash, /* After # in preprocessor conditional. */
79 ls_predicate, /* After the predicate, maybe paren? */
80 ls_answer /* In answer to predicate. */
81};
82
83/* Lexing TODO: Maybe handle space in escaped newlines. Stop lex.cc
84 from recognizing comments and directives during its lexing pass. */
85
86static const uchar *skip_whitespace (cpp_reader *, const uchar *, int);
87static cpp_hashnode *lex_identifier (cpp_reader *, const uchar *);
88static const uchar *copy_comment (cpp_reader *, const uchar *, int);
89static void check_output_buffer (cpp_reader *, size_t);
90static void push_replacement_text (cpp_reader *, cpp_hashnode *);
91static bool scan_parameters (cpp_reader *, unsigned *);
92static bool recursive_macro (cpp_reader *, cpp_hashnode *);
93static void save_replacement_text (cpp_reader *, cpp_macro *, unsigned int);
94static void maybe_start_funlike (cpp_reader *, cpp_hashnode *, const uchar *,
95 struct fun_macro *);
96static void save_argument (struct fun_macro *, size_t);
97static void replace_args_and_push (cpp_reader *, struct fun_macro *);
98static size_t canonicalize_text (uchar *, const uchar *, size_t, uchar *);
99
100/* Ensures we have N bytes' space in the output buffer, and
101 reallocates it if not. */
102static void
103check_output_buffer (cpp_reader *pfile, size_t n)
104{
105 /* We might need two bytes to terminate an unterminated comment, and
106 one more to terminate the line with a NUL. */
107 n += 2 + 1;
108
109 if (n > (size_t) (pfile->out.limit - pfile->out.cur))
110 {
111 size_t size = pfile->out.cur - pfile->out.base;
112 size_t new_size = (size + n) * 3 / 2;
113
114 pfile->out.base = XRESIZEVEC (unsigned char, pfile->out.base, new_size);
115 pfile->out.limit = pfile->out.base + new_size;
116 pfile->out.cur = pfile->out.base + size;
117 }
118}
119
120/* Skip a C-style block comment in a macro as a result of -CC.
121 PFILE->buffer->cur points to the initial asterisk of the comment,
122 change it to point to after the '*' and '/' characters that terminate it.
123 Return true if the macro has not been termined, in that case set
124 PFILE->buffer->cur to the end of the buffer. */
125static bool
126skip_macro_block_comment (cpp_reader *pfile)
127{
128 const uchar *cur = pfile->buffer->cur;
129
130 cur++;
131 if (*cur == '/')
132 cur++;
133
134 /* People like decorating comments with '*', so check for '/'
135 instead for efficiency. */
136 while (! (*cur++ == '/' && cur[-2] == '*'))
137 if (cur[-1] == '\n')
138 {
139 pfile->buffer->cur = cur - 1;
140 return true;
141 }
142
143 pfile->buffer->cur = cur;
144 return false;
145}
146
147/* CUR points to the asterisk introducing a comment in the current
148 context. IN_DEFINE is true if we are in the replacement text of a
149 macro.
150
151 The asterisk and following comment is copied to the buffer pointed
152 to by pfile->out.cur, which must be of sufficient size.
153 Unterminated comments are diagnosed, and correctly terminated in
154 the output. pfile->out.cur is updated depending upon IN_DEFINE,
155 -C, -CC and pfile->state.in_directive.
156
157 Returns a pointer to the first character after the comment in the
158 input buffer. */
159static const uchar *
160copy_comment (cpp_reader *pfile, const uchar *cur, int in_define)
161{
162 bool unterminated, copy = false;
163 location_t src_loc = pfile->line_table->highest_line;
164 cpp_buffer *buffer = pfile->buffer;
165
166 buffer->cur = cur;
167 if (pfile->context->prev)
168 unterminated = skip_macro_block_comment (pfile);
169 else
170 unterminated = _cpp_skip_block_comment (pfile);
171
172 if (unterminated)
173 cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
174 msgid: "unterminated comment");
175
176 /* Comments in directives become spaces so that tokens are properly
177 separated when the ISO preprocessor re-lexes the line. The
178 exception is #define. */
179 if (pfile->state.in_directive)
180 {
181 if (in_define)
182 {
183 if (CPP_OPTION (pfile, discard_comments_in_macro_exp))
184 pfile->out.cur--;
185 else
186 copy = true;
187 }
188 else
189 pfile->out.cur[-1] = ' ';
190 }
191 else if (CPP_OPTION (pfile, discard_comments))
192 pfile->out.cur--;
193 else
194 copy = true;
195
196 if (copy)
197 {
198 size_t len = (size_t) (buffer->cur - cur);
199 memcpy (dest: pfile->out.cur, src: cur, n: len);
200 pfile->out.cur += len;
201 if (unterminated)
202 {
203 *pfile->out.cur++ = '*';
204 *pfile->out.cur++ = '/';
205 }
206 }
207
208 return buffer->cur;
209}
210
211/* CUR points to any character in the input buffer. Skips over all
212 contiguous horizontal white space and NULs, including comments if
213 SKIP_COMMENTS, until reaching the first non-horizontal-whitespace
214 character or the end of the current context. Escaped newlines are
215 removed.
216
217 The whitespace is copied verbatim to the output buffer, except that
218 comments are handled as described in copy_comment().
219 pfile->out.cur is updated.
220
221 Returns a pointer to the first character after the whitespace in
222 the input buffer. */
223static const uchar *
224skip_whitespace (cpp_reader *pfile, const uchar *cur, int skip_comments)
225{
226 uchar *out = pfile->out.cur;
227
228 for (;;)
229 {
230 unsigned int c = *cur++;
231 *out++ = c;
232
233 if (is_nvspace (c))
234 continue;
235
236 if (c == '/' && *cur == '*' && skip_comments)
237 {
238 pfile->out.cur = out;
239 cur = copy_comment (pfile, cur, in_define: false /* in_define */);
240 out = pfile->out.cur;
241 continue;
242 }
243
244 out--;
245 break;
246 }
247
248 pfile->out.cur = out;
249 return cur - 1;
250}
251
252/* Lexes and outputs an identifier starting at CUR, which is assumed
253 to point to a valid first character of an identifier. Returns
254 the hashnode, and updates out.cur. */
255static cpp_hashnode *
256lex_identifier (cpp_reader *pfile, const uchar *cur)
257{
258 size_t len;
259 uchar *out = pfile->out.cur;
260 cpp_hashnode *result;
261
262 do
263 *out++ = *cur++;
264 while (is_numchar (*cur));
265
266 CUR (pfile->context) = cur;
267 len = out - pfile->out.cur;
268 result = CPP_HASHNODE (ht_lookup (pfile->hash_table, pfile->out.cur,
269 len, HT_ALLOC));
270 pfile->out.cur = out;
271 return result;
272}
273
274/* Overlays the true file buffer temporarily with text of length LEN
275 starting at START. The true buffer is restored upon calling
276 restore_buff(). */
277void
278_cpp_overlay_buffer (cpp_reader *pfile, const uchar *start, size_t len)
279{
280 cpp_buffer *buffer = pfile->buffer;
281
282 pfile->overlaid_buffer = buffer;
283 pfile->saved_cur = buffer->cur;
284 pfile->saved_rlimit = buffer->rlimit;
285 pfile->saved_line_base = buffer->next_line;
286 buffer->need_line = false;
287
288 buffer->cur = start;
289 buffer->line_base = start;
290 buffer->rlimit = start + len;
291}
292
293/* Restores a buffer overlaid by _cpp_overlay_buffer(). */
294void
295_cpp_remove_overlay (cpp_reader *pfile)
296{
297 cpp_buffer *buffer = pfile->overlaid_buffer;
298
299 buffer->cur = pfile->saved_cur;
300 buffer->rlimit = pfile->saved_rlimit;
301 buffer->line_base = pfile->saved_line_base;
302 buffer->need_line = true;
303
304 pfile->overlaid_buffer = NULL;
305}
306
307/* Reads a logical line into the output buffer. Returns TRUE if there
308 is more text left in the buffer. */
309bool
310_cpp_read_logical_line_trad (cpp_reader *pfile)
311{
312 do
313 {
314 if (pfile->buffer->need_line && !_cpp_get_fresh_line (pfile))
315 {
316 /* Now pop the buffer that _cpp_get_fresh_line did not. */
317 _cpp_pop_buffer (pfile);
318 return false;
319 }
320 }
321 while (!_cpp_scan_out_logical_line (pfile, NULL, false)
322 || pfile->state.skipping);
323
324 return pfile->buffer != NULL;
325}
326
327/* Return true if NODE is a fun_like macro. */
328static inline bool
329fun_like_macro (cpp_hashnode *node)
330{
331 if (cpp_builtin_macro_p (node))
332 return (node->value.builtin == BT_HAS_ATTRIBUTE
333 || node->value.builtin == BT_HAS_STD_ATTRIBUTE
334 || node->value.builtin == BT_HAS_BUILTIN
335 || node->value.builtin == BT_HAS_INCLUDE
336 || node->value.builtin == BT_HAS_INCLUDE_NEXT);
337 return node->value.macro->fun_like;
338}
339
340/* Set up state for finding the opening '(' of a function-like
341 macro. */
342static void
343maybe_start_funlike (cpp_reader *pfile, cpp_hashnode *node, const uchar *start,
344 struct fun_macro *macro)
345{
346 unsigned int n;
347 if (cpp_builtin_macro_p (node))
348 n = 1;
349 else
350 n = node->value.macro->paramc;
351
352 if (macro->buff)
353 _cpp_release_buff (pfile, macro->buff);
354 macro->buff = _cpp_get_buff (pfile, (n + 1) * sizeof (size_t));
355 macro->args = (size_t *) BUFF_FRONT (macro->buff);
356 macro->node = node;
357 macro->offset = start - pfile->out.base;
358 macro->paramc = n;
359 macro->argc = 0;
360}
361
362/* Save the OFFSET of the start of the next argument to MACRO. */
363static void
364save_argument (struct fun_macro *macro, size_t offset)
365{
366 macro->argc++;
367 if (macro->argc <= macro->paramc)
368 macro->args[macro->argc] = offset;
369}
370
371/* Copies the next logical line in the current buffer (starting at
372 buffer->cur) to the output buffer. The output is guaranteed to
373 terminate with a NUL character. buffer->cur is updated.
374
375 If MACRO is non-NULL, then we are scanning the replacement list of
376 MACRO, and we call save_replacement_text() every time we meet an
377 argument.
378
379 If BUILTIN_MACRO_ARG is true, this is called to macro expand
380 arguments of builtin function-like macros. */
381bool
382_cpp_scan_out_logical_line (cpp_reader *pfile, cpp_macro *macro,
383 bool builtin_macro_arg)
384{
385 bool result = true;
386 cpp_context *context;
387 const uchar *cur;
388 uchar *out;
389 struct fun_macro fmacro;
390 unsigned int c, paren_depth = 0, quote;
391 enum ls lex_state = ls_none;
392 bool header_ok;
393 const uchar *start_of_input_line;
394
395 fmacro.buff = NULL;
396 fmacro.args = NULL;
397 fmacro.node = NULL;
398 fmacro.offset = 0;
399 fmacro.line = 0;
400 fmacro.paramc = 0;
401 fmacro.argc = 0;
402
403 quote = 0;
404 header_ok = pfile->state.angled_headers;
405 CUR (pfile->context) = pfile->buffer->cur;
406 RLIMIT (pfile->context) = pfile->buffer->rlimit;
407 if (!builtin_macro_arg)
408 {
409 pfile->out.cur = pfile->out.base;
410 pfile->out.first_line = pfile->line_table->highest_line;
411 }
412 /* start_of_input_line is needed to make sure that directives really,
413 really start at the first character of the line. */
414 start_of_input_line = pfile->buffer->cur;
415 new_context:
416 context = pfile->context;
417 cur = CUR (context);
418 check_output_buffer (pfile, RLIMIT (context) - cur);
419 out = pfile->out.cur;
420
421 for (;;)
422 {
423 if (!context->prev
424 && !builtin_macro_arg
425 && cur >= pfile->buffer->notes[pfile->buffer->cur_note].pos)
426 {
427 pfile->buffer->cur = cur;
428 _cpp_process_line_notes (pfile, false);
429 }
430 c = *cur++;
431 *out++ = c;
432
433 /* Whitespace should "continue" out of the switch,
434 non-whitespace should "break" out of it. */
435 switch (c)
436 {
437 case ' ':
438 case '\t':
439 case '\f':
440 case '\v':
441 case '\0':
442 continue;
443
444 case '\n':
445 /* If this is a macro's expansion, pop it. */
446 if (context->prev)
447 {
448 pfile->out.cur = out - 1;
449 _cpp_pop_context (pfile);
450 goto new_context;
451 }
452
453 /* Omit the newline from the output buffer. */
454 pfile->out.cur = out - 1;
455 pfile->buffer->cur = cur;
456 if (builtin_macro_arg)
457 goto done;
458 pfile->buffer->need_line = true;
459 CPP_INCREMENT_LINE (pfile, 0);
460
461 if ((lex_state == ls_fun_open || lex_state == ls_fun_close)
462 && !pfile->state.in_directive
463 && _cpp_get_fresh_line (pfile))
464 {
465 /* Newlines in arguments become a space, but we don't
466 clear any in-progress quote. */
467 if (lex_state == ls_fun_close)
468 out[-1] = ' ';
469 cur = pfile->buffer->cur;
470 continue;
471 }
472 goto done;
473
474 case '<':
475 if (header_ok)
476 quote = '>';
477 break;
478 case '>':
479 if (c == quote)
480 quote = 0;
481 break;
482
483 case '"':
484 case '\'':
485 if (c == quote)
486 quote = 0;
487 else if (!quote)
488 quote = c;
489 break;
490
491 case '\\':
492 /* Skip escaped quotes here, it's easier than above. */
493 if (*cur == '\\' || *cur == '"' || *cur == '\'')
494 *out++ = *cur++;
495 break;
496
497 case '/':
498 /* Traditional CPP does not recognize comments within
499 literals. */
500 if (!quote && *cur == '*')
501 {
502 pfile->out.cur = out;
503 cur = copy_comment (pfile, cur, in_define: macro != 0);
504 out = pfile->out.cur;
505 continue;
506 }
507 break;
508
509 case '_':
510 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
511 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
512 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
513 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
514 case 'y': case 'z':
515 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
516 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
517 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
518 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
519 case 'Y': case 'Z':
520 if (!pfile->state.skipping && (quote == 0 || macro))
521 {
522 cpp_hashnode *node;
523 uchar *out_start = out - 1;
524
525 pfile->out.cur = out_start;
526 node = lex_identifier (pfile, cur: cur - 1);
527 out = pfile->out.cur;
528 cur = CUR (context);
529
530 if (cpp_macro_p (node)
531 /* Should we expand for ls_answer? */
532 && (lex_state == ls_none || lex_state == ls_fun_open)
533 && !pfile->state.prevent_expansion)
534 {
535 /* Macros invalidate MI optimization. */
536 pfile->mi_valid = false;
537 if (fun_like_macro (node))
538 {
539 maybe_start_funlike (pfile, node, start: out_start, macro: &fmacro);
540 lex_state = ls_fun_open;
541 fmacro.line = pfile->line_table->highest_line;
542 continue;
543 }
544 else if (!recursive_macro (pfile, node))
545 {
546 /* Remove the object-like macro's name from the
547 output, and push its replacement text. */
548 pfile->out.cur = out_start;
549 push_replacement_text (pfile, node);
550 lex_state = ls_none;
551 goto new_context;
552 }
553 }
554 else if (macro && node->type == NT_MACRO_ARG)
555 {
556 /* Found a parameter in the replacement text of a
557 #define. Remove its name from the output. */
558 pfile->out.cur = out_start;
559 save_replacement_text (pfile, macro, node->value.arg_index);
560 out = pfile->out.base;
561 }
562 else if (lex_state == ls_hash)
563 {
564 lex_state = ls_predicate;
565 continue;
566 }
567 else if (pfile->state.in_expression
568 && node == pfile->spec_nodes.n_defined)
569 {
570 lex_state = ls_defined;
571 continue;
572 }
573 }
574 break;
575
576 case '(':
577 if (quote == 0)
578 {
579 paren_depth++;
580 if (lex_state == ls_fun_open)
581 {
582 if (recursive_macro (pfile, fmacro.node))
583 lex_state = ls_none;
584 else
585 {
586 lex_state = ls_fun_close;
587 paren_depth = 1;
588 out = pfile->out.base + fmacro.offset;
589 fmacro.args[0] = fmacro.offset;
590 }
591 }
592 else if (lex_state == ls_predicate)
593 lex_state = ls_answer;
594 else if (lex_state == ls_defined)
595 lex_state = ls_defined_close;
596 }
597 break;
598
599 case ',':
600 if (quote == 0 && lex_state == ls_fun_close && paren_depth == 1)
601 save_argument (macro: &fmacro, offset: out - pfile->out.base);
602 break;
603
604 case ')':
605 if (quote == 0)
606 {
607 paren_depth--;
608 if (lex_state == ls_fun_close && paren_depth == 0)
609 {
610 if (cpp_builtin_macro_p (node: fmacro.node))
611 {
612 /* Handle builtin function-like macros like
613 __has_attribute. The already parsed arguments
614 are put into a buffer, which is then preprocessed
615 and the result is fed to _cpp_push_text_context
616 with disabled expansion, where the ISO preprocessor
617 parses it. While in traditional preprocessing
618 macro arguments aren't immediately expanded, they in
619 the end are because the macro with replaced arguments
620 is preprocessed again. For the builtin function-like
621 macros we need the argument immediately though,
622 if we don't preprocess them, they would behave
623 very differently from ISO preprocessor handling
624 of those builtin macros. So, this handling is
625 more similar to traditional preprocessing of
626 #if directives, where we also keep preprocessing
627 until everything is expanded, and then feed the
628 result with disabled expansion to ISO preprocessor
629 for handling the directives. */
630 lex_state = ls_none;
631 save_argument (macro: &fmacro, offset: out - pfile->out.base);
632 cpp_macro m;
633 memset (s: &m, c: '\0', n: sizeof (m));
634 m.paramc = fmacro.paramc;
635 if (_cpp_arguments_ok (pfile, &m, fmacro.node,
636 fmacro.argc))
637 {
638 size_t len = fmacro.args[1] - fmacro.args[0];
639 uchar *buf;
640
641 /* Remove the macro's invocation from the
642 output, and push its replacement text. */
643 pfile->out.cur = pfile->out.base + fmacro.offset;
644 CUR (context) = cur;
645 buf = _cpp_unaligned_alloc (pfile, len + 2);
646 buf[0] = '(';
647 memcpy (dest: buf + 1, src: pfile->out.base + fmacro.args[0],
648 n: len);
649 buf[len + 1] = '\n';
650
651 const unsigned char *ctx_rlimit = RLIMIT (context);
652 const unsigned char *saved_cur = pfile->buffer->cur;
653 const unsigned char *saved_rlimit
654 = pfile->buffer->rlimit;
655 const unsigned char *saved_line_base
656 = pfile->buffer->line_base;
657 bool saved_need_line = pfile->buffer->need_line;
658 cpp_buffer *saved_overlaid_buffer
659 = pfile->overlaid_buffer;
660 pfile->buffer->cur = buf;
661 pfile->buffer->line_base = buf;
662 pfile->buffer->rlimit = buf + len + 1;
663 pfile->buffer->need_line = false;
664 pfile->overlaid_buffer = pfile->buffer;
665 bool saved_in_directive = pfile->state.in_directive;
666 pfile->state.in_directive = true;
667 cpp_context *saved_prev_context = context->prev;
668 context->prev = NULL;
669
670 _cpp_scan_out_logical_line (pfile, NULL, builtin_macro_arg: true);
671
672 pfile->state.in_directive = saved_in_directive;
673 check_output_buffer (pfile, n: 1);
674 *pfile->out.cur = '\n';
675 pfile->buffer->cur = pfile->out.base + fmacro.offset;
676 pfile->buffer->line_base = pfile->buffer->cur;
677 pfile->buffer->rlimit = pfile->out.cur;
678 CUR (context) = pfile->buffer->cur;
679 RLIMIT (context) = pfile->buffer->rlimit;
680
681 pfile->state.prevent_expansion++;
682 const uchar *text
683 = _cpp_builtin_macro_text (pfile, fmacro.node);
684 pfile->state.prevent_expansion--;
685
686 context->prev = saved_prev_context;
687 pfile->buffer->cur = saved_cur;
688 pfile->buffer->rlimit = saved_rlimit;
689 pfile->buffer->line_base = saved_line_base;
690 pfile->buffer->need_line = saved_need_line;
691 pfile->overlaid_buffer = saved_overlaid_buffer;
692 pfile->out.cur = pfile->out.base + fmacro.offset;
693 CUR (context) = cur;
694 RLIMIT (context) = ctx_rlimit;
695 len = ustrlen (s1: text);
696 buf = _cpp_unaligned_alloc (pfile, len + 1);
697 memcpy (dest: buf, src: text, n: len);
698 buf[len] = '\n';
699 text = buf;
700 _cpp_push_text_context (pfile, fmacro.node,
701 text, len);
702 goto new_context;
703 }
704 break;
705 }
706
707 cpp_macro *m = fmacro.node->value.macro;
708
709 m->used = 1;
710 lex_state = ls_none;
711 save_argument (macro: &fmacro, offset: out - pfile->out.base);
712
713 /* A single zero-length argument is no argument. */
714 if (fmacro.argc == 1
715 && m->paramc == 0
716 && out == pfile->out.base + fmacro.offset + 1)
717 fmacro.argc = 0;
718
719 if (_cpp_arguments_ok (pfile, m, fmacro.node, fmacro.argc))
720 {
721 /* Remove the macro's invocation from the
722 output, and push its replacement text. */
723 pfile->out.cur = pfile->out.base + fmacro.offset;
724 CUR (context) = cur;
725 replace_args_and_push (pfile, &fmacro);
726 goto new_context;
727 }
728 }
729 else if (lex_state == ls_answer || lex_state == ls_defined_close)
730 lex_state = ls_none;
731 }
732 break;
733
734 case '#':
735 if (cur - 1 == start_of_input_line
736 /* A '#' from a macro doesn't start a directive. */
737 && !pfile->context->prev
738 && !pfile->state.in_directive)
739 {
740 /* A directive. With the way _cpp_handle_directive
741 currently works, we only want to call it if either we
742 know the directive is OK, or we want it to fail and
743 be removed from the output. If we want it to be
744 passed through (the assembler case) then we must not
745 call _cpp_handle_directive. */
746 pfile->out.cur = out;
747 cur = skip_whitespace (pfile, cur, skip_comments: true /* skip_comments */);
748 out = pfile->out.cur;
749
750 if (*cur == '\n')
751 {
752 /* Null directive. Ignore it and don't invalidate
753 the MI optimization. */
754 pfile->buffer->need_line = true;
755 CPP_INCREMENT_LINE (pfile, 0);
756 result = false;
757 goto done;
758 }
759 else
760 {
761 bool do_it = false;
762
763 if (is_numstart (*cur)
764 && CPP_OPTION (pfile, lang) != CLK_ASM)
765 do_it = true;
766 else if (is_idstart (*cur))
767 /* Check whether we know this directive, but don't
768 advance. */
769 do_it = lex_identifier (pfile, cur)->is_directive;
770
771 if (do_it || CPP_OPTION (pfile, lang) != CLK_ASM)
772 {
773 /* This is a kludge. We want to have the ISO
774 preprocessor lex the next token. */
775 pfile->buffer->cur = cur;
776 _cpp_handle_directive (pfile, false /* indented */);
777 result = false;
778 goto done;
779 }
780 }
781 }
782
783 if (pfile->state.in_expression)
784 {
785 lex_state = ls_hash;
786 continue;
787 }
788 break;
789
790 default:
791 break;
792 }
793
794 /* Non-whitespace disables MI optimization and stops treating
795 '<' as a quote in #include. */
796 header_ok = false;
797 if (!pfile->state.in_directive)
798 pfile->mi_valid = false;
799
800 if (lex_state == ls_none)
801 continue;
802
803 /* Some of these transitions of state are syntax errors. The
804 ISO preprocessor will issue errors later. */
805 if (lex_state == ls_fun_open)
806 /* Missing '('. */
807 lex_state = ls_none;
808 else if (lex_state == ls_hash
809 || lex_state == ls_predicate
810 || lex_state == ls_defined)
811 lex_state = ls_none;
812
813 /* ls_answer and ls_defined_close keep going until ')'. */
814 }
815
816 done:
817 if (fmacro.buff)
818 _cpp_release_buff (pfile, fmacro.buff);
819
820 if (lex_state == ls_fun_close)
821 cpp_error_with_line (pfile, CPP_DL_ERROR, fmacro.line, 0,
822 msgid: "unterminated argument list invoking macro \"%s\"",
823 NODE_NAME (fmacro.node));
824 return result;
825}
826
827/* Push a context holding the replacement text of the macro NODE on
828 the context stack. NODE is either object-like, or a function-like
829 macro with no arguments. */
830static void
831push_replacement_text (cpp_reader *pfile, cpp_hashnode *node)
832{
833 size_t len;
834 const uchar *text;
835 uchar *buf;
836
837 if (cpp_builtin_macro_p (node))
838 {
839 text = _cpp_builtin_macro_text (pfile, node);
840 len = ustrlen (s1: text);
841 buf = _cpp_unaligned_alloc (pfile, len + 1);
842 memcpy (dest: buf, src: text, n: len);
843 buf[len] = '\n';
844 text = buf;
845 }
846 else
847 {
848 cpp_macro *macro = node->value.macro;
849 macro->used = 1;
850 text = macro->exp.text;
851 len = macro->count;
852 }
853
854 _cpp_push_text_context (pfile, node, text, len);
855}
856
857/* Returns TRUE if traditional macro recursion is detected. */
858static bool
859recursive_macro (cpp_reader *pfile, cpp_hashnode *node)
860{
861 bool recursing = !!(node->flags & NODE_DISABLED);
862
863 /* Object-like macros that are already expanding are necessarily
864 recursive.
865
866 However, it is possible to have traditional function-like macros
867 that are not infinitely recursive but recurse to any given depth.
868 Further, it is easy to construct examples that get ever longer
869 until the point they stop recursing. So there is no easy way to
870 detect true recursion; instead we assume any expansion more than
871 20 deep since the first invocation of this macro must be
872 recursing. */
873 if (recursing && fun_like_macro (node))
874 {
875 size_t depth = 0;
876 cpp_context *context = pfile->context;
877
878 do
879 {
880 depth++;
881 if (context->c.macro == node && depth > 20)
882 break;
883 context = context->prev;
884 }
885 while (context);
886 recursing = context != NULL;
887 }
888
889 if (recursing)
890 cpp_error (pfile, CPP_DL_ERROR,
891 msgid: "detected recursion whilst expanding macro \"%s\"",
892 NODE_NAME (node));
893
894 return recursing;
895}
896
897/* Return the length of the replacement text of a function-like or
898 object-like non-builtin macro. */
899size_t
900_cpp_replacement_text_len (const cpp_macro *macro)
901{
902 size_t len;
903
904 if (macro->fun_like && (macro->paramc != 0))
905 {
906 const uchar *exp;
907
908 len = 0;
909 for (exp = macro->exp.text;;)
910 {
911 struct block *b = (struct block *) exp;
912
913 len += b->text_len;
914 if (b->arg_index == 0)
915 break;
916 len += NODE_LEN (macro->parm.params[b->arg_index - 1]);
917 exp += BLOCK_LEN (b->text_len);
918 }
919 }
920 else
921 len = macro->count;
922
923 return len;
924}
925
926/* Copy the replacement text of MACRO to DEST, which must be of
927 sufficient size. It is not NUL-terminated. The next character is
928 returned. */
929uchar *
930_cpp_copy_replacement_text (const cpp_macro *macro, uchar *dest)
931{
932 if (macro->fun_like && (macro->paramc != 0))
933 {
934 const uchar *exp;
935
936 for (exp = macro->exp.text;;)
937 {
938 struct block *b = (struct block *) exp;
939 cpp_hashnode *param;
940
941 memcpy (dest: dest, src: b->text, n: b->text_len);
942 dest += b->text_len;
943 if (b->arg_index == 0)
944 break;
945 param = macro->parm.params[b->arg_index - 1];
946 memcpy (dest: dest, NODE_NAME (param), NODE_LEN (param));
947 dest += NODE_LEN (param);
948 exp += BLOCK_LEN (b->text_len);
949 }
950 }
951 else
952 {
953 memcpy (dest: dest, src: macro->exp.text, n: macro->count);
954 dest += macro->count;
955 }
956
957 return dest;
958}
959
960/* Push a context holding the replacement text of the macro NODE on
961 the context stack. NODE is either object-like, or a function-like
962 macro with no arguments. */
963static void
964replace_args_and_push (cpp_reader *pfile, struct fun_macro *fmacro)
965{
966 cpp_macro *macro = fmacro->node->value.macro;
967
968 if (macro->paramc == 0)
969 push_replacement_text (pfile, node: fmacro->node);
970 else
971 {
972 const uchar *exp;
973 uchar *p;
974 _cpp_buff *buff;
975 size_t len = 0;
976 int cxtquote = 0;
977
978 /* Get an estimate of the length of the argument-replaced text.
979 This is a worst case estimate, assuming that every replacement
980 text character needs quoting. */
981 for (exp = macro->exp.text;;)
982 {
983 struct block *b = (struct block *) exp;
984
985 len += b->text_len;
986 if (b->arg_index == 0)
987 break;
988 len += 2 * (fmacro->args[b->arg_index]
989 - fmacro->args[b->arg_index - 1] - 1);
990 exp += BLOCK_LEN (b->text_len);
991 }
992
993 /* Allocate room for the expansion plus \n. */
994 buff = _cpp_get_buff (pfile, len + 1);
995
996 /* Copy the expansion and replace arguments. */
997 /* Accumulate actual length, including quoting as necessary */
998 p = BUFF_FRONT (buff);
999 len = 0;
1000 for (exp = macro->exp.text;;)
1001 {
1002 struct block *b = (struct block *) exp;
1003 size_t arglen;
1004 int argquote;
1005 uchar *base;
1006 uchar *in;
1007
1008 len += b->text_len;
1009 /* Copy the non-argument text literally, keeping
1010 track of whether matching quotes have been seen. */
1011 for (arglen = b->text_len, in = b->text; arglen > 0; arglen--)
1012 {
1013 if (*in == '"')
1014 cxtquote = ! cxtquote;
1015 *p++ = *in++;
1016 }
1017 /* Done if no more arguments */
1018 if (b->arg_index == 0)
1019 break;
1020 arglen = (fmacro->args[b->arg_index]
1021 - fmacro->args[b->arg_index - 1] - 1);
1022 base = pfile->out.base + fmacro->args[b->arg_index - 1];
1023 in = base;
1024#if 0
1025 /* Skip leading whitespace in the text for the argument to
1026 be substituted. To be compatible with gcc 2.95, we would
1027 also need to trim trailing whitespace. Gcc 2.95 trims
1028 leading and trailing whitespace, which may be a bug. The
1029 current gcc testsuite explicitly checks that this leading
1030 and trailing whitespace in actual arguments is
1031 preserved. */
1032 while (arglen > 0 && is_space (*in))
1033 {
1034 in++;
1035 arglen--;
1036 }
1037#endif
1038 for (argquote = 0; arglen > 0; arglen--)
1039 {
1040 if (cxtquote && *in == '"')
1041 {
1042 if (in > base && *(in-1) != '\\')
1043 argquote = ! argquote;
1044 /* Always add backslash before double quote if argument
1045 is expanded in a quoted context */
1046 *p++ = '\\';
1047 len++;
1048 }
1049 else if (cxtquote && argquote && *in == '\\')
1050 {
1051 /* Always add backslash before a backslash in an argument
1052 that is expanded in a quoted context and also in the
1053 range of a quoted context in the argument itself. */
1054 *p++ = '\\';
1055 len++;
1056 }
1057 *p++ = *in++;
1058 len++;
1059 }
1060 exp += BLOCK_LEN (b->text_len);
1061 }
1062
1063 /* \n-terminate. */
1064 *p = '\n';
1065 _cpp_push_text_context (pfile, fmacro->node, BUFF_FRONT (buff), len);
1066
1067 /* So we free buffer allocation when macro is left. */
1068 pfile->context->buff = buff;
1069 }
1070}
1071
1072/* Read and record the parameters, if any, of a function-like macro
1073 definition. Destroys pfile->out.cur.
1074
1075 Returns true on success, false on failure (syntax error or a
1076 duplicate parameter). On success, CUR (pfile->context) is just
1077 past the closing parenthesis. */
1078static bool
1079scan_parameters (cpp_reader *pfile, unsigned *n_ptr)
1080{
1081 const uchar *cur = CUR (pfile->context) + 1;
1082 bool ok;
1083
1084 unsigned nparms = 0;
1085 for (;;)
1086 {
1087 cur = skip_whitespace (pfile, cur, skip_comments: true /* skip_comments */);
1088
1089 if (is_idstart (*cur))
1090 {
1091 struct cpp_hashnode *id = lex_identifier (pfile, cur);
1092 ok = false;
1093 if (!_cpp_save_parameter (pfile, nparms, id, id))
1094 break;
1095 nparms++;
1096 cur = skip_whitespace (pfile, CUR (pfile->context),
1097 skip_comments: true /* skip_comments */);
1098 if (*cur == ',')
1099 {
1100 cur++;
1101 continue;
1102 }
1103 ok = (*cur == ')');
1104 break;
1105 }
1106
1107 ok = (*cur == ')' && !nparms);
1108 break;
1109 }
1110
1111 *n_ptr = nparms;
1112
1113 if (!ok)
1114 cpp_error (pfile, CPP_DL_ERROR, msgid: "syntax error in macro parameter list");
1115
1116 CUR (pfile->context) = cur + (*cur == ')');
1117
1118 return ok;
1119}
1120
1121/* Save the text from pfile->out.base to pfile->out.cur as
1122 the replacement text for the current macro, followed by argument
1123 ARG_INDEX, with zero indicating the end of the replacement
1124 text. */
1125static void
1126save_replacement_text (cpp_reader *pfile, cpp_macro *macro,
1127 unsigned int arg_index)
1128{
1129 size_t len = pfile->out.cur - pfile->out.base;
1130 uchar *exp;
1131
1132 if (macro->paramc == 0)
1133 {
1134 /* Object-like and function-like macros without parameters
1135 simply store their \n-terminated replacement text. */
1136 exp = _cpp_unaligned_alloc (pfile, len + 1);
1137 memcpy (dest: exp, src: pfile->out.base, n: len);
1138 exp[len] = '\n';
1139 macro->exp.text = exp;
1140 macro->count = len;
1141 }
1142 else
1143 {
1144 /* Store the text's length (unsigned int), the argument index
1145 (unsigned short, base 1) and then the text. */
1146 size_t blen = BLOCK_LEN (len);
1147 struct block *block;
1148
1149 if (macro->count + blen > BUFF_ROOM (pfile->a_buff))
1150 _cpp_extend_buff (pfile, &pfile->a_buff, macro->count + blen);
1151
1152 exp = BUFF_FRONT (pfile->a_buff);
1153 block = (struct block *) (exp + macro->count);
1154 macro->exp.text = exp;
1155
1156 /* Write out the block information. */
1157 block->text_len = len;
1158 block->arg_index = arg_index;
1159 memcpy (dest: block->text, src: pfile->out.base, n: len);
1160
1161 /* Lex the rest into the start of the output buffer. */
1162 pfile->out.cur = pfile->out.base;
1163
1164 macro->count += blen;
1165
1166 /* If we've finished, commit the memory. */
1167 if (arg_index == 0)
1168 BUFF_FRONT (pfile->a_buff) += macro->count;
1169 }
1170}
1171
1172/* Analyze and save the replacement text of a macro. Returns true on
1173 success. */
1174cpp_macro *
1175_cpp_create_trad_definition (cpp_reader *pfile)
1176{
1177 const uchar *cur;
1178 uchar *limit;
1179 cpp_context *context = pfile->context;
1180 unsigned nparms = 0;
1181 int fun_like = 0;
1182 cpp_hashnode **params = NULL;
1183
1184 /* The context has not been set up for command line defines, and CUR
1185 has not been updated for the macro name for in-file defines. */
1186 pfile->out.cur = pfile->out.base;
1187 CUR (context) = pfile->buffer->cur;
1188 RLIMIT (context) = pfile->buffer->rlimit;
1189 check_output_buffer (pfile, RLIMIT (context) - CUR (context));
1190
1191 /* Is this a function-like macro? */
1192 if (* CUR (context) == '(')
1193 {
1194 fun_like = +1;
1195 if (scan_parameters (pfile, n_ptr: &nparms))
1196 params = (cpp_hashnode **)_cpp_commit_buff
1197 (pfile, size: sizeof (cpp_hashnode *) * nparms);
1198 else
1199 fun_like = -1;
1200 }
1201
1202 cpp_macro *macro = NULL;
1203
1204 if (fun_like >= 0)
1205 {
1206 macro = _cpp_new_macro (pfile, cmk_traditional,
1207 _cpp_aligned_alloc (pfile, sizeof (cpp_macro)));
1208 macro->parm.params = params;
1209 macro->paramc = nparms;
1210 macro->fun_like = fun_like != 0;
1211 }
1212
1213 /* Skip leading whitespace in the replacement text. */
1214 pfile->buffer->cur
1215 = skip_whitespace (pfile, CUR (context),
1216 CPP_OPTION (pfile, discard_comments_in_macro_exp));
1217
1218 pfile->state.prevent_expansion++;
1219 _cpp_scan_out_logical_line (pfile, macro, builtin_macro_arg: false);
1220 pfile->state.prevent_expansion--;
1221
1222 _cpp_unsave_parameters (pfile, nparms);
1223
1224 if (macro)
1225 {
1226 /* Skip trailing white space. */
1227 cur = pfile->out.base;
1228 limit = pfile->out.cur;
1229 while (limit > cur && is_space (limit[-1]))
1230 limit--;
1231 pfile->out.cur = limit;
1232 save_replacement_text (pfile, macro, arg_index: 0);
1233 }
1234
1235 return macro;
1236}
1237
1238/* Copy SRC of length LEN to DEST, but convert all contiguous
1239 whitespace to a single space, provided it is not in quotes. The
1240 quote currently in effect is pointed to by PQUOTE, and is updated
1241 by the function. Returns the number of bytes copied. */
1242static size_t
1243canonicalize_text (uchar *dest, const uchar *src, size_t len, uchar *pquote)
1244{
1245 uchar *orig_dest = dest;
1246 uchar quote = *pquote;
1247
1248 while (len)
1249 {
1250 if (is_space (*src) && !quote)
1251 {
1252 do
1253 src++, len--;
1254 while (len && is_space (*src));
1255 *dest++ = ' ';
1256 }
1257 else
1258 {
1259 if (*src == '\'' || *src == '"')
1260 {
1261 if (!quote)
1262 quote = *src;
1263 else if (quote == *src)
1264 quote = 0;
1265 }
1266 *dest++ = *src++, len--;
1267 }
1268 }
1269
1270 *pquote = quote;
1271 return dest - orig_dest;
1272}
1273
1274/* Returns true if MACRO1 and MACRO2 have expansions different other
1275 than in the form of their whitespace. */
1276bool
1277_cpp_expansions_different_trad (const cpp_macro *macro1,
1278 const cpp_macro *macro2)
1279{
1280 uchar *p1 = XNEWVEC (uchar, macro1->count + macro2->count);
1281 uchar *p2 = p1 + macro1->count;
1282 uchar quote1 = 0, quote2 = 0;
1283 bool mismatch;
1284 size_t len1, len2;
1285
1286 if (macro1->paramc > 0)
1287 {
1288 const uchar *exp1 = macro1->exp.text, *exp2 = macro2->exp.text;
1289
1290 mismatch = true;
1291 for (;;)
1292 {
1293 struct block *b1 = (struct block *) exp1;
1294 struct block *b2 = (struct block *) exp2;
1295
1296 if (b1->arg_index != b2->arg_index)
1297 break;
1298
1299 len1 = canonicalize_text (dest: p1, src: b1->text, len: b1->text_len, pquote: &quote1);
1300 len2 = canonicalize_text (dest: p2, src: b2->text, len: b2->text_len, pquote: &quote2);
1301 if (len1 != len2 || memcmp (s1: p1, s2: p2, n: len1))
1302 break;
1303 if (b1->arg_index == 0)
1304 {
1305 mismatch = false;
1306 break;
1307 }
1308 exp1 += BLOCK_LEN (b1->text_len);
1309 exp2 += BLOCK_LEN (b2->text_len);
1310 }
1311 }
1312 else
1313 {
1314 len1 = canonicalize_text (dest: p1, src: macro1->exp.text, len: macro1->count, pquote: &quote1);
1315 len2 = canonicalize_text (dest: p2, src: macro2->exp.text, len: macro2->count, pquote: &quote2);
1316 mismatch = (len1 != len2 || memcmp (s1: p1, s2: p2, n: len1));
1317 }
1318
1319 free (ptr: p1);
1320 return mismatch;
1321}
1322

source code of libcpp/traditional.cc