MacroArgs.cpp source code [clang/lib/Lex/MacroArgs.cpp]

1	//===--- MacroArgs.cpp - Formal argument info for Macros ------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the MacroArgs interface.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "clang/Lex/MacroArgs.h"
14	#include "clang/Lex/LexDiagnostic.h"
15	#include "clang/Lex/MacroInfo.h"
16	#include "clang/Lex/Preprocessor.h"
17	#include "llvm/Support/SaveAndRestore.h"
18	#include <algorithm>
19
20	using namespace clang;
21
22	/// MacroArgs ctor function - This destroys the vector passed in.
23	MacroArgs MacroArgs::create(const* MacroInfo *MI,
24	ArrayRef<Token> UnexpArgTokens,
25	bool VarargsElided, Preprocessor &PP) {
26	assert(MI->isFunctionLike() &&
27	"Can't have args for an object-like macro!");
28	MacroArgs ResultEnt = nullptr**;
29	unsigned ClosestMatch = ~`0U`;
30
31	// See if we have an entry with a big enough argument list to reuse on the
32	// free list. If so, reuse it.
33	for (MacroArgs *Entry = &PP.MacroArgCache; Entry;
34	Entry = &(*Entry)->ArgCache) {
35	if ((*Entry)->NumUnexpArgTokens >= UnexpArgTokens.size() &&
36	(*Entry)->NumUnexpArgTokens < ClosestMatch) {
37	ResultEnt = Entry;
38
39	// If we have an exact match, use it.
40	if ((*Entry)->NumUnexpArgTokens == UnexpArgTokens.size())
41	break;
42	// Otherwise, use the best fit.
43	ClosestMatch = (*Entry)->NumUnexpArgTokens;
44	}
45	}
46	MacroArgs *Result;
47	if (!ResultEnt) {
48	// Allocate memory for a MacroArgs object with the lexer tokens at the end,
49	// and construct the MacroArgs object.
50	Result = new (
51	llvm::safe_malloc(Sz: totalSizeToAlloc<Token>(Counts: UnexpArgTokens.size())))
52	MacroArgs (UnexpArgTokens.size(), VarargsElided, MI->getNumParams());
53	} else {
54	Result = *ResultEnt;
55	// Unlink this node from the preprocessors singly linked list.
56	*ResultEnt = Result->ArgCache;
57	Result->NumUnexpArgTokens = UnexpArgTokens.size();
58	Result->VarargsElided = VarargsElided;
59	Result->NumMacroArgs = MI->getNumParams();
60	}
61
62	// Copy the actual unexpanded tokens to immediately after the result ptr.
63	if (!UnexpArgTokens.empty()) {
64	static_assert(std::is_trivial_v<Token>,
65	"assume trivial copyability if copying into the "
66	"uninitialized array (as opposed to reusing a cached "
67	"MacroArgs)");
68	std::copy(first: UnexpArgTokens.begin(), last: UnexpArgTokens.end(),
69	result: Result->getTrailingObjects());
70	}
71
72	return Result;
73	}
74
75	/// destroy - Destroy and deallocate the memory for this object.
76	///
77	void MacroArgs::destroy(Preprocessor &PP) {
78	// Don't clear PreExpArgTokens, just clear the entries. Clearing the entries
79	// would deallocate the element vectors.
80	for (unsigned i = `0`, e = PreExpArgTokens.size(); i != e; ++i)
81	PreExpArgTokens [i].clear();
82
83	// Add this to the preprocessor's free list.
84	ArgCache = PP.MacroArgCache;
85	PP.MacroArgCache = this;
86	}
87
88	/// deallocate - This should only be called by the Preprocessor when managing
89	/// its freelist.
90	MacroArgs *MacroArgs::deallocate() {
91	MacroArgs *Next = ArgCache;
92
93	// Run the dtor to deallocate the vectors.
94	this->~MacroArgs();
95	// Release the memory for the object.
96	static_assert(std::is_trivially_destructible_v<Token>,
97	"assume trivially destructible and forego destructors");
98	free(ptr: this);
99
100	return Next;
101	}
102
103
104	/// getArgLength - Given a pointer to an expanded or unexpanded argument,
105	/// return the number of tokens, not counting the EOF, that make up the
106	/// argument.
107	unsigned MacroArgs::getArgLength(const Token *ArgPtr) {
108	unsigned NumArgTokens = `0`;
109	for (; ArgPtr->isNot(K: tok::eof); ++ArgPtr)
110	++NumArgTokens;
111	return NumArgTokens;
112	}
113
114
115	/// getUnexpArgument - Return the unexpanded tokens for the specified formal.
116	///
117	const Token MacroArgs::getUnexpArgument(unsigned* Arg) const {
118
119	assert(Arg < getNumMacroArguments() && "Invalid arg #");
120	// The unexpanded argument tokens start immediately after the MacroArgs object
121	// in memory.
122	const Token *Start = getTrailingObjects();
123	const Token *Result = Start;
124
125	// Scan to find Arg.
126	for (; Arg; ++Result) {
127	assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
128	if (Result->is(K: tok::eof))
129	--Arg;
130	}
131	assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
132	return Result;
133	}
134
135	bool MacroArgs::invokedWithVariadicArgument(const MacroInfo *const MI,
136	Preprocessor &PP) {
137	if (!MI->isVariadic())
138	return false;
139	const int VariadicArgIndex = getNumMacroArguments() - `1`;
140	return getPreExpArgument(Arg: VariadicArgIndex, PP).front().isNot(K: tok::eof);
141	}
142
143	/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
144	/// by pre-expansion, return false. Otherwise, conservatively return true.
145	bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok,
146	Preprocessor &PP) const {
147	// If there are no identifiers in the argument list, or if the identifiers are
148	// known to not be macros, pre-expansion won't modify it.
149	for (; ArgTok->isNot(K: tok::eof); ++ArgTok)
150	if (IdentifierInfo *II = ArgTok->getIdentifierInfo())
151	if (II->hasMacroDefinition())
152	// Return true even though the macro could be a function-like macro
153	// without a following '(' token, or could be disabled, or not visible.
154	return true;
155	return false;
156	}
157
158	/// getPreExpArgument - Return the pre-expanded form of the specified
159	/// argument.
160	const std::vector<Token> &MacroArgs::getPreExpArgument(unsigned Arg,
161	Preprocessor &PP) {
162	assert(Arg < getNumMacroArguments() && "Invalid argument number!");
163
164	// If we have already computed this, return it.
165	if (PreExpArgTokens.size() < getNumMacroArguments())
166	PreExpArgTokens.resize(new_size: getNumMacroArguments());
167
168	std::vector<Token> &Result = PreExpArgTokens [Arg];
169	if (!Result.empty()) return Result;
170
171	SaveAndRestore PreExpandingMacroArgs(PP.InMacroArgPreExpansion, true);
172
173	const Token *AT = getUnexpArgument(Arg);
174	unsigned NumToks = getArgLength(ArgPtr: AT)+`1`; // Include the EOF.
175
176	// Otherwise, we have to pre-expand this argument, populating Result. To do
177	// this, we set up a fake TokenLexer to lex from the unexpanded argument
178	// list. With this installed, we lex expanded tokens until we hit the EOF
179	// token at the end of the unexp list.
180	PP.EnterTokenStream(Toks: AT, NumToks, DisableMacroExpansion: false /disable expand/,
181	OwnsTokens: false /owns tokens/, IsReinject: false /is reinject/);
182
183	// Lex all of the macro-expanded tokens into Result.
184	do {
185	Result.push_back(x: Token ());
186	Token &Tok = Result.back();
187	PP.Lex(Result&: Tok);
188	} while (Result.back().isNot(K: tok::eof));
189
190	// Pop the token stream off the top of the stack. We know that the internal
191	// pointer inside of it is to the "end" of the token stream, but the stack
192	// will not otherwise be popped until the next token is lexed. The problem is
193	// that the token may be lexed sometime after the vector of tokens itself is
194	// destroyed, which would be badness.
195	if (PP.InCachingLexMode())
196	PP.ExitCachingLexMode();
197	PP.RemoveTopOfLexerStack();
198	return Result;
199	}
200
201
202	/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
203	/// tokens into the literal string token that should be produced by the C #
204	/// preprocessor operator. If Charify is true, then it should be turned into
205	/// a character literal for the Microsoft charize (#@) extension.
206	///
207	Token MacroArgs::StringifyArgument(const Token *ArgToks,
208	Preprocessor &PP, bool Charify,
209	SourceLocation ExpansionLocStart,
210	SourceLocation ExpansionLocEnd) {
211	Token Tok;
212	Tok.startToken();
213	Tok.setKind(Charify ? tok::char_constant : tok::string_literal);
214
215	const Token *ArgTokStart = ArgToks;
216
217	// Stringify all the tokens.
218	SmallString<`128`> Result;
219	Result += "\"";
220
221	bool isFirst = true;
222	for (; ArgToks->isNot(K: tok::eof); ++ArgToks) {
223	const Token &Tok = *ArgToks;
224	if (!isFirst && (Tok.hasLeadingSpace() \|\| Tok.isAtStartOfLine()))
225	Result += `' '`;
226	isFirst = false;
227
228	// If this is a string or character constant, escape the token as specified
229	// by 6.10.3.2p2.
230	if (tok::isStringLiteral(K: Tok.getKind()) \|\| // "foo", u8R"x(foo)x"_bar, etc.
231	Tok.is(K: tok::char_constant) \|\| // 'x'
232	Tok.is(K: tok::wide_char_constant) \|\| // L'x'.
233	Tok.is(K: tok::utf8_char_constant) \|\| // u8'x'.
234	Tok.is(K: tok::utf16_char_constant) \|\| // u'x'.
235	Tok.is(K: tok::utf32_char_constant)) { // U'x'.
236	bool Invalid = false;
237	std::string TokStr = PP.getSpelling(Tok, Invalid: &Invalid);
238	if (!Invalid) {
239	std::string Str = Lexer::Stringify(Str: TokStr);
240	Result.append(in_start: Str.begin(), in_end: Str.end());
241	}
242	} else if (Tok.is(K: tok::code_completion)) {
243	PP.CodeCompleteNaturalLanguage();
244	} else {
245	// Otherwise, just append the token. Do some gymnastics to get the token
246	// in place and avoid copies where possible.
247	unsigned CurStrLen = Result.size();
248	Result.resize(N: CurStrLen+Tok.getLength());
249	const char *BufPtr = Result.data() + CurStrLen;
250	bool Invalid = false;
251	unsigned ActualTokLen = PP.getSpelling(Tok, Buffer&: BufPtr, Invalid: &Invalid);
252
253	if (!Invalid) {
254	// If getSpelling returned a pointer to an already uniqued version of
255	// the string instead of filling in BufPtr, memcpy it onto our string.
256	if (ActualTokLen && BufPtr != &Result [CurStrLen])
257	memcpy(dest: &Result [CurStrLen], src: BufPtr, n: ActualTokLen);
258
259	// If the token was dirty, the spelling may be shorter than the token.
260	if (ActualTokLen != Tok.getLength())
261	Result.resize(N: CurStrLen+ActualTokLen);
262	}
263	}
264	}
265
266	// If the last character of the string is a \, and if it isn't escaped, this
267	// is an invalid string literal, diagnose it as specified in C99.
268	if (Result.back() == `'\\'`) {
269	// Count the number of consecutive \ characters. If even, then they are
270	// just escaped backslashes, otherwise it's an error.
271	unsigned FirstNonSlash = Result.size()-`2`;
272	// Guaranteed to find the starting " if nothing else.
273	while (Result [FirstNonSlash] == `'\\'`)
274	--FirstNonSlash;
275	if ((Result.size()-`1`-FirstNonSlash) & `1`) {
276	// Diagnose errors for things like: #define F(X) #X / F(\)
277	PP.Diag(ArgToks[-`1`], diag::pp_invalid_string_literal);
278	Result.pop_back(); // remove one of the \'s.
279	}
280	}
281	Result += `'"'`;
282
283	// If this is the charify operation and the result is not a legal character
284	// constant, diagnose it.
285	if (Charify) {
286	// First step, turn double quotes into single quotes:
287	Result [`0`] = `'\''`;
288	Result [Result.size()-`1`] = `'\''`;
289
290	// Check for bogus character.
291	bool isBad = false;
292	if (Result.size() == `3`)
293	isBad = Result [`1`] == `'\''`; // ''' is not legal. '\' already fixed above.
294	else
295	isBad = (Result.size() != `4` \|\| Result [`1`] != `'\\'`); // Not '\x'
296
297	if (isBad) {
298	PP.Diag(ArgTokStart[`0`], diag::err_invalid_character_to_charify);
299	Result = "' '"; // Use something arbitrary, but legal.
300	}
301	}
302
303	PP.CreateString(Str: Result, Tok,
304	ExpansionLocStart, ExpansionLocEnd);
305	return Tok;
306	}
307

source code of clang/lib/Lex/MacroArgs.cpp