DependencyDirectivesScanner.cpp source code [clang/lib/Lex/DependencyDirectivesScanner.cpp]

1	//===- DependencyDirectivesScanner.cpp ------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This is the interface for scanning header and source files to get the
11	/// minimum necessary preprocessor directives for evaluating includes. It
12	/// reduces the source down to #define, #include, #import, @import, and any
13	/// conditional preprocessor logic that contains one of those.
14	///
15	//===----------------------------------------------------------------------===//
16
17	#include "clang/Lex/DependencyDirectivesScanner.h"
18	#include "clang/Basic/CharInfo.h"
19	#include "clang/Basic/Diagnostic.h"
20	#include "clang/Lex/LexDiagnostic.h"
21	#include "clang/Lex/Lexer.h"
22	#include "clang/Lex/Pragma.h"
23	#include "llvm/ADT/ScopeExit.h"
24	#include "llvm/ADT/SmallString.h"
25	#include "llvm/ADT/StringMap.h"
26	#include "llvm/ADT/StringSwitch.h"
27	#include <optional>
28
29	using namespace clang;
30	using namespace clang::dependency_directives_scan;
31	using namespace llvm;
32
33	namespace {
34
35	struct DirectiveWithTokens {
36	DirectiveKind Kind;
37	unsigned NumTokens;
38
39	DirectiveWithTokens(DirectiveKind Kind, unsigned NumTokens)
40	: Kind(Kind), NumTokens(NumTokens) {}
41	};
42
43	/// Does an efficient "scan" of the sources to detect the presence of
44	/// preprocessor (or module import) directives and collects the raw lexed tokens
45	/// for those directives so that the \p Lexer can "replay" them when the file is
46	/// included.
47	///
48	/// Note that the behavior of the raw lexer is affected by the language mode,
49	/// while at this point we want to do a scan and collect tokens once,
50	/// irrespective of the language mode that the file will get included in. To
51	/// compensate for that the \p Lexer, while "replaying", will adjust a token
52	/// where appropriate, when it could affect the preprocessor's state.
53	/// For example in a directive like
54	///
55	/// \code
56	/// #if __has_cpp_attribute(clang::fallthrough)
57	/// \endcode
58	///
59	/// The preprocessor needs to see '::' as 'tok::coloncolon' instead of 2
60	/// 'tok::colon'. The \p Lexer will adjust if it sees consecutive 'tok::colon'
61	/// while in C++ mode.
62	struct Scanner {
63	Scanner(StringRef Input,
64	SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
65	DiagnosticsEngine *Diags, SourceLocation InputSourceLoc)
66	: Input (Input), Tokens(Tokens), Diags(Diags),
67	InputSourceLoc (InputSourceLoc), LangOpts(getLangOptsForDepScanning()),
68	TheLexer (InputSourceLoc, LangOpts, Input.begin(), Input.begin(),
69	Input.end()) {}
70
71	static LangOptions getLangOptsForDepScanning() {
72	LangOptions LangOpts;
73	// Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'.
74	LangOpts.ObjC = true;
75	LangOpts.LineComment = true;
76	LangOpts.RawStringLiterals = true;
77	// FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"".
78	return LangOpts;
79	}
80
81	/// Lex the provided source and emit the directive tokens.
82	///
83	/// \returns True on error.
84	bool scan(SmallVectorImpl<Directive> &Directives);
85
86	private:
87	/// Lexes next token and advances \p First and the \p Lexer.
88	[[nodiscard]] dependency_directives_scan::Token &
89	lexToken(const char &First, const* char *const End);
90
91	[[nodiscard]] dependency_directives_scan::Token &
92	lexIncludeFilename(const char &First, const* char *const End);
93
94	void skipLine(const char &First, const* char *const End);
95	void skipDirective(StringRef Name, const char &First, const* char *const End);
96
97	/// Returns the spelling of a string literal or identifier after performing
98	/// any processing needed to handle \c clang::Token::NeedsCleaning.
99	StringRef cleanStringIfNeeded(const dependency_directives_scan::Token &Tok);
100
101	/// Lexes next token and if it is identifier returns its string, otherwise
102	/// it skips the current line and returns \p std::nullopt.
103	///
104	/// In any case (whatever the token kind) \p First and the \p Lexer will
105	/// advance beyond the token.
106	[[nodiscard]] std::optional<StringRef>
107	tryLexIdentifierOrSkipLine(const char &First, const* char *const End);
108
109	/// Used when it is certain that next token is an identifier.
110	[[nodiscard]] StringRef lexIdentifier(const char *&First,
111	const char *const End);
112
113	/// Lexes next token and returns true iff it is an identifier that matches \p
114	/// Id, otherwise it skips the current line and returns false.
115	///
116	/// In any case (whatever the token kind) \p First and the \p Lexer will
117	/// advance beyond the token.
118	[[nodiscard]] bool isNextIdentifierOrSkipLine(StringRef Id,
119	const char *&First,
120	const char *const End);
121
122	/// Lexes next token and returns true iff it matches the kind \p K.
123	/// Otherwise it skips the current line and returns false.
124	///
125	/// In any case (whatever the token kind) \p First and the \p Lexer will
126	/// advance beyond the token.
127	[[nodiscard]] bool isNextTokenOrSkipLine(tok::TokenKind K, const char *&First,
128	const char *const End);
129
130	/// Lexes next token and if it is string literal, returns its string.
131	/// Otherwise, it skips the current line and returns \p std::nullopt.
132	///
133	/// In any case (whatever the token kind) \p First and the \p Lexer will
134	/// advance beyond the token.
135	[[nodiscard]] std::optional<StringRef>
136	tryLexStringLiteralOrSkipLine(const char &First, const* char *const End);
137
138	[[nodiscard]] bool scanImpl(const char First, const* char *const End);
139	[[nodiscard]] bool lexPPLine(const char &First, const* char *const End);
140	[[nodiscard]] bool lexAt(const char &First, const* char *const End);
141	[[nodiscard]] bool lexModule(const char &First, const* char *const End);
142	[[nodiscard]] bool lexDefine(const char HashLoc, const* char *&First,
143	const char *const End);
144	[[nodiscard]] bool lexPragma(const char &First, const* char *const End);
145	[[nodiscard]] bool lex_Pragma(const char &First, const* char *const End);
146	[[nodiscard]] bool lexEndif(const char &First, const* char *const End);
147	[[nodiscard]] bool lexDefault(DirectiveKind Kind, const char *&First,
148	const char *const End);
149	[[nodiscard]] bool lexModuleDirectiveBody(DirectiveKind Kind,
150	const char *&First,
151	const char *const End);
152	void lexPPDirectiveBody(const char &First, const* char *const End);
153
154	DirectiveWithTokens &pushDirective(DirectiveKind Kind) {
155	Tokens.append(RHS: CurDirToks);
156	DirsWithToks.emplace_back(Args&: Kind, Args: CurDirToks.size());
157	CurDirToks.clear();
158	return DirsWithToks.back();
159	}
160	void popDirective() {
161	Tokens.pop_back_n(NumItems: DirsWithToks.pop_back_val().NumTokens);
162	}
163	DirectiveKind topDirective() const {
164	return DirsWithToks.empty() ? pp_none : DirsWithToks.back().Kind;
165	}
166
167	unsigned getOffsetAt(const char CurPtr) const* {
168	return CurPtr - Input.data();
169	}
170
171	/// Reports a diagnostic if the diagnostic engine is provided. Always returns
172	/// true at the end.
173	bool reportError(const char CurPtr, unsigned* Err);
174
175	StringMap<char> SplitIds;
176	StringRef Input;
177	SmallVectorImpl<dependency_directives_scan::Token> &Tokens;
178	DiagnosticsEngine *Diags;
179	SourceLocation InputSourceLoc;
180
181	const char LastTokenPtr = nullptr*;
182	/// Keeps track of the tokens for the currently lexed directive. Once a
183	/// directive is fully lexed and "committed" then the tokens get appended to
184	/// \p Tokens and \p CurDirToks is cleared for the next directive.
185	SmallVector<dependency_directives_scan::Token, `32`> CurDirToks;
186	/// The directives that were lexed along with the number of tokens that each
187	/// directive contains. The tokens of all the directives are kept in \p Tokens
188	/// vector, in the same order as the directives order in \p DirsWithToks.
189	SmallVector<DirectiveWithTokens, `64`> DirsWithToks;
190	LangOptions LangOpts;
191	Lexer TheLexer;
192	};
193
194	} // end anonymous namespace
195
196	bool Scanner::reportError(const char CurPtr, unsigned* Err) {
197	if (!Diags)
198	return true;
199	assert(CurPtr >= Input.data() && "invalid buffer ptr");
200	Diags->Report(Loc: InputSourceLoc.getLocWithOffset(Offset: getOffsetAt(CurPtr)), DiagID: Err);
201	return true;
202	}
203
204	static void skipOverSpaces(const char &First, const* char *const End) {
205	while (First != End && isHorizontalWhitespace(c: *First))
206	++First;
207	}
208
209	[[nodiscard]] static bool isRawStringLiteral(const char *First,
210	const char *Current) {
211	assert(First <= Current);
212
213	// Check if we can even back up.
214	if (*Current != `'"'` \|\| First == Current)
215	return false;
216
217	// Check for an "R".
218	--Current;
219	if (*Current != `'R'`)
220	return false;
221	if (First == Current \|\| !isAsciiIdentifierContinue(c: *--Current))
222	return true;
223
224	// Check for a prefix of "u", "U", or "L".
225	if (Current == `'u'` \|\| Current == `'U'` \|\| *Current == `'L'`)
226	return First == Current \|\| !isAsciiIdentifierContinue(c: *--Current);
227
228	// Check for a prefix of "u8".
229	if (Current != `'8'` \|\| First == Current \|\| Current-- != `'u'`)
230	return false;
231	return First == Current \|\| !isAsciiIdentifierContinue(c: *--Current);
232	}
233
234	static void skipRawString(const char &First, const* char *const End) {
235	assert(First[`0`] == `'"'`);
236	assert(First[-`1`] == `'R'`);
237
238	const char *Last = ++First;
239	while (Last != End && *Last != `'('`)
240	++Last;
241	if (Last == End) {
242	First = Last; // Hit the end... just give up.
243	return;
244	}
245
246	StringRef Terminator(First, Last - First);
247	for (;;) {
248	// Move First to just past the next ")".
249	First = Last;
250	while (First != End && *First != `')'`)
251	++First;
252	if (First == End)
253	return;
254	++First;
255
256	// Look ahead for the terminator sequence.
257	Last = First;
258	while (Last != End && size_t(Last - First) < Terminator.size() &&
259	Terminator [Last - First] == *Last)
260	++Last;
261
262	// Check if we hit it (or the end of the file).
263	if (Last == End) {
264	First = Last;
265	return;
266	}
267	if (size_t(Last - First) < Terminator.size())
268	continue;
269	if (*Last != `'"'`)
270	continue;
271	First = Last + `1`;
272	return;
273	}
274	}
275
276	// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n)
277	static unsigned isEOL(const char First, const* char *const End) {
278	if (First == End)
279	return `0`;
280	if (End - First > `1` && isVerticalWhitespace(c: First[`0`]) &&
281	isVerticalWhitespace(c: First[`1`]) && First[`0`] != First[`1`])
282	return `2`;
283	return !!isVerticalWhitespace(c: First[`0`]);
284	}
285
286	static void skipString(const char &First, const* char *const End) {
287	assert(First == `'\''` \|\| First == `'"'` \|\| *First == `'<'`);
288	const char Terminator = First == `'<'` ? `'>'` : First;
289	for (++First; First != End && *First != Terminator; ++First) {
290	// String and character literals don't extend past the end of the line.
291	if (isVerticalWhitespace(c: *First))
292	return;
293	if (*First != `'\\'`)
294	continue;
295	// Skip past backslash to the next character. This ensures that the
296	// character right after it is skipped as well, which matters if it's
297	// the terminator.
298	if (++First == End)
299	return;
300	if (!isWhitespace(c: *First))
301	continue;
302	// Whitespace after the backslash might indicate a line continuation.
303	const char *FirstAfterBackslashPastSpace = First;
304	skipOverSpaces(First&: FirstAfterBackslashPastSpace, End);
305	if (unsigned NLSize = isEOL(First: FirstAfterBackslashPastSpace, End)) {
306	// Advance the character pointer to the next line for the next
307	// iteration.
308	First = FirstAfterBackslashPastSpace + NLSize - `1`;
309	}
310	}
311	if (First != End)
312	++First; // Finish off the string.
313	}
314
315	// Returns the length of the skipped newline
316	static unsigned skipNewline(const char &First, const* char *End) {
317	if (First == End)
318	return `0`;
319	assert(isVerticalWhitespace(*First));
320	unsigned Len = isEOL(First, End);
321	assert(Len && "expected newline");
322	First += Len;
323	return Len;
324	}
325
326	static bool wasLineContinuation(const char First, unsigned* EOLLen) {
327	return (First - (int*)EOLLen - `1`) == `'\\'`;
328	}
329
330	static void skipToNewlineRaw(const char &First, const* char *const End) {
331	for (;;) {
332	if (First == End)
333	return;
334
335	unsigned Len = isEOL(First, End);
336	if (Len)
337	return;
338
339	do {
340	if (++First == End)
341	return;
342	Len = isEOL(First, End);
343	} while (!Len);
344
345	if (First[-`1`] != `'\\'`)
346	return;
347
348	First += Len;
349	// Keep skipping lines...
350	}
351	}
352
353	static void skipLineComment(const char &First, const* char *const End) {
354	assert(First[`0`] == `'/'` && First[`1`] == `'/'`);
355	First += `2`;
356	skipToNewlineRaw(First, End);
357	}
358
359	static void skipBlockComment(const char &First, const* char *const End) {
360	assert(First[`0`] == `'/'` && First[`1`] == `'*'`);
361	if (End - First < `4`) {
362	First = End;
363	return;
364	}
365	for (First += `3`; First != End; ++First)
366	if (First[-`1`] == `'*'` && First[`0`] == `'/'`) {
367	++First;
368	return;
369	}
370	}
371
372	/// \returns True if the current single quotation mark character is a C++14
373	/// digit separator.
374	static bool isQuoteCppDigitSeparator(const char *const Start,
375	const char *const Cur,
376	const char *const End) {
377	assert(*Cur == `'\''` && "expected quotation character");
378	// skipLine called in places where we don't expect a valid number
379	// body before `start` on the same line, so always return false at the start.
380	if (Start == Cur)
381	return false;
382	// The previous character must be a valid PP number character.
383	// Make sure that the L, u, U, u8 prefixes don't get marked as a
384	// separator though.
385	char Prev = *(Cur - `1`);
386	if (Prev == `'L'` \|\| Prev == `'U'` \|\| Prev == `'u'`)
387	return false;
388	if (Prev == `'8'` && (Cur - `1` != Start) && *(Cur - `2`) == `'u'`)
389	return false;
390	if (!isPreprocessingNumberBody(c: Prev))
391	return false;
392	// The next character should be a valid identifier body character.
393	return (Cur + `1`) < End && isAsciiIdentifierContinue(c: *(Cur + `1`));
394	}
395
396	void Scanner::skipLine(const char &First, const* char *const End) {
397	for (;;) {
398	assert(First <= End);
399	if (First == End)
400	return;
401
402	if (isVerticalWhitespace(c: *First)) {
403	skipNewline(First, End);
404	return;
405	}
406	const char *Start = First;
407	while (First != End && !isVerticalWhitespace(c: *First)) {
408	// Iterate over strings correctly to avoid comments and newlines.
409	if (*First == `'"'` \|\|
410	(*First == `'\''` && !isQuoteCppDigitSeparator(Start, Cur: First, End))) {
411	LastTokenPtr = First;
412	if (isRawStringLiteral(First: Start, Current: First))
413	skipRawString(First, End);
414	else
415	skipString(First, End);
416	continue;
417	}
418
419	// Iterate over comments correctly.
420	if (*First != `'/'` \|\| End - First < `2`) {
421	LastTokenPtr = First;
422	++First;
423	continue;
424	}
425
426	if (First[`1`] == `'/'`) {
427	// "//...".
428	skipLineComment(First, End);
429	continue;
430	}
431
432	if (First[`1`] != `'*'`) {
433	LastTokenPtr = First;
434	++First;
435	continue;
436	}
437
438	// "/.../".
439	skipBlockComment(First, End);
440	}
441	if (First == End)
442	return;
443
444	// Skip over the newline.
445	unsigned Len = skipNewline(First, End);
446	if (!wasLineContinuation(First, EOLLen: Len)) // Continue past line-continuations.
447	break;
448	}
449	}
450
451	void Scanner::skipDirective(StringRef Name, const char *&First,
452	const char *const End) {
453	if (llvm::StringSwitch<bool>(Name)
454	.Case(S: "warning", Value: true)
455	.Case(S: "error", Value: true)
456	.Default(Value: false))
457	// Do not process quotes or comments.
458	skipToNewlineRaw(First, End);
459	else
460	skipLine(First, End);
461	}
462
463	static void skipWhitespace(const char &First, const* char *const End) {
464	for (;;) {
465	assert(First <= End);
466	skipOverSpaces(First, End);
467
468	if (End - First < `2`)
469	return;
470
471	if (First[`0`] == `'\\'` && isVerticalWhitespace(c: First[`1`])) {
472	skipNewline(First&: ++First, End);
473	continue;
474	}
475
476	// Check for a non-comment character.
477	if (First[`0`] != `'/'`)
478	return;
479
480	// "// ...".
481	if (First[`1`] == `'/'`) {
482	skipLineComment(First, End);
483	return;
484	}
485
486	// Cannot be a comment.
487	if (First[`1`] != `'*'`)
488	return;
489
490	// "/.../".
491	skipBlockComment(First, End);
492	}
493	}
494
495	bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First,
496	const char *const End) {
497	const char *DirectiveLoc = Input.data() + CurDirToks.front().Offset;
498	for (;;) {
499	// Keep a copy of the First char incase it needs to be reset.
500	const char *Previous = First;
501	const dependency_directives_scan::Token &Tok = lexToken(First, End);
502	if ((Tok.is(K: tok::hash) \|\| Tok.is(K: tok::at)) &&
503	(Tok.Flags & clang::Token::StartOfLine)) {
504	CurDirToks.pop_back();
505	First = Previous;
506	return false;
507	}
508	if (Tok.is(K: tok::eof))
509	return reportError(
510	CurPtr: DirectiveLoc,
511	diag::Err: err_dep_source_scanner_missing_semi_after_at_import);
512	if (Tok.is(K: tok::semi))
513	break;
514	}
515	pushDirective(Kind);
516	skipWhitespace(First, End);
517	if (First == End)
518	return false;
519	if (!isVerticalWhitespace(c: *First))
520	return reportError(
521	CurPtr: DirectiveLoc, diag::Err: err_dep_source_scanner_unexpected_tokens_at_import);
522	skipNewline(First, End);
523	return false;
524	}
525
526	dependency_directives_scan::Token &Scanner::lexToken(const char *&First,
527	const char *const End) {
528	clang::Token Tok;
529	TheLexer.LexFromRawLexer(Result&: Tok);
530	First = Input.data() + TheLexer.getCurrentBufferOffset();
531	assert(First <= End);
532
533	unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength();
534	CurDirToks.emplace_back(Args&: Offset, Args: Tok.getLength(), Args: Tok.getKind(),
535	Args: Tok.getFlags());
536	return CurDirToks.back();
537	}
538
539	dependency_directives_scan::Token &
540	Scanner::lexIncludeFilename(const char &First, const* char *const End) {
541	clang::Token Tok;
542	TheLexer.LexIncludeFilename(FilenameTok&: Tok);
543	First = Input.data() + TheLexer.getCurrentBufferOffset();
544	assert(First <= End);
545
546	unsigned Offset = TheLexer.getCurrentBufferOffset() - Tok.getLength();
547	CurDirToks.emplace_back(Args&: Offset, Args: Tok.getLength(), Args: Tok.getKind(),
548	Args: Tok.getFlags());
549	return CurDirToks.back();
550	}
551
552	void Scanner::lexPPDirectiveBody(const char &First, const* char *const End) {
553	while (true) {
554	const dependency_directives_scan::Token &Tok = lexToken(First, End);
555	if (Tok.is(K: tok::eod) \|\| Tok.is(K: tok::eof))
556	break;
557	}
558	}
559
560	StringRef
561	Scanner::cleanStringIfNeeded(const dependency_directives_scan::Token &Tok) {
562	bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning;
563	if (LLVM_LIKELY(!NeedsCleaning))
564	return Input.slice(Start: Tok.Offset, End: Tok.getEnd());
565
566	SmallString<`64`> Spelling;
567	Spelling.resize(N: Tok.Length);
568
569	// FIXME: C++11 raw string literals need special handling (see getSpellingSlow
570	// in the Lexer). Currently we cannot see them due to our LangOpts.
571
572	unsigned SpellingLength = `0`;
573	const char *BufPtr = Input.begin() + Tok.Offset;
574	const char *AfterIdent = Input.begin() + Tok.getEnd();
575	while (BufPtr < AfterIdent) {
576	auto [Char, Size] = Lexer::getCharAndSizeNoWarn(Ptr: BufPtr, LangOpts);
577	Spelling [SpellingLength++] = Char;
578	BufPtr += Size;
579	}
580
581	return SplitIds.try_emplace(Key: StringRef(Spelling.begin(), SpellingLength), Args: `0`)
582	.first ->first();
583	}
584
585	std::optional<StringRef>
586	Scanner::tryLexIdentifierOrSkipLine(const char &First, const* char *const End) {
587	const dependency_directives_scan::Token &Tok = lexToken(First, End);
588	if (Tok.isNot(K: tok::raw_identifier)) {
589	if (!Tok.is(K: tok::eod))
590	skipLine(First, End);
591	return std::nullopt;
592	}
593
594	return cleanStringIfNeeded(Tok);
595	}
596
597	StringRef Scanner::lexIdentifier(const char &First, const* char *const End) {
598	std::optional<StringRef> Id = tryLexIdentifierOrSkipLine(First, End);
599	assert(Id && "expected identifier token");
600	return *Id;
601	}
602
603	bool Scanner::isNextIdentifierOrSkipLine(StringRef Id, const char *&First,
604	const char *const End) {
605	if (std::optional<StringRef> FoundId =
606	tryLexIdentifierOrSkipLine(First, End)) {
607	if (*FoundId == Id)
608	return true;
609	skipLine(First, End);
610	}
611	return false;
612	}
613
614	bool Scanner::isNextTokenOrSkipLine(tok::TokenKind K, const char *&First,
615	const char *const End) {
616	const dependency_directives_scan::Token &Tok = lexToken(First, End);
617	if (Tok.is(K))
618	return true;
619	skipLine(First, End);
620	return false;
621	}
622
623	std::optional<StringRef>
624	Scanner::tryLexStringLiteralOrSkipLine(const char *&First,
625	const char *const End) {
626	const dependency_directives_scan::Token &Tok = lexToken(First, End);
627	if (!tok::isStringLiteral(K: Tok.Kind)) {
628	if (!Tok.is(K: tok::eod))
629	skipLine(First, End);
630	return std::nullopt;
631	}
632
633	return cleanStringIfNeeded(Tok);
634	}
635
636	bool Scanner::lexAt(const char &First, const* char *const End) {
637	// Handle "@import".
638
639	// Lex '@'.
640	const dependency_directives_scan::Token &AtTok = lexToken(First, End);
641	assert(AtTok.is(tok::at));
642	(void)AtTok;
643
644	if (!isNextIdentifierOrSkipLine(Id: "import", First, End))
645	return false;
646	return lexModuleDirectiveBody(Kind: decl_at_import, First, End);
647	}
648
649	bool Scanner::lexModule(const char &First, const* char *const End) {
650	StringRef Id = lexIdentifier(First, End);
651	bool Export = false;
652	if (Id == "export") {
653	Export = true;
654	std::optional<StringRef> NextId = tryLexIdentifierOrSkipLine(First, End);
655	if (!NextId)
656	return false;
657	Id = *NextId;
658	}
659
660	if (Id != "module" && Id != "import") {
661	skipLine(First, End);
662	return false;
663	}
664
665	skipWhitespace(First, End);
666
667	// Ignore this as a module directive if the next character can't be part of
668	// an import.
669
670	switch (*First) {
671	case `':'`: {
672	// `module :` is never the start of a valid module declaration.
673	if (Id == "module") {
674	skipLine(First, End);
675	return false;
676	}
677	// `import:(type)name` is a valid ObjC method decl, so check one more token.
678	(void)lexToken(First, End);
679	if (!tryLexIdentifierOrSkipLine(First, End))
680	return false;
681	break;
682	}
683	case `'<'`:
684	case `'"'`:
685	break;
686	default:
687	if (!isAsciiIdentifierContinue(c: *First)) {
688	skipLine(First, End);
689	return false;
690	}
691	}
692
693	TheLexer.seek(Offset: getOffsetAt(CurPtr: First), /IsAtStartOfLine/ false);
694
695	DirectiveKind Kind;
696	if (Id == "module")
697	Kind = Export ? cxx_export_module_decl : cxx_module_decl;
698	else
699	Kind = Export ? cxx_export_import_decl : cxx_import_decl;
700
701	return lexModuleDirectiveBody(Kind, First, End);
702	}
703
704	bool Scanner::lex_Pragma(const char &First, const* char *const End) {
705	if (!isNextTokenOrSkipLine(K: tok::l_paren, First, End))
706	return false;
707
708	std::optional<StringRef> Str = tryLexStringLiteralOrSkipLine(First, End);
709
710	if (!Str \|\| !isNextTokenOrSkipLine(K: tok::r_paren, First, End))
711	return false;
712
713	SmallString<`64`> Buffer(*Str);
714	prepare_PragmaString(StrVal&: Buffer);
715
716	// Use a new scanner instance since the tokens will be inside the allocated
717	// string. We should already have captured all the relevant tokens in the
718	// current scanner.
719	SmallVector<dependency_directives_scan::Token> DiscardTokens;
720	const char *Begin = Buffer.c_str();
721	Scanner PragmaScanner{StringRef(Begin, Buffer.size()), DiscardTokens, Diags,
722	InputSourceLoc};
723
724	PragmaScanner.TheLexer.setParsingPreprocessorDirective(true);
725	if (PragmaScanner.lexPragma(First&: Begin, End: Buffer.end()))
726	return true;
727
728	DirectiveKind K = PragmaScanner.topDirective();
729	if (K == pp_none) {
730	skipLine(First, End);
731	return false;
732	}
733
734	assert(Begin == Buffer.end());
735	pushDirective(Kind: K);
736	return false;
737	}
738
739	bool Scanner::lexPragma(const char &First, const* char *const End) {
740	std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);
741	if (!FoundId)
742	return false;
743
744	StringRef Id = *FoundId;
745	auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
746	.Case(S: "once", Value: pp_pragma_once)
747	.Case(S: "push_macro", Value: pp_pragma_push_macro)
748	.Case(S: "pop_macro", Value: pp_pragma_pop_macro)
749	.Case(S: "include_alias", Value: pp_pragma_include_alias)
750	.Default(Value: pp_none);
751	if (Kind != pp_none) {
752	lexPPDirectiveBody(First, End);
753	pushDirective(Kind);
754	return false;
755	}
756
757	if (Id != "clang") {
758	skipLine(First, End);
759	return false;
760	}
761
762	FoundId = tryLexIdentifierOrSkipLine(First, End);
763	if (!FoundId)
764	return false;
765	Id = *FoundId;
766
767	// #pragma clang system_header
768	if (Id == "system_header") {
769	lexPPDirectiveBody(First, End);
770	pushDirective(Kind: pp_pragma_system_header);
771	return false;
772	}
773
774	if (Id != "module") {
775	skipLine(First, End);
776	return false;
777	}
778
779	// #pragma clang module.
780	if (!isNextIdentifierOrSkipLine(Id: "import", First, End))
781	return false;
782
783	// #pragma clang module import.
784	lexPPDirectiveBody(First, End);
785	pushDirective(Kind: pp_pragma_import);
786	return false;
787	}
788
789	bool Scanner::lexEndif(const char &First, const* char *const End) {
790	// Strip out "#else" if it's empty.
791	if (topDirective() == pp_else)
792	popDirective();
793
794	// If "#ifdef" is empty, strip it and skip the "#endif".
795	//
796	// FIXME: Once/if Clang starts disallowing __has_include in macro expansions,
797	// we can skip empty `#if` and `#elif` blocks as well after scanning for a
798	// literal __has_include in the condition. Even without that rule we could
799	// drop the tokens if we scan for identifiers in the condition and find none.
800	if (topDirective() == pp_ifdef \|\| topDirective() == pp_ifndef) {
801	popDirective();
802	skipLine(First, End);
803	return false;
804	}
805
806	return lexDefault(Kind: pp_endif, First, End);
807	}
808
809	bool Scanner::lexDefault(DirectiveKind Kind, const char *&First,
810	const char *const End) {
811	lexPPDirectiveBody(First, End);
812	pushDirective(Kind);
813	return false;
814	}
815
816	static bool isStartOfRelevantLine(char First) {
817	switch (First) {
818	case `'#'`:
819	case `'@'`:
820	case `'i'`:
821	case `'e'`:
822	case `'m'`:
823	case `'_'`:
824	return true;
825	}
826	return false;
827	}
828
829	bool Scanner::lexPPLine(const char &First, const* char *const End) {
830	assert(First != End);
831
832	skipWhitespace(First, End);
833	assert(First <= End);
834	if (First == End)
835	return false;
836
837	if (!isStartOfRelevantLine(First: *First)) {
838	skipLine(First, End);
839	assert(First <= End);
840	return false;
841	}
842
843	LastTokenPtr = First;
844
845	TheLexer.seek(Offset: getOffsetAt(CurPtr: First), /IsAtStartOfLine/ true);
846
847	auto ScEx1 = make_scope_exit(F: [&]() {
848	/// Clear Scanner's CurDirToks before returning, in case we didn't push a
849	/// new directive.
850	CurDirToks.clear();
851	});
852
853	// Handle "@import".
854	if (*First == `'@'`)
855	return lexAt(First, End);
856
857	// Handle module directives for C++20 modules.
858	if (First == `'i'` \|\| First == `'e'` \|\| *First == `'m'`)
859	return lexModule(First, End);
860
861	if (*First == `'_'`) {
862	if (isNextIdentifierOrSkipLine(Id: "_Pragma", First, End))
863	return lex_Pragma(First, End);
864	return false;
865	}
866
867	// Handle preprocessing directives.
868
869	TheLexer.setParsingPreprocessorDirective(true);
870	auto ScEx2 = make_scope_exit(
871	F: [&]() { TheLexer.setParsingPreprocessorDirective(false); });
872
873	// Lex '#'.
874	const dependency_directives_scan::Token &HashTok = lexToken(First, End);
875	if (HashTok.is(K: tok::hashhash)) {
876	// A \p tok::hashhash at this location is passed by the preprocessor to the
877	// parser to interpret, like any other token. So for dependency scanning
878	// skip it like a normal token not affecting the preprocessor.
879	skipLine(First, End);
880	assert(First <= End);
881	return false;
882	}
883	assert(HashTok.is(tok::hash));
884	(void)HashTok;
885
886	std::optional<StringRef> FoundId = tryLexIdentifierOrSkipLine(First, End);
887	if (!FoundId)
888	return false;
889
890	StringRef Id = *FoundId;
891
892	if (Id == "pragma")
893	return lexPragma(First, End);
894
895	auto Kind = llvm::StringSwitch<DirectiveKind>(Id)
896	.Case(S: "include", Value: pp_include)
897	.Case(S: "__include_macros", Value: pp___include_macros)
898	.Case(S: "define", Value: pp_define)
899	.Case(S: "undef", Value: pp_undef)
900	.Case(S: "import", Value: pp_import)
901	.Case(S: "include_next", Value: pp_include_next)
902	.Case(S: "if", Value: pp_if)
903	.Case(S: "ifdef", Value: pp_ifdef)
904	.Case(S: "ifndef", Value: pp_ifndef)
905	.Case(S: "elif", Value: pp_elif)
906	.Case(S: "elifdef", Value: pp_elifdef)
907	.Case(S: "elifndef", Value: pp_elifndef)
908	.Case(S: "else", Value: pp_else)
909	.Case(S: "endif", Value: pp_endif)
910	.Default(Value: pp_none);
911	if (Kind == pp_none) {
912	skipDirective(Name: Id, First, End);
913	return false;
914	}
915
916	if (Kind == pp_endif)
917	return lexEndif(First, End);
918
919	switch (Kind) {
920	case pp_include:
921	case pp___include_macros:
922	case pp_include_next:
923	case pp_import:
924	// Ignore missing filenames in include or import directives.
925	if (lexIncludeFilename(First, End).is(K: tok::eod)) {
926	return false;
927	}
928	break;
929	default:
930	break;
931	}
932
933	// Everything else.
934	return lexDefault(Kind, First, End);
935	}
936
937	static void skipUTF8ByteOrderMark(const char &First, const* char *const End) {
938	if ((End - First) >= `3` && First[`0`] == `'\xef'` && First[`1`] == `'\xbb'` &&
939	First[`2`] == `'\xbf'`)
940	First += `3`;
941	}
942
943	bool Scanner::scanImpl(const char First, const* char *const End) {
944	skipUTF8ByteOrderMark(First, End);
945	while (First != End)
946	if (lexPPLine(First, End))
947	return true;
948	return false;
949	}
950
951	bool Scanner::scan(SmallVectorImpl<Directive> &Directives) {
952	bool Error = scanImpl(First: Input.begin(), End: Input.end());
953
954	if (!Error) {
955	// Add an EOF on success.
956	if (LastTokenPtr &&
957	(Tokens.empty() \|\| LastTokenPtr > Input.begin() + Tokens.back().Offset))
958	pushDirective(Kind: tokens_present_before_eof);
959	pushDirective(Kind: pp_eof);
960	}
961
962	ArrayRef<dependency_directives_scan::Token> RemainingTokens = Tokens;
963	for (const DirectiveWithTokens &DirWithToks : DirsWithToks) {
964	assert(RemainingTokens.size() >= DirWithToks.NumTokens);
965	Directives.emplace_back(Args: DirWithToks.Kind,
966	Args: RemainingTokens.take_front(N: DirWithToks.NumTokens));
967	RemainingTokens = RemainingTokens.drop_front(N: DirWithToks.NumTokens);
968	}
969	assert(RemainingTokens.empty());
970
971	return Error;
972	}
973
974	bool clang::scanSourceForDependencyDirectives(
975	StringRef Input, SmallVectorImpl<dependency_directives_scan::Token> &Tokens,
976	SmallVectorImpl<Directive> &Directives, DiagnosticsEngine *Diags,
977	SourceLocation InputSourceLoc) {
978	return Scanner (Input, Tokens, Diags, InputSourceLoc).scan(Directives);
979	}
980
981	void clang::printDependencyDirectivesAsSource(
982	StringRef Source,
983	ArrayRef<dependency_directives_scan::Directive> Directives,
984	llvm::raw_ostream &OS) {
985	// Add a space separator where it is convenient for testing purposes.
986	auto needsSpaceSeparator =
987	[](tok::TokenKind Prev,
988	const dependency_directives_scan::Token &Tok) -> bool {
989	if (Prev == Tok.Kind)
990	return !Tok.isOneOf(K1: tok::l_paren, Ks: tok::r_paren, Ks: tok::l_square,
991	Ks: tok::r_square);
992	if (Prev == tok::raw_identifier &&
993	Tok.isOneOf(K1: tok::hash, Ks: tok::numeric_constant, Ks: tok::string_literal,
994	Ks: tok::char_constant, Ks: tok::header_name))
995	return true;
996	if (Prev == tok::r_paren &&
997	Tok.isOneOf(K1: tok::raw_identifier, Ks: tok::hash, Ks: tok::string_literal,
998	Ks: tok::char_constant, Ks: tok::unknown))
999	return true;
1000	if (Prev == tok::comma &&
1001	Tok.isOneOf(K1: tok::l_paren, Ks: tok::string_literal, Ks: tok::less))
1002	return true;
1003	return false;
1004	};
1005
1006	for (const dependency_directives_scan::Directive &Directive : Directives) {
1007	if (Directive.Kind == tokens_present_before_eof)
1008	OS << "<TokBeforeEOF>";
1009	std::optional<tok::TokenKind> PrevTokenKind;
1010	for (const dependency_directives_scan::Token &Tok : Directive.Tokens) {
1011	if (PrevTokenKind && needsSpaceSeparator (*PrevTokenKind, Tok))
1012	OS << `' '`;
1013	PrevTokenKind = Tok.Kind;
1014	OS << Source.slice(Start: Tok.Offset, End: Tok.getEnd());
1015	}
1016	}
1017	}
1018

source code of clang/lib/Lex/DependencyDirectivesScanner.cpp