prescan.cpp source code [flang/lib/Parser/prescan.cpp]

1	//===-- lib/Parser/prescan.cpp --------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "prescan.h"
10	#include "flang/Common/idioms.h"
11	#include "flang/Parser/characters.h"
12	#include "flang/Parser/message.h"
13	#include "flang/Parser/preprocessor.h"
14	#include "flang/Parser/source.h"
15	#include "flang/Parser/token-sequence.h"
16	#include "llvm/Support/raw_ostream.h"
17	#include <cstddef>
18	#include <cstring>
19	#include <utility>
20	#include <vector>
21
22	namespace Fortran::parser {
23
24	using common::LanguageFeature;
25
26	static constexpr int maxPrescannerNesting{`100`};
27
28	Prescanner::Prescanner(Messages &messages, CookedSource &cooked,
29	Preprocessor &preprocessor, common::LanguageFeatureControl lfc)
30	: messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor},
31	allSources_{preprocessor_.allSources()}, features_{lfc},
32	backslashFreeFormContinuation_{preprocessor.AnyDefinitions()},
33	encoding_{allSources_.encoding()} {}
34
35	Prescanner::Prescanner(const Prescanner &that, Preprocessor &prepro,
36	bool isNestedInIncludeDirective)
37	: messages_{that.messages_}, cooked_{that.cooked_}, preprocessor_{prepro},
38	allSources_{that.allSources_}, features_{that.features_},
39	preprocessingOnly_{that.preprocessingOnly_},
40	expandIncludeLines_{that.expandIncludeLines_},
41	isNestedInIncludeDirective_{isNestedInIncludeDirective},
42	backslashFreeFormContinuation_{that.backslashFreeFormContinuation_},
43	inFixedForm_{that.inFixedForm_},
44	fixedFormColumnLimit_{that.fixedFormColumnLimit_},
45	encoding_{that.encoding_},
46	prescannerNesting_{that.prescannerNesting_ + `1`},
47	skipLeadingAmpersand_{that.skipLeadingAmpersand_},
48	compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_},
49	compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {}
50
51	// Returns number of bytes to skip
52	static inline int IsSpace(const char *p) {
53	if (*p == `' '`) {
54	return `1`;
55	} else if (p == `'\xa0'`) { // LATIN-1 NBSP non-breaking space*
56	return `1`;
57	} else if (p[`0`] == `'\xc2'` && p[`1`] == `'\xa0'`) { // UTF-8 NBSP
58	return `2`;
59	} else {
60	return `0`;
61	}
62	}
63
64	static inline int IsSpaceOrTab(const char *p) {
65	return *p == `'\t'` ? `1` : IsSpace(p);
66	}
67
68	static inline constexpr bool IsFixedFormCommentChar(char ch) {
69	return ch == `'!'` \|\| ch == `'*'` \|\| ch == `'C'` \|\| ch == `'c'`;
70	}
71
72	static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) {
73	char *p{dir.GetMutableCharData()};
74	char *limit{p + dir.SizeInChars()};
75	for (; p < limit; ++p) {
76	if (*p != `' '`) {
77	CHECK(IsFixedFormCommentChar(ch: *p));
78	*p = `'!'`;
79	return;
80	}
81	}
82	DIE("compiler directive all blank");
83	}
84
85	void Prescanner::Prescan(ProvenanceRange range) {
86	startProvenance_ = range.start();
87	start_ = allSources_.GetSource(range);
88	CHECK(start_);
89	limit_ = start_ + range.size();
90	nextLine_ = start_;
91	const bool beganInFixedForm{inFixedForm_};
92	if (prescannerNesting_ > maxPrescannerNesting) {
93	Say(GetProvenance(start_),
94	"too many nested INCLUDE/#include files, possibly circular"_err_en_US);
95	return;
96	}
97	while (!IsAtEnd()) {
98	Statement();
99	}
100	if (inFixedForm_ != beganInFixedForm) {
101	std::string dir{"!dir$ "};
102	if (beganInFixedForm) {
103	dir += "fixed";
104	} else {
105	dir += "free";
106	}
107	dir += `'\n'`;
108	TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()};
109	tokens.Emit(cooked_);
110	}
111	}
112
113	void Prescanner::Statement() {
114	TokenSequence tokens;
115	const char *statementStart{nextLine_};
116	LineClassification line{ClassifyLine(statementStart)};
117	switch (line.kind) {
118	case LineClassification::Kind::Comment:
119	nextLine_ += line.payloadOffset; // advance to '!' or newline
120	NextLine();
121	return;
122	case LineClassification::Kind::IncludeLine:
123	FortranInclude(quote: nextLine_ + line.payloadOffset);
124	NextLine();
125	return;
126	case LineClassification::Kind::ConditionalCompilationDirective:
127	case LineClassification::Kind::IncludeDirective:
128	preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
129	afterPreprocessingDirective_ = true;
130	skipLeadingAmpersand_ \|= !inFixedForm_;
131	return;
132	case LineClassification::Kind::PreprocessorDirective:
133	preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
134	afterPreprocessingDirective_ = true;
135	// Don't set skipLeadingAmpersand_
136	return;
137	case LineClassification::Kind::DefinitionDirective:
138	preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
139	// Don't set afterPreprocessingDirective_ or skipLeadingAmpersand_
140	return;
141	case LineClassification::Kind::CompilerDirective: {
142	directiveSentinel_ = line.sentinel;
143	CHECK(InCompilerDirective());
144	BeginStatementAndAdvance();
145	if (inFixedForm_) {
146	CHECK(IsFixedFormCommentChar(ch: *at_));
147	} else {
148	at_ += line.payloadOffset;
149	column_ += line.payloadOffset;
150	CHECK(*at_ == `'!'`);
151	}
152	std::optional<int> condOffset;
153	if (InOpenMPConditionalLine()) {
154	condOffset = `2`;
155	} else if (directiveSentinel_[`0`] == `'@'` && directiveSentinel_[`1`] == `'c'` &&
156	directiveSentinel_[`2`] == `'u'` && directiveSentinel_[`3`] == `'f'` &&
157	directiveSentinel_[`4`] == `'\0'`) {
158	// CUDA conditional compilation line.
159	condOffset = `5`;
160	}
161	if (condOffset && !preprocessingOnly_) {
162	at_ += condOffset, column_ += condOffset;
163	if (auto payload{IsIncludeLine(at_)}) {
164	FortranInclude(quote: at_ + *payload);
165	return;
166	}
167	if (inFixedForm_) {
168	LabelField(tokens);
169	}
170	SkipSpaces();
171	} else {
172	// Compiler directive. Emit normalized sentinel, squash following spaces.
173	// Conditional compilation lines (!$) take this path in -E mode too
174	// so that -fopenmp only has to appear on the later compilation.
175	EmitChar(tokens, `'!'`);
176	++at_, ++column_;
177	for (const char sp{directiveSentinel_}; sp != `'\0'`;
178	++sp, ++at_, ++column_) {
179	EmitChar(tokens, *sp);
180	}
181	if (inFixedForm_) {
182	while (column_ < `6`) {
183	if (*at_ == `'\t'`) {
184	tabInCurrentLine_ = true;
185	++at_;
186	for (; column_ < `7`; ++column_) {
187	EmitChar(tokens, `' '`);
188	}
189	} else if (int spaceBytes{IsSpace(p: at_)}) {
190	EmitChar(tokens, `' '`);
191	at_ += spaceBytes;
192	++column_;
193	} else {
194	if (InOpenMPConditionalLine() && column_ == `3` &&
195	IsDecimalDigit(*at_)) {
196	// subtle: !$ in -E mode can't be immediately followed by a digit
197	EmitChar(tokens, `' '`);
198	}
199	break;
200	}
201	}
202	} else if (int spaceBytes{IsSpaceOrTab(p: at_)}) {
203	EmitChar(tokens, `' '`);
204	at_ += spaceBytes, ++column_;
205	}
206	tokens.CloseToken();
207	SkipSpaces();
208	if (InOpenMPConditionalLine() && inFixedForm_ && !tabInCurrentLine_ &&
209	column_ == `6` && *at_ != `'\n'`) {
210	// !$ 0 - turn '0' into a space
211	// !$ 1 - turn '1' into '&'
212	if (int n{IsSpace(p: at_)}; n \|\| *at_ == `'0'`) {
213	at_ += n ? n : `1`;
214	} else {
215	++at_;
216	EmitChar(tokens, `'&'`);
217	tokens.CloseToken();
218	}
219	++column_;
220	SkipSpaces();
221	}
222	}
223	break;
224	}
225	case LineClassification::Kind::Source: {
226	BeginStatementAndAdvance();
227	bool checkLabelField{false};
228	if (inFixedForm_) {
229	if (features_.IsEnabled(LanguageFeature::OldDebugLines) &&
230	(at_ == `'D'` \|\| at_ == `'d'`)) {
231	NextChar();
232	}
233	checkLabelField = true;
234	} else {
235	if (skipLeadingAmpersand_) {
236	skipLeadingAmpersand_ = false;
237	const char *p{SkipWhiteSpace(at_)};
238	if (p < limit_ && *p == `'&'`) {
239	column_ += ++p - at_;
240	at_ = p;
241	}
242	} else {
243	SkipSpaces();
244	}
245	}
246	// Check for a leading identifier that might be a keyword macro
247	// that will expand to anything indicating a non-source line, like
248	// a comment marker or directive sentinel. If so, disable line
249	// continuation, so that NextToken() won't consume anything from
250	// following lines.
251	if (IsLegalIdentifierStart(*at_)) {
252	// TODO: Only bother with these cases when any keyword macro has
253	// been defined with replacement text that could begin a comment
254	// or directive sentinel.
255	const char *p{at_};
256	while (IsLegalInIdentifier(*++p)) {
257	}
258	CharBlock id{at_, static_cast<std::size_t>(p - at_)};
259	if (preprocessor_.IsNameDefined(id) &&
260	!preprocessor_.IsFunctionLikeDefinition(id)) {
261	checkLabelField = false;
262	TokenSequence toks;
263	toks.Put(id, GetProvenance(at_));
264	if (auto replaced{preprocessor_.MacroReplacement(toks, *this)}) {
265	auto newLineClass{ClassifyLine(*replaced, GetCurrentProvenance())};
266	if (newLineClass.kind ==
267	LineClassification::Kind::CompilerDirective) {
268	directiveSentinel_ = newLineClass.sentinel;
269	disableSourceContinuation_ = false;
270	} else {
271	disableSourceContinuation_ = !replaced->empty() &&
272	newLineClass.kind != LineClassification::Kind::Source;
273	}
274	}
275	}
276	}
277	if (checkLabelField) {
278	LabelField(tokens);
279	}
280	} break;
281	}
282
283	while (NextToken(tokens)) {
284	}
285	if (continuationLines_ > `255`) {
286	if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) {
287	Say(common::LanguageFeature::MiscSourceExtensions,
288	GetProvenance(statementStart),
289	"%d continuation lines is more than the Fortran standard allows"_port_en_US,
290	continuationLines_);
291	}
292	}
293
294	Provenance newlineProvenance{GetCurrentProvenance()};
295	if (std::optional<TokenSequence> preprocessed{
296	preprocessor_.MacroReplacement(tokens, *this)}) {
297	// Reprocess the preprocessed line.
298	LineClassification ppl{ClassifyLine(*preprocessed, newlineProvenance)};
299	switch (ppl.kind) {
300	case LineClassification::Kind::Comment:
301	break;
302	case LineClassification::Kind::IncludeLine:
303	FortranInclude(quote: preprocessed->TokenAt(`0`).begin() + ppl.payloadOffset);
304	break;
305	case LineClassification::Kind::ConditionalCompilationDirective:
306	case LineClassification::Kind::IncludeDirective:
307	case LineClassification::Kind::DefinitionDirective:
308	case LineClassification::Kind::PreprocessorDirective:
309	if (features_.ShouldWarn(common::UsageWarning::Preprocessing)) {
310	Say(common::UsageWarning::Preprocessing,
311	preprocessed->GetProvenanceRange(),
312	"Preprocessed line resembles a preprocessor directive"_warn_en_US);
313	}
314	CheckAndEmitLine(preprocessed->ToLowerCase(), newlineProvenance);
315	break;
316	case LineClassification::Kind::CompilerDirective:
317	if (preprocessed->HasRedundantBlanks()) {
318	preprocessed->RemoveRedundantBlanks();
319	}
320	while (CompilerDirectiveContinuation(*preprocessed, ppl.sentinel)) {
321	newlineProvenance = GetCurrentProvenance();
322	}
323	NormalizeCompilerDirectiveCommentMarker(*preprocessed);
324	preprocessed->ToLowerCase();
325	SourceFormChange(preprocessed->ToString());
326	CheckAndEmitLine(
327	preprocessed->ClipComment(*this, true / skip first ! /),
328	newlineProvenance);
329	break;
330	case LineClassification::Kind::Source:
331	if (inFixedForm_) {
332	if (!preprocessingOnly_ && preprocessed->HasBlanks()) {
333	preprocessed->RemoveBlanks();
334	}
335	} else {
336	while (SourceLineContinuation(*preprocessed)) {
337	newlineProvenance = GetCurrentProvenance();
338	}
339	if (preprocessed->HasRedundantBlanks()) {
340	preprocessed->RemoveRedundantBlanks();
341	}
342	}
343	CheckAndEmitLine(
344	preprocessed->ToLowerCase().ClipComment(*this), newlineProvenance);
345	break;
346	}
347	} else { // no macro replacement
348	if (line.kind == LineClassification::Kind::CompilerDirective) {
349	while (CompilerDirectiveContinuation(tokens, line.sentinel)) {
350	newlineProvenance = GetCurrentProvenance();
351	}
352	if (preprocessingOnly_ && inFixedForm_ && InOpenMPConditionalLine() &&
353	nextLine_ < limit_) {
354	// In -E mode, when the line after !$ conditional compilation is a
355	// regular fixed form continuation line, append a '&' to the line.
356	const char *p{nextLine_};
357	int col{`1`};
358	while (int n{IsSpace(p)}) {
359	if (*p == `'\t'`) {
360	break;
361	}
362	p += n;
363	++col;
364	}
365	if (col == `6` && p != `'0'` && p != `'\t'` && *p != `'\n'`) {
366	EmitChar(tokens, `'&'`);
367	tokens.CloseToken();
368	}
369	}
370	tokens.ToLowerCase();
371	SourceFormChange(tokens.ToString());
372	} else { // Kind::Source
373	tokens.ToLowerCase();
374	if (inFixedForm_) {
375	EnforceStupidEndStatementRules(tokens);
376	}
377	}
378	CheckAndEmitLine(tokens, newlineProvenance);
379	}
380	directiveSentinel_ = nullptr;
381	}
382
383	void Prescanner::CheckAndEmitLine(
384	TokenSequence &tokens, Provenance newlineProvenance) {
385	tokens.CheckBadFortranCharacters(
386	messages_, *this, disableSourceContinuation_ \|\| preprocessingOnly_);
387	// Parenthesis nesting check does not apply while any #include is
388	// active, nor on the lines before and after a top-level #include,
389	// nor before or after conditional source.
390	// Applications play shenanigans with line continuation before and
391	// after #include'd subprogram argument lists and conditional source.
392	if (!preprocessingOnly_ && !isNestedInIncludeDirective_ && !omitNewline_ &&
393	!afterPreprocessingDirective_ && tokens.BadlyNestedParentheses() &&
394	!preprocessor_.InConditional()) {
395	if (nextLine_ < limit_ && IsPreprocessorDirectiveLine(nextLine_)) {
396	// don't complain
397	} else {
398	tokens.CheckBadParentheses(messages_);
399	}
400	}
401	tokens.Emit(cooked_);
402	if (omitNewline_) {
403	omitNewline_ = false;
404	} else {
405	cooked_.Put(`'\n'`, newlineProvenance);
406	afterPreprocessingDirective_ = false;
407	}
408	}
409
410	TokenSequence Prescanner::TokenizePreprocessorDirective() {
411	CHECK(!IsAtEnd() && !inPreprocessorDirective_);
412	inPreprocessorDirective_ = true;
413	BeginStatementAndAdvance();
414	TokenSequence tokens;
415	while (NextToken(tokens)) {
416	}
417	inPreprocessorDirective_ = false;
418	return tokens;
419	}
420
421	void Prescanner::NextLine() {
422	void vstart{static_cast<void* >(const_cast<char* *>(nextLine_))};
423	void *v{std::memchr(s: vstart, c: `'\n'`, n: limit_ - nextLine_)};
424	if (!v) {
425	nextLine_ = limit_;
426	} else {
427	const char nl{const_cast<const* char >(static_cast<char* *>(v))};
428	nextLine_ = nl + `1`;
429	}
430	}
431
432	void Prescanner::LabelField(TokenSequence &token) {
433	int outCol{`1`};
434	const char *start{at_};
435	std::optional<int> badColumn;
436	for (; *at_ != `'\n'` && column_ <= `6`; ++at_) {
437	if (*at_ == `'\t'`) {
438	++at_;
439	column_ = `7`;
440	break;
441	}
442	if (int n{IsSpace(p: at_)}; n == `0` &&
443	!(at_ == `'0'` && column_ == `6`)) { // '0' in column 6 becomes space*
444	EmitChar(token, *at_);
445	++outCol;
446	if (!badColumn && (column_ == `6` \|\| !IsDecimalDigit(*at_))) {
447	badColumn = column_;
448	}
449	}
450	++column_;
451	}
452	if (badColumn && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) {
453	if ((prescannerNesting_ > `0` && *badColumn == `6` &&
454	cooked_.BufferedBytes() == firstCookedCharacterOffset_) \|\|
455	afterPreprocessingDirective_) {
456	// This is the first source line in #include'd text or conditional
457	// code under #if, or the first source line after such.
458	// If it turns out that the preprocessed text begins with a
459	// fixed form continuation line, the newline at the end
460	// of the latest source line beforehand will be deleted in
461	// CookedSource::Marshal().
462	cooked_.MarkPossibleFixedFormContinuation();
463	} else if (features_.ShouldWarn(common::UsageWarning::Scanning)) {
464	Say(common::UsageWarning::Scanning, GetProvenance(start + *badColumn - `1`),
465	*badColumn == `6`
466	? "Statement should not begin with a continuation line"_warn_en_US
467	: "Character in fixed-form label field must be a digit"_warn_en_US);
468	}
469	token.clear();
470	if (*badColumn < `6`) {
471	at_ = start;
472	column_ = `1`;
473	return;
474	}
475	outCol = `1`;
476	}
477	if (outCol == `1`) { // empty label field
478	// Emit a space so that, if the line is rescanned after preprocessing,
479	// a leading 'C' or 'D' won't be left-justified and then accidentally
480	// misinterpreted as a comment card.
481	EmitChar(token, `' '`);
482	++outCol;
483	}
484	token.CloseToken();
485	SkipToNextSignificantCharacter();
486	if (IsDecimalDigit(*at_)) {
487	if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) {
488	Say(common::LanguageFeature::MiscSourceExtensions, GetCurrentProvenance(),
489	"Label digit is not in fixed-form label field"_port_en_US);
490	}
491	}
492	}
493
494	// 6.3.3.5: A program unit END statement, or any other statement whose
495	// initial line resembles an END statement, shall not be continued in
496	// fixed form source.
497	void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) {
498	CharBlock cBlock{tokens.ToCharBlock()};
499	const char *str{cBlock.begin()};
500	std::size_t n{cBlock.size()};
501	if (n < `3`) {
502	return;
503	}
504	std::size_t j{`0`};
505	for (; j < n && (str[j] == `' '` \|\| (str[j] >= `'0'` && str[j] <= `'9'`)); ++j) {
506	}
507	if (j + `3` > n \|\| std::memcmp(s1: str + j, s2: "end", n: `3`) != `0`) {
508	return;
509	}
510	// It starts with END, possibly after a label.
511	auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))};
512	auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - `1`))};
513	if (!start \|\| !end) {
514	return;
515	}
516	if (&start->sourceFile == &end->sourceFile && start->line == end->line) {
517	return; // no continuation
518	}
519	j += `3`;
520	static const char *const prefixes[]{"program", "subroutine", "function",
521	"blockdata", "module", "submodule", nullptr};
522	bool isPrefix{j == n \|\| !IsLegalInIdentifier(str[j])}; // prefix is END
523	std::size_t endOfPrefix{j - `1`};
524	for (const char *const p{prefixes}; p; ++p) {
525	std::size_t pLen{std::strlen(s: *p)};
526	if (j + pLen <= n && std::memcmp(s1: str + j, s2: *p, n: pLen) == `0`) {
527	isPrefix = true; // END thing as prefix
528	j += pLen;
529	endOfPrefix = j - `1`;
530	for (; j < n && IsLegalInIdentifier(str[j]); ++j) {
531	}
532	break;
533	}
534	}
535	if (isPrefix) {
536	auto range{tokens.GetTokenProvenanceRange(`1`)};
537	if (j == n) { // END or END thing [name]
538	Say(range,
539	"Program unit END statement may not be continued in fixed form source"_err_en_US);
540	} else {
541	auto endOfPrefixPos{
542	allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))};
543	auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))};
544	if (endOfPrefixPos && next &&
545	&endOfPrefixPos->sourceFile == &start->sourceFile &&
546	endOfPrefixPos->line == start->line &&
547	(&next->sourceFile != &start->sourceFile \|\|
548	next->line != start->line)) {
549	Say(range,
550	"Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US);
551	}
552	}
553	}
554	}
555
556	void Prescanner::SkipToEndOfLine() {
557	while (*at_ != `'\n'`) {
558	++at_, ++column_;
559	}
560	}
561
562	bool Prescanner::MustSkipToEndOfLine() const {
563	if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) {
564	return true; // skip over ignored columns in right margin (73:80)
565	} else if (*at_ == `'!'` && !inCharLiteral_ &&
566	(!inFixedForm_ \|\| tabInCurrentLine_ \|\| column_ != `6`)) {
567	return !IsCompilerDirectiveSentinel(p: at_);
568	} else {
569	return false;
570	}
571	}
572
573	void Prescanner::NextChar() {
574	CHECK(*at_ != `'\n'`);
575	int n{IsSpace(p: at_)};
576	at_ += n ? n : `1`;
577	++column_;
578	while (at_[`0`] == `'\xef'` && at_[`1`] == `'\xbb'` && at_[`2`] == `'\xbf'`) {
579	// UTF-8 byte order mark - treat this file as UTF-8
580	at_ += `3`;
581	encoding_ = Encoding::UTF_8;
582	}
583	SkipToNextSignificantCharacter();
584	}
585
586	// Skip everything that should be ignored until the next significant
587	// character is reached; handles C-style comments in preprocessing
588	// directives, Fortran ! comments, stuff after the right margin in
589	// fixed form, and all forms of line continuation.
590	bool Prescanner::SkipToNextSignificantCharacter() {
591	if (inPreprocessorDirective_) {
592	SkipCComments();
593	return false;
594	} else {
595	auto anyContinuationLine{false};
596	bool mightNeedSpace{false};
597	if (MustSkipToEndOfLine()) {
598	SkipToEndOfLine();
599	} else {
600	mightNeedSpace = *at_ == `'\n'`;
601	}
602	for (; Continuation(mightNeedFixedFormSpace: mightNeedSpace); mightNeedSpace = false) {
603	anyContinuationLine = true;
604	++continuationLines_;
605	if (MustSkipToEndOfLine()) {
606	SkipToEndOfLine();
607	}
608	}
609	if (*at_ == `'\t'`) {
610	tabInCurrentLine_ = true;
611	}
612	return anyContinuationLine;
613	}
614	}
615
616	void Prescanner::SkipCComments() {
617	while (true) {
618	if (IsCComment(p: at_)) {
619	if (const char *after{SkipCComment(at_)}) {
620	column_ += after - at_;
621	// May have skipped over one or more newlines; relocate the start of
622	// the next line.
623	nextLine_ = at_ = after;
624	NextLine();
625	} else {
626	// Don't emit any messages about unclosed C-style comments, because
627	// the sequence / can appear legally in a FORMAT statement. There's*
628	// no ambiguity, since the sequence / cannot appear legally.*
629	break;
630	}
631	} else if (inPreprocessorDirective_ && at_[`0`] == `'\\'` && at_ + `2` < limit_ &&
632	at_[`1`] == `'\n'` && !IsAtEnd()) {
633	BeginSourceLineAndAdvance();
634	} else {
635	break;
636	}
637	}
638	}
639
640	void Prescanner::SkipSpaces() {
641	while (IsSpaceOrTab(p: at_)) {
642	NextChar();
643	}
644	insertASpace_ = false;
645	}
646
647	const char Prescanner::SkipWhiteSpace(const* char *p) {
648	while (int n{IsSpaceOrTab(p)}) {
649	p += n;
650	}
651	return p;
652	}
653
654	const char *Prescanner::SkipWhiteSpaceIncludingEmptyMacros(
655	const char p) const* {
656	while (true) {
657	if (int n{IsSpaceOrTab(p)}) {
658	p += n;
659	} else if (preprocessor_.AnyDefinitions() && IsLegalIdentifierStart(*p)) {
660	// Skip keyword macros with empty definitions
661	const char *q{p + `1`};
662	while (IsLegalInIdentifier(*q)) {
663	++q;
664	}
665	if (preprocessor_.IsNameDefinedEmpty(
666	CharBlock{p, static_cast<std::size_t>(q - p)})) {
667	p = q;
668	} else {
669	break;
670	}
671	} else {
672	break;
673	}
674	}
675	return p;
676	}
677
678	const char Prescanner::SkipWhiteSpaceAndCComments(const* char p) const* {
679	while (true) {
680	if (int n{IsSpaceOrTab(p)}) {
681	p += n;
682	} else if (IsCComment(p)) {
683	if (const char *after{SkipCComment(p)}) {
684	p = after;
685	} else {
686	break;
687	}
688	} else {
689	break;
690	}
691	}
692	return p;
693	}
694
695	const char Prescanner::SkipCComment(const* char p) const* {
696	char star{`' '`}, slash{`' '`};
697	p += `2`;
698	while (star != `'*'` \|\| slash != `'/'`) {
699	if (p >= limit_) {
700	return nullptr; // signifies an unterminated comment
701	}
702	star = slash;
703	slash = *p++;
704	}
705	return p;
706	}
707
708	bool Prescanner::NextToken(TokenSequence &tokens) {
709	CHECK(at_ >= start_ && at_ < limit_);
710	if (InFixedFormSource() && !preprocessingOnly_) {
711	SkipSpaces();
712	} else {
713	if (*at_ == `'/'` && IsCComment(p: at_)) {
714	// Recognize and skip over classic C style /comments/ when
715	// outside a character literal.
716	if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) {
717	Say(LanguageFeature::ClassicCComments, GetCurrentProvenance(),
718	"nonstandard usage: C-style comment"_port_en_US);
719	}
720	SkipCComments();
721	}
722	if (IsSpaceOrTab(p: at_)) {
723	// Compress free-form white space into a single space character.
724	const auto theSpace{at_};
725	char previous{at_ <= start_ ? `' '` : at_[-`1`]};
726	NextChar();
727	SkipSpaces();
728	if (*at_ == `'\n'` && !omitNewline_) {
729	// Discard white space at the end of a line.
730	} else if (!inPreprocessorDirective_ &&
731	(previous == `'('` \|\| at_ == `'('` \|\| at_ == `')'`)) {
732	// Discard white space before/after '(' and before ')', unless in a
733	// preprocessor directive. This helps yield space-free contiguous
734	// names for generic interfaces like OPERATOR( + ) and
735	// READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg).
736	// This has the effect of silently ignoring the illegal spaces in
737	// the array constructor ( /1,2/ ) but that seems benign; it's
738	// hard to avoid that while still removing spaces from OPERATOR( / )
739	// and OPERATOR( // ).
740	} else {
741	// Preserve the squashed white space as a single space character.
742	tokens.PutNextTokenChar(`' '`, GetProvenance(theSpace));
743	tokens.CloseToken();
744	return true;
745	}
746	}
747	}
748	if (insertASpace_) {
749	tokens.PutNextTokenChar(`' '`, spaceProvenance_);
750	insertASpace_ = false;
751	}
752	if (*at_ == `'\n'`) {
753	return false;
754	}
755	const char *start{at_};
756	if (at_ == `'\''` \|\| at_ == `'"'`) {
757	QuotedCharacterLiteral(tokens, start);
758	preventHollerith_ = false;
759	} else if (IsDecimalDigit(*at_)) {
760	int n{`0`}, digits{`0`};
761	static constexpr int maxHollerith{`256` /lines/ * (`132` - `6` /columns/)};
762	do {
763	if (n < maxHollerith) {
764	n = `10` * n + DecimalDigitValue(*at_);
765	}
766	EmitCharAndAdvance(tokens, *at_);
767	++digits;
768	if (InFixedFormSource()) {
769	SkipSpaces();
770	}
771	} while (IsDecimalDigit(*at_));
772	if ((at_ == `'h'` \|\| at_ == `'H'`) && n > `0` && n < maxHollerith &&
773	!preventHollerith_) {
774	Hollerith(tokens, n, start);
775	} else if (*at_ == `'.'`) {
776	while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
777	}
778	HandleExponentAndOrKindSuffix(tokens);
779	} else if (HandleExponentAndOrKindSuffix(tokens)) {
780	} else if (digits == `1` && n == `0` && (at_ == `'x'` \|\| at_ == `'X'`) &&
781	inPreprocessorDirective_) {
782	do {
783	EmitCharAndAdvance(tokens, *at_);
784	} while (IsHexadecimalDigit(*at_));
785	} else if (at_[`0`] == `'_'` && (at_[`1`] == `'\''` \|\| at_[`1`] == `'"'`)) { // 4_"..."
786	EmitCharAndAdvance(tokens, *at_);
787	QuotedCharacterLiteral(tokens, start);
788	} else if (IsLetter(*at_) && !preventHollerith_ &&
789	parenthesisNesting_ > `0` &&
790	!preprocessor_.IsNameDefined(CharBlock{at_, `1`})) {
791	// Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that
792	// we don't misrecognize I9HHOLLERITH as an identifier in the next case.
793	EmitCharAndAdvance(tokens, *at_);
794	}
795	preventHollerith_ = false;
796	} else if (*at_ == `'.'`) {
797	char nch{EmitCharAndAdvance(tokens, `'.'`)};
798	if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) {
799	while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
800	}
801	HandleExponentAndOrKindSuffix(tokens);
802	} else if (nch == `'.'` && EmitCharAndAdvance(tokens, `'.'`) == `'.'`) {
803	EmitCharAndAdvance(tokens, `'.'`); // variadic macro definition ellipsis
804	}
805	preventHollerith_ = false;
806	} else if (IsLegalInIdentifier(*at_)) {
807	std::size_t parts{`1`};
808	bool anyDefined{false};
809	bool hadContinuation{false};
810	// Subtlety: When an identifier is split across continuation lines,
811	// its parts are kept as distinct pp-tokens if that macro replacement
812	// should operate on them independently. This trick accommodates the
813	// historic practice of using line continuation for token pasting after
814	// replacement.
815	// In free form, the macro to be replaced must have been preceded
816	// by '&' and followed by either '&' or, if last, the end of a line.
817	// call & call foo& call foo&
818	// &MACRO& OR &MACRO& OR &MACRO
819	// &foo(...) &(...)
820	do {
821	EmitChar(tokens, *at_);
822	++at_, ++column_;
823	hadContinuation = SkipToNextSignificantCharacter();
824	if (hadContinuation && IsLegalIdentifierStart(*at_)) {
825	// Continued identifier
826	tokens.CloseToken();
827	++parts;
828	if (!anyDefined &&
829	(parts > `2` \|\| inFixedForm_ \|\|
830	(start > start_ && start[-`1`] == `'&'`)) &&
831	preprocessor_.IsNameDefined(
832	tokens.TokenAt(tokens.SizeInTokens() - `1`))) {
833	anyDefined = true;
834	}
835	}
836	} while (IsLegalInIdentifier(*at_));
837	if (!anyDefined && parts > `1`) {
838	tokens.CloseToken();
839	char after{*SkipWhiteSpace(p: at_)};
840	anyDefined = (hadContinuation \|\| after == `'\n'` \|\| after == `'&'`) &&
841	preprocessor_.IsNameDefined(
842	tokens.TokenAt(tokens.SizeInTokens() - `1`));
843	tokens.ReopenLastToken();
844	}
845	if (!anyDefined) {
846	// If no part was a defined macro, combine the parts into one so that
847	// the combination itself can be subject to macro replacement.
848	while (parts-- > `1`) {
849	tokens.ReopenLastToken();
850	}
851	}
852	if (InFixedFormSource()) {
853	SkipSpaces();
854	}
855	if ((at_ == `'\''` \|\| at_ == `'"'`) &&
856	tokens.CharAt(tokens.SizeInChars() - `1`) == `'_'`) { // kind_"..."
857	QuotedCharacterLiteral(tokens, start);
858	preventHollerith_ = false;
859	} else {
860	preventHollerith_ = true; // DO 10 H = ...
861	}
862	} else if (at_ == `''`) {
863	if (EmitCharAndAdvance(tokens, `''`) == `''`) {
864	EmitCharAndAdvance(tokens, `'*'`);
865	} else {
866	// Subtle ambiguity:
867	// CHARACTER2H declares H because 2 is a kind specifier
868	// DATAC/N2H / is repeated Hollerith*
869	preventHollerith_ = !slashInCurrentStatement_;
870	}
871	} else {
872	char ch{*at_};
873	if (ch == `'('`) {
874	if (parenthesisNesting_++ == `0`) {
875	isPossibleMacroCall_ = tokens.SizeInTokens() > `0` &&
876	preprocessor_.IsFunctionLikeDefinition(
877	tokens.TokenAt(tokens.SizeInTokens() - `1`));
878	}
879	} else if (ch == `')'` && parenthesisNesting_ > `0`) {
880	--parenthesisNesting_;
881	}
882	char nch{EmitCharAndAdvance(tokens, ch)};
883	preventHollerith_ = false;
884	if ((nch == `'='` &&
885	(ch == `'<'` \|\| ch == `'>'` \|\| ch == `'/'` \|\| ch == `'='` \|\| ch == `'!'`)) \|\|
886	(ch == nch &&
887	(ch == `'/'` \|\| ch == `':'` \|\| ch == `'*'` \|\| ch == `'#'` \|\| ch == `'&'` \|\|
888	ch == `'\|'` \|\| ch == `'<'` \|\| ch == `'>'`)) \|\|
889	(ch == `'='` && nch == `'>'`)) {
890	// token comprises two characters
891	EmitCharAndAdvance(tokens, nch);
892	} else if (ch == `'/'`) {
893	slashInCurrentStatement_ = true;
894	} else if (ch == `';'` && InFixedFormSource()) {
895	SkipSpaces();
896	if (IsDecimalDigit(*at_)) {
897	if (features_.ShouldWarn(
898	common::LanguageFeature::MiscSourceExtensions)) {
899	Say(common::LanguageFeature::MiscSourceExtensions,
900	GetProvenanceRange(at_, at_ + `1`),
901	"Label should be in the label field"_port_en_US);
902	}
903	}
904	}
905	}
906	tokens.CloseToken();
907	return true;
908	}
909
910	bool Prescanner::HandleExponent(TokenSequence &tokens) {
911	if (char ed{ToLowerCaseLetter(*at_)}; ed == `'e'` \|\| ed == `'d'`) {
912	// Do some look-ahead to ensure that this 'e'/'d' is an exponent,
913	// not the start of an identifier that could be a macro.
914	const char *startAt{at_};
915	int startColumn{column_};
916	TokenSequence possible;
917	EmitCharAndAdvance(possible, *at_);
918	if (at_ == `'+'` \|\| at_ == `'-'`) {
919	EmitCharAndAdvance(possible, *at_);
920	}
921	if (IsDecimalDigit(at_)) { // it's an exponent; scan it*
922	while (IsDecimalDigit(*at_)) {
923	EmitCharAndAdvance(possible, *at_);
924	}
925	possible.CloseToken();
926	tokens.AppendRange(possible, `0`); // appends to current token
927	return true;
928	}
929	// Not an exponent; backtrack
930	at_ = startAt;
931	column_ = startColumn;
932	}
933	return false;
934	}
935
936	bool Prescanner::HandleKindSuffix(TokenSequence &tokens) {
937	if (*at_ != `'_'`) {
938	return false;
939	}
940	TokenSequence withUnderscore, separate;
941	EmitChar(withUnderscore, `'_'`);
942	EmitCharAndAdvance(separate, `'_'`);
943	if (IsLegalInIdentifier(*at_)) {
944	separate.CloseToken();
945	EmitChar(withUnderscore, *at_);
946	EmitCharAndAdvance(separate, *at_);
947	while (IsLegalInIdentifier(*at_)) {
948	EmitChar(withUnderscore, *at_);
949	EmitCharAndAdvance(separate, *at_);
950	}
951	}
952	withUnderscore.CloseToken();
953	separate.CloseToken();
954	tokens.CloseToken();
955	if (separate.SizeInTokens() == `2` &&
956	preprocessor_.IsNameDefined(separate.TokenAt(`1`)) &&
957	!preprocessor_.IsNameDefined(withUnderscore.ToCharBlock())) {
958	// "_foo" is not defined, but "foo" is
959	tokens.CopyAll(separate); // '_' "foo"
960	} else {
961	tokens.CopyAll(withUnderscore); // "_foo"
962	}
963	return true;
964	}
965
966	bool Prescanner::HandleExponentAndOrKindSuffix(TokenSequence &tokens) {
967	bool hadExponent{HandleExponent(tokens)};
968	if (HandleKindSuffix(tokens)) {
969	return true;
970	} else {
971	return hadExponent;
972	}
973	}
974
975	void Prescanner::QuotedCharacterLiteral(
976	TokenSequence &tokens, const char *start) {
977	char quote{*at_};
978	const char *end{at_ + `1`};
979	inCharLiteral_ = true;
980	continuationInCharLiteral_ = true;
981	const auto emit{[&](char ch) { EmitChar(tokens, ch); }};
982	const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }};
983	bool isEscaped{false};
984	bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)};
985	while (true) {
986	if (*at_ == `'\\'`) {
987	if (escapesEnabled) {
988	isEscaped = !isEscaped;
989	} else {
990	// The parser always processes escape sequences, so don't confuse it
991	// when escapes are disabled.
992	insert (`'\\'`);
993	}
994	} else {
995	isEscaped = false;
996	}
997	if (*at_ == `'\n'`) {
998	if (inPreprocessorDirective_) {
999	EmitQuotedChar(static_cast<unsigned char>(at_), emit, insert, false*,
1000	Encoding::LATIN_1);
1001	} else if (InCompilerDirective() && preprocessingOnly_) {
1002	// don't complain about -E output of !$, do it in later compilation
1003	} else {
1004	Say(GetProvenanceRange(start, end),
1005	"Incomplete character literal"_err_en_US);
1006	}
1007	break;
1008	}
1009	EmitQuotedChar(static_cast<unsigned char>(at_), emit, insert, false*,
1010	Encoding::LATIN_1);
1011	while (PadOutCharacterLiteral(tokens)) {
1012	}
1013	// Here's a weird edge case. When there's a two or more following
1014	// continuation lines at this point, and the entire significant part of
1015	// the next continuation line is the name of a keyword macro, replace
1016	// it in the character literal with its definition. Example:
1017	// #define FOO foo
1018	// subroutine subr() bind(c, name="my_&
1019	// &FOO&
1020	// &_bar") ...
1021	// produces a binding name of "my_foo_bar".
1022	while (at_[`1`] == `'&'` && nextLine_ < limit_ && !InFixedFormSource()) {
1023	const char *idStart{nextLine_};
1024	if (const char amper{SkipWhiteSpace(p: nextLine_)}; amper == `'&'`) {
1025	idStart = amper + `1`;
1026	}
1027	if (IsLegalIdentifierStart(*idStart)) {
1028	std::size_t idLen{`1`};
1029	for (; IsLegalInIdentifier(idStart[idLen]); ++idLen) {
1030	}
1031	if (idStart[idLen] == `'&'`) {
1032	CharBlock id{idStart, idLen};
1033	if (preprocessor_.IsNameDefined(id)) {
1034	TokenSequence ppTokens;
1035	ppTokens.Put(id, GetProvenance(idStart));
1036	if (auto replaced{
1037	preprocessor_.MacroReplacement(ppTokens, *this)}) {
1038	tokens.CopyAll(*replaced);
1039	at_ = &idStart[idLen - `1`];
1040	NextLine();
1041	continue; // try again on the next line
1042	}
1043	}
1044	}
1045	}
1046	break;
1047	}
1048	end = at_ + `1`;
1049	NextChar();
1050	if (*at_ == quote && !isEscaped) {
1051	// A doubled unescaped quote mark becomes a single instance of that
1052	// quote character in the literal (later). There can be spaces between
1053	// the quotes in fixed form source.
1054	EmitChar(tokens, quote);
1055	inCharLiteral_ = false; // for cases like print , '...'!comment*
1056	NextChar();
1057	if (InFixedFormSource()) {
1058	SkipSpaces();
1059	}
1060	if (*at_ != quote) {
1061	break;
1062	}
1063	inCharLiteral_ = true;
1064	}
1065	}
1066	continuationInCharLiteral_ = false;
1067	inCharLiteral_ = false;
1068	}
1069
1070	void Prescanner::Hollerith(
1071	TokenSequence &tokens, int count, const char *start) {
1072	inCharLiteral_ = true;
1073	CHECK(at_ == `'h'` \|\| at_ == `'H'`);
1074	EmitChar(tokens, `'H'`);
1075	while (count-- > `0`) {
1076	if (PadOutCharacterLiteral(tokens)) {
1077	} else if (*at_ == `'\n'`) {
1078	if (features_.ShouldWarn(common::UsageWarning::Scanning)) {
1079	Say(common::UsageWarning::Scanning, GetProvenanceRange(start, at_),
1080	"Possible truncated Hollerith literal"_warn_en_US);
1081	}
1082	break;
1083	} else {
1084	NextChar();
1085	// Each multi-byte character encoding counts as a single character.
1086	// No escape sequences are recognized.
1087	// Hollerith is always emitted to the cooked character
1088	// stream in UTF-8.
1089	DecodedCharacter decoded{DecodeCharacter(
1090	encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)};
1091	if (decoded.bytes > `0`) {
1092	EncodedCharacter utf8{
1093	EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)};
1094	for (int j{`0`}; j < utf8.bytes; ++j) {
1095	EmitChar(tokens, utf8.buffer[j]);
1096	}
1097	at_ += decoded.bytes - `1`;
1098	} else {
1099	Say(GetProvenanceRange(start, at_),
1100	"Bad character in Hollerith literal"_err_en_US);
1101	break;
1102	}
1103	}
1104	}
1105	if (*at_ != `'\n'`) {
1106	NextChar();
1107	}
1108	inCharLiteral_ = false;
1109	}
1110
1111	// In fixed form, source card images must be processed as if they were at
1112	// least 72 columns wide, at least in character literal contexts.
1113	bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) {
1114	while (inFixedForm_ && !tabInCurrentLine_ && at_[`1`] == `'\n'`) {
1115	if (column_ < fixedFormColumnLimit_) {
1116	tokens.PutNextTokenChar(`' '`, spaceProvenance_);
1117	++column_;
1118	return true;
1119	}
1120	if (!FixedFormContinuation(mightNeedSpace: false /no need to insert space/) \|\|
1121	tabInCurrentLine_) {
1122	return false;
1123	}
1124	CHECK(column_ == `7`);
1125	--at_; // point to column 6 of continuation line
1126	column_ = `6`;
1127	}
1128	return false;
1129	}
1130
1131	static bool IsAtProcess(const char *p) {
1132	static const char pAtProc[]{"process"};
1133	for (std::size_t i{`0`}; i < sizeof pAtProc - `1`; ++i) {
1134	if (ToLowerCaseLetter(*++p) != pAtProc[i])
1135	return false;
1136	}
1137	return true;
1138	}
1139
1140	bool Prescanner::IsFixedFormCommentLine(const char start) const* {
1141	const char *p{start};
1142	// The @process directive must start in column 1.
1143	if (*p == `'@'` && IsAtProcess(p)) {
1144	return true;
1145	}
1146	if (IsFixedFormCommentChar(p) \|\| p == `'%'` \|\| // VAX %list, %eject, &c.
1147	((p == `'D'` \|\| p == `'d'`) &&
1148	!features_.IsEnabled(LanguageFeature::OldDebugLines))) {
1149	return true;
1150	}
1151	bool anyTabs{false};
1152	while (true) {
1153	if (int n{IsSpace(p)}) {
1154	p += n;
1155	} else if (*p == `'\t'`) {
1156	anyTabs = true;
1157	++p;
1158	} else if (*p == `'0'` && !anyTabs && p == start + `5`) {
1159	++p; // 0 in column 6 must treated as a space
1160	} else {
1161	break;
1162	}
1163	}
1164	if (!anyTabs && p >= start + fixedFormColumnLimit_) {
1165	return true;
1166	}
1167	if (*p == `'!'` && !inCharLiteral_ && (anyTabs \|\| p != start + `5`)) {
1168	return true;
1169	}
1170	return *p == `'\n'`;
1171	}
1172
1173	const char Prescanner::IsFreeFormComment(const* char p) const* {
1174	p = SkipWhiteSpaceAndCComments(p);
1175	if (p == `'!'` \|\| p == `'\n'`) {
1176	return p;
1177	} else if (*p == `'@'`) {
1178	return IsAtProcess(p) ? p : nullptr;
1179	} else {
1180	return nullptr;
1181	}
1182	}
1183
1184	std::optional<std::size_t> Prescanner::IsIncludeLine(const char start) const* {
1185	if (!expandIncludeLines_) {
1186	return std::nullopt;
1187	}
1188	const char *p{SkipWhiteSpace(p: start)};
1189	if (*p == `'0'` && inFixedForm_ && p == start + `5`) {
1190	// Accept " 0INCLUDE" in fixed form.
1191	p = SkipWhiteSpace(p: p + `1`);
1192	}
1193	for (const char q{"include"}; q; ++q) {
1194	if (ToLowerCaseLetter(p) != q) {
1195	return std::nullopt;
1196	}
1197	p = SkipWhiteSpace(p: p + `1`);
1198	}
1199	if (IsDecimalDigit(p)) { // accept & ignore a numeric kind prefix*
1200	for (p = SkipWhiteSpace(p: p + `1`); IsDecimalDigit(*p);
1201	p = SkipWhiteSpace(p: p + `1`)) {
1202	}
1203	if (*p != `'_'`) {
1204	return std::nullopt;
1205	}
1206	p = SkipWhiteSpace(p: p + `1`);
1207	}
1208	if (p == `'"'` \|\| p == `'\''`) {
1209	return {p - start};
1210	}
1211	return std::nullopt;
1212	}
1213
1214	void Prescanner::FortranInclude(const char *firstQuote) {
1215	const char *p{firstQuote};
1216	while (p != `'"'` && p != `'\''`) {
1217	++p;
1218	}
1219	char quote{*p};
1220	std::string path;
1221	for (++p; *p != `'\n'`; ++p) {
1222	if (*p == quote) {
1223	if (p[`1`] != quote) {
1224	break;
1225	}
1226	++p;
1227	}
1228	path += *p;
1229	}
1230	if (*p != quote) {
1231	Say(GetProvenanceRange(firstQuote, p),
1232	"malformed path name string"_err_en_US);
1233	return;
1234	}
1235	p = SkipWhiteSpace(p: p + `1`);
1236	if (p != `'\n'` && p != `'!'`) {
1237	const char *garbage{p};
1238	for (; p != `'\n'` && p != `'!'`; ++p) {
1239	}
1240	if (features_.ShouldWarn(common::UsageWarning::Scanning)) {
1241	Say(common::UsageWarning::Scanning, GetProvenanceRange(garbage, p),
1242	"excess characters after path name"_warn_en_US);
1243	}
1244	}
1245	std::string buf;
1246	llvm::raw_string_ostream error{buf};
1247	Provenance provenance{GetProvenance(nextLine_)};
1248	std::optional<std::string> prependPath;
1249	if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) {
1250	prependPath = DirectoryName(currentFile->path());
1251	}
1252	const SourceFile *included{
1253	allSources_.Open(path, error, std::move(prependPath))};
1254	if (!included) {
1255	Say(provenance, "INCLUDE: %s"_err_en_US, buf);
1256	} else if (included->bytes() > `0`) {
1257	ProvenanceRange includeLineRange{
1258	provenance, static_cast<std::size_t>(p - nextLine_)};
1259	ProvenanceRange fileRange{
1260	allSources_.AddIncludedFile(*included, includeLineRange)};
1261	Preprocessor cleanPrepro{allSources_};
1262	if (preprocessor_.IsNameDefined("__FILE__"s)) {
1263	cleanPrepro.DefineStandardMacros(); // __FILE__, __LINE__, &c.
1264	}
1265	if (preprocessor_.IsNameDefined("_CUDA"s)) {
1266	cleanPrepro.Define("_CUDA"s, "1");
1267	}
1268	Prescanner{*this, cleanPrepro, /isNestedInIncludeDirective=/false}
1269	.set_encoding(included->encoding())
1270	.Prescan(fileRange);
1271	}
1272	}
1273
1274	const char Prescanner::IsPreprocessorDirectiveLine(const* char start) const* {
1275	const char *p{start};
1276	while (int n{IsSpace(p)}) {
1277	p += n;
1278	}
1279	if (*p == `'#'`) {
1280	if (inFixedForm_ && p == start + `5`) {
1281	return nullptr;
1282	}
1283	} else {
1284	p = SkipWhiteSpace(p);
1285	if (*p != `'#'`) {
1286	return nullptr;
1287	}
1288	}
1289	return SkipWhiteSpace(p: p + `1`);
1290	}
1291
1292	bool Prescanner::IsNextLinePreprocessorDirective() const {
1293	return IsPreprocessorDirectiveLine(start: nextLine_) != nullptr;
1294	}
1295
1296	bool Prescanner::SkipCommentLine(bool afterAmpersand) {
1297	if (IsAtEnd()) {
1298	if (afterAmpersand && prescannerNesting_ > `0`) {
1299	// A continuation marker at the end of the last line in an
1300	// include file inhibits the newline for that line.
1301	SkipToEndOfLine();
1302	omitNewline_ = true;
1303	}
1304	} else if (inPreprocessorDirective_) {
1305	} else {
1306	auto lineClass{ClassifyLine(nextLine_)};
1307	if (lineClass.kind == LineClassification::Kind::Comment) {
1308	NextLine();
1309	return true;
1310	} else if (lineClass.kind ==
1311	LineClassification::Kind::ConditionalCompilationDirective \|\|
1312	lineClass.kind == LineClassification::Kind::PreprocessorDirective) {
1313	// Allow conditional compilation directives (e.g., #ifdef) to affect
1314	// continuation lines.
1315	// Allow other preprocessor directives, too, except #include
1316	// (when it does not follow '&'), #define, and #undef (because
1317	// they cannot be allowed to affect preceding text on a
1318	// continued line).
1319	preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
1320	return true;
1321	} else if (afterAmpersand &&
1322	(lineClass.kind == LineClassification::Kind::DefinitionDirective \|\|
1323	lineClass.kind == LineClassification::Kind::IncludeDirective \|\|
1324	lineClass.kind == LineClassification::Kind::IncludeLine)) {
1325	SkipToEndOfLine();
1326	omitNewline_ = true;
1327	skipLeadingAmpersand_ = true;
1328	}
1329	}
1330	return false;
1331	}
1332
1333	const char Prescanner::FixedFormContinuationLine(bool* mightNeedSpace) {
1334	if (IsAtEnd()) {
1335	return nullptr;
1336	}
1337	tabInCurrentLine_ = false;
1338	char col1{*nextLine_};
1339	bool canBeNonDirectiveContinuation{
1340	(col1 == `' '` \|\|
1341	((col1 == `'D'` \|\| col1 == `'d'`) &&
1342	features_.IsEnabled(LanguageFeature::OldDebugLines))) &&
1343	nextLine_[`1`] == `' '` && nextLine_[`2`] == `' '` && nextLine_[`3`] == `' '` &&
1344	nextLine_[`4`] == `' '`};
1345	if (InCompilerDirective() &&
1346	!(InOpenMPConditionalLine() && !preprocessingOnly_)) {
1347	// !$ under -E is not continued, but deferred to later compilation
1348	if (IsFixedFormCommentChar(ch: col1) &&
1349	!(InOpenMPConditionalLine() && preprocessingOnly_)) {
1350	int j{`1`};
1351	for (; j < `5`; ++j) {
1352	char ch{directiveSentinel_[j - `1`]};
1353	if (ch == `'\0'`) {
1354	break;
1355	} else if (ch != ToLowerCaseLetter(nextLine_[j])) {
1356	return nullptr;
1357	}
1358	}
1359	for (; j < `5`; ++j) {
1360	if (nextLine_[j] != `' '`) {
1361	return nullptr;
1362	}
1363	}
1364	const char *col6{nextLine_ + `5`};
1365	if (col6 != `'\n'` && col6 != `'0'` && !IsSpaceOrTab(p: col6)) {
1366	if (mightNeedSpace && !IsSpace(p: nextLine_ + `6`)) {
1367	insertASpace_ = true;
1368	}
1369	return nextLine_ + `6`;
1370	}
1371	}
1372	} else { // Normal case: not in a compiler directive.
1373	// !$ conditional compilation lines may be continuations when not
1374	// just preprocessing.
1375	if (!preprocessingOnly_ && IsFixedFormCommentChar(ch: col1) &&
1376	nextLine_[`1`] == `'$'` && nextLine_[`2`] == `' '` && nextLine_[`3`] == `' '` &&
1377	nextLine_[`4`] == `' '` && IsCompilerDirectiveSentinel(&nextLine_[`1`], `1`)) {
1378	if (const char *col6{nextLine_ + `5`};
1379	col6 != `'\n'` && col6 != `'0'` && !IsSpaceOrTab(p: col6)) {
1380	insertASpace_ \|= mightNeedSpace && !IsSpace(p: nextLine_ + `6`);
1381	return nextLine_ + `6`;
1382	} else {
1383	return nullptr;
1384	}
1385	}
1386	if (col1 == `'&'` &&
1387	features_.IsEnabled(
1388	LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) {
1389	// Extension: '&' as continuation marker
1390	if (features_.ShouldWarn(
1391	LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) {
1392	Say(LanguageFeature::FixedFormContinuationWithColumn1Ampersand,
1393	GetProvenance(nextLine_), "nonstandard usage"_port_en_US);
1394	}
1395	return nextLine_ + `1`;
1396	}
1397	if (col1 == `'\t'` && nextLine_[`1`] >= `'1'` && nextLine_[`1`] <= `'9'`) {
1398	tabInCurrentLine_ = true;
1399	return nextLine_ + `2`; // VAX extension
1400	}
1401	if (canBeNonDirectiveContinuation) {
1402	const char *col6{nextLine_ + `5`};
1403	if (col6 != `'\n'` && col6 != `'0'` && !IsSpaceOrTab(p: col6)) {
1404	if ((col6 == `'i'` \|\| col6 == `'I'`) && IsIncludeLine(start: nextLine_)) {
1405	// It's an INCLUDE line, not a continuation
1406	} else {
1407	return nextLine_ + `6`;
1408	}
1409	}
1410	}
1411	if (IsImplicitContinuation()) {
1412	return nextLine_;
1413	}
1414	}
1415	return nullptr; // not a continuation line
1416	}
1417
1418	const char Prescanner::FreeFormContinuationLine(bool* ampersand) {
1419	const char *lineStart{nextLine_};
1420	const char *p{lineStart};
1421	if (p >= limit_) {
1422	return nullptr;
1423	}
1424	p = SkipWhiteSpaceIncludingEmptyMacros(p);
1425	if (InCompilerDirective()) {
1426	if (InOpenMPConditionalLine()) {
1427	if (preprocessingOnly_) {
1428	// in -E mode, don't treat !$ as a continuation
1429	return nullptr;
1430	} else if (p[`0`] == `'!'` && p[`1`] == `'$'`) {
1431	// accept but do not require a matching sentinel
1432	if (p[`2`] != `'&'` && !IsSpaceOrTab(p: &p[`2`])) {
1433	return nullptr; // not !$
1434	}
1435	p += `2`;
1436	}
1437	} else if (*p++ == `'!'`) {
1438	for (const char s{directiveSentinel_}; s != `'\0'`; ++p, ++s) {
1439	if (s != ToLowerCaseLetter(p)) {
1440	return nullptr; // not the same directive class
1441	}
1442	}
1443	} else {
1444	return nullptr;
1445	}
1446	p = SkipWhiteSpace(p);
1447	if (*p == `'&'`) {
1448	if (!ampersand) {
1449	insertASpace_ = true;
1450	}
1451	return p + `1`;
1452	} else if (ampersand) {
1453	return p;
1454	} else {
1455	return nullptr;
1456	}
1457	}
1458	if (p[`0`] == `'!'` && p[`1`] == `'$'` && !preprocessingOnly_ &&
1459	features_.IsEnabled(LanguageFeature::OpenMP)) {
1460	// !$ conditional line can be a continuation
1461	p = lineStart = SkipWhiteSpace(p: p + `2`);
1462	}
1463	if (*p == `'&'`) {
1464	return p + `1`;
1465	} else if (p == `'!'` \|\| p == `'\n'` \|\| *p == `'#'`) {
1466	return nullptr;
1467	} else if (ampersand \|\| IsImplicitContinuation()) {
1468	if (continuationInCharLiteral_) {
1469	// 'a'& -> 'a''b' == "a'b"
1470	// 'b'
1471	if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) {
1472	Say(common::LanguageFeature::MiscSourceExtensions,
1473	GetProvenanceRange(p, p + `1`),
1474	"Character literal continuation line should have been preceded by '&'"_port_en_US);
1475	}
1476	} else if (p > lineStart && IsSpaceOrTab(p: p - `1`)) {
1477	--p;
1478	} else {
1479	insertASpace_ = true;
1480	}
1481	return p;
1482	} else {
1483	return nullptr;
1484	}
1485	}
1486
1487	bool Prescanner::FixedFormContinuation(bool mightNeedSpace) {
1488	// N.B. We accept '&' as a continuation indicator in fixed form, too,
1489	// but not in a character literal.
1490	if (*at_ == `'&'` && inCharLiteral_) {
1491	return false;
1492	}
1493	do {
1494	if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) {
1495	BeginSourceLine(at: cont);
1496	column_ = `7`;
1497	NextLine();
1498	return true;
1499	}
1500	} while (SkipCommentLine(afterAmpersand: false / not after ampersand /));
1501	return false;
1502	}
1503
1504	bool Prescanner::FreeFormContinuation() {
1505	const char *p{at_};
1506	bool ampersand{*p == `'&'`};
1507	if (ampersand) {
1508	p = SkipWhiteSpace(p: p + `1`);
1509	}
1510	if (*p != `'\n'`) {
1511	if (inCharLiteral_) {
1512	return false;
1513	} else if (p == `'!'`) { // & ! comment - ok*
1514	} else if (ampersand && isPossibleMacroCall_ && (p == `','` \|\| p == `')'`)) {
1515	return false; // allow & at end of a macro argument
1516	} else if (ampersand && preprocessingOnly_ && !parenthesisNesting_) {
1517	return false; // allow & at start of line, maybe after !$
1518	} else if (features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) {
1519	Say(LanguageFeature::CruftAfterAmpersand, GetProvenance(p),
1520	"missing ! before comment after &"_warn_en_US);
1521	}
1522	}
1523	do {
1524	if (const char *cont{FreeFormContinuationLine(ampersand)}) {
1525	BeginSourceLine(at: cont);
1526	NextLine();
1527	return true;
1528	}
1529	} while (SkipCommentLine(afterAmpersand: ampersand));
1530	return false;
1531	}
1532
1533	// Implicit line continuation allows a preprocessor macro call with
1534	// arguments to span multiple lines.
1535	bool Prescanner::IsImplicitContinuation() const {
1536	return !inPreprocessorDirective_ && !inCharLiteral_ && isPossibleMacroCall_ &&
1537	parenthesisNesting_ > `0` && !IsAtEnd() &&
1538	ClassifyLine(nextLine_).kind == LineClassification::Kind::Source;
1539	}
1540
1541	bool Prescanner::Continuation(bool mightNeedFixedFormSpace) {
1542	if (disableSourceContinuation_) {
1543	return false;
1544	} else if (at_ == `'\n'` \|\| at_ == `'&'`) {
1545	if (inFixedForm_) {
1546	return FixedFormContinuation(mightNeedSpace: mightNeedFixedFormSpace);
1547	} else {
1548	return FreeFormContinuation();
1549	}
1550	} else if (*at_ == `'\\'` && at_ + `2` == nextLine_ &&
1551	backslashFreeFormContinuation_ && !inFixedForm_ && nextLine_ < limit_) {
1552	// cpp-like handling of \ at end of a free form source line
1553	BeginSourceLine(at: nextLine_);
1554	NextLine();
1555	return true;
1556	} else {
1557	return false;
1558	}
1559	}
1560
1561	std::optional<Prescanner::LineClassification>
1562	Prescanner::IsFixedFormCompilerDirectiveLine(const char start) const* {
1563	const char *p{start};
1564	char col1{*p++};
1565	if (!IsFixedFormCommentChar(ch: col1)) {
1566	return std::nullopt;
1567	}
1568	char sentinel[`5`], *sp{sentinel};
1569	int column{`2`};
1570	for (; column < `6`; ++column) {
1571	if (p == `'\n'` \|\| IsSpaceOrTab(p) \|\| IsDecimalDigit(p)) {
1572	break;
1573	}
1574	sp++ = ToLowerCaseLetter(p++);
1575	}
1576	if (sp == sentinel) {
1577	return std::nullopt;
1578	}
1579	*sp = `'\0'`;
1580	// A fixed form OpenMP conditional compilation sentinel must satisfy the
1581	// following criteria, for initial lines:
1582	// - Columns 3 through 5 must have only white space or numbers.
1583	// - Column 6 must be space or zero.
1584	bool isOpenMPConditional{sp == &sentinel[`1`] && sentinel[`0`] == `'$'`};
1585	bool hadDigit{false};
1586	if (isOpenMPConditional) {
1587	for (; column < `6`; ++column, ++p) {
1588	if (IsDecimalDigit(*p)) {
1589	hadDigit = true;
1590	} else if (!IsSpaceOrTab(p)) {
1591	return std::nullopt;
1592	}
1593	}
1594	}
1595	if (column == `6`) {
1596	if (*p == `'0'`) {
1597	++p;
1598	} else if (int n{IsSpaceOrTab(p)}) {
1599	p += n;
1600	} else if (isOpenMPConditional && preprocessingOnly_ && !hadDigit &&
1601	*p != `'\n'`) {
1602	// In -E mode, "!$ &" is treated as a directive
1603	} else {
1604	// This is a Continuation line, not an initial directive line.
1605	return std::nullopt;
1606	}
1607	}
1608	if (const char *ss{IsCompilerDirectiveSentinel(
1609	sentinel, static_cast<std::size_t>(sp - sentinel))}) {
1610	return {
1611	LineClassification {LineClassification::Kind::CompilerDirective, `0`, ss}};
1612	}
1613	return std::nullopt;
1614	}
1615
1616	std::optional<Prescanner::LineClassification>
1617	Prescanner::IsFreeFormCompilerDirectiveLine(const char start) const* {
1618	if (const char *p{SkipWhiteSpaceIncludingEmptyMacros(p: start)};
1619	p && *p++ == `'!'`) {
1620	if (auto maybePair{IsCompilerDirectiveSentinel(p)}) {
1621	auto offset{static_cast<std::size_t>(p - start - `1`)};
1622	return {LineClassification {LineClassification::Kind::CompilerDirective,
1623	offset, maybePair ->first}};
1624	}
1625	}
1626	return std::nullopt;
1627	}
1628
1629	Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) {
1630	std::uint64_t packed{`0`};
1631	for (char ch : dir) {
1632	packed = (packed << `8`) \| (ToLowerCaseLetter(ch) & `0xff`);
1633	}
1634	compilerDirectiveBloomFilter_.set(position: packed % prime1);
1635	compilerDirectiveBloomFilter_.set(position: packed % prime2);
1636	compilerDirectiveSentinels_.insert(x: dir);
1637	return *this;
1638	}
1639
1640	const char *Prescanner::IsCompilerDirectiveSentinel(
1641	const char sentinel, std::size_t len) const* {
1642	std::uint64_t packed{`0`};
1643	for (std::size_t j{`0`}; j < len; ++j) {
1644	packed = (packed << `8`) \| (sentinel[j] & `0xff`);
1645	}
1646	if (len == `0` \|\| !compilerDirectiveBloomFilter_.test(position: packed % prime1) \|\|
1647	!compilerDirectiveBloomFilter_.test(position: packed % prime2)) {
1648	return nullptr;
1649	}
1650	const auto iter{compilerDirectiveSentinels_.find(x: std::string (sentinel, len))};
1651	return iter == compilerDirectiveSentinels_.end() ? nullptr : iter ->c_str();
1652	}
1653
1654	const char Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const* {
1655	const char *p{token.begin()};
1656	const char *end{p + token.size()};
1657	while (p < end && (p == `' '` \|\| p == `'\n'`)) {
1658	++p;
1659	}
1660	if (p < end && *p == `'!'`) {
1661	++p;
1662	}
1663	while (end > p && (end[-`1`] == `' '` \|\| end[-`1`] == `'\t'`)) {
1664	--end;
1665	}
1666	return end > p && IsCompilerDirectiveSentinel(sentinel: p, len: end - p) ? p : nullptr;
1667	}
1668
1669	std::optional<std::pair<const char , const* char *>>
1670	Prescanner::IsCompilerDirectiveSentinel(const char p) const* {
1671	char sentinel[`8`];
1672	for (std::size_t j{`0`}; j + `1` < sizeof sentinel; ++p, ++j) {
1673	if (int n{IsSpaceOrTab(p)};
1674	n \|\| !(IsLetter(p) \|\| p == `'$'` \|\| *p == `'@'`)) {
1675	if (j > `0`) {
1676	if (j == `1` && sentinel[`0`] == `'$'` && n == `0` && p != `'&'` && p != `'\n'`) {
1677	// Free form OpenMP conditional compilation line sentinels have to
1678	// be immediately followed by a space or &, not a digit
1679	// or anything else. A newline also works for an initial line.
1680	break;
1681	}
1682	sentinel[j] = `'\0'`;
1683	if (*p != `'!'`) {
1684	if (const char *sp{IsCompilerDirectiveSentinel(sentinel, len: j)}) {
1685	return std::make_pair(x&: sp, y&: p);
1686	}
1687	}
1688	}
1689	break;
1690	} else {
1691	sentinel[j] = ToLowerCaseLetter(*p);
1692	}
1693	}
1694	return std::nullopt;
1695	}
1696
1697	constexpr bool IsDirective(const char match, const* char *dir) {
1698	for (; *match; ++match) {
1699	if (match != ToLowerCaseLetter(dir++)) {
1700	return false;
1701	}
1702	}
1703	return true;
1704	}
1705
1706	Prescanner::LineClassification Prescanner::ClassifyLine(
1707	const char start) const* {
1708	if (inFixedForm_) {
1709	if (std::optional<LineClassification> lc{
1710	IsFixedFormCompilerDirectiveLine(start)}) {
1711	return std::move(*lc);
1712	}
1713	if (IsFixedFormCommentLine(start)) {
1714	return {LineClassification::Kind::Comment};
1715	}
1716	} else {
1717	if (std::optional<LineClassification> lc{
1718	IsFreeFormCompilerDirectiveLine(start)}) {
1719	return std::move(*lc);
1720	}
1721	if (const char *bang{IsFreeFormComment(p: start)}) {
1722	return {LineClassification::Kind::Comment,
1723	static_cast<std::size_t>(bang - start)};
1724	}
1725	}
1726	if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) {
1727	return {LineClassification::Kind::IncludeLine, *quoteOffset};
1728	}
1729	if (const char *dir{IsPreprocessorDirectiveLine(start)}) {
1730	if (IsDirective(match: "if", dir) \|\| IsDirective(match: "elif", dir) \|\|
1731	IsDirective(match: "else", dir) \|\| IsDirective(match: "endif", dir)) {
1732	return {LineClassification::Kind::ConditionalCompilationDirective};
1733	} else if (IsDirective(match: "include", dir)) {
1734	return {LineClassification::Kind::IncludeDirective};
1735	} else if (IsDirective(match: "define", dir) \|\| IsDirective(match: "undef", dir)) {
1736	return {LineClassification::Kind::DefinitionDirective};
1737	} else {
1738	return {LineClassification::Kind::PreprocessorDirective};
1739	}
1740	}
1741	return {LineClassification::Kind::Source};
1742	}
1743
1744	Prescanner::LineClassification Prescanner::ClassifyLine(
1745	TokenSequence &tokens, Provenance newlineProvenance) const {
1746	// Append a newline temporarily.
1747	tokens.PutNextTokenChar(`'\n'`, newlineProvenance);
1748	tokens.CloseToken();
1749	const char *ppd{tokens.ToCharBlock().begin()};
1750	LineClassification classification{ClassifyLine(start: ppd)};
1751	tokens.pop_back(); // remove the newline
1752	return classification;
1753	}
1754
1755	void Prescanner::SourceFormChange(std::string &&dir) {
1756	if (dir == "!dir$ free") {
1757	inFixedForm_ = false;
1758	} else if (dir == "!dir$ fixed") {
1759	inFixedForm_ = true;
1760	}
1761	}
1762
1763	// Acquire and append compiler directive continuation lines to
1764	// the tokens that constitute a compiler directive, even when those
1765	// directive continuation lines are the result of macro expansion.
1766	// (Not used when neither the original compiler directive line nor
1767	// the directive continuation line result from preprocessing; regular
1768	// line continuation during tokenization handles that normal case.)
1769	bool Prescanner::CompilerDirectiveContinuation(
1770	TokenSequence &tokens, const char *origSentinel) {
1771	if (inFixedForm_ \|\| tokens.empty() \|\|
1772	tokens.TokenAt(tokens.SizeInTokens() - `1`) != "&" \|\|
1773	(preprocessingOnly_ && !parenthesisNesting_)) {
1774	return false;
1775	}
1776	LineClassification followingLine{ClassifyLine(start: nextLine_)};
1777	if (followingLine.kind == LineClassification::Kind::Comment) {
1778	nextLine_ += followingLine.payloadOffset; // advance to '!' or newline
1779	NextLine();
1780	return true;
1781	}
1782	CHECK(origSentinel != nullptr);
1783	directiveSentinel_ = origSentinel; // so InCompilerDirective() is true
1784	const char *nextContinuation{
1785	followingLine.kind == LineClassification::Kind::CompilerDirective
1786	? FreeFormContinuationLine(ampersand: true)
1787	: nullptr};
1788	if (!nextContinuation &&
1789	followingLine.kind != LineClassification::Kind::Source) {
1790	return false;
1791	}
1792	auto origNextLine{nextLine_};
1793	BeginSourceLine(at: nextLine_);
1794	NextLine();
1795	if (nextContinuation) {
1796	// What follows is !DIR$ & xxx; skip over the & so that it
1797	// doesn't cause a spurious continuation.
1798	at_ = nextContinuation;
1799	} else {
1800	// What follows looks like a source line before macro expansion,
1801	// but might become a directive continuation afterwards.
1802	SkipSpaces();
1803	}
1804	TokenSequence followingTokens;
1805	while (NextToken(followingTokens)) {
1806	}
1807	if (auto followingPrepro{
1808	preprocessor_.MacroReplacement(followingTokens, *this)}) {
1809	followingTokens = std::move(*followingPrepro);
1810	}
1811	followingTokens.RemoveRedundantBlanks();
1812	std::size_t startAt{`0`};
1813	std::size_t following{followingTokens.SizeInTokens()};
1814	bool ok{false};
1815	if (nextContinuation) {
1816	ok = true;
1817	} else {
1818	startAt = `2`;
1819	if (startAt < following && followingTokens.TokenAt(`0`) == "!") {
1820	CharBlock sentinel{followingTokens.TokenAt(`1`)};
1821	if (!sentinel.empty() &&
1822	std::memcmp(s1: sentinel.begin(), s2: origSentinel, n: sentinel.size()) == `0`) {
1823	ok = true;
1824	while (
1825	startAt < following && followingTokens.TokenAt(startAt).IsBlank()) {
1826	++startAt;
1827	}
1828	if (startAt < following && followingTokens.TokenAt(startAt) == "&") {
1829	++startAt;
1830	}
1831	}
1832	}
1833	}
1834	if (ok) {
1835	tokens.pop_back(); // delete original '&'
1836	tokens.AppendRange(followingTokens, startAt, following - startAt);
1837	tokens.RemoveRedundantBlanks();
1838	} else {
1839	nextLine_ = origNextLine;
1840	}
1841	return ok;
1842	}
1843
1844	// Similar, but for source line continuation after macro replacement.
1845	bool Prescanner::SourceLineContinuation(TokenSequence &tokens) {
1846	if (!inFixedForm_ && !tokens.empty() &&
1847	tokens.TokenAt(tokens.SizeInTokens() - `1`) == "&") {
1848	LineClassification followingLine{ClassifyLine(start: nextLine_)};
1849	if (followingLine.kind == LineClassification::Kind::Comment) {
1850	nextLine_ += followingLine.payloadOffset; // advance to '!' or newline
1851	NextLine();
1852	return true;
1853	} else if (const char nextContinuation{FreeFormContinuationLine(ampersand: true*)}) {
1854	BeginSourceLine(at: nextLine_);
1855	NextLine();
1856	TokenSequence followingTokens;
1857	at_ = nextContinuation;
1858	while (NextToken(followingTokens)) {
1859	}
1860	if (auto followingPrepro{
1861	preprocessor_.MacroReplacement(followingTokens, *this)}) {
1862	followingTokens = std::move(*followingPrepro);
1863	}
1864	followingTokens.RemoveRedundantBlanks();
1865	tokens.pop_back(); // delete original '&'
1866	tokens.CopyAll(followingTokens);
1867	return true;
1868	}
1869	}
1870	return false;
1871	}
1872	} // namespace Fortran::parser
1873

Provided by KDAB

Definitions

maxPrescannerNesting
Prescanner
Prescanner
IsSpace
IsSpaceOrTab
IsFixedFormCommentChar
NormalizeCompilerDirectiveCommentMarker
Prescan
Statement
CheckAndEmitLine
TokenizePreprocessorDirective
NextLine
LabelField
EnforceStupidEndStatementRules
SkipToEndOfLine
MustSkipToEndOfLine
NextChar
SkipToNextSignificantCharacter
SkipCComments
SkipSpaces
SkipWhiteSpace
SkipWhiteSpaceIncludingEmptyMacros
SkipWhiteSpaceAndCComments
SkipCComment
NextToken
HandleExponent
HandleKindSuffix
HandleExponentAndOrKindSuffix
QuotedCharacterLiteral
Hollerith
PadOutCharacterLiteral
IsAtProcess
IsFixedFormCommentLine
IsFreeFormComment
IsIncludeLine
FortranInclude
IsPreprocessorDirectiveLine
IsNextLinePreprocessorDirective
SkipCommentLine
FixedFormContinuationLine
FreeFormContinuationLine
FixedFormContinuation
FreeFormContinuation
IsImplicitContinuation
Continuation
IsFixedFormCompilerDirectiveLine
IsFreeFormCompilerDirectiveLine
AddCompilerDirectiveSentinel
IsCompilerDirectiveSentinel
IsCompilerDirectiveSentinel
IsCompilerDirectiveSentinel
IsDirective
ClassifyLine
ClassifyLine
SourceFormChange
CompilerDirectiveContinuation

Update your C++ knowledge – Modern C++11/14/17 Training

Find out more

Definitions

source code of flang/lib/Parser/prescan.cpp