1//===-- lib/Parser/prescan.cpp --------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "prescan.h"
10#include "flang/Common/idioms.h"
11#include "flang/Parser/characters.h"
12#include "flang/Parser/message.h"
13#include "flang/Parser/preprocessor.h"
14#include "flang/Parser/source.h"
15#include "flang/Parser/token-sequence.h"
16#include "llvm/Support/raw_ostream.h"
17#include <cstddef>
18#include <cstring>
19#include <utility>
20#include <vector>
21
22namespace Fortran::parser {
23
24using common::LanguageFeature;
25
26static constexpr int maxPrescannerNesting{100};
27
28Prescanner::Prescanner(Messages &messages, CookedSource &cooked,
29 Preprocessor &preprocessor, common::LanguageFeatureControl lfc)
30 : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor},
31 allSources_{preprocessor_.allSources()}, features_{lfc},
32 backslashFreeFormContinuation_{preprocessor.AnyDefinitions()},
33 encoding_{allSources_.encoding()} {}
34
35Prescanner::Prescanner(const Prescanner &that, Preprocessor &prepro,
36 bool isNestedInIncludeDirective)
37 : messages_{that.messages_}, cooked_{that.cooked_}, preprocessor_{prepro},
38 allSources_{that.allSources_}, features_{that.features_},
39 preprocessingOnly_{that.preprocessingOnly_},
40 expandIncludeLines_{that.expandIncludeLines_},
41 isNestedInIncludeDirective_{isNestedInIncludeDirective},
42 backslashFreeFormContinuation_{that.backslashFreeFormContinuation_},
43 inFixedForm_{that.inFixedForm_},
44 fixedFormColumnLimit_{that.fixedFormColumnLimit_},
45 encoding_{that.encoding_},
46 prescannerNesting_{that.prescannerNesting_ + 1},
47 skipLeadingAmpersand_{that.skipLeadingAmpersand_},
48 compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_},
49 compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {}
50
51// Returns number of bytes to skip
52static inline int IsSpace(const char *p) {
53 if (*p == ' ') {
54 return 1;
55 } else if (*p == '\xa0') { // LATIN-1 NBSP non-breaking space
56 return 1;
57 } else if (p[0] == '\xc2' && p[1] == '\xa0') { // UTF-8 NBSP
58 return 2;
59 } else {
60 return 0;
61 }
62}
63
64static inline int IsSpaceOrTab(const char *p) {
65 return *p == '\t' ? 1 : IsSpace(p);
66}
67
68static inline constexpr bool IsFixedFormCommentChar(char ch) {
69 return ch == '!' || ch == '*' || ch == 'C' || ch == 'c';
70}
71
72static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) {
73 char *p{dir.GetMutableCharData()};
74 char *limit{p + dir.SizeInChars()};
75 for (; p < limit; ++p) {
76 if (*p != ' ') {
77 CHECK(IsFixedFormCommentChar(ch: *p));
78 *p = '!';
79 return;
80 }
81 }
82 DIE("compiler directive all blank");
83}
84
85void Prescanner::Prescan(ProvenanceRange range) {
86 startProvenance_ = range.start();
87 start_ = allSources_.GetSource(range);
88 CHECK(start_);
89 limit_ = start_ + range.size();
90 nextLine_ = start_;
91 const bool beganInFixedForm{inFixedForm_};
92 if (prescannerNesting_ > maxPrescannerNesting) {
93 Say(GetProvenance(start_),
94 "too many nested INCLUDE/#include files, possibly circular"_err_en_US);
95 return;
96 }
97 while (!IsAtEnd()) {
98 Statement();
99 }
100 if (inFixedForm_ != beganInFixedForm) {
101 std::string dir{"!dir$ "};
102 if (beganInFixedForm) {
103 dir += "fixed";
104 } else {
105 dir += "free";
106 }
107 dir += '\n';
108 TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()};
109 tokens.Emit(cooked_);
110 }
111}
112
113void Prescanner::Statement() {
114 TokenSequence tokens;
115 const char *statementStart{nextLine_};
116 LineClassification line{ClassifyLine(statementStart)};
117 switch (line.kind) {
118 case LineClassification::Kind::Comment:
119 nextLine_ += line.payloadOffset; // advance to '!' or newline
120 NextLine();
121 return;
122 case LineClassification::Kind::IncludeLine:
123 FortranInclude(quote: nextLine_ + line.payloadOffset);
124 NextLine();
125 return;
126 case LineClassification::Kind::ConditionalCompilationDirective:
127 case LineClassification::Kind::IncludeDirective:
128 preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
129 afterPreprocessingDirective_ = true;
130 skipLeadingAmpersand_ |= !inFixedForm_;
131 return;
132 case LineClassification::Kind::PreprocessorDirective:
133 preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
134 afterPreprocessingDirective_ = true;
135 // Don't set skipLeadingAmpersand_
136 return;
137 case LineClassification::Kind::DefinitionDirective:
138 preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
139 // Don't set afterPreprocessingDirective_ or skipLeadingAmpersand_
140 return;
141 case LineClassification::Kind::CompilerDirective: {
142 directiveSentinel_ = line.sentinel;
143 CHECK(InCompilerDirective());
144 BeginStatementAndAdvance();
145 if (inFixedForm_) {
146 CHECK(IsFixedFormCommentChar(ch: *at_));
147 } else {
148 at_ += line.payloadOffset;
149 column_ += line.payloadOffset;
150 CHECK(*at_ == '!');
151 }
152 std::optional<int> condOffset;
153 if (InOpenMPConditionalLine()) {
154 condOffset = 2;
155 } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'c' &&
156 directiveSentinel_[2] == 'u' && directiveSentinel_[3] == 'f' &&
157 directiveSentinel_[4] == '\0') {
158 // CUDA conditional compilation line.
159 condOffset = 5;
160 }
161 if (condOffset && !preprocessingOnly_) {
162 at_ += *condOffset, column_ += *condOffset;
163 if (auto payload{IsIncludeLine(at_)}) {
164 FortranInclude(quote: at_ + *payload);
165 return;
166 }
167 if (inFixedForm_) {
168 LabelField(tokens);
169 }
170 SkipSpaces();
171 } else {
172 // Compiler directive. Emit normalized sentinel, squash following spaces.
173 // Conditional compilation lines (!$) take this path in -E mode too
174 // so that -fopenmp only has to appear on the later compilation.
175 EmitChar(tokens, '!');
176 ++at_, ++column_;
177 for (const char *sp{directiveSentinel_}; *sp != '\0';
178 ++sp, ++at_, ++column_) {
179 EmitChar(tokens, *sp);
180 }
181 if (inFixedForm_) {
182 while (column_ < 6) {
183 if (*at_ == '\t') {
184 tabInCurrentLine_ = true;
185 ++at_;
186 for (; column_ < 7; ++column_) {
187 EmitChar(tokens, ' ');
188 }
189 } else if (int spaceBytes{IsSpace(p: at_)}) {
190 EmitChar(tokens, ' ');
191 at_ += spaceBytes;
192 ++column_;
193 } else {
194 if (InOpenMPConditionalLine() && column_ == 3 &&
195 IsDecimalDigit(*at_)) {
196 // subtle: !$ in -E mode can't be immediately followed by a digit
197 EmitChar(tokens, ' ');
198 }
199 break;
200 }
201 }
202 } else if (int spaceBytes{IsSpaceOrTab(p: at_)}) {
203 EmitChar(tokens, ' ');
204 at_ += spaceBytes, ++column_;
205 }
206 tokens.CloseToken();
207 SkipSpaces();
208 if (InOpenMPConditionalLine() && inFixedForm_ && !tabInCurrentLine_ &&
209 column_ == 6 && *at_ != '\n') {
210 // !$ 0 - turn '0' into a space
211 // !$ 1 - turn '1' into '&'
212 if (int n{IsSpace(p: at_)}; n || *at_ == '0') {
213 at_ += n ? n : 1;
214 } else {
215 ++at_;
216 EmitChar(tokens, '&');
217 tokens.CloseToken();
218 }
219 ++column_;
220 SkipSpaces();
221 }
222 }
223 break;
224 }
225 case LineClassification::Kind::Source: {
226 BeginStatementAndAdvance();
227 bool checkLabelField{false};
228 if (inFixedForm_) {
229 if (features_.IsEnabled(LanguageFeature::OldDebugLines) &&
230 (*at_ == 'D' || *at_ == 'd')) {
231 NextChar();
232 }
233 checkLabelField = true;
234 } else {
235 if (skipLeadingAmpersand_) {
236 skipLeadingAmpersand_ = false;
237 const char *p{SkipWhiteSpace(at_)};
238 if (p < limit_ && *p == '&') {
239 column_ += ++p - at_;
240 at_ = p;
241 }
242 } else {
243 SkipSpaces();
244 }
245 }
246 // Check for a leading identifier that might be a keyword macro
247 // that will expand to anything indicating a non-source line, like
248 // a comment marker or directive sentinel. If so, disable line
249 // continuation, so that NextToken() won't consume anything from
250 // following lines.
251 if (IsLegalIdentifierStart(*at_)) {
252 // TODO: Only bother with these cases when any keyword macro has
253 // been defined with replacement text that could begin a comment
254 // or directive sentinel.
255 const char *p{at_};
256 while (IsLegalInIdentifier(*++p)) {
257 }
258 CharBlock id{at_, static_cast<std::size_t>(p - at_)};
259 if (preprocessor_.IsNameDefined(id) &&
260 !preprocessor_.IsFunctionLikeDefinition(id)) {
261 checkLabelField = false;
262 TokenSequence toks;
263 toks.Put(id, GetProvenance(at_));
264 if (auto replaced{preprocessor_.MacroReplacement(toks, *this)}) {
265 auto newLineClass{ClassifyLine(*replaced, GetCurrentProvenance())};
266 if (newLineClass.kind ==
267 LineClassification::Kind::CompilerDirective) {
268 directiveSentinel_ = newLineClass.sentinel;
269 disableSourceContinuation_ = false;
270 } else {
271 disableSourceContinuation_ = !replaced->empty() &&
272 newLineClass.kind != LineClassification::Kind::Source;
273 }
274 }
275 }
276 }
277 if (checkLabelField) {
278 LabelField(tokens);
279 }
280 } break;
281 }
282
283 while (NextToken(tokens)) {
284 }
285 if (continuationLines_ > 255) {
286 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) {
287 Say(common::LanguageFeature::MiscSourceExtensions,
288 GetProvenance(statementStart),
289 "%d continuation lines is more than the Fortran standard allows"_port_en_US,
290 continuationLines_);
291 }
292 }
293
294 Provenance newlineProvenance{GetCurrentProvenance()};
295 if (std::optional<TokenSequence> preprocessed{
296 preprocessor_.MacroReplacement(tokens, *this)}) {
297 // Reprocess the preprocessed line.
298 LineClassification ppl{ClassifyLine(*preprocessed, newlineProvenance)};
299 switch (ppl.kind) {
300 case LineClassification::Kind::Comment:
301 break;
302 case LineClassification::Kind::IncludeLine:
303 FortranInclude(quote: preprocessed->TokenAt(0).begin() + ppl.payloadOffset);
304 break;
305 case LineClassification::Kind::ConditionalCompilationDirective:
306 case LineClassification::Kind::IncludeDirective:
307 case LineClassification::Kind::DefinitionDirective:
308 case LineClassification::Kind::PreprocessorDirective:
309 if (features_.ShouldWarn(common::UsageWarning::Preprocessing)) {
310 Say(common::UsageWarning::Preprocessing,
311 preprocessed->GetProvenanceRange(),
312 "Preprocessed line resembles a preprocessor directive"_warn_en_US);
313 }
314 CheckAndEmitLine(preprocessed->ToLowerCase(), newlineProvenance);
315 break;
316 case LineClassification::Kind::CompilerDirective:
317 if (preprocessed->HasRedundantBlanks()) {
318 preprocessed->RemoveRedundantBlanks();
319 }
320 while (CompilerDirectiveContinuation(*preprocessed, ppl.sentinel)) {
321 newlineProvenance = GetCurrentProvenance();
322 }
323 NormalizeCompilerDirectiveCommentMarker(*preprocessed);
324 preprocessed->ToLowerCase();
325 SourceFormChange(preprocessed->ToString());
326 CheckAndEmitLine(
327 preprocessed->ClipComment(*this, true /* skip first ! */),
328 newlineProvenance);
329 break;
330 case LineClassification::Kind::Source:
331 if (inFixedForm_) {
332 if (!preprocessingOnly_ && preprocessed->HasBlanks()) {
333 preprocessed->RemoveBlanks();
334 }
335 } else {
336 while (SourceLineContinuation(*preprocessed)) {
337 newlineProvenance = GetCurrentProvenance();
338 }
339 if (preprocessed->HasRedundantBlanks()) {
340 preprocessed->RemoveRedundantBlanks();
341 }
342 }
343 CheckAndEmitLine(
344 preprocessed->ToLowerCase().ClipComment(*this), newlineProvenance);
345 break;
346 }
347 } else { // no macro replacement
348 if (line.kind == LineClassification::Kind::CompilerDirective) {
349 while (CompilerDirectiveContinuation(tokens, line.sentinel)) {
350 newlineProvenance = GetCurrentProvenance();
351 }
352 if (preprocessingOnly_ && inFixedForm_ && InOpenMPConditionalLine() &&
353 nextLine_ < limit_) {
354 // In -E mode, when the line after !$ conditional compilation is a
355 // regular fixed form continuation line, append a '&' to the line.
356 const char *p{nextLine_};
357 int col{1};
358 while (int n{IsSpace(p)}) {
359 if (*p == '\t') {
360 break;
361 }
362 p += n;
363 ++col;
364 }
365 if (col == 6 && *p != '0' && *p != '\t' && *p != '\n') {
366 EmitChar(tokens, '&');
367 tokens.CloseToken();
368 }
369 }
370 tokens.ToLowerCase();
371 SourceFormChange(tokens.ToString());
372 } else { // Kind::Source
373 tokens.ToLowerCase();
374 if (inFixedForm_) {
375 EnforceStupidEndStatementRules(tokens);
376 }
377 }
378 CheckAndEmitLine(tokens, newlineProvenance);
379 }
380 directiveSentinel_ = nullptr;
381}
382
383void Prescanner::CheckAndEmitLine(
384 TokenSequence &tokens, Provenance newlineProvenance) {
385 tokens.CheckBadFortranCharacters(
386 messages_, *this, disableSourceContinuation_ || preprocessingOnly_);
387 // Parenthesis nesting check does not apply while any #include is
388 // active, nor on the lines before and after a top-level #include,
389 // nor before or after conditional source.
390 // Applications play shenanigans with line continuation before and
391 // after #include'd subprogram argument lists and conditional source.
392 if (!preprocessingOnly_ && !isNestedInIncludeDirective_ && !omitNewline_ &&
393 !afterPreprocessingDirective_ && tokens.BadlyNestedParentheses() &&
394 !preprocessor_.InConditional()) {
395 if (nextLine_ < limit_ && IsPreprocessorDirectiveLine(nextLine_)) {
396 // don't complain
397 } else {
398 tokens.CheckBadParentheses(messages_);
399 }
400 }
401 tokens.Emit(cooked_);
402 if (omitNewline_) {
403 omitNewline_ = false;
404 } else {
405 cooked_.Put('\n', newlineProvenance);
406 afterPreprocessingDirective_ = false;
407 }
408}
409
410TokenSequence Prescanner::TokenizePreprocessorDirective() {
411 CHECK(!IsAtEnd() && !inPreprocessorDirective_);
412 inPreprocessorDirective_ = true;
413 BeginStatementAndAdvance();
414 TokenSequence tokens;
415 while (NextToken(tokens)) {
416 }
417 inPreprocessorDirective_ = false;
418 return tokens;
419}
420
421void Prescanner::NextLine() {
422 void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))};
423 void *v{std::memchr(s: vstart, c: '\n', n: limit_ - nextLine_)};
424 if (!v) {
425 nextLine_ = limit_;
426 } else {
427 const char *nl{const_cast<const char *>(static_cast<char *>(v))};
428 nextLine_ = nl + 1;
429 }
430}
431
432void Prescanner::LabelField(TokenSequence &token) {
433 int outCol{1};
434 const char *start{at_};
435 std::optional<int> badColumn;
436 for (; *at_ != '\n' && column_ <= 6; ++at_) {
437 if (*at_ == '\t') {
438 ++at_;
439 column_ = 7;
440 break;
441 }
442 if (int n{IsSpace(p: at_)}; n == 0 &&
443 !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space
444 EmitChar(token, *at_);
445 ++outCol;
446 if (!badColumn && (column_ == 6 || !IsDecimalDigit(*at_))) {
447 badColumn = column_;
448 }
449 }
450 ++column_;
451 }
452 if (badColumn && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) {
453 if ((prescannerNesting_ > 0 && *badColumn == 6 &&
454 cooked_.BufferedBytes() == firstCookedCharacterOffset_) ||
455 afterPreprocessingDirective_) {
456 // This is the first source line in #include'd text or conditional
457 // code under #if, or the first source line after such.
458 // If it turns out that the preprocessed text begins with a
459 // fixed form continuation line, the newline at the end
460 // of the latest source line beforehand will be deleted in
461 // CookedSource::Marshal().
462 cooked_.MarkPossibleFixedFormContinuation();
463 } else if (features_.ShouldWarn(common::UsageWarning::Scanning)) {
464 Say(common::UsageWarning::Scanning, GetProvenance(start + *badColumn - 1),
465 *badColumn == 6
466 ? "Statement should not begin with a continuation line"_warn_en_US
467 : "Character in fixed-form label field must be a digit"_warn_en_US);
468 }
469 token.clear();
470 if (*badColumn < 6) {
471 at_ = start;
472 column_ = 1;
473 return;
474 }
475 outCol = 1;
476 }
477 if (outCol == 1) { // empty label field
478 // Emit a space so that, if the line is rescanned after preprocessing,
479 // a leading 'C' or 'D' won't be left-justified and then accidentally
480 // misinterpreted as a comment card.
481 EmitChar(token, ' ');
482 ++outCol;
483 }
484 token.CloseToken();
485 SkipToNextSignificantCharacter();
486 if (IsDecimalDigit(*at_)) {
487 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) {
488 Say(common::LanguageFeature::MiscSourceExtensions, GetCurrentProvenance(),
489 "Label digit is not in fixed-form label field"_port_en_US);
490 }
491 }
492}
493
494// 6.3.3.5: A program unit END statement, or any other statement whose
495// initial line resembles an END statement, shall not be continued in
496// fixed form source.
497void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) {
498 CharBlock cBlock{tokens.ToCharBlock()};
499 const char *str{cBlock.begin()};
500 std::size_t n{cBlock.size()};
501 if (n < 3) {
502 return;
503 }
504 std::size_t j{0};
505 for (; j < n && (str[j] == ' ' || (str[j] >= '0' && str[j] <= '9')); ++j) {
506 }
507 if (j + 3 > n || std::memcmp(s1: str + j, s2: "end", n: 3) != 0) {
508 return;
509 }
510 // It starts with END, possibly after a label.
511 auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))};
512 auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - 1))};
513 if (!start || !end) {
514 return;
515 }
516 if (&*start->sourceFile == &*end->sourceFile && start->line == end->line) {
517 return; // no continuation
518 }
519 j += 3;
520 static const char *const prefixes[]{"program", "subroutine", "function",
521 "blockdata", "module", "submodule", nullptr};
522 bool isPrefix{j == n || !IsLegalInIdentifier(str[j])}; // prefix is END
523 std::size_t endOfPrefix{j - 1};
524 for (const char *const *p{prefixes}; *p; ++p) {
525 std::size_t pLen{std::strlen(s: *p)};
526 if (j + pLen <= n && std::memcmp(s1: str + j, s2: *p, n: pLen) == 0) {
527 isPrefix = true; // END thing as prefix
528 j += pLen;
529 endOfPrefix = j - 1;
530 for (; j < n && IsLegalInIdentifier(str[j]); ++j) {
531 }
532 break;
533 }
534 }
535 if (isPrefix) {
536 auto range{tokens.GetTokenProvenanceRange(1)};
537 if (j == n) { // END or END thing [name]
538 Say(range,
539 "Program unit END statement may not be continued in fixed form source"_err_en_US);
540 } else {
541 auto endOfPrefixPos{
542 allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))};
543 auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))};
544 if (endOfPrefixPos && next &&
545 &*endOfPrefixPos->sourceFile == &*start->sourceFile &&
546 endOfPrefixPos->line == start->line &&
547 (&*next->sourceFile != &*start->sourceFile ||
548 next->line != start->line)) {
549 Say(range,
550 "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US);
551 }
552 }
553 }
554}
555
556void Prescanner::SkipToEndOfLine() {
557 while (*at_ != '\n') {
558 ++at_, ++column_;
559 }
560}
561
562bool Prescanner::MustSkipToEndOfLine() const {
563 if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) {
564 return true; // skip over ignored columns in right margin (73:80)
565 } else if (*at_ == '!' && !inCharLiteral_ &&
566 (!inFixedForm_ || tabInCurrentLine_ || column_ != 6)) {
567 return !IsCompilerDirectiveSentinel(p: at_);
568 } else {
569 return false;
570 }
571}
572
573void Prescanner::NextChar() {
574 CHECK(*at_ != '\n');
575 int n{IsSpace(p: at_)};
576 at_ += n ? n : 1;
577 ++column_;
578 while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') {
579 // UTF-8 byte order mark - treat this file as UTF-8
580 at_ += 3;
581 encoding_ = Encoding::UTF_8;
582 }
583 SkipToNextSignificantCharacter();
584}
585
586// Skip everything that should be ignored until the next significant
587// character is reached; handles C-style comments in preprocessing
588// directives, Fortran ! comments, stuff after the right margin in
589// fixed form, and all forms of line continuation.
590bool Prescanner::SkipToNextSignificantCharacter() {
591 if (inPreprocessorDirective_) {
592 SkipCComments();
593 return false;
594 } else {
595 auto anyContinuationLine{false};
596 bool atNewline{false};
597 if (MustSkipToEndOfLine()) {
598 SkipToEndOfLine();
599 } else {
600 atNewline = *at_ == '\n';
601 }
602 for (; Continuation(mightNeedFixedFormSpace: atNewline); atNewline = false) {
603 anyContinuationLine = true;
604 ++continuationLines_;
605 if (MustSkipToEndOfLine()) {
606 SkipToEndOfLine();
607 }
608 }
609 if (*at_ == '\t') {
610 tabInCurrentLine_ = true;
611 }
612 return anyContinuationLine;
613 }
614}
615
616void Prescanner::SkipCComments() {
617 while (true) {
618 if (IsCComment(p: at_)) {
619 if (const char *after{SkipCComment(at_)}) {
620 column_ += after - at_;
621 // May have skipped over one or more newlines; relocate the start of
622 // the next line.
623 nextLine_ = at_ = after;
624 NextLine();
625 } else {
626 // Don't emit any messages about unclosed C-style comments, because
627 // the sequence /* can appear legally in a FORMAT statement. There's
628 // no ambiguity, since the sequence */ cannot appear legally.
629 break;
630 }
631 } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ &&
632 at_[1] == '\n' && !IsAtEnd()) {
633 BeginSourceLineAndAdvance();
634 } else {
635 break;
636 }
637 }
638}
639
640void Prescanner::SkipSpaces() {
641 while (IsSpaceOrTab(p: at_)) {
642 NextChar();
643 }
644 brokenToken_ = false;
645}
646
647const char *Prescanner::SkipWhiteSpace(const char *p) {
648 while (int n{IsSpaceOrTab(p)}) {
649 p += n;
650 }
651 return p;
652}
653
654const char *Prescanner::SkipWhiteSpaceIncludingEmptyMacros(
655 const char *p) const {
656 while (true) {
657 if (int n{IsSpaceOrTab(p)}) {
658 p += n;
659 } else if (preprocessor_.AnyDefinitions() && IsLegalIdentifierStart(*p)) {
660 // Skip keyword macros with empty definitions
661 const char *q{p + 1};
662 while (IsLegalInIdentifier(*q)) {
663 ++q;
664 }
665 if (preprocessor_.IsNameDefinedEmpty(
666 CharBlock{p, static_cast<std::size_t>(q - p)})) {
667 p = q;
668 } else {
669 break;
670 }
671 } else {
672 break;
673 }
674 }
675 return p;
676}
677
678const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const {
679 while (true) {
680 if (int n{IsSpaceOrTab(p)}) {
681 p += n;
682 } else if (IsCComment(p)) {
683 if (const char *after{SkipCComment(p)}) {
684 p = after;
685 } else {
686 break;
687 }
688 } else {
689 break;
690 }
691 }
692 return p;
693}
694
695const char *Prescanner::SkipCComment(const char *p) const {
696 char star{' '}, slash{' '};
697 p += 2;
698 while (star != '*' || slash != '/') {
699 if (p >= limit_) {
700 return nullptr; // signifies an unterminated comment
701 }
702 star = slash;
703 slash = *p++;
704 }
705 return p;
706}
707
708bool Prescanner::NextToken(TokenSequence &tokens) {
709 CHECK(at_ >= start_ && at_ < limit_);
710 if (InFixedFormSource() && !preprocessingOnly_) {
711 SkipSpaces();
712 } else {
713 if (*at_ == '/' && IsCComment(p: at_)) {
714 // Recognize and skip over classic C style /*comments*/ when
715 // outside a character literal.
716 if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) {
717 Say(LanguageFeature::ClassicCComments, GetCurrentProvenance(),
718 "nonstandard usage: C-style comment"_port_en_US);
719 }
720 SkipCComments();
721 }
722 if (IsSpaceOrTab(p: at_)) {
723 // Compress free-form white space into a single space character.
724 const auto theSpace{at_};
725 char previous{at_ <= start_ ? ' ' : at_[-1]};
726 NextChar();
727 SkipSpaces();
728 if (*at_ == '\n' && !omitNewline_) {
729 // Discard white space at the end of a line.
730 } else if (!inPreprocessorDirective_ &&
731 (previous == '(' || *at_ == '(' || *at_ == ')')) {
732 // Discard white space before/after '(' and before ')', unless in a
733 // preprocessor directive. This helps yield space-free contiguous
734 // names for generic interfaces like OPERATOR( + ) and
735 // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg).
736 // This has the effect of silently ignoring the illegal spaces in
737 // the array constructor ( /1,2/ ) but that seems benign; it's
738 // hard to avoid that while still removing spaces from OPERATOR( / )
739 // and OPERATOR( // ).
740 } else {
741 // Preserve the squashed white space as a single space character.
742 tokens.PutNextTokenChar(' ', GetProvenance(theSpace));
743 tokens.CloseToken();
744 return true;
745 }
746 }
747 }
748 brokenToken_ = false;
749 if (*at_ == '\n') {
750 return false;
751 }
752 const char *start{at_};
753 if (*at_ == '\'' || *at_ == '"') {
754 QuotedCharacterLiteral(tokens, start);
755 preventHollerith_ = false;
756 } else if (IsDecimalDigit(*at_)) {
757 int n{0}, digits{0};
758 static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)};
759 do {
760 if (n < maxHollerith) {
761 n = 10 * n + DecimalDigitValue(*at_);
762 }
763 EmitCharAndAdvance(tokens, *at_);
764 ++digits;
765 if (InFixedFormSource()) {
766 SkipSpaces();
767 }
768 } while (IsDecimalDigit(*at_));
769 if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith &&
770 !preventHollerith_) {
771 Hollerith(tokens, n, start);
772 } else if (*at_ == '.') {
773 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
774 }
775 HandleExponentAndOrKindSuffix(tokens);
776 } else if (HandleExponentAndOrKindSuffix(tokens)) {
777 } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') &&
778 inPreprocessorDirective_) {
779 do {
780 EmitCharAndAdvance(tokens, *at_);
781 } while (IsHexadecimalDigit(*at_));
782 } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..."
783 EmitCharAndAdvance(tokens, *at_);
784 QuotedCharacterLiteral(tokens, start);
785 } else if (IsLetter(*at_) && !preventHollerith_ &&
786 parenthesisNesting_ > 0 &&
787 !preprocessor_.IsNameDefined(CharBlock{at_, 1})) {
788 // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that
789 // we don't misrecognize I9HHOLLERITH as an identifier in the next case.
790 EmitCharAndAdvance(tokens, *at_);
791 }
792 preventHollerith_ = false;
793 } else if (*at_ == '.') {
794 char nch{EmitCharAndAdvance(tokens, '.')};
795 if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) {
796 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
797 }
798 HandleExponentAndOrKindSuffix(tokens);
799 } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') {
800 EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis
801 }
802 preventHollerith_ = false;
803 } else if (IsLegalInIdentifier(*at_)) {
804 std::size_t parts{1};
805 bool anyDefined{false};
806 bool hadContinuation{false};
807 // Subtlety: When an identifier is split across continuation lines,
808 // its parts are kept as distinct pp-tokens if macro replacement
809 // should operate on them independently. This trick accommodates the
810 // historic practice of using line continuation for token pasting after
811 // replacement.
812 // In free form, the macro to be replaced must have been preceded
813 // by '&' and followed by either '&' or, if last, the end of a line.
814 // call & call foo& call foo&
815 // &MACRO& OR &MACRO& OR &MACRO
816 // &foo(...) &(...)
817 do {
818 EmitChar(tokens, *at_);
819 ++at_, ++column_;
820 hadContinuation = SkipToNextSignificantCharacter();
821 if (hadContinuation && IsLegalIdentifierStart(*at_)) {
822 if (brokenToken_) {
823 break;
824 }
825 // Continued identifier
826 tokens.CloseToken();
827 ++parts;
828 if (!anyDefined &&
829 (parts > 2 || inFixedForm_ ||
830 (start > start_ && start[-1] == '&')) &&
831 preprocessor_.IsNameDefined(
832 tokens.TokenAt(tokens.SizeInTokens() - 1))) {
833 anyDefined = true;
834 }
835 }
836 } while (IsLegalInIdentifier(*at_));
837 if (!anyDefined && parts > 1) {
838 tokens.CloseToken();
839 char after{*SkipWhiteSpace(p: at_)};
840 anyDefined = (hadContinuation || after == '\n' || after == '&') &&
841 preprocessor_.IsNameDefined(
842 tokens.TokenAt(tokens.SizeInTokens() - 1));
843 tokens.ReopenLastToken();
844 }
845 if (!anyDefined) {
846 // If no part was a defined macro, combine the parts into one so that
847 // the combination itself can be subject to macro replacement.
848 while (parts-- > 1) {
849 tokens.ReopenLastToken();
850 }
851 }
852 if (InFixedFormSource()) {
853 SkipSpaces();
854 }
855 if ((*at_ == '\'' || *at_ == '"') &&
856 tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..."
857 QuotedCharacterLiteral(tokens, start);
858 preventHollerith_ = false;
859 } else {
860 preventHollerith_ = true; // DO 10 H = ...
861 }
862 } else if (*at_ == '*') {
863 if (EmitCharAndAdvance(tokens, '*') == '*') {
864 EmitCharAndAdvance(tokens, '*');
865 } else {
866 // Subtle ambiguity:
867 // CHARACTER*2H declares H because *2 is a kind specifier
868 // DATAC/N*2H / is repeated Hollerith
869 preventHollerith_ = !slashInCurrentStatement_;
870 }
871 } else {
872 char ch{*at_};
873 if (ch == '(') {
874 if (parenthesisNesting_++ == 0) {
875 isPossibleMacroCall_ = tokens.SizeInTokens() > 0 &&
876 preprocessor_.IsFunctionLikeDefinition(
877 tokens.TokenAt(tokens.SizeInTokens() - 1));
878 }
879 } else if (ch == ')' && parenthesisNesting_ > 0) {
880 --parenthesisNesting_;
881 }
882 char nch{EmitCharAndAdvance(tokens, ch)};
883 preventHollerith_ = false;
884 if ((nch == '=' &&
885 (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) ||
886 (ch == nch &&
887 (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' ||
888 ch == '|' || ch == '<' || ch == '>')) ||
889 (ch == '=' && nch == '>')) {
890 // token comprises two characters
891 EmitCharAndAdvance(tokens, nch);
892 } else if (ch == '/') {
893 slashInCurrentStatement_ = true;
894 } else if (ch == ';' && InFixedFormSource()) {
895 SkipSpaces();
896 if (IsDecimalDigit(*at_)) {
897 if (features_.ShouldWarn(
898 common::LanguageFeature::MiscSourceExtensions)) {
899 Say(common::LanguageFeature::MiscSourceExtensions,
900 GetProvenanceRange(at_, at_ + 1),
901 "Label should be in the label field"_port_en_US);
902 }
903 }
904 }
905 }
906 tokens.CloseToken();
907 return true;
908}
909
910bool Prescanner::HandleExponent(TokenSequence &tokens) {
911 if (char ed{ToLowerCaseLetter(*at_)}; ed == 'e' || ed == 'd') {
912 // Do some look-ahead to ensure that this 'e'/'d' is an exponent,
913 // not the start of an identifier that could be a macro.
914 const char *startAt{at_};
915 int startColumn{column_};
916 TokenSequence possible;
917 EmitCharAndAdvance(possible, *at_);
918 if (InFixedFormSource()) {
919 SkipSpaces();
920 }
921 if (*at_ == '+' || *at_ == '-') {
922 EmitCharAndAdvance(possible, *at_);
923 if (InFixedFormSource()) {
924 SkipSpaces();
925 }
926 }
927 if (IsDecimalDigit(*at_)) { // it's an exponent; scan it
928 while (IsDecimalDigit(*at_)) {
929 EmitCharAndAdvance(possible, *at_);
930 if (InFixedFormSource()) {
931 SkipSpaces();
932 }
933 }
934 possible.CloseToken();
935 tokens.AppendRange(possible, 0); // appends to current token
936 return true;
937 }
938 // Not an exponent; backtrack
939 at_ = startAt;
940 column_ = startColumn;
941 }
942 return false;
943}
944
945bool Prescanner::HandleKindSuffix(TokenSequence &tokens) {
946 if (*at_ != '_') {
947 return false;
948 }
949 TokenSequence withUnderscore, separate;
950 EmitChar(withUnderscore, '_');
951 EmitCharAndAdvance(separate, '_');
952 if (InFixedFormSource()) {
953 SkipSpaces();
954 }
955 if (IsLegalInIdentifier(*at_)) {
956 separate.CloseToken();
957 EmitChar(withUnderscore, *at_);
958 EmitCharAndAdvance(separate, *at_);
959 if (InFixedFormSource()) {
960 SkipSpaces();
961 }
962 while (IsLegalInIdentifier(*at_)) {
963 EmitChar(withUnderscore, *at_);
964 EmitCharAndAdvance(separate, *at_);
965 if (InFixedFormSource()) {
966 SkipSpaces();
967 }
968 }
969 }
970 withUnderscore.CloseToken();
971 separate.CloseToken();
972 tokens.CloseToken();
973 if (separate.SizeInTokens() == 2 &&
974 preprocessor_.IsNameDefined(separate.TokenAt(1)) &&
975 !preprocessor_.IsNameDefined(withUnderscore.ToCharBlock())) {
976 // "_foo" is not defined, but "foo" is
977 tokens.CopyAll(separate); // '_' "foo"
978 } else {
979 tokens.CopyAll(withUnderscore); // "_foo"
980 }
981 return true;
982}
983
984bool Prescanner::HandleExponentAndOrKindSuffix(TokenSequence &tokens) {
985 bool hadExponent{HandleExponent(tokens)};
986 if (HandleKindSuffix(tokens)) {
987 return true;
988 } else {
989 return hadExponent;
990 }
991}
992
993void Prescanner::QuotedCharacterLiteral(
994 TokenSequence &tokens, const char *start) {
995 char quote{*at_};
996 const char *end{at_ + 1};
997 inCharLiteral_ = true;
998 continuationInCharLiteral_ = true;
999 const auto emit{[&](char ch) { EmitChar(tokens, ch); }};
1000 const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }};
1001 bool isEscaped{false};
1002 bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)};
1003 while (true) {
1004 if (*at_ == '\\') {
1005 if (escapesEnabled) {
1006 isEscaped = !isEscaped;
1007 } else {
1008 // The parser always processes escape sequences, so don't confuse it
1009 // when escapes are disabled.
1010 insert('\\');
1011 }
1012 } else {
1013 isEscaped = false;
1014 }
1015 if (*at_ == '\n') {
1016 if (inPreprocessorDirective_) {
1017 EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false,
1018 Encoding::LATIN_1);
1019 } else if (InCompilerDirective() && preprocessingOnly_) {
1020 // don't complain about -E output of !$, do it in later compilation
1021 } else {
1022 Say(GetProvenanceRange(start, end),
1023 "Incomplete character literal"_err_en_US);
1024 }
1025 break;
1026 }
1027 EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false,
1028 Encoding::LATIN_1);
1029 while (PadOutCharacterLiteral(tokens)) {
1030 }
1031 // Here's a weird edge case. When there's a two or more following
1032 // continuation lines at this point, and the entire significant part of
1033 // the next continuation line is the name of a keyword macro, replace
1034 // it in the character literal with its definition. Example:
1035 // #define FOO foo
1036 // subroutine subr() bind(c, name="my_&
1037 // &FOO&
1038 // &_bar") ...
1039 // produces a binding name of "my_foo_bar".
1040 while (at_[1] == '&' && nextLine_ < limit_ && !InFixedFormSource()) {
1041 const char *idStart{nextLine_};
1042 if (const char *amper{SkipWhiteSpace(p: nextLine_)}; *amper == '&') {
1043 idStart = amper + 1;
1044 }
1045 if (IsLegalIdentifierStart(*idStart)) {
1046 std::size_t idLen{1};
1047 for (; IsLegalInIdentifier(idStart[idLen]); ++idLen) {
1048 }
1049 if (idStart[idLen] == '&') {
1050 CharBlock id{idStart, idLen};
1051 if (preprocessor_.IsNameDefined(id)) {
1052 TokenSequence ppTokens;
1053 ppTokens.Put(id, GetProvenance(idStart));
1054 if (auto replaced{
1055 preprocessor_.MacroReplacement(ppTokens, *this)}) {
1056 tokens.CopyAll(*replaced);
1057 at_ = &idStart[idLen - 1];
1058 NextLine();
1059 continue; // try again on the next line
1060 }
1061 }
1062 }
1063 }
1064 break;
1065 }
1066 end = at_ + 1;
1067 NextChar();
1068 if (*at_ == quote && !isEscaped) {
1069 // A doubled unescaped quote mark becomes a single instance of that
1070 // quote character in the literal (later). There can be spaces between
1071 // the quotes in fixed form source.
1072 EmitChar(tokens, quote);
1073 inCharLiteral_ = false; // for cases like print *, '...'!comment
1074 NextChar();
1075 if (InFixedFormSource()) {
1076 SkipSpaces();
1077 }
1078 if (*at_ != quote) {
1079 break;
1080 }
1081 inCharLiteral_ = true;
1082 }
1083 }
1084 continuationInCharLiteral_ = false;
1085 inCharLiteral_ = false;
1086}
1087
1088void Prescanner::Hollerith(
1089 TokenSequence &tokens, int count, const char *start) {
1090 inCharLiteral_ = true;
1091 CHECK(*at_ == 'h' || *at_ == 'H');
1092 EmitChar(tokens, 'H');
1093 while (count-- > 0) {
1094 if (PadOutCharacterLiteral(tokens)) {
1095 } else if (*at_ == '\n') {
1096 if (features_.ShouldWarn(common::UsageWarning::Scanning)) {
1097 Say(common::UsageWarning::Scanning, GetProvenanceRange(start, at_),
1098 "Possible truncated Hollerith literal"_warn_en_US);
1099 }
1100 break;
1101 } else {
1102 NextChar();
1103 // Each multi-byte character encoding counts as a single character.
1104 // No escape sequences are recognized.
1105 // Hollerith is always emitted to the cooked character
1106 // stream in UTF-8.
1107 DecodedCharacter decoded{DecodeCharacter(
1108 encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)};
1109 if (decoded.bytes > 0) {
1110 EncodedCharacter utf8{
1111 EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)};
1112 for (int j{0}; j < utf8.bytes; ++j) {
1113 EmitChar(tokens, utf8.buffer[j]);
1114 }
1115 at_ += decoded.bytes - 1;
1116 } else {
1117 Say(GetProvenanceRange(start, at_),
1118 "Bad character in Hollerith literal"_err_en_US);
1119 break;
1120 }
1121 }
1122 }
1123 if (*at_ != '\n') {
1124 NextChar();
1125 }
1126 inCharLiteral_ = false;
1127}
1128
1129// In fixed form, source card images must be processed as if they were at
1130// least 72 columns wide, at least in character literal contexts.
1131bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) {
1132 while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') {
1133 if (column_ < fixedFormColumnLimit_) {
1134 tokens.PutNextTokenChar(' ', spaceProvenance_);
1135 ++column_;
1136 return true;
1137 }
1138 if (!FixedFormContinuation(atNewline: false /*no need to insert space*/) ||
1139 tabInCurrentLine_) {
1140 return false;
1141 }
1142 CHECK(column_ == 7);
1143 --at_; // point to column 6 of continuation line
1144 column_ = 6;
1145 }
1146 return false;
1147}
1148
1149static bool IsAtProcess(const char *p) {
1150 static const char pAtProc[]{"process"};
1151 for (std::size_t i{0}; i < sizeof pAtProc - 1; ++i) {
1152 if (ToLowerCaseLetter(*++p) != pAtProc[i])
1153 return false;
1154 }
1155 return true;
1156}
1157
1158bool Prescanner::IsFixedFormCommentLine(const char *start) const {
1159 const char *p{start};
1160 // The @process directive must start in column 1.
1161 if (*p == '@' && IsAtProcess(p)) {
1162 return true;
1163 }
1164 if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c.
1165 ((*p == 'D' || *p == 'd') &&
1166 !features_.IsEnabled(LanguageFeature::OldDebugLines))) {
1167 return true;
1168 }
1169 bool anyTabs{false};
1170 while (true) {
1171 if (int n{IsSpace(p)}) {
1172 p += n;
1173 } else if (*p == '\t') {
1174 anyTabs = true;
1175 ++p;
1176 } else if (*p == '0' && !anyTabs && p == start + 5) {
1177 ++p; // 0 in column 6 must treated as a space
1178 } else {
1179 break;
1180 }
1181 }
1182 if (!anyTabs && p >= start + fixedFormColumnLimit_) {
1183 return true;
1184 }
1185 if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) {
1186 return true;
1187 }
1188 return *p == '\n';
1189}
1190
1191const char *Prescanner::IsFreeFormComment(const char *p) const {
1192 p = SkipWhiteSpaceAndCComments(p);
1193 if (*p == '!' || *p == '\n') {
1194 return p;
1195 } else if (*p == '@') {
1196 return IsAtProcess(p) ? p : nullptr;
1197 } else {
1198 return nullptr;
1199 }
1200}
1201
1202std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const {
1203 if (!expandIncludeLines_) {
1204 return std::nullopt;
1205 }
1206 const char *p{SkipWhiteSpace(p: start)};
1207 if (*p == '0' && inFixedForm_ && p == start + 5) {
1208 // Accept " 0INCLUDE" in fixed form.
1209 p = SkipWhiteSpace(p: p + 1);
1210 }
1211 for (const char *q{"include"}; *q; ++q) {
1212 if (ToLowerCaseLetter(*p) != *q) {
1213 return std::nullopt;
1214 }
1215 p = SkipWhiteSpace(p: p + 1);
1216 }
1217 if (IsDecimalDigit(*p)) { // accept & ignore a numeric kind prefix
1218 for (p = SkipWhiteSpace(p: p + 1); IsDecimalDigit(*p);
1219 p = SkipWhiteSpace(p: p + 1)) {
1220 }
1221 if (*p != '_') {
1222 return std::nullopt;
1223 }
1224 p = SkipWhiteSpace(p: p + 1);
1225 }
1226 if (*p == '"' || *p == '\'') {
1227 return {p - start};
1228 }
1229 return std::nullopt;
1230}
1231
1232void Prescanner::FortranInclude(const char *firstQuote) {
1233 const char *p{firstQuote};
1234 while (*p != '"' && *p != '\'') {
1235 ++p;
1236 }
1237 char quote{*p};
1238 std::string path;
1239 for (++p; *p != '\n'; ++p) {
1240 if (*p == quote) {
1241 if (p[1] != quote) {
1242 break;
1243 }
1244 ++p;
1245 }
1246 path += *p;
1247 }
1248 if (*p != quote) {
1249 Say(GetProvenanceRange(firstQuote, p),
1250 "malformed path name string"_err_en_US);
1251 return;
1252 }
1253 p = SkipWhiteSpace(p: p + 1);
1254 if (*p != '\n' && *p != '!') {
1255 const char *garbage{p};
1256 for (; *p != '\n' && *p != '!'; ++p) {
1257 }
1258 if (features_.ShouldWarn(common::UsageWarning::Scanning)) {
1259 Say(common::UsageWarning::Scanning, GetProvenanceRange(garbage, p),
1260 "excess characters after path name"_warn_en_US);
1261 }
1262 }
1263 std::string buf;
1264 llvm::raw_string_ostream error{buf};
1265 Provenance provenance{GetProvenance(nextLine_)};
1266 std::optional<std::string> prependPath;
1267 if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) {
1268 prependPath = DirectoryName(currentFile->path());
1269 }
1270 const SourceFile *included{
1271 allSources_.Open(path, error, std::move(prependPath))};
1272 if (!included) {
1273 Say(provenance, "INCLUDE: %s"_err_en_US, buf);
1274 } else if (included->bytes() > 0) {
1275 ProvenanceRange includeLineRange{
1276 provenance, static_cast<std::size_t>(p - nextLine_)};
1277 ProvenanceRange fileRange{
1278 allSources_.AddIncludedFile(*included, includeLineRange)};
1279 Preprocessor cleanPrepro{allSources_};
1280 if (preprocessor_.IsNameDefined("__FILE__"s)) {
1281 cleanPrepro.DefineStandardMacros(); // __FILE__, __LINE__, &c.
1282 }
1283 if (preprocessor_.IsNameDefined("_CUDA"s)) {
1284 cleanPrepro.Define("_CUDA"s, "1");
1285 }
1286 Prescanner{*this, cleanPrepro, /*isNestedInIncludeDirective=*/false}
1287 .set_encoding(included->encoding())
1288 .Prescan(fileRange);
1289 }
1290}
1291
1292const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const {
1293 const char *p{start};
1294 while (int n{IsSpace(p)}) {
1295 p += n;
1296 }
1297 if (*p == '#') {
1298 if (inFixedForm_ && p == start + 5) {
1299 return nullptr;
1300 }
1301 } else {
1302 p = SkipWhiteSpace(p);
1303 if (*p != '#') {
1304 return nullptr;
1305 }
1306 }
1307 return SkipWhiteSpace(p: p + 1);
1308}
1309
1310bool Prescanner::IsNextLinePreprocessorDirective() const {
1311 return IsPreprocessorDirectiveLine(start: nextLine_) != nullptr;
1312}
1313
1314bool Prescanner::SkipCommentLine(bool afterAmpersand) {
1315 if (IsAtEnd()) {
1316 if (afterAmpersand && prescannerNesting_ > 0) {
1317 // A continuation marker at the end of the last line in an
1318 // include file inhibits the newline for that line.
1319 SkipToEndOfLine();
1320 omitNewline_ = true;
1321 }
1322 } else if (inPreprocessorDirective_) {
1323 } else {
1324 auto lineClass{ClassifyLine(nextLine_)};
1325 if (lineClass.kind == LineClassification::Kind::Comment) {
1326 NextLine();
1327 return true;
1328 } else if (lineClass.kind ==
1329 LineClassification::Kind::ConditionalCompilationDirective ||
1330 lineClass.kind == LineClassification::Kind::PreprocessorDirective) {
1331 // Allow conditional compilation directives (e.g., #ifdef) to affect
1332 // continuation lines.
1333 // Allow other preprocessor directives, too, except #include
1334 // (when it does not follow '&'), #define, and #undef (because
1335 // they cannot be allowed to affect preceding text on a
1336 // continued line).
1337 preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
1338 return true;
1339 } else if (afterAmpersand &&
1340 (lineClass.kind == LineClassification::Kind::DefinitionDirective ||
1341 lineClass.kind == LineClassification::Kind::IncludeDirective ||
1342 lineClass.kind == LineClassification::Kind::IncludeLine)) {
1343 SkipToEndOfLine();
1344 omitNewline_ = true;
1345 skipLeadingAmpersand_ = true;
1346 }
1347 }
1348 return false;
1349}
1350
1351const char *Prescanner::FixedFormContinuationLine(bool atNewline) {
1352 if (IsAtEnd()) {
1353 return nullptr;
1354 }
1355 tabInCurrentLine_ = false;
1356 char col1{*nextLine_};
1357 bool canBeNonDirectiveContinuation{
1358 (col1 == ' ' ||
1359 ((col1 == 'D' || col1 == 'd') &&
1360 features_.IsEnabled(LanguageFeature::OldDebugLines))) &&
1361 nextLine_[1] == ' ' && nextLine_[2] == ' ' && nextLine_[3] == ' ' &&
1362 nextLine_[4] == ' '};
1363 if (InCompilerDirective() &&
1364 !(InOpenMPConditionalLine() && !preprocessingOnly_)) {
1365 // !$ under -E is not continued, but deferred to later compilation
1366 if (IsFixedFormCommentChar(ch: col1) &&
1367 !(InOpenMPConditionalLine() && preprocessingOnly_)) {
1368 int j{1};
1369 for (; j < 5; ++j) {
1370 char ch{directiveSentinel_[j - 1]};
1371 if (ch == '\0') {
1372 break;
1373 } else if (ch != ToLowerCaseLetter(nextLine_[j])) {
1374 return nullptr;
1375 }
1376 }
1377 for (; j < 5; ++j) {
1378 if (nextLine_[j] != ' ') {
1379 return nullptr;
1380 }
1381 }
1382 const char *col6{nextLine_ + 5};
1383 if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(p: col6)) {
1384 if (atNewline && !IsSpace(p: nextLine_ + 6)) {
1385 brokenToken_ = true;
1386 }
1387 return nextLine_ + 6;
1388 }
1389 }
1390 } else { // Normal case: not in a compiler directive.
1391 // !$ conditional compilation lines may be continuations when not
1392 // just preprocessing.
1393 if (!preprocessingOnly_ && IsFixedFormCommentChar(ch: col1) &&
1394 nextLine_[1] == '$' && nextLine_[2] == ' ' && nextLine_[3] == ' ' &&
1395 nextLine_[4] == ' ' && IsCompilerDirectiveSentinel(&nextLine_[1], 1)) {
1396 if (const char *col6{nextLine_ + 5};
1397 *col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(p: col6)) {
1398 if (atNewline && !IsSpace(p: nextLine_ + 6)) {
1399 brokenToken_ = true;
1400 }
1401 return nextLine_ + 6;
1402 } else {
1403 return nullptr;
1404 }
1405 }
1406 if (col1 == '&' &&
1407 features_.IsEnabled(
1408 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) {
1409 // Extension: '&' as continuation marker
1410 if (features_.ShouldWarn(
1411 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) {
1412 Say(LanguageFeature::FixedFormContinuationWithColumn1Ampersand,
1413 GetProvenance(nextLine_), "nonstandard usage"_port_en_US);
1414 }
1415 return nextLine_ + 1;
1416 }
1417 if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') {
1418 tabInCurrentLine_ = true;
1419 return nextLine_ + 2; // VAX extension
1420 }
1421 if (canBeNonDirectiveContinuation) {
1422 const char *col6{nextLine_ + 5};
1423 if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(p: col6)) {
1424 if ((*col6 == 'i' || *col6 == 'I') && IsIncludeLine(start: nextLine_)) {
1425 // It's an INCLUDE line, not a continuation
1426 } else {
1427 return nextLine_ + 6;
1428 }
1429 }
1430 }
1431 if (IsImplicitContinuation()) {
1432 return nextLine_;
1433 }
1434 }
1435 return nullptr; // not a continuation line
1436}
1437
1438const char *Prescanner::FreeFormContinuationLine(bool ampersand) {
1439 const char *lineStart{nextLine_};
1440 const char *p{lineStart};
1441 if (p >= limit_) {
1442 return nullptr;
1443 }
1444 p = SkipWhiteSpaceIncludingEmptyMacros(p);
1445 if (InCompilerDirective()) {
1446 if (InOpenMPConditionalLine()) {
1447 if (preprocessingOnly_) {
1448 // in -E mode, don't treat !$ as a continuation
1449 return nullptr;
1450 } else if (p[0] == '!' && p[1] == '$') {
1451 // accept but do not require a matching sentinel
1452 if (p[2] != '&' && !IsSpaceOrTab(p: &p[2])) {
1453 return nullptr; // not !$
1454 }
1455 p += 2;
1456 }
1457 } else if (*p++ == '!') {
1458 for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) {
1459 if (*s != ToLowerCaseLetter(*p)) {
1460 return nullptr; // not the same directive class
1461 }
1462 }
1463 } else {
1464 return nullptr;
1465 }
1466 p = SkipWhiteSpace(p);
1467 if (*p == '&') {
1468 if (!ampersand) {
1469 brokenToken_ = true;
1470 }
1471 return p + 1;
1472 } else if (ampersand) {
1473 return p;
1474 } else {
1475 return nullptr;
1476 }
1477 }
1478 if (p[0] == '!' && p[1] == '$' && !preprocessingOnly_ &&
1479 features_.IsEnabled(LanguageFeature::OpenMP)) {
1480 // !$ conditional line can be a continuation
1481 p = lineStart = SkipWhiteSpace(p: p + 2);
1482 }
1483 if (*p == '&') {
1484 return p + 1;
1485 } else if (*p == '!' || *p == '\n' || *p == '#') {
1486 return nullptr;
1487 } else if (ampersand || IsImplicitContinuation()) {
1488 if (continuationInCharLiteral_) {
1489 // 'a'& -> 'a''b' == "a'b"
1490 // 'b'
1491 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) {
1492 Say(common::LanguageFeature::MiscSourceExtensions,
1493 GetProvenanceRange(p, p + 1),
1494 "Character literal continuation line should have been preceded by '&'"_port_en_US);
1495 }
1496 } else if (p > lineStart && IsSpaceOrTab(p: p - 1)) {
1497 --p;
1498 } else {
1499 brokenToken_ = true;
1500 }
1501 return p;
1502 } else {
1503 return nullptr;
1504 }
1505}
1506
1507bool Prescanner::FixedFormContinuation(bool atNewline) {
1508 // N.B. We accept '&' as a continuation indicator in fixed form, too,
1509 // but not in a character literal.
1510 if (*at_ == '&' && inCharLiteral_) {
1511 return false;
1512 }
1513 do {
1514 if (const char *cont{FixedFormContinuationLine(atNewline)}) {
1515 BeginSourceLine(at: cont);
1516 column_ = 7;
1517 NextLine();
1518 return true;
1519 }
1520 } while (SkipCommentLine(afterAmpersand: false /* not after ampersand */));
1521 return false;
1522}
1523
1524bool Prescanner::FreeFormContinuation() {
1525 const char *p{at_};
1526 bool ampersand{*p == '&'};
1527 if (ampersand) {
1528 p = SkipWhiteSpace(p: p + 1);
1529 }
1530 if (*p != '\n') {
1531 if (inCharLiteral_) {
1532 return false;
1533 } else if (*p == '!') { // & ! comment - ok
1534 } else if (ampersand && isPossibleMacroCall_ && (*p == ',' || *p == ')')) {
1535 return false; // allow & at end of a macro argument
1536 } else if (ampersand && preprocessingOnly_ && !parenthesisNesting_) {
1537 return false; // allow & at start of line, maybe after !$
1538 } else if (features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) {
1539 Say(LanguageFeature::CruftAfterAmpersand, GetProvenance(p),
1540 "missing ! before comment after &"_warn_en_US);
1541 }
1542 }
1543 do {
1544 if (const char *cont{FreeFormContinuationLine(ampersand)}) {
1545 BeginSourceLine(at: cont);
1546 NextLine();
1547 return true;
1548 }
1549 } while (SkipCommentLine(afterAmpersand: ampersand));
1550 return false;
1551}
1552
1553// Implicit line continuation allows a preprocessor macro call with
1554// arguments to span multiple lines.
1555bool Prescanner::IsImplicitContinuation() const {
1556 return !inPreprocessorDirective_ && !inCharLiteral_ && isPossibleMacroCall_ &&
1557 parenthesisNesting_ > 0 && !IsAtEnd() &&
1558 ClassifyLine(nextLine_).kind == LineClassification::Kind::Source;
1559}
1560
1561bool Prescanner::Continuation(bool mightNeedFixedFormSpace) {
1562 if (disableSourceContinuation_) {
1563 return false;
1564 } else if (*at_ == '\n' || *at_ == '&') {
1565 if (inFixedForm_) {
1566 return FixedFormContinuation(atNewline: mightNeedFixedFormSpace);
1567 } else {
1568 return FreeFormContinuation();
1569 }
1570 } else if (*at_ == '\\' && at_ + 2 == nextLine_ &&
1571 backslashFreeFormContinuation_ && !inFixedForm_ && nextLine_ < limit_) {
1572 // cpp-like handling of \ at end of a free form source line
1573 BeginSourceLine(at: nextLine_);
1574 NextLine();
1575 return true;
1576 } else {
1577 return false;
1578 }
1579}
1580
1581std::optional<Prescanner::LineClassification>
1582Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const {
1583 const char *p{start};
1584 char col1{*p++};
1585 if (!IsFixedFormCommentChar(ch: col1)) {
1586 return std::nullopt;
1587 }
1588 char sentinel[5], *sp{sentinel};
1589 int column{2};
1590 for (; column < 6; ++column) {
1591 if (*p == '\n' || IsSpaceOrTab(p) || IsDecimalDigit(*p)) {
1592 break;
1593 }
1594 *sp++ = ToLowerCaseLetter(*p++);
1595 }
1596 if (sp == sentinel) {
1597 return std::nullopt;
1598 }
1599 *sp = '\0';
1600 // A fixed form OpenMP conditional compilation sentinel must satisfy the
1601 // following criteria, for initial lines:
1602 // - Columns 3 through 5 must have only white space or numbers.
1603 // - Column 6 must be space or zero.
1604 bool isOpenMPConditional{sp == &sentinel[1] && sentinel[0] == '$'};
1605 bool hadDigit{false};
1606 if (isOpenMPConditional) {
1607 for (; column < 6; ++column, ++p) {
1608 if (IsDecimalDigit(*p)) {
1609 hadDigit = true;
1610 } else if (!IsSpaceOrTab(p)) {
1611 return std::nullopt;
1612 }
1613 }
1614 }
1615 if (column == 6) {
1616 if (*p == '0') {
1617 ++p;
1618 } else if (int n{IsSpaceOrTab(p)}) {
1619 p += n;
1620 } else if (isOpenMPConditional && preprocessingOnly_ && !hadDigit &&
1621 *p != '\n') {
1622 // In -E mode, "!$ &" is treated as a directive
1623 } else {
1624 // This is a Continuation line, not an initial directive line.
1625 return std::nullopt;
1626 }
1627 }
1628 if (const char *ss{IsCompilerDirectiveSentinel(
1629 sentinel, static_cast<std::size_t>(sp - sentinel))}) {
1630 return {
1631 LineClassification{LineClassification::Kind::CompilerDirective, 0, ss}};
1632 }
1633 return std::nullopt;
1634}
1635
1636std::optional<Prescanner::LineClassification>
1637Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const {
1638 if (const char *p{SkipWhiteSpaceIncludingEmptyMacros(p: start)};
1639 p && *p++ == '!') {
1640 if (auto maybePair{IsCompilerDirectiveSentinel(p)}) {
1641 auto offset{static_cast<std::size_t>(p - start - 1)};
1642 return {LineClassification{LineClassification::Kind::CompilerDirective,
1643 offset, maybePair->first}};
1644 }
1645 }
1646 return std::nullopt;
1647}
1648
1649Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) {
1650 std::uint64_t packed{0};
1651 for (char ch : dir) {
1652 packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff);
1653 }
1654 compilerDirectiveBloomFilter_.set(position: packed % prime1);
1655 compilerDirectiveBloomFilter_.set(position: packed % prime2);
1656 compilerDirectiveSentinels_.insert(x: dir);
1657 return *this;
1658}
1659
1660const char *Prescanner::IsCompilerDirectiveSentinel(
1661 const char *sentinel, std::size_t len) const {
1662 std::uint64_t packed{0};
1663 for (std::size_t j{0}; j < len; ++j) {
1664 packed = (packed << 8) | (sentinel[j] & 0xff);
1665 }
1666 if (len == 0 || !compilerDirectiveBloomFilter_.test(position: packed % prime1) ||
1667 !compilerDirectiveBloomFilter_.test(position: packed % prime2)) {
1668 return nullptr;
1669 }
1670 const auto iter{compilerDirectiveSentinels_.find(x: std::string(sentinel, len))};
1671 return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str();
1672}
1673
1674const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const {
1675 const char *p{token.begin()};
1676 const char *end{p + token.size()};
1677 while (p < end && (*p == ' ' || *p == '\n')) {
1678 ++p;
1679 }
1680 if (p < end && *p == '!') {
1681 ++p;
1682 }
1683 while (end > p && (end[-1] == ' ' || end[-1] == '\t')) {
1684 --end;
1685 }
1686 return end > p && IsCompilerDirectiveSentinel(sentinel: p, len: end - p) ? p : nullptr;
1687}
1688
1689std::optional<std::pair<const char *, const char *>>
1690Prescanner::IsCompilerDirectiveSentinel(const char *p) const {
1691 char sentinel[8];
1692 for (std::size_t j{0}; j + 1 < sizeof sentinel; ++p, ++j) {
1693 if (int n{IsSpaceOrTab(p)};
1694 n || !(IsLetter(*p) || *p == '$' || *p == '@')) {
1695 if (j > 0) {
1696 if (j == 1 && sentinel[0] == '$' && n == 0 && *p != '&' && *p != '\n') {
1697 // Free form OpenMP conditional compilation line sentinels have to
1698 // be immediately followed by a space or &, not a digit
1699 // or anything else. A newline also works for an initial line.
1700 break;
1701 }
1702 sentinel[j] = '\0';
1703 if (*p != '!') {
1704 if (const char *sp{IsCompilerDirectiveSentinel(sentinel, len: j)}) {
1705 return std::make_pair(x&: sp, y&: p);
1706 }
1707 }
1708 }
1709 break;
1710 } else {
1711 sentinel[j] = ToLowerCaseLetter(*p);
1712 }
1713 }
1714 return std::nullopt;
1715}
1716
1717constexpr bool IsDirective(const char *match, const char *dir) {
1718 for (; *match; ++match) {
1719 if (*match != ToLowerCaseLetter(*dir++)) {
1720 return false;
1721 }
1722 }
1723 return true;
1724}
1725
1726Prescanner::LineClassification Prescanner::ClassifyLine(
1727 const char *start) const {
1728 if (inFixedForm_) {
1729 if (std::optional<LineClassification> lc{
1730 IsFixedFormCompilerDirectiveLine(start)}) {
1731 return std::move(*lc);
1732 }
1733 if (IsFixedFormCommentLine(start)) {
1734 return {LineClassification::Kind::Comment};
1735 }
1736 } else {
1737 if (std::optional<LineClassification> lc{
1738 IsFreeFormCompilerDirectiveLine(start)}) {
1739 return std::move(*lc);
1740 }
1741 if (const char *bang{IsFreeFormComment(p: start)}) {
1742 return {LineClassification::Kind::Comment,
1743 static_cast<std::size_t>(bang - start)};
1744 }
1745 }
1746 if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) {
1747 return {LineClassification::Kind::IncludeLine, *quoteOffset};
1748 }
1749 if (const char *dir{IsPreprocessorDirectiveLine(start)}) {
1750 if (IsDirective(match: "if", dir) || IsDirective(match: "elif", dir) ||
1751 IsDirective(match: "else", dir) || IsDirective(match: "endif", dir)) {
1752 return {LineClassification::Kind::ConditionalCompilationDirective};
1753 } else if (IsDirective(match: "include", dir)) {
1754 return {LineClassification::Kind::IncludeDirective};
1755 } else if (IsDirective(match: "define", dir) || IsDirective(match: "undef", dir)) {
1756 return {LineClassification::Kind::DefinitionDirective};
1757 } else {
1758 return {LineClassification::Kind::PreprocessorDirective};
1759 }
1760 }
1761 return {LineClassification::Kind::Source};
1762}
1763
1764Prescanner::LineClassification Prescanner::ClassifyLine(
1765 TokenSequence &tokens, Provenance newlineProvenance) const {
1766 // Append a newline temporarily.
1767 tokens.PutNextTokenChar('\n', newlineProvenance);
1768 tokens.CloseToken();
1769 const char *ppd{tokens.ToCharBlock().begin()};
1770 LineClassification classification{ClassifyLine(start: ppd)};
1771 tokens.pop_back(); // remove the newline
1772 return classification;
1773}
1774
1775void Prescanner::SourceFormChange(std::string &&dir) {
1776 if (dir == "!dir$ free") {
1777 inFixedForm_ = false;
1778 } else if (dir == "!dir$ fixed") {
1779 inFixedForm_ = true;
1780 }
1781}
1782
1783// Acquire and append compiler directive continuation lines to
1784// the tokens that constitute a compiler directive, even when those
1785// directive continuation lines are the result of macro expansion.
1786// (Not used when neither the original compiler directive line nor
1787// the directive continuation line result from preprocessing; regular
1788// line continuation during tokenization handles that normal case.)
1789bool Prescanner::CompilerDirectiveContinuation(
1790 TokenSequence &tokens, const char *origSentinel) {
1791 if (inFixedForm_ || tokens.empty() ||
1792 tokens.TokenAt(tokens.SizeInTokens() - 1) != "&" ||
1793 (preprocessingOnly_ && !parenthesisNesting_)) {
1794 return false;
1795 }
1796 LineClassification followingLine{ClassifyLine(start: nextLine_)};
1797 if (followingLine.kind == LineClassification::Kind::Comment) {
1798 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline
1799 NextLine();
1800 return true;
1801 }
1802 CHECK(origSentinel != nullptr);
1803 directiveSentinel_ = origSentinel; // so InCompilerDirective() is true
1804 const char *nextContinuation{
1805 followingLine.kind == LineClassification::Kind::CompilerDirective
1806 ? FreeFormContinuationLine(ampersand: true)
1807 : nullptr};
1808 if (!nextContinuation &&
1809 followingLine.kind != LineClassification::Kind::Source) {
1810 return false;
1811 }
1812 auto origNextLine{nextLine_};
1813 BeginSourceLine(at: nextLine_);
1814 NextLine();
1815 if (nextContinuation) {
1816 // What follows is !DIR$ & xxx; skip over the & so that it
1817 // doesn't cause a spurious continuation.
1818 at_ = nextContinuation;
1819 } else {
1820 // What follows looks like a source line before macro expansion,
1821 // but might become a directive continuation afterwards.
1822 SkipSpaces();
1823 }
1824 TokenSequence followingTokens;
1825 while (NextToken(followingTokens)) {
1826 }
1827 if (auto followingPrepro{
1828 preprocessor_.MacroReplacement(followingTokens, *this)}) {
1829 followingTokens = std::move(*followingPrepro);
1830 }
1831 followingTokens.RemoveRedundantBlanks();
1832 std::size_t startAt{0};
1833 std::size_t following{followingTokens.SizeInTokens()};
1834 bool ok{false};
1835 if (nextContinuation) {
1836 ok = true;
1837 } else {
1838 startAt = 2;
1839 if (startAt < following && followingTokens.TokenAt(0) == "!") {
1840 CharBlock sentinel{followingTokens.TokenAt(1)};
1841 if (!sentinel.empty() &&
1842 std::memcmp(s1: sentinel.begin(), s2: origSentinel, n: sentinel.size()) == 0) {
1843 ok = true;
1844 while (
1845 startAt < following && followingTokens.TokenAt(startAt).IsBlank()) {
1846 ++startAt;
1847 }
1848 if (startAt < following && followingTokens.TokenAt(startAt) == "&") {
1849 ++startAt;
1850 }
1851 }
1852 }
1853 }
1854 if (ok) {
1855 tokens.pop_back(); // delete original '&'
1856 tokens.AppendRange(followingTokens, startAt, following - startAt);
1857 tokens.RemoveRedundantBlanks();
1858 } else {
1859 nextLine_ = origNextLine;
1860 }
1861 return ok;
1862}
1863
1864// Similar, but for source line continuation after macro replacement.
1865bool Prescanner::SourceLineContinuation(TokenSequence &tokens) {
1866 if (!inFixedForm_ && !tokens.empty() &&
1867 tokens.TokenAt(tokens.SizeInTokens() - 1) == "&") {
1868 LineClassification followingLine{ClassifyLine(start: nextLine_)};
1869 if (followingLine.kind == LineClassification::Kind::Comment) {
1870 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline
1871 NextLine();
1872 return true;
1873 } else if (const char *nextContinuation{FreeFormContinuationLine(ampersand: true)}) {
1874 BeginSourceLine(at: nextLine_);
1875 NextLine();
1876 TokenSequence followingTokens;
1877 at_ = nextContinuation;
1878 while (NextToken(followingTokens)) {
1879 }
1880 if (auto followingPrepro{
1881 preprocessor_.MacroReplacement(followingTokens, *this)}) {
1882 followingTokens = std::move(*followingPrepro);
1883 }
1884 followingTokens.RemoveRedundantBlanks();
1885 tokens.pop_back(); // delete original '&'
1886 tokens.CopyAll(followingTokens);
1887 return true;
1888 }
1889 }
1890 return false;
1891}
1892} // namespace Fortran::parser
1893

source code of flang/lib/Parser/prescan.cpp