1//===-- lib/Parser/prescan.cpp --------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "prescan.h"
10#include "flang/Common/idioms.h"
11#include "flang/Parser/characters.h"
12#include "flang/Parser/message.h"
13#include "flang/Parser/preprocessor.h"
14#include "flang/Parser/source.h"
15#include "flang/Parser/token-sequence.h"
16#include "llvm/Support/raw_ostream.h"
17#include <cstddef>
18#include <cstring>
19#include <utility>
20#include <vector>
21
22namespace Fortran::parser {
23
24using common::LanguageFeature;
25
26static constexpr int maxPrescannerNesting{100};
27
28Prescanner::Prescanner(Messages &messages, CookedSource &cooked,
29 Preprocessor &preprocessor, common::LanguageFeatureControl lfc)
30 : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor},
31 allSources_{preprocessor_.allSources()}, features_{lfc},
32 encoding_{allSources_.encoding()} {}
33
34Prescanner::Prescanner(const Prescanner &that)
35 : messages_{that.messages_}, cooked_{that.cooked_},
36 preprocessor_{that.preprocessor_}, allSources_{that.allSources_},
37 features_{that.features_}, inFixedForm_{that.inFixedForm_},
38 fixedFormColumnLimit_{that.fixedFormColumnLimit_},
39 encoding_{that.encoding_}, prescannerNesting_{that.prescannerNesting_ +
40 1},
41 skipLeadingAmpersand_{that.skipLeadingAmpersand_},
42 compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_},
43 compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {}
44
45static inline constexpr bool IsFixedFormCommentChar(char ch) {
46 return ch == '!' || ch == '*' || ch == 'C' || ch == 'c';
47}
48
49static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) {
50 char *p{dir.GetMutableCharData()};
51 char *limit{p + dir.SizeInChars()};
52 for (; p < limit; ++p) {
53 if (*p != ' ') {
54 CHECK(IsFixedFormCommentChar(ch: *p));
55 *p = '!';
56 return;
57 }
58 }
59 DIE("compiler directive all blank");
60}
61
62void Prescanner::Prescan(ProvenanceRange range) {
63 startProvenance_ = range.start();
64 start_ = allSources_.GetSource(range);
65 CHECK(start_);
66 limit_ = start_ + range.size();
67 nextLine_ = start_;
68 const bool beganInFixedForm{inFixedForm_};
69 if (prescannerNesting_ > maxPrescannerNesting) {
70 Say(GetProvenance(start_),
71 "too many nested INCLUDE/#include files, possibly circular"_err_en_US);
72 return;
73 }
74 while (!IsAtEnd()) {
75 Statement();
76 }
77 if (inFixedForm_ != beganInFixedForm) {
78 std::string dir{"!dir$ "};
79 if (beganInFixedForm) {
80 dir += "fixed";
81 } else {
82 dir += "free";
83 }
84 dir += '\n';
85 TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()};
86 tokens.Emit(cooked_);
87 }
88}
89
90void Prescanner::Statement() {
91 TokenSequence tokens;
92 const char *statementStart{nextLine_};
93 LineClassification line{ClassifyLine(statementStart)};
94 switch (line.kind) {
95 case LineClassification::Kind::Comment:
96 nextLine_ += line.payloadOffset; // advance to '!' or newline
97 NextLine();
98 return;
99 case LineClassification::Kind::IncludeLine:
100 FortranInclude(quote: nextLine_ + line.payloadOffset);
101 NextLine();
102 return;
103 case LineClassification::Kind::ConditionalCompilationDirective:
104 case LineClassification::Kind::IncludeDirective:
105 case LineClassification::Kind::DefinitionDirective:
106 case LineClassification::Kind::PreprocessorDirective:
107 preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
108 return;
109 case LineClassification::Kind::CompilerDirective: {
110 directiveSentinel_ = line.sentinel;
111 CHECK(InCompilerDirective());
112 BeginStatementAndAdvance();
113 if (inFixedForm_) {
114 CHECK(IsFixedFormCommentChar(ch: *at_));
115 } else {
116 while (*at_ == ' ' || *at_ == '\t') {
117 ++at_, ++column_;
118 }
119 CHECK(*at_ == '!');
120 }
121 std::optional<int> condOffset;
122 if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') {
123 // OpenMP conditional compilation line.
124 condOffset = 2;
125 } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'c' &&
126 directiveSentinel_[2] == 'u' && directiveSentinel_[3] == 'f' &&
127 directiveSentinel_[4] == '\0') {
128 // CUDA conditional compilation line.
129 condOffset = 5;
130 }
131 if (condOffset) {
132 at_ += *condOffset, column_ += *condOffset;
133 if (auto payload{IsIncludeLine(at_)}) {
134 FortranInclude(quote: at_ + *payload);
135 return;
136 } else if (inFixedForm_) {
137 LabelField(tokens);
138 } else {
139 SkipSpaces();
140 }
141 } else {
142 // Compiler directive. Emit normalized sentinel, squash following spaces.
143 EmitChar(tokens, '!');
144 ++at_, ++column_;
145 for (const char *sp{directiveSentinel_}; *sp != '\0';
146 ++sp, ++at_, ++column_) {
147 EmitChar(tokens, *sp);
148 }
149 if (*at_ == ' ' || *at_ == '\t') {
150 EmitChar(tokens, ' ');
151 while (*at_ == ' ' || *at_ == '\t') {
152 ++at_, ++column_;
153 }
154 }
155 tokens.CloseToken();
156 }
157 break;
158 }
159 case LineClassification::Kind::Source:
160 BeginStatementAndAdvance();
161 if (inFixedForm_) {
162 if (features_.IsEnabled(LanguageFeature::OldDebugLines) &&
163 (*at_ == 'D' || *at_ == 'd')) {
164 NextChar();
165 }
166 LabelField(tokens);
167 } else if (skipLeadingAmpersand_) {
168 skipLeadingAmpersand_ = false;
169 const char *p{SkipWhiteSpace(at_)};
170 if (p < limit_ && *p == '&') {
171 column_ += ++p - at_;
172 at_ = p;
173 }
174 } else {
175 SkipSpaces();
176 }
177 break;
178 }
179
180 while (NextToken(tokens)) {
181 }
182 if (continuationLines_ > 255) {
183 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) {
184 Say(GetProvenance(statementStart),
185 "%d continuation lines is more than the Fortran standard allows"_port_en_US,
186 continuationLines_);
187 }
188 }
189
190 Provenance newlineProvenance{GetCurrentProvenance()};
191 if (std::optional<TokenSequence> preprocessed{
192 preprocessor_.MacroReplacement(tokens, *this)}) {
193 // Reprocess the preprocessed line. Append a newline temporarily.
194 preprocessed->PutNextTokenChar('\n', newlineProvenance);
195 preprocessed->CloseToken();
196 const char *ppd{preprocessed->ToCharBlock().begin()};
197 LineClassification ppl{ClassifyLine(ppd)};
198 preprocessed->pop_back(); // remove the newline
199 switch (ppl.kind) {
200 case LineClassification::Kind::Comment:
201 break;
202 case LineClassification::Kind::IncludeLine:
203 FortranInclude(quote: ppd + ppl.payloadOffset);
204 break;
205 case LineClassification::Kind::ConditionalCompilationDirective:
206 case LineClassification::Kind::IncludeDirective:
207 case LineClassification::Kind::DefinitionDirective:
208 case LineClassification::Kind::PreprocessorDirective:
209 Say(preprocessed->GetProvenanceRange(),
210 "Preprocessed line resembles a preprocessor directive"_warn_en_US);
211 preprocessed->ToLowerCase()
212 .CheckBadFortranCharacters(messages_, *this)
213 .CheckBadParentheses(messages_)
214 .Emit(cooked_);
215 break;
216 case LineClassification::Kind::CompilerDirective:
217 if (preprocessed->HasRedundantBlanks()) {
218 preprocessed->RemoveRedundantBlanks();
219 }
220 while (CompilerDirectiveContinuation(*preprocessed, ppl.sentinel)) {
221 newlineProvenance = GetCurrentProvenance();
222 }
223 NormalizeCompilerDirectiveCommentMarker(*preprocessed);
224 preprocessed->ToLowerCase();
225 SourceFormChange(preprocessed->ToString());
226 preprocessed->ClipComment(*this, true /* skip first ! */)
227 .CheckBadFortranCharacters(messages_, *this)
228 .CheckBadParentheses(messages_)
229 .Emit(cooked_);
230 break;
231 case LineClassification::Kind::Source:
232 if (inFixedForm_) {
233 if (preprocessed->HasBlanks(/*after column*/ 6)) {
234 preprocessed->RemoveBlanks(/*after column*/ 6);
235 }
236 } else {
237 while (SourceLineContinuation(*preprocessed)) {
238 newlineProvenance = GetCurrentProvenance();
239 }
240 if (preprocessed->HasRedundantBlanks()) {
241 preprocessed->RemoveRedundantBlanks();
242 }
243 }
244 preprocessed->ToLowerCase()
245 .ClipComment(*this)
246 .CheckBadFortranCharacters(messages_, *this)
247 .CheckBadParentheses(messages_)
248 .Emit(cooked_);
249 break;
250 }
251 } else {
252 if (line.kind == LineClassification::Kind::CompilerDirective) {
253 while (CompilerDirectiveContinuation(tokens, line.sentinel)) {
254 newlineProvenance = GetCurrentProvenance();
255 }
256 tokens.ToLowerCase();
257 SourceFormChange(tokens.ToString());
258 } else { // Kind::Source
259 tokens.ToLowerCase();
260 if (inFixedForm_) {
261 EnforceStupidEndStatementRules(tokens);
262 }
263 }
264 tokens.CheckBadFortranCharacters(messages_, *this)
265 .CheckBadParentheses(messages_)
266 .Emit(cooked_);
267 }
268 if (omitNewline_) {
269 omitNewline_ = false;
270 } else {
271 cooked_.Put('\n', newlineProvenance);
272 }
273 directiveSentinel_ = nullptr;
274}
275
276TokenSequence Prescanner::TokenizePreprocessorDirective() {
277 CHECK(!IsAtEnd() && !inPreprocessorDirective_);
278 inPreprocessorDirective_ = true;
279 BeginStatementAndAdvance();
280 TokenSequence tokens;
281 while (NextToken(tokens)) {
282 }
283 inPreprocessorDirective_ = false;
284 return tokens;
285}
286
287void Prescanner::NextLine() {
288 void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))};
289 void *v{std::memchr(s: vstart, c: '\n', n: limit_ - nextLine_)};
290 if (!v) {
291 nextLine_ = limit_;
292 } else {
293 const char *nl{const_cast<const char *>(static_cast<char *>(v))};
294 nextLine_ = nl + 1;
295 }
296}
297
298void Prescanner::LabelField(TokenSequence &token) {
299 int outCol{1};
300 const char *start{at_};
301 std::optional<int> badColumn;
302 for (; *at_ != '\n' && column_ <= 6; ++at_) {
303 if (*at_ == '\t') {
304 ++at_;
305 column_ = 7;
306 break;
307 }
308 if (*at_ != ' ' &&
309 !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space
310 EmitChar(token, *at_);
311 ++outCol;
312 if (!badColumn && (column_ == 6 || !IsDecimalDigit(*at_))) {
313 badColumn = column_;
314 }
315 }
316 ++column_;
317 }
318 if (badColumn && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) {
319 Say(GetProvenance(start + *badColumn - 1),
320 *badColumn == 6
321 ? "Statement should not begin with a continuation line"_warn_en_US
322 : "Character in fixed-form label field must be a digit"_warn_en_US);
323 token.clear();
324 if (*badColumn < 6) {
325 at_ = start;
326 column_ = 1;
327 return;
328 }
329 outCol = 1;
330 }
331 if (outCol == 1) { // empty label field
332 // Emit a space so that, if the line is rescanned after preprocessing,
333 // a leading 'C' or 'D' won't be left-justified and then accidentally
334 // misinterpreted as a comment card.
335 EmitChar(token, ' ');
336 ++outCol;
337 }
338 token.CloseToken();
339 SkipToNextSignificantCharacter();
340 if (IsDecimalDigit(*at_)) {
341 if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) {
342 Say(GetCurrentProvenance(),
343 "Label digit is not in fixed-form label field"_port_en_US);
344 }
345 }
346}
347
348// 6.3.3.5: A program unit END statement, or any other statement whose
349// initial line resembles an END statement, shall not be continued in
350// fixed form source.
351void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) {
352 CharBlock cBlock{tokens.ToCharBlock()};
353 const char *str{cBlock.begin()};
354 std::size_t n{cBlock.size()};
355 if (n < 3) {
356 return;
357 }
358 std::size_t j{0};
359 for (; j < n && (str[j] == ' ' || (str[j] >= '0' && str[j] <= '9')); ++j) {
360 }
361 if (j + 3 > n || std::memcmp(s1: str + j, s2: "end", n: 3) != 0) {
362 return;
363 }
364 // It starts with END, possibly after a label.
365 auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))};
366 auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - 1))};
367 if (!start || !end) {
368 return;
369 }
370 if (&*start->sourceFile == &*end->sourceFile && start->line == end->line) {
371 return; // no continuation
372 }
373 j += 3;
374 static const char *const prefixes[]{"program", "subroutine", "function",
375 "blockdata", "module", "submodule", nullptr};
376 bool isPrefix{j == n || !IsLegalInIdentifier(str[j])}; // prefix is END
377 std::size_t endOfPrefix{j - 1};
378 for (const char *const *p{prefixes}; *p; ++p) {
379 std::size_t pLen{std::strlen(s: *p)};
380 if (j + pLen <= n && std::memcmp(s1: str + j, s2: *p, n: pLen) == 0) {
381 isPrefix = true; // END thing as prefix
382 j += pLen;
383 endOfPrefix = j - 1;
384 for (; j < n && IsLegalInIdentifier(str[j]); ++j) {
385 }
386 break;
387 }
388 }
389 if (isPrefix) {
390 auto range{tokens.GetTokenProvenanceRange(1)};
391 if (j == n) { // END or END thing [name]
392 Say(range,
393 "Program unit END statement may not be continued in fixed form source"_err_en_US);
394 } else {
395 auto endOfPrefixPos{
396 allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))};
397 auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))};
398 if (endOfPrefixPos && next &&
399 &*endOfPrefixPos->sourceFile == &*start->sourceFile &&
400 endOfPrefixPos->line == start->line &&
401 (&*next->sourceFile != &*start->sourceFile ||
402 next->line != start->line)) {
403 Say(range,
404 "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US);
405 }
406 }
407 }
408}
409
410void Prescanner::SkipToEndOfLine() {
411 while (*at_ != '\n') {
412 ++at_, ++column_;
413 }
414}
415
416bool Prescanner::MustSkipToEndOfLine() const {
417 if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) {
418 return true; // skip over ignored columns in right margin (73:80)
419 } else if (*at_ == '!' && !inCharLiteral_) {
420 return true; // inline comment goes to end of source line
421 } else {
422 return false;
423 }
424}
425
426void Prescanner::NextChar() {
427 CHECK(*at_ != '\n');
428 ++at_, ++column_;
429 while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') {
430 // UTF-8 byte order mark - treat this file as UTF-8
431 at_ += 3;
432 encoding_ = Encoding::UTF_8;
433 }
434 SkipToNextSignificantCharacter();
435}
436
437// Skip everything that should be ignored until the next significant
438// character is reached; handles C-style comments in preprocessing
439// directives, Fortran ! comments, stuff after the right margin in
440// fixed form, and all forms of line continuation.
441bool Prescanner::SkipToNextSignificantCharacter() {
442 auto anyContinuationLine{false};
443 if (inPreprocessorDirective_) {
444 SkipCComments();
445 } else {
446 bool mightNeedSpace{false};
447 if (MustSkipToEndOfLine()) {
448 SkipToEndOfLine();
449 } else {
450 mightNeedSpace = *at_ == '\n';
451 }
452 for (; Continuation(mightNeedFixedFormSpace: mightNeedSpace); mightNeedSpace = false) {
453 anyContinuationLine = true;
454 ++continuationLines_;
455 if (MustSkipToEndOfLine()) {
456 SkipToEndOfLine();
457 }
458 }
459 if (*at_ == '\t') {
460 tabInCurrentLine_ = true;
461 }
462 }
463 return anyContinuationLine;
464}
465
466void Prescanner::SkipCComments() {
467 while (true) {
468 if (IsCComment(p: at_)) {
469 if (const char *after{SkipCComment(at_)}) {
470 column_ += after - at_;
471 // May have skipped over one or more newlines; relocate the start of
472 // the next line.
473 nextLine_ = at_ = after;
474 NextLine();
475 } else {
476 // Don't emit any messages about unclosed C-style comments, because
477 // the sequence /* can appear legally in a FORMAT statement. There's
478 // no ambiguity, since the sequence */ cannot appear legally.
479 break;
480 }
481 } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ &&
482 at_[1] == '\n' && !IsAtEnd()) {
483 BeginSourceLineAndAdvance();
484 } else {
485 break;
486 }
487 }
488}
489
490void Prescanner::SkipSpaces() {
491 while (*at_ == ' ' || *at_ == '\t') {
492 NextChar();
493 }
494 insertASpace_ = false;
495}
496
497const char *Prescanner::SkipWhiteSpace(const char *p) {
498 while (*p == ' ' || *p == '\t') {
499 ++p;
500 }
501 return p;
502}
503
504const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const {
505 while (true) {
506 if (*p == ' ' || *p == '\t') {
507 ++p;
508 } else if (IsCComment(p)) {
509 if (const char *after{SkipCComment(p)}) {
510 p = after;
511 } else {
512 break;
513 }
514 } else {
515 break;
516 }
517 }
518 return p;
519}
520
521const char *Prescanner::SkipCComment(const char *p) const {
522 char star{' '}, slash{' '};
523 p += 2;
524 while (star != '*' || slash != '/') {
525 if (p >= limit_) {
526 return nullptr; // signifies an unterminated comment
527 }
528 star = slash;
529 slash = *p++;
530 }
531 return p;
532}
533
534bool Prescanner::NextToken(TokenSequence &tokens) {
535 CHECK(at_ >= start_ && at_ < limit_);
536 if (InFixedFormSource()) {
537 SkipSpaces();
538 } else {
539 if (*at_ == '/' && IsCComment(p: at_)) {
540 // Recognize and skip over classic C style /*comments*/ when
541 // outside a character literal.
542 if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) {
543 Say(GetCurrentProvenance(),
544 "nonstandard usage: C-style comment"_port_en_US);
545 }
546 SkipCComments();
547 }
548 if (*at_ == ' ' || *at_ == '\t') {
549 // Compress free-form white space into a single space character.
550 const auto theSpace{at_};
551 char previous{at_ <= start_ ? ' ' : at_[-1]};
552 NextChar();
553 SkipSpaces();
554 if (*at_ == '\n') {
555 // Discard white space at the end of a line.
556 } else if (!inPreprocessorDirective_ &&
557 (previous == '(' || *at_ == '(' || *at_ == ')')) {
558 // Discard white space before/after '(' and before ')', unless in a
559 // preprocessor directive. This helps yield space-free contiguous
560 // names for generic interfaces like OPERATOR( + ) and
561 // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg).
562 // This has the effect of silently ignoring the illegal spaces in
563 // the array constructor ( /1,2/ ) but that seems benign; it's
564 // hard to avoid that while still removing spaces from OPERATOR( / )
565 // and OPERATOR( // ).
566 } else {
567 // Preserve the squashed white space as a single space character.
568 tokens.PutNextTokenChar(' ', GetProvenance(theSpace));
569 tokens.CloseToken();
570 return true;
571 }
572 }
573 }
574 if (insertASpace_) {
575 tokens.PutNextTokenChar(' ', spaceProvenance_);
576 insertASpace_ = false;
577 }
578 if (*at_ == '\n') {
579 return false;
580 }
581 const char *start{at_};
582 if (*at_ == '\'' || *at_ == '"') {
583 QuotedCharacterLiteral(tokens, start);
584 preventHollerith_ = false;
585 } else if (IsDecimalDigit(*at_)) {
586 int n{0}, digits{0};
587 static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)};
588 do {
589 if (n < maxHollerith) {
590 n = 10 * n + DecimalDigitValue(*at_);
591 }
592 EmitCharAndAdvance(tokens, *at_);
593 ++digits;
594 if (InFixedFormSource()) {
595 SkipSpaces();
596 }
597 } while (IsDecimalDigit(*at_));
598 if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith &&
599 !preventHollerith_) {
600 Hollerith(tokens, n, start);
601 } else if (*at_ == '.') {
602 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
603 }
604 ExponentAndKind(tokens);
605 } else if (ExponentAndKind(tokens)) {
606 } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') &&
607 inPreprocessorDirective_) {
608 do {
609 EmitCharAndAdvance(tokens, *at_);
610 } while (IsHexadecimalDigit(*at_));
611 } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..."
612 EmitCharAndAdvance(tokens, *at_);
613 QuotedCharacterLiteral(tokens, start);
614 } else if (IsLetter(*at_) && !preventHollerith_ &&
615 parenthesisNesting_ > 0) {
616 // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that
617 // we don't misrecognize I9HOLLERITH as an identifier in the next case.
618 EmitCharAndAdvance(tokens, *at_);
619 }
620 preventHollerith_ = false;
621 } else if (*at_ == '.') {
622 char nch{EmitCharAndAdvance(tokens, '.')};
623 if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) {
624 while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) {
625 }
626 ExponentAndKind(tokens);
627 } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') {
628 EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis
629 }
630 preventHollerith_ = false;
631 } else if (IsLegalInIdentifier(*at_)) {
632 int parts{1};
633 const char *afterLast{nullptr};
634 do {
635 EmitChar(tokens, *at_);
636 ++at_, ++column_;
637 afterLast = at_;
638 if (SkipToNextSignificantCharacter() && IsLegalIdentifierStart(*at_)) {
639 tokens.CloseToken();
640 ++parts;
641 }
642 } while (IsLegalInIdentifier(*at_));
643 if (parts >= 3) {
644 // Subtlety: When an identifier is split across three or more continuation
645 // lines (or two continuation lines, immediately preceded or followed
646 // by '&' free form continuation line markers, its parts are kept as
647 // distinct pp-tokens so that macro operates on them independently.
648 // This trick accommodates the historic practice of using line
649 // continuation for token pasting after replacement.
650 } else if (parts == 2) {
651 if ((start > start_ && start[-1] == '&') ||
652 (afterLast < limit_ && (*afterLast == '&' || *afterLast == '\n'))) {
653 // call & call foo& call foo&
654 // &MACRO& OR &MACRO& OR &MACRO
655 // &foo(...) &(...)
656 } else {
657 tokens.ReopenLastToken();
658 }
659 }
660 if (InFixedFormSource()) {
661 SkipSpaces();
662 }
663 if ((*at_ == '\'' || *at_ == '"') &&
664 tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..."
665 QuotedCharacterLiteral(tokens, start);
666 preventHollerith_ = false;
667 } else {
668 preventHollerith_ = true; // DO 10 H = ...
669 }
670 } else if (*at_ == '*') {
671 if (EmitCharAndAdvance(tokens, '*') == '*') {
672 EmitCharAndAdvance(tokens, '*');
673 } else {
674 // Subtle ambiguity:
675 // CHARACTER*2H declares H because *2 is a kind specifier
676 // DATAC/N*2H / is repeated Hollerith
677 preventHollerith_ = !slashInCurrentStatement_;
678 }
679 } else {
680 char ch{*at_};
681 if (ch == '(') {
682 if (parenthesisNesting_++ == 0) {
683 isPossibleMacroCall_ = tokens.SizeInTokens() > 0 &&
684 preprocessor_.IsFunctionLikeDefinition(
685 tokens.TokenAt(tokens.SizeInTokens() - 1));
686 }
687 } else if (ch == ')' && parenthesisNesting_ > 0) {
688 --parenthesisNesting_;
689 }
690 char nch{EmitCharAndAdvance(tokens, ch)};
691 preventHollerith_ = false;
692 if ((nch == '=' &&
693 (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) ||
694 (ch == nch &&
695 (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' ||
696 ch == '|' || ch == '<' || ch == '>')) ||
697 (ch == '=' && nch == '>')) {
698 // token comprises two characters
699 EmitCharAndAdvance(tokens, nch);
700 } else if (ch == '/') {
701 slashInCurrentStatement_ = true;
702 } else if (ch == ';' && InFixedFormSource()) {
703 SkipSpaces();
704 if (IsDecimalDigit(*at_)) {
705 if (features_.ShouldWarn(
706 common::LanguageFeature::MiscSourceExtensions)) {
707 Say(GetProvenanceRange(at_, at_ + 1),
708 "Label should be in the label field"_port_en_US);
709 }
710 }
711 }
712 }
713 tokens.CloseToken();
714 return true;
715}
716
717bool Prescanner::ExponentAndKind(TokenSequence &tokens) {
718 char ed{ToLowerCaseLetter(*at_)};
719 if (ed != 'e' && ed != 'd') {
720 return false;
721 }
722 EmitCharAndAdvance(tokens, ed);
723 if (*at_ == '+' || *at_ == '-') {
724 EmitCharAndAdvance(tokens, *at_);
725 }
726 while (IsDecimalDigit(*at_)) {
727 EmitCharAndAdvance(tokens, *at_);
728 }
729 if (*at_ == '_') {
730 while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) {
731 }
732 }
733 return true;
734}
735
736void Prescanner::QuotedCharacterLiteral(
737 TokenSequence &tokens, const char *start) {
738 char quote{*at_};
739 const char *end{at_ + 1};
740 inCharLiteral_ = true;
741 continuationInCharLiteral_ = true;
742 const auto emit{[&](char ch) { EmitChar(tokens, ch); }};
743 const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }};
744 bool isEscaped{false};
745 bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)};
746 while (true) {
747 if (*at_ == '\\') {
748 if (escapesEnabled) {
749 isEscaped = !isEscaped;
750 } else {
751 // The parser always processes escape sequences, so don't confuse it
752 // when escapes are disabled.
753 insert('\\');
754 }
755 } else {
756 isEscaped = false;
757 }
758 EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false,
759 Encoding::LATIN_1);
760 while (PadOutCharacterLiteral(tokens)) {
761 }
762 if (*at_ == '\n') {
763 if (!inPreprocessorDirective_) {
764 Say(GetProvenanceRange(start, end),
765 "Incomplete character literal"_err_en_US);
766 }
767 break;
768 }
769 end = at_ + 1;
770 NextChar();
771 if (*at_ == quote && !isEscaped) {
772 // A doubled unescaped quote mark becomes a single instance of that
773 // quote character in the literal (later). There can be spaces between
774 // the quotes in fixed form source.
775 EmitChar(tokens, quote);
776 inCharLiteral_ = false; // for cases like print *, '...'!comment
777 NextChar();
778 if (InFixedFormSource()) {
779 SkipSpaces();
780 }
781 if (*at_ != quote) {
782 break;
783 }
784 inCharLiteral_ = true;
785 }
786 }
787 continuationInCharLiteral_ = false;
788 inCharLiteral_ = false;
789}
790
791void Prescanner::Hollerith(
792 TokenSequence &tokens, int count, const char *start) {
793 inCharLiteral_ = true;
794 CHECK(*at_ == 'h' || *at_ == 'H');
795 EmitChar(tokens, 'H');
796 while (count-- > 0) {
797 if (PadOutCharacterLiteral(tokens)) {
798 } else if (*at_ == '\n') {
799 Say(GetProvenanceRange(start, at_),
800 "Possible truncated Hollerith literal"_warn_en_US);
801 break;
802 } else {
803 NextChar();
804 // Each multi-byte character encoding counts as a single character.
805 // No escape sequences are recognized.
806 // Hollerith is always emitted to the cooked character
807 // stream in UTF-8.
808 DecodedCharacter decoded{DecodeCharacter(
809 encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)};
810 if (decoded.bytes > 0) {
811 EncodedCharacter utf8{
812 EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)};
813 for (int j{0}; j < utf8.bytes; ++j) {
814 EmitChar(tokens, utf8.buffer[j]);
815 }
816 at_ += decoded.bytes - 1;
817 } else {
818 Say(GetProvenanceRange(start, at_),
819 "Bad character in Hollerith literal"_err_en_US);
820 break;
821 }
822 }
823 }
824 if (*at_ != '\n') {
825 NextChar();
826 }
827 inCharLiteral_ = false;
828}
829
830// In fixed form, source card images must be processed as if they were at
831// least 72 columns wide, at least in character literal contexts.
832bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) {
833 while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') {
834 if (column_ < fixedFormColumnLimit_) {
835 tokens.PutNextTokenChar(' ', spaceProvenance_);
836 ++column_;
837 return true;
838 }
839 if (!FixedFormContinuation(mightNeedSpace: false /*no need to insert space*/) ||
840 tabInCurrentLine_) {
841 return false;
842 }
843 CHECK(column_ == 7);
844 --at_; // point to column 6 of continuation line
845 column_ = 6;
846 }
847 return false;
848}
849
850static bool IsAtProcess(const char *p) {
851 static const char pAtProc[]{"process"};
852 for (std::size_t i{0}; i < sizeof pAtProc - 1; ++i) {
853 if (ToLowerCaseLetter(*++p) != pAtProc[i])
854 return false;
855 }
856 return true;
857}
858
859bool Prescanner::IsFixedFormCommentLine(const char *start) const {
860 const char *p{start};
861
862 // The @process directive must start in column 1.
863 if (*p == '@' && IsAtProcess(p)) {
864 return true;
865 }
866
867 if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c.
868 ((*p == 'D' || *p == 'd') &&
869 !features_.IsEnabled(LanguageFeature::OldDebugLines))) {
870 return true;
871 }
872 bool anyTabs{false};
873 while (true) {
874 if (*p == ' ') {
875 ++p;
876 } else if (*p == '\t') {
877 anyTabs = true;
878 ++p;
879 } else if (*p == '0' && !anyTabs && p == start + 5) {
880 ++p; // 0 in column 6 must treated as a space
881 } else {
882 break;
883 }
884 }
885 if (!anyTabs && p >= start + fixedFormColumnLimit_) {
886 return true;
887 }
888 if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) {
889 return true;
890 }
891 return *p == '\n';
892}
893
894const char *Prescanner::IsFreeFormComment(const char *p) const {
895 p = SkipWhiteSpaceAndCComments(p);
896 if (*p == '!' || *p == '\n') {
897 return p;
898 } else if (*p == '@') {
899 return IsAtProcess(p) ? p : nullptr;
900 } else {
901 return nullptr;
902 }
903}
904
905std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const {
906 const char *p{SkipWhiteSpace(p: start)};
907 if (*p == '0' && inFixedForm_ && p == start + 5) {
908 // Accept " 0INCLUDE" in fixed form.
909 p = SkipWhiteSpace(p: p + 1);
910 }
911 for (const char *q{"include"}; *q; ++q) {
912 if (ToLowerCaseLetter(*p) != *q) {
913 return std::nullopt;
914 }
915 p = SkipWhiteSpace(p: p + 1);
916 }
917 if (IsDecimalDigit(*p)) { // accept & ignore a numeric kind prefix
918 for (p = SkipWhiteSpace(p: p + 1); IsDecimalDigit(*p);
919 p = SkipWhiteSpace(p: p + 1)) {
920 }
921 if (*p != '_') {
922 return std::nullopt;
923 }
924 p = SkipWhiteSpace(p: p + 1);
925 }
926 if (*p == '"' || *p == '\'') {
927 return {p - start};
928 }
929 return std::nullopt;
930}
931
932void Prescanner::FortranInclude(const char *firstQuote) {
933 const char *p{firstQuote};
934 while (*p != '"' && *p != '\'') {
935 ++p;
936 }
937 char quote{*p};
938 std::string path;
939 for (++p; *p != '\n'; ++p) {
940 if (*p == quote) {
941 if (p[1] != quote) {
942 break;
943 }
944 ++p;
945 }
946 path += *p;
947 }
948 if (*p != quote) {
949 Say(GetProvenanceRange(firstQuote, p),
950 "malformed path name string"_err_en_US);
951 return;
952 }
953 p = SkipWhiteSpace(p: p + 1);
954 if (*p != '\n' && *p != '!') {
955 const char *garbage{p};
956 for (; *p != '\n' && *p != '!'; ++p) {
957 }
958 Say(GetProvenanceRange(garbage, p),
959 "excess characters after path name"_warn_en_US);
960 }
961 std::string buf;
962 llvm::raw_string_ostream error{buf};
963 Provenance provenance{GetProvenance(nextLine_)};
964 std::optional<std::string> prependPath;
965 if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) {
966 prependPath = DirectoryName(currentFile->path());
967 }
968 const SourceFile *included{
969 allSources_.Open(path, error, std::move(prependPath))};
970 if (!included) {
971 Say(provenance, "INCLUDE: %s"_err_en_US, error.str());
972 } else if (included->bytes() > 0) {
973 ProvenanceRange includeLineRange{
974 provenance, static_cast<std::size_t>(p - nextLine_)};
975 ProvenanceRange fileRange{
976 allSources_.AddIncludedFile(*included, includeLineRange)};
977 Prescanner{*this}.set_encoding(included->encoding()).Prescan(fileRange);
978 }
979}
980
981const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const {
982 const char *p{start};
983 for (; *p == ' '; ++p) {
984 }
985 if (*p == '#') {
986 if (inFixedForm_ && p == start + 5) {
987 return nullptr;
988 }
989 } else {
990 p = SkipWhiteSpace(p);
991 if (*p != '#') {
992 return nullptr;
993 }
994 }
995 return SkipWhiteSpace(p: p + 1);
996}
997
998bool Prescanner::IsNextLinePreprocessorDirective() const {
999 return IsPreprocessorDirectiveLine(start: nextLine_) != nullptr;
1000}
1001
1002bool Prescanner::SkipCommentLine(bool afterAmpersand) {
1003 if (IsAtEnd()) {
1004 if (afterAmpersand && prescannerNesting_ > 0) {
1005 // A continuation marker at the end of the last line in an
1006 // include file inhibits the newline for that line.
1007 SkipToEndOfLine();
1008 omitNewline_ = true;
1009 }
1010 return false;
1011 }
1012 auto lineClass{ClassifyLine(nextLine_)};
1013 if (lineClass.kind == LineClassification::Kind::Comment) {
1014 NextLine();
1015 return true;
1016 } else if (inPreprocessorDirective_) {
1017 return false;
1018 } else if (lineClass.kind ==
1019 LineClassification::Kind::ConditionalCompilationDirective ||
1020 lineClass.kind == LineClassification::Kind::PreprocessorDirective) {
1021 // Allow conditional compilation directives (e.g., #ifdef) to affect
1022 // continuation lines.
1023 // Allow other preprocessor directives, too, except #include
1024 // (when it does not follow '&'), #define, and #undef (because
1025 // they cannot be allowed to affect preceding text on a
1026 // continued line).
1027 preprocessor_.Directive(TokenizePreprocessorDirective(), *this);
1028 return true;
1029 } else if (afterAmpersand &&
1030 (lineClass.kind == LineClassification::Kind::IncludeDirective ||
1031 lineClass.kind == LineClassification::Kind::IncludeLine)) {
1032 SkipToEndOfLine();
1033 omitNewline_ = true;
1034 skipLeadingAmpersand_ = true;
1035 return false;
1036 } else {
1037 return false;
1038 }
1039}
1040
1041const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) {
1042 if (IsAtEnd()) {
1043 return nullptr;
1044 }
1045 tabInCurrentLine_ = false;
1046 char col1{*nextLine_};
1047 if (IsFixedFormCommentChar(ch: col1)) {
1048 int j{1};
1049 if (InCompilerDirective()) {
1050 // Must be a continued compiler directive.
1051 for (; j < 5; ++j) {
1052 char ch{directiveSentinel_[j - 1]};
1053 if (ch == '\0') {
1054 break;
1055 }
1056 if (ch != ToLowerCaseLetter(nextLine_[j])) {
1057 return nullptr;
1058 }
1059 }
1060 } else if (features_.IsEnabled(LanguageFeature::OpenMP)) {
1061 // Fixed Source Form Conditional Compilation Sentinels.
1062 if (nextLine_[1] != '$') {
1063 return nullptr;
1064 }
1065 j++;
1066 } else {
1067 return nullptr;
1068 }
1069 for (; j < 5; ++j) {
1070 if (nextLine_[j] != ' ') {
1071 return nullptr;
1072 }
1073 }
1074 char col6{nextLine_[5]};
1075 if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') {
1076 if (nextLine_[6] != ' ' && mightNeedSpace) {
1077 insertASpace_ = true;
1078 }
1079 return nextLine_ + 6;
1080 }
1081 return nullptr;
1082 } else {
1083 // Normal case: not in a compiler directive.
1084 if (col1 == '&' &&
1085 features_.IsEnabled(
1086 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) {
1087 // Extension: '&' as continuation marker
1088 if (features_.ShouldWarn(
1089 LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) {
1090 Say(GetProvenance(nextLine_), "nonstandard usage"_port_en_US);
1091 }
1092 return nextLine_ + 1;
1093 }
1094 if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') {
1095 tabInCurrentLine_ = true;
1096 return nextLine_ + 2; // VAX extension
1097 }
1098 if ((col1 == ' ' ||
1099 ((col1 == 'D' || col1 == 'd') &&
1100 features_.IsEnabled(LanguageFeature::OldDebugLines))) &&
1101 nextLine_[1] == ' ' && nextLine_[2] == ' ' && nextLine_[3] == ' ' &&
1102 nextLine_[4] == ' ') {
1103 char col6{nextLine_[5]};
1104 if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') {
1105 if ((col6 == 'i' || col6 == 'I') && IsIncludeLine(start: nextLine_)) {
1106 // It's An INCLUDE line, not a continuation
1107 } else {
1108 return nextLine_ + 6;
1109 }
1110 }
1111 }
1112 if (IsImplicitContinuation()) {
1113 return nextLine_;
1114 }
1115 }
1116 return nullptr; // not a continuation line
1117}
1118
1119const char *Prescanner::FreeFormContinuationLine(bool ampersand) {
1120 const char *p{nextLine_};
1121 if (p >= limit_) {
1122 return nullptr;
1123 }
1124 p = SkipWhiteSpace(p);
1125 if (InCompilerDirective()) {
1126 if (*p++ != '!') {
1127 return nullptr;
1128 }
1129 for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) {
1130 if (*s != ToLowerCaseLetter(*p)) {
1131 return nullptr;
1132 }
1133 }
1134 p = SkipWhiteSpace(p);
1135 if (*p == '&') {
1136 if (!ampersand) {
1137 insertASpace_ = true;
1138 }
1139 return p + 1;
1140 } else if (ampersand) {
1141 return p;
1142 } else {
1143 return nullptr;
1144 }
1145 } else {
1146 if (*p == '&') {
1147 return p + 1;
1148 } else if (*p == '!' || *p == '\n' || *p == '#') {
1149 return nullptr;
1150 } else if (ampersand || IsImplicitContinuation()) {
1151 if (continuationInCharLiteral_) {
1152 // 'a'& -> 'a''b' == "a'b"
1153 // 'b'
1154 if (features_.ShouldWarn(
1155 common::LanguageFeature::MiscSourceExtensions)) {
1156 Say(GetProvenanceRange(p, p + 1),
1157 "Character literal continuation line should have been preceded by '&'"_port_en_US);
1158 }
1159 } else if (p > nextLine_) {
1160 --p;
1161 } else {
1162 insertASpace_ = true;
1163 }
1164 return p;
1165 } else {
1166 return nullptr;
1167 }
1168 }
1169}
1170
1171bool Prescanner::FixedFormContinuation(bool mightNeedSpace) {
1172 // N.B. We accept '&' as a continuation indicator in fixed form, too,
1173 // but not in a character literal.
1174 if (*at_ == '&' && inCharLiteral_) {
1175 return false;
1176 }
1177 do {
1178 if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) {
1179 BeginSourceLine(at: cont);
1180 column_ = 7;
1181 NextLine();
1182 return true;
1183 }
1184 } while (SkipCommentLine(afterAmpersand: false /* not after ampersand */));
1185 return false;
1186}
1187
1188bool Prescanner::FreeFormContinuation() {
1189 const char *p{at_};
1190 bool ampersand{*p == '&'};
1191 if (ampersand) {
1192 p = SkipWhiteSpace(p: p + 1);
1193 }
1194 if (*p != '\n') {
1195 if (inCharLiteral_) {
1196 return false;
1197 } else if (*p == '!') { // & ! comment - ok
1198 } else if (ampersand && isPossibleMacroCall_ && (*p == ',' || *p == ')')) {
1199 return false; // allow & at end of a macro argument
1200 } else if (features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) {
1201 Say(GetProvenance(p), "missing ! before comment after &"_warn_en_US);
1202 }
1203 }
1204 do {
1205 if (const char *cont{FreeFormContinuationLine(ampersand)}) {
1206 BeginSourceLine(at: cont);
1207 NextLine();
1208 return true;
1209 }
1210 } while (SkipCommentLine(afterAmpersand: ampersand));
1211 return false;
1212}
1213
1214// Implicit line continuation allows a preprocessor macro call with
1215// arguments to span multiple lines.
1216bool Prescanner::IsImplicitContinuation() const {
1217 return !inPreprocessorDirective_ && !inCharLiteral_ && isPossibleMacroCall_ &&
1218 parenthesisNesting_ > 0 && !IsAtEnd() &&
1219 ClassifyLine(nextLine_).kind == LineClassification::Kind::Source;
1220}
1221
1222bool Prescanner::Continuation(bool mightNeedFixedFormSpace) {
1223 if (*at_ == '\n' || *at_ == '&') {
1224 if (inFixedForm_) {
1225 return FixedFormContinuation(mightNeedSpace: mightNeedFixedFormSpace);
1226 } else {
1227 return FreeFormContinuation();
1228 }
1229 } else {
1230 return false;
1231 }
1232}
1233
1234std::optional<Prescanner::LineClassification>
1235Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const {
1236 const char *p{start};
1237 char col1{*p++};
1238 if (!IsFixedFormCommentChar(ch: col1)) {
1239 return std::nullopt;
1240 }
1241 char sentinel[5], *sp{sentinel};
1242 int column{2};
1243 for (; column < 6; ++column, ++p) {
1244 if (*p == ' ' || *p == '\n' || *p == '\t') {
1245 break;
1246 }
1247 if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) {
1248 // OpenMP conditional compilation line: leave the label alone
1249 break;
1250 }
1251 *sp++ = ToLowerCaseLetter(*p);
1252 }
1253 if (column == 6) {
1254 if (*p == ' ' || *p == '\t' || *p == '0') {
1255 ++p;
1256 } else {
1257 // This is a Continuation line, not an initial directive line.
1258 return std::nullopt;
1259 }
1260 }
1261 if (sp == sentinel) {
1262 return std::nullopt;
1263 }
1264 *sp = '\0';
1265 if (const char *ss{IsCompilerDirectiveSentinel(
1266 sentinel, static_cast<std::size_t>(sp - sentinel))}) {
1267 std::size_t payloadOffset = p - start;
1268 return {LineClassification{
1269 LineClassification::Kind::CompilerDirective, payloadOffset, ss}};
1270 }
1271 return std::nullopt;
1272}
1273
1274std::optional<Prescanner::LineClassification>
1275Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const {
1276 char sentinel[8];
1277 const char *p{SkipWhiteSpace(p: start)};
1278 if (*p++ != '!') {
1279 return std::nullopt;
1280 }
1281 for (std::size_t j{0}; j + 1 < sizeof sentinel; ++p, ++j) {
1282 if (*p == '\n') {
1283 break;
1284 }
1285 if (*p == ' ' || *p == '\t' || *p == '&') {
1286 if (j == 0) {
1287 break;
1288 }
1289 sentinel[j] = '\0';
1290 p = SkipWhiteSpace(p: p + 1);
1291 if (*p == '!') {
1292 break;
1293 }
1294 if (const char *sp{IsCompilerDirectiveSentinel(sentinel, j)}) {
1295 std::size_t offset = p - start;
1296 return {LineClassification{
1297 LineClassification::Kind::CompilerDirective, offset, sp}};
1298 }
1299 break;
1300 }
1301 sentinel[j] = ToLowerCaseLetter(*p);
1302 }
1303 return std::nullopt;
1304}
1305
1306Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) {
1307 std::uint64_t packed{0};
1308 for (char ch : dir) {
1309 packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff);
1310 }
1311 compilerDirectiveBloomFilter_.set(position: packed % prime1);
1312 compilerDirectiveBloomFilter_.set(position: packed % prime2);
1313 compilerDirectiveSentinels_.insert(x: dir);
1314 return *this;
1315}
1316
1317const char *Prescanner::IsCompilerDirectiveSentinel(
1318 const char *sentinel, std::size_t len) const {
1319 std::uint64_t packed{0};
1320 for (std::size_t j{0}; j < len; ++j) {
1321 packed = (packed << 8) | (sentinel[j] & 0xff);
1322 }
1323 if (len == 0 || !compilerDirectiveBloomFilter_.test(position: packed % prime1) ||
1324 !compilerDirectiveBloomFilter_.test(position: packed % prime2)) {
1325 return nullptr;
1326 }
1327 const auto iter{compilerDirectiveSentinels_.find(x: std::string(sentinel, len))};
1328 return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str();
1329}
1330
1331const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const {
1332 const char *p{token.begin()};
1333 const char *end{p + token.size()};
1334 while (p < end && (*p == ' ' || *p == '\n')) {
1335 ++p;
1336 }
1337 if (p < end && *p == '!') {
1338 ++p;
1339 }
1340 while (end > p && (end[-1] == ' ' || end[-1] == '\t')) {
1341 --end;
1342 }
1343 return end > p && IsCompilerDirectiveSentinel(sentinel: p, len: end - p) ? p : nullptr;
1344}
1345
1346constexpr bool IsDirective(const char *match, const char *dir) {
1347 for (; *match; ++match) {
1348 if (*match != ToLowerCaseLetter(*dir++)) {
1349 return false;
1350 }
1351 }
1352 return true;
1353}
1354
1355Prescanner::LineClassification Prescanner::ClassifyLine(
1356 const char *start) const {
1357 if (inFixedForm_) {
1358 if (std::optional<LineClassification> lc{
1359 IsFixedFormCompilerDirectiveLine(start)}) {
1360 return std::move(*lc);
1361 }
1362 if (IsFixedFormCommentLine(start)) {
1363 return {LineClassification::Kind::Comment};
1364 }
1365 } else {
1366 if (std::optional<LineClassification> lc{
1367 IsFreeFormCompilerDirectiveLine(start)}) {
1368 return std::move(*lc);
1369 }
1370 if (const char *bang{IsFreeFormComment(p: start)}) {
1371 return {LineClassification::Kind::Comment,
1372 static_cast<std::size_t>(bang - start)};
1373 }
1374 }
1375 if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) {
1376 return {LineClassification::Kind::IncludeLine, *quoteOffset};
1377 }
1378 if (const char *dir{IsPreprocessorDirectiveLine(start)}) {
1379 if (IsDirective(match: "if", dir) || IsDirective(match: "elif", dir) ||
1380 IsDirective(match: "else", dir) || IsDirective(match: "endif", dir)) {
1381 return {LineClassification::Kind::ConditionalCompilationDirective};
1382 } else if (IsDirective(match: "include", dir)) {
1383 return {LineClassification::Kind::IncludeDirective};
1384 } else if (IsDirective(match: "define", dir) || IsDirective(match: "undef", dir)) {
1385 return {LineClassification::Kind::DefinitionDirective};
1386 } else {
1387 return {LineClassification::Kind::PreprocessorDirective};
1388 }
1389 }
1390 return {LineClassification::Kind::Source};
1391}
1392
1393void Prescanner::SourceFormChange(std::string &&dir) {
1394 if (dir == "!dir$ free") {
1395 inFixedForm_ = false;
1396 } else if (dir == "!dir$ fixed") {
1397 inFixedForm_ = true;
1398 }
1399}
1400
1401// Acquire and append compiler directive continuation lines to
1402// the tokens that constitute a compiler directive, even when those
1403// directive continuation lines are the result of macro expansion.
1404// (Not used when neither the original compiler directive line nor
1405// the directive continuation line result from preprocessing; regular
1406// line continuation during tokenization handles that normal case.)
1407bool Prescanner::CompilerDirectiveContinuation(
1408 TokenSequence &tokens, const char *origSentinel) {
1409 if (inFixedForm_ || tokens.empty() ||
1410 tokens.TokenAt(tokens.SizeInTokens() - 1) != "&") {
1411 return false;
1412 }
1413 LineClassification followingLine{ClassifyLine(start: nextLine_)};
1414 if (followingLine.kind == LineClassification::Kind::Comment) {
1415 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline
1416 NextLine();
1417 return true;
1418 }
1419 CHECK(origSentinel != nullptr);
1420 directiveSentinel_ = origSentinel; // so IsDirective() is true
1421 const char *nextContinuation{
1422 followingLine.kind == LineClassification::Kind::CompilerDirective
1423 ? FreeFormContinuationLine(ampersand: true)
1424 : nullptr};
1425 if (!nextContinuation &&
1426 followingLine.kind != LineClassification::Kind::Source) {
1427 return false;
1428 }
1429 auto origNextLine{nextLine_};
1430 BeginSourceLine(at: nextLine_);
1431 NextLine();
1432 TokenSequence followingTokens;
1433 if (nextContinuation) {
1434 // What follows is !DIR$ & xxx; skip over the & so that it
1435 // doesn't cause a spurious continuation.
1436 at_ = nextContinuation;
1437 } else {
1438 // What follows looks like a source line before macro expansion,
1439 // but might become a directive continuation afterwards.
1440 SkipSpaces();
1441 }
1442 while (NextToken(followingTokens)) {
1443 }
1444 if (auto followingPrepro{
1445 preprocessor_.MacroReplacement(followingTokens, *this)}) {
1446 followingTokens = std::move(*followingPrepro);
1447 }
1448 followingTokens.RemoveRedundantBlanks();
1449 std::size_t startAt{0};
1450 std::size_t keep{followingTokens.SizeInTokens()};
1451 bool ok{false};
1452 if (nextContinuation) {
1453 ok = true;
1454 } else {
1455 if (keep >= 3 && followingTokens.TokenAt(0) == "!" &&
1456 followingTokens.TokenAt(2) == "&") {
1457 CharBlock sentinel{followingTokens.TokenAt(1)};
1458 if (!sentinel.empty() &&
1459 std::memcmp(s1: sentinel.begin(), s2: origSentinel, n: sentinel.size()) == 0) {
1460 startAt = 3;
1461 keep -= 3;
1462 ok = true;
1463 }
1464 }
1465 }
1466 if (ok) {
1467 tokens.pop_back(); // delete original '&'
1468 tokens.Put(followingTokens, startAt, keep);
1469 } else {
1470 nextLine_ = origNextLine;
1471 }
1472 return ok;
1473}
1474
1475// Similar, but for source line continuation after macro replacement.
1476bool Prescanner::SourceLineContinuation(TokenSequence &tokens) {
1477 if (!inFixedForm_ && !tokens.empty() &&
1478 tokens.TokenAt(tokens.SizeInTokens() - 1) == "&") {
1479 LineClassification followingLine{ClassifyLine(start: nextLine_)};
1480 if (followingLine.kind == LineClassification::Kind::Comment) {
1481 nextLine_ += followingLine.payloadOffset; // advance to '!' or newline
1482 NextLine();
1483 return true;
1484 } else if (const char *nextContinuation{FreeFormContinuationLine(ampersand: true)}) {
1485 BeginSourceLine(at: nextLine_);
1486 NextLine();
1487 TokenSequence followingTokens;
1488 at_ = nextContinuation;
1489 while (NextToken(followingTokens)) {
1490 }
1491 if (auto followingPrepro{
1492 preprocessor_.MacroReplacement(followingTokens, *this)}) {
1493 followingTokens = std::move(*followingPrepro);
1494 }
1495 followingTokens.RemoveRedundantBlanks();
1496 tokens.pop_back(); // delete original '&'
1497 tokens.Put(followingTokens);
1498 return true;
1499 }
1500 }
1501 return false;
1502}
1503} // namespace Fortran::parser
1504

source code of flang/lib/Parser/prescan.cpp