1 | //===-- lib/Parser/prescan.cpp --------------------------------------------===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "prescan.h" |
10 | #include "flang/Common/idioms.h" |
11 | #include "flang/Parser/characters.h" |
12 | #include "flang/Parser/message.h" |
13 | #include "flang/Parser/preprocessor.h" |
14 | #include "flang/Parser/source.h" |
15 | #include "flang/Parser/token-sequence.h" |
16 | #include "llvm/Support/raw_ostream.h" |
17 | #include <cstddef> |
18 | #include <cstring> |
19 | #include <utility> |
20 | #include <vector> |
21 | |
22 | namespace Fortran::parser { |
23 | |
24 | using common::LanguageFeature; |
25 | |
26 | static constexpr int maxPrescannerNesting{100}; |
27 | |
28 | Prescanner::Prescanner(Messages &messages, CookedSource &cooked, |
29 | Preprocessor &preprocessor, common::LanguageFeatureControl lfc) |
30 | : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor}, |
31 | allSources_{preprocessor_.allSources()}, features_{lfc}, |
32 | backslashFreeFormContinuation_{preprocessor.AnyDefinitions()}, |
33 | encoding_{allSources_.encoding()} {} |
34 | |
35 | Prescanner::Prescanner(const Prescanner &that, Preprocessor &prepro, |
36 | bool isNestedInIncludeDirective) |
37 | : messages_{that.messages_}, cooked_{that.cooked_}, preprocessor_{prepro}, |
38 | allSources_{that.allSources_}, features_{that.features_}, |
39 | preprocessingOnly_{that.preprocessingOnly_}, |
40 | expandIncludeLines_{that.expandIncludeLines_}, |
41 | isNestedInIncludeDirective_{isNestedInIncludeDirective}, |
42 | backslashFreeFormContinuation_{that.backslashFreeFormContinuation_}, |
43 | inFixedForm_{that.inFixedForm_}, |
44 | fixedFormColumnLimit_{that.fixedFormColumnLimit_}, |
45 | encoding_{that.encoding_}, |
46 | prescannerNesting_{that.prescannerNesting_ + 1}, |
47 | skipLeadingAmpersand_{that.skipLeadingAmpersand_}, |
48 | compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_}, |
49 | compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {} |
50 | |
51 | // Returns number of bytes to skip |
52 | static inline int IsSpace(const char *p) { |
53 | if (*p == ' ') { |
54 | return 1; |
55 | } else if (*p == '\xa0') { // LATIN-1 NBSP non-breaking space |
56 | return 1; |
57 | } else if (p[0] == '\xc2' && p[1] == '\xa0') { // UTF-8 NBSP |
58 | return 2; |
59 | } else { |
60 | return 0; |
61 | } |
62 | } |
63 | |
64 | static inline int IsSpaceOrTab(const char *p) { |
65 | return *p == '\t' ? 1 : IsSpace(p); |
66 | } |
67 | |
68 | static inline constexpr bool IsFixedFormCommentChar(char ch) { |
69 | return ch == '!' || ch == '*' || ch == 'C' || ch == 'c'; |
70 | } |
71 | |
72 | static void NormalizeCompilerDirectiveCommentMarker(TokenSequence &dir) { |
73 | char *p{dir.GetMutableCharData()}; |
74 | char *limit{p + dir.SizeInChars()}; |
75 | for (; p < limit; ++p) { |
76 | if (*p != ' ') { |
77 | CHECK(IsFixedFormCommentChar(ch: *p)); |
78 | *p = '!'; |
79 | return; |
80 | } |
81 | } |
82 | DIE("compiler directive all blank"); |
83 | } |
84 | |
85 | void Prescanner::Prescan(ProvenanceRange range) { |
86 | startProvenance_ = range.start(); |
87 | start_ = allSources_.GetSource(range); |
88 | CHECK(start_); |
89 | limit_ = start_ + range.size(); |
90 | nextLine_ = start_; |
91 | const bool beganInFixedForm{inFixedForm_}; |
92 | if (prescannerNesting_ > maxPrescannerNesting) { |
93 | Say(GetProvenance(start_), |
94 | "too many nested INCLUDE/#include files, possibly circular"_err_en_US); |
95 | return; |
96 | } |
97 | while (!IsAtEnd()) { |
98 | Statement(); |
99 | } |
100 | if (inFixedForm_ != beganInFixedForm) { |
101 | std::string dir{"!dir$ "}; |
102 | if (beganInFixedForm) { |
103 | dir += "fixed"; |
104 | } else { |
105 | dir += "free"; |
106 | } |
107 | dir += '\n'; |
108 | TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()}; |
109 | tokens.Emit(cooked_); |
110 | } |
111 | } |
112 | |
113 | void Prescanner::Statement() { |
114 | TokenSequence tokens; |
115 | const char *statementStart{nextLine_}; |
116 | LineClassification line{ClassifyLine(statementStart)}; |
117 | switch (line.kind) { |
118 | case LineClassification::Kind::Comment: |
119 | nextLine_ += line.payloadOffset; // advance to '!' or newline |
120 | NextLine(); |
121 | return; |
122 | case LineClassification::Kind::IncludeLine: |
123 | FortranInclude(quote: nextLine_ + line.payloadOffset); |
124 | NextLine(); |
125 | return; |
126 | case LineClassification::Kind::ConditionalCompilationDirective: |
127 | case LineClassification::Kind::IncludeDirective: |
128 | preprocessor_.Directive(TokenizePreprocessorDirective(), *this); |
129 | afterPreprocessingDirective_ = true; |
130 | skipLeadingAmpersand_ |= !inFixedForm_; |
131 | return; |
132 | case LineClassification::Kind::PreprocessorDirective: |
133 | preprocessor_.Directive(TokenizePreprocessorDirective(), *this); |
134 | afterPreprocessingDirective_ = true; |
135 | // Don't set skipLeadingAmpersand_ |
136 | return; |
137 | case LineClassification::Kind::DefinitionDirective: |
138 | preprocessor_.Directive(TokenizePreprocessorDirective(), *this); |
139 | // Don't set afterPreprocessingDirective_ or skipLeadingAmpersand_ |
140 | return; |
141 | case LineClassification::Kind::CompilerDirective: { |
142 | directiveSentinel_ = line.sentinel; |
143 | CHECK(InCompilerDirective()); |
144 | BeginStatementAndAdvance(); |
145 | if (inFixedForm_) { |
146 | CHECK(IsFixedFormCommentChar(ch: *at_)); |
147 | } else { |
148 | at_ += line.payloadOffset; |
149 | column_ += line.payloadOffset; |
150 | CHECK(*at_ == '!'); |
151 | } |
152 | std::optional<int> condOffset; |
153 | if (InOpenMPConditionalLine()) { |
154 | condOffset = 2; |
155 | } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'c' && |
156 | directiveSentinel_[2] == 'u' && directiveSentinel_[3] == 'f' && |
157 | directiveSentinel_[4] == '\0') { |
158 | // CUDA conditional compilation line. |
159 | condOffset = 5; |
160 | } |
161 | if (condOffset && !preprocessingOnly_) { |
162 | at_ += *condOffset, column_ += *condOffset; |
163 | if (auto payload{IsIncludeLine(at_)}) { |
164 | FortranInclude(quote: at_ + *payload); |
165 | return; |
166 | } |
167 | if (inFixedForm_) { |
168 | LabelField(tokens); |
169 | } |
170 | SkipSpaces(); |
171 | } else { |
172 | // Compiler directive. Emit normalized sentinel, squash following spaces. |
173 | // Conditional compilation lines (!$) take this path in -E mode too |
174 | // so that -fopenmp only has to appear on the later compilation. |
175 | EmitChar(tokens, '!'); |
176 | ++at_, ++column_; |
177 | for (const char *sp{directiveSentinel_}; *sp != '\0'; |
178 | ++sp, ++at_, ++column_) { |
179 | EmitChar(tokens, *sp); |
180 | } |
181 | if (inFixedForm_) { |
182 | while (column_ < 6) { |
183 | if (*at_ == '\t') { |
184 | tabInCurrentLine_ = true; |
185 | ++at_; |
186 | for (; column_ < 7; ++column_) { |
187 | EmitChar(tokens, ' '); |
188 | } |
189 | } else if (int spaceBytes{IsSpace(p: at_)}) { |
190 | EmitChar(tokens, ' '); |
191 | at_ += spaceBytes; |
192 | ++column_; |
193 | } else { |
194 | if (InOpenMPConditionalLine() && column_ == 3 && |
195 | IsDecimalDigit(*at_)) { |
196 | // subtle: !$ in -E mode can't be immediately followed by a digit |
197 | EmitChar(tokens, ' '); |
198 | } |
199 | break; |
200 | } |
201 | } |
202 | } else if (int spaceBytes{IsSpaceOrTab(p: at_)}) { |
203 | EmitChar(tokens, ' '); |
204 | at_ += spaceBytes, ++column_; |
205 | } |
206 | tokens.CloseToken(); |
207 | SkipSpaces(); |
208 | if (InOpenMPConditionalLine() && inFixedForm_ && !tabInCurrentLine_ && |
209 | column_ == 6 && *at_ != '\n') { |
210 | // !$ 0 - turn '0' into a space |
211 | // !$ 1 - turn '1' into '&' |
212 | if (int n{IsSpace(p: at_)}; n || *at_ == '0') { |
213 | at_ += n ? n : 1; |
214 | } else { |
215 | ++at_; |
216 | EmitChar(tokens, '&'); |
217 | tokens.CloseToken(); |
218 | } |
219 | ++column_; |
220 | SkipSpaces(); |
221 | } |
222 | } |
223 | break; |
224 | } |
225 | case LineClassification::Kind::Source: { |
226 | BeginStatementAndAdvance(); |
227 | bool checkLabelField{false}; |
228 | if (inFixedForm_) { |
229 | if (features_.IsEnabled(LanguageFeature::OldDebugLines) && |
230 | (*at_ == 'D' || *at_ == 'd')) { |
231 | NextChar(); |
232 | } |
233 | checkLabelField = true; |
234 | } else { |
235 | if (skipLeadingAmpersand_) { |
236 | skipLeadingAmpersand_ = false; |
237 | const char *p{SkipWhiteSpace(at_)}; |
238 | if (p < limit_ && *p == '&') { |
239 | column_ += ++p - at_; |
240 | at_ = p; |
241 | } |
242 | } else { |
243 | SkipSpaces(); |
244 | } |
245 | } |
246 | // Check for a leading identifier that might be a keyword macro |
247 | // that will expand to anything indicating a non-source line, like |
248 | // a comment marker or directive sentinel. If so, disable line |
249 | // continuation, so that NextToken() won't consume anything from |
250 | // following lines. |
251 | if (IsLegalIdentifierStart(*at_)) { |
252 | // TODO: Only bother with these cases when any keyword macro has |
253 | // been defined with replacement text that could begin a comment |
254 | // or directive sentinel. |
255 | const char *p{at_}; |
256 | while (IsLegalInIdentifier(*++p)) { |
257 | } |
258 | CharBlock id{at_, static_cast<std::size_t>(p - at_)}; |
259 | if (preprocessor_.IsNameDefined(id) && |
260 | !preprocessor_.IsFunctionLikeDefinition(id)) { |
261 | checkLabelField = false; |
262 | TokenSequence toks; |
263 | toks.Put(id, GetProvenance(at_)); |
264 | if (auto replaced{preprocessor_.MacroReplacement(toks, *this)}) { |
265 | auto newLineClass{ClassifyLine(*replaced, GetCurrentProvenance())}; |
266 | if (newLineClass.kind == |
267 | LineClassification::Kind::CompilerDirective) { |
268 | directiveSentinel_ = newLineClass.sentinel; |
269 | disableSourceContinuation_ = false; |
270 | } else { |
271 | disableSourceContinuation_ = !replaced->empty() && |
272 | newLineClass.kind != LineClassification::Kind::Source; |
273 | } |
274 | } |
275 | } |
276 | } |
277 | if (checkLabelField) { |
278 | LabelField(tokens); |
279 | } |
280 | } break; |
281 | } |
282 | |
283 | while (NextToken(tokens)) { |
284 | } |
285 | if (continuationLines_ > 255) { |
286 | if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { |
287 | Say(common::LanguageFeature::MiscSourceExtensions, |
288 | GetProvenance(statementStart), |
289 | "%d continuation lines is more than the Fortran standard allows"_port_en_US, |
290 | continuationLines_); |
291 | } |
292 | } |
293 | |
294 | Provenance newlineProvenance{GetCurrentProvenance()}; |
295 | if (std::optional<TokenSequence> preprocessed{ |
296 | preprocessor_.MacroReplacement(tokens, *this)}) { |
297 | // Reprocess the preprocessed line. |
298 | LineClassification ppl{ClassifyLine(*preprocessed, newlineProvenance)}; |
299 | switch (ppl.kind) { |
300 | case LineClassification::Kind::Comment: |
301 | break; |
302 | case LineClassification::Kind::IncludeLine: |
303 | FortranInclude(quote: preprocessed->TokenAt(0).begin() + ppl.payloadOffset); |
304 | break; |
305 | case LineClassification::Kind::ConditionalCompilationDirective: |
306 | case LineClassification::Kind::IncludeDirective: |
307 | case LineClassification::Kind::DefinitionDirective: |
308 | case LineClassification::Kind::PreprocessorDirective: |
309 | if (features_.ShouldWarn(common::UsageWarning::Preprocessing)) { |
310 | Say(common::UsageWarning::Preprocessing, |
311 | preprocessed->GetProvenanceRange(), |
312 | "Preprocessed line resembles a preprocessor directive"_warn_en_US); |
313 | } |
314 | CheckAndEmitLine(preprocessed->ToLowerCase(), newlineProvenance); |
315 | break; |
316 | case LineClassification::Kind::CompilerDirective: |
317 | if (preprocessed->HasRedundantBlanks()) { |
318 | preprocessed->RemoveRedundantBlanks(); |
319 | } |
320 | while (CompilerDirectiveContinuation(*preprocessed, ppl.sentinel)) { |
321 | newlineProvenance = GetCurrentProvenance(); |
322 | } |
323 | NormalizeCompilerDirectiveCommentMarker(*preprocessed); |
324 | preprocessed->ToLowerCase(); |
325 | SourceFormChange(preprocessed->ToString()); |
326 | CheckAndEmitLine( |
327 | preprocessed->ClipComment(*this, true /* skip first ! */), |
328 | newlineProvenance); |
329 | break; |
330 | case LineClassification::Kind::Source: |
331 | if (inFixedForm_) { |
332 | if (!preprocessingOnly_ && preprocessed->HasBlanks()) { |
333 | preprocessed->RemoveBlanks(); |
334 | } |
335 | } else { |
336 | while (SourceLineContinuation(*preprocessed)) { |
337 | newlineProvenance = GetCurrentProvenance(); |
338 | } |
339 | if (preprocessed->HasRedundantBlanks()) { |
340 | preprocessed->RemoveRedundantBlanks(); |
341 | } |
342 | } |
343 | CheckAndEmitLine( |
344 | preprocessed->ToLowerCase().ClipComment(*this), newlineProvenance); |
345 | break; |
346 | } |
347 | } else { // no macro replacement |
348 | if (line.kind == LineClassification::Kind::CompilerDirective) { |
349 | while (CompilerDirectiveContinuation(tokens, line.sentinel)) { |
350 | newlineProvenance = GetCurrentProvenance(); |
351 | } |
352 | if (preprocessingOnly_ && inFixedForm_ && InOpenMPConditionalLine() && |
353 | nextLine_ < limit_) { |
354 | // In -E mode, when the line after !$ conditional compilation is a |
355 | // regular fixed form continuation line, append a '&' to the line. |
356 | const char *p{nextLine_}; |
357 | int col{1}; |
358 | while (int n{IsSpace(p)}) { |
359 | if (*p == '\t') { |
360 | break; |
361 | } |
362 | p += n; |
363 | ++col; |
364 | } |
365 | if (col == 6 && *p != '0' && *p != '\t' && *p != '\n') { |
366 | EmitChar(tokens, '&'); |
367 | tokens.CloseToken(); |
368 | } |
369 | } |
370 | tokens.ToLowerCase(); |
371 | SourceFormChange(tokens.ToString()); |
372 | } else { // Kind::Source |
373 | tokens.ToLowerCase(); |
374 | if (inFixedForm_) { |
375 | EnforceStupidEndStatementRules(tokens); |
376 | } |
377 | } |
378 | CheckAndEmitLine(tokens, newlineProvenance); |
379 | } |
380 | directiveSentinel_ = nullptr; |
381 | } |
382 | |
383 | void Prescanner::CheckAndEmitLine( |
384 | TokenSequence &tokens, Provenance newlineProvenance) { |
385 | tokens.CheckBadFortranCharacters( |
386 | messages_, *this, disableSourceContinuation_ || preprocessingOnly_); |
387 | // Parenthesis nesting check does not apply while any #include is |
388 | // active, nor on the lines before and after a top-level #include, |
389 | // nor before or after conditional source. |
390 | // Applications play shenanigans with line continuation before and |
391 | // after #include'd subprogram argument lists and conditional source. |
392 | if (!preprocessingOnly_ && !isNestedInIncludeDirective_ && !omitNewline_ && |
393 | !afterPreprocessingDirective_ && tokens.BadlyNestedParentheses() && |
394 | !preprocessor_.InConditional()) { |
395 | if (nextLine_ < limit_ && IsPreprocessorDirectiveLine(nextLine_)) { |
396 | // don't complain |
397 | } else { |
398 | tokens.CheckBadParentheses(messages_); |
399 | } |
400 | } |
401 | tokens.Emit(cooked_); |
402 | if (omitNewline_) { |
403 | omitNewline_ = false; |
404 | } else { |
405 | cooked_.Put('\n', newlineProvenance); |
406 | afterPreprocessingDirective_ = false; |
407 | } |
408 | } |
409 | |
410 | TokenSequence Prescanner::TokenizePreprocessorDirective() { |
411 | CHECK(!IsAtEnd() && !inPreprocessorDirective_); |
412 | inPreprocessorDirective_ = true; |
413 | BeginStatementAndAdvance(); |
414 | TokenSequence tokens; |
415 | while (NextToken(tokens)) { |
416 | } |
417 | inPreprocessorDirective_ = false; |
418 | return tokens; |
419 | } |
420 | |
421 | void Prescanner::NextLine() { |
422 | void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))}; |
423 | void *v{std::memchr(s: vstart, c: '\n', n: limit_ - nextLine_)}; |
424 | if (!v) { |
425 | nextLine_ = limit_; |
426 | } else { |
427 | const char *nl{const_cast<const char *>(static_cast<char *>(v))}; |
428 | nextLine_ = nl + 1; |
429 | } |
430 | } |
431 | |
432 | void Prescanner::LabelField(TokenSequence &token) { |
433 | int outCol{1}; |
434 | const char *start{at_}; |
435 | std::optional<int> badColumn; |
436 | for (; *at_ != '\n' && column_ <= 6; ++at_) { |
437 | if (*at_ == '\t') { |
438 | ++at_; |
439 | column_ = 7; |
440 | break; |
441 | } |
442 | if (int n{IsSpace(p: at_)}; n == 0 && |
443 | !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space |
444 | EmitChar(token, *at_); |
445 | ++outCol; |
446 | if (!badColumn && (column_ == 6 || !IsDecimalDigit(*at_))) { |
447 | badColumn = column_; |
448 | } |
449 | } |
450 | ++column_; |
451 | } |
452 | if (badColumn && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) { |
453 | if ((prescannerNesting_ > 0 && *badColumn == 6 && |
454 | cooked_.BufferedBytes() == firstCookedCharacterOffset_) || |
455 | afterPreprocessingDirective_) { |
456 | // This is the first source line in #include'd text or conditional |
457 | // code under #if, or the first source line after such. |
458 | // If it turns out that the preprocessed text begins with a |
459 | // fixed form continuation line, the newline at the end |
460 | // of the latest source line beforehand will be deleted in |
461 | // CookedSource::Marshal(). |
462 | cooked_.MarkPossibleFixedFormContinuation(); |
463 | } else if (features_.ShouldWarn(common::UsageWarning::Scanning)) { |
464 | Say(common::UsageWarning::Scanning, GetProvenance(start + *badColumn - 1), |
465 | *badColumn == 6 |
466 | ? "Statement should not begin with a continuation line"_warn_en_US |
467 | : "Character in fixed-form label field must be a digit"_warn_en_US); |
468 | } |
469 | token.clear(); |
470 | if (*badColumn < 6) { |
471 | at_ = start; |
472 | column_ = 1; |
473 | return; |
474 | } |
475 | outCol = 1; |
476 | } |
477 | if (outCol == 1) { // empty label field |
478 | // Emit a space so that, if the line is rescanned after preprocessing, |
479 | // a leading 'C' or 'D' won't be left-justified and then accidentally |
480 | // misinterpreted as a comment card. |
481 | EmitChar(token, ' '); |
482 | ++outCol; |
483 | } |
484 | token.CloseToken(); |
485 | SkipToNextSignificantCharacter(); |
486 | if (IsDecimalDigit(*at_)) { |
487 | if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { |
488 | Say(common::LanguageFeature::MiscSourceExtensions, GetCurrentProvenance(), |
489 | "Label digit is not in fixed-form label field"_port_en_US); |
490 | } |
491 | } |
492 | } |
493 | |
494 | // 6.3.3.5: A program unit END statement, or any other statement whose |
495 | // initial line resembles an END statement, shall not be continued in |
496 | // fixed form source. |
497 | void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) { |
498 | CharBlock cBlock{tokens.ToCharBlock()}; |
499 | const char *str{cBlock.begin()}; |
500 | std::size_t n{cBlock.size()}; |
501 | if (n < 3) { |
502 | return; |
503 | } |
504 | std::size_t j{0}; |
505 | for (; j < n && (str[j] == ' ' || (str[j] >= '0' && str[j] <= '9')); ++j) { |
506 | } |
507 | if (j + 3 > n || std::memcmp(s1: str + j, s2: "end", n: 3) != 0) { |
508 | return; |
509 | } |
510 | // It starts with END, possibly after a label. |
511 | auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; |
512 | auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - 1))}; |
513 | if (!start || !end) { |
514 | return; |
515 | } |
516 | if (&*start->sourceFile == &*end->sourceFile && start->line == end->line) { |
517 | return; // no continuation |
518 | } |
519 | j += 3; |
520 | static const char *const prefixes[]{"program", "subroutine", "function", |
521 | "blockdata", "module", "submodule", nullptr}; |
522 | bool isPrefix{j == n || !IsLegalInIdentifier(str[j])}; // prefix is END |
523 | std::size_t endOfPrefix{j - 1}; |
524 | for (const char *const *p{prefixes}; *p; ++p) { |
525 | std::size_t pLen{std::strlen(s: *p)}; |
526 | if (j + pLen <= n && std::memcmp(s1: str + j, s2: *p, n: pLen) == 0) { |
527 | isPrefix = true; // END thing as prefix |
528 | j += pLen; |
529 | endOfPrefix = j - 1; |
530 | for (; j < n && IsLegalInIdentifier(str[j]); ++j) { |
531 | } |
532 | break; |
533 | } |
534 | } |
535 | if (isPrefix) { |
536 | auto range{tokens.GetTokenProvenanceRange(1)}; |
537 | if (j == n) { // END or END thing [name] |
538 | Say(range, |
539 | "Program unit END statement may not be continued in fixed form source"_err_en_US); |
540 | } else { |
541 | auto endOfPrefixPos{ |
542 | allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))}; |
543 | auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; |
544 | if (endOfPrefixPos && next && |
545 | &*endOfPrefixPos->sourceFile == &*start->sourceFile && |
546 | endOfPrefixPos->line == start->line && |
547 | (&*next->sourceFile != &*start->sourceFile || |
548 | next->line != start->line)) { |
549 | Say(range, |
550 | "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US); |
551 | } |
552 | } |
553 | } |
554 | } |
555 | |
556 | void Prescanner::SkipToEndOfLine() { |
557 | while (*at_ != '\n') { |
558 | ++at_, ++column_; |
559 | } |
560 | } |
561 | |
562 | bool Prescanner::MustSkipToEndOfLine() const { |
563 | if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) { |
564 | return true; // skip over ignored columns in right margin (73:80) |
565 | } else if (*at_ == '!' && !inCharLiteral_ && |
566 | (!inFixedForm_ || tabInCurrentLine_ || column_ != 6)) { |
567 | return !IsCompilerDirectiveSentinel(p: at_); |
568 | } else { |
569 | return false; |
570 | } |
571 | } |
572 | |
573 | void Prescanner::NextChar() { |
574 | CHECK(*at_ != '\n'); |
575 | int n{IsSpace(p: at_)}; |
576 | at_ += n ? n : 1; |
577 | ++column_; |
578 | while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') { |
579 | // UTF-8 byte order mark - treat this file as UTF-8 |
580 | at_ += 3; |
581 | encoding_ = Encoding::UTF_8; |
582 | } |
583 | SkipToNextSignificantCharacter(); |
584 | } |
585 | |
586 | // Skip everything that should be ignored until the next significant |
587 | // character is reached; handles C-style comments in preprocessing |
588 | // directives, Fortran ! comments, stuff after the right margin in |
589 | // fixed form, and all forms of line continuation. |
590 | bool Prescanner::SkipToNextSignificantCharacter() { |
591 | if (inPreprocessorDirective_) { |
592 | SkipCComments(); |
593 | return false; |
594 | } else { |
595 | auto anyContinuationLine{false}; |
596 | bool mightNeedSpace{false}; |
597 | if (MustSkipToEndOfLine()) { |
598 | SkipToEndOfLine(); |
599 | } else { |
600 | mightNeedSpace = *at_ == '\n'; |
601 | } |
602 | for (; Continuation(mightNeedFixedFormSpace: mightNeedSpace); mightNeedSpace = false) { |
603 | anyContinuationLine = true; |
604 | ++continuationLines_; |
605 | if (MustSkipToEndOfLine()) { |
606 | SkipToEndOfLine(); |
607 | } |
608 | } |
609 | if (*at_ == '\t') { |
610 | tabInCurrentLine_ = true; |
611 | } |
612 | return anyContinuationLine; |
613 | } |
614 | } |
615 | |
616 | void Prescanner::SkipCComments() { |
617 | while (true) { |
618 | if (IsCComment(p: at_)) { |
619 | if (const char *after{SkipCComment(at_)}) { |
620 | column_ += after - at_; |
621 | // May have skipped over one or more newlines; relocate the start of |
622 | // the next line. |
623 | nextLine_ = at_ = after; |
624 | NextLine(); |
625 | } else { |
626 | // Don't emit any messages about unclosed C-style comments, because |
627 | // the sequence /* can appear legally in a FORMAT statement. There's |
628 | // no ambiguity, since the sequence */ cannot appear legally. |
629 | break; |
630 | } |
631 | } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ && |
632 | at_[1] == '\n' && !IsAtEnd()) { |
633 | BeginSourceLineAndAdvance(); |
634 | } else { |
635 | break; |
636 | } |
637 | } |
638 | } |
639 | |
640 | void Prescanner::SkipSpaces() { |
641 | while (IsSpaceOrTab(p: at_)) { |
642 | NextChar(); |
643 | } |
644 | insertASpace_ = false; |
645 | } |
646 | |
647 | const char *Prescanner::SkipWhiteSpace(const char *p) { |
648 | while (int n{IsSpaceOrTab(p)}) { |
649 | p += n; |
650 | } |
651 | return p; |
652 | } |
653 | |
654 | const char *Prescanner::SkipWhiteSpaceIncludingEmptyMacros( |
655 | const char *p) const { |
656 | while (true) { |
657 | if (int n{IsSpaceOrTab(p)}) { |
658 | p += n; |
659 | } else if (preprocessor_.AnyDefinitions() && IsLegalIdentifierStart(*p)) { |
660 | // Skip keyword macros with empty definitions |
661 | const char *q{p + 1}; |
662 | while (IsLegalInIdentifier(*q)) { |
663 | ++q; |
664 | } |
665 | if (preprocessor_.IsNameDefinedEmpty( |
666 | CharBlock{p, static_cast<std::size_t>(q - p)})) { |
667 | p = q; |
668 | } else { |
669 | break; |
670 | } |
671 | } else { |
672 | break; |
673 | } |
674 | } |
675 | return p; |
676 | } |
677 | |
678 | const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const { |
679 | while (true) { |
680 | if (int n{IsSpaceOrTab(p)}) { |
681 | p += n; |
682 | } else if (IsCComment(p)) { |
683 | if (const char *after{SkipCComment(p)}) { |
684 | p = after; |
685 | } else { |
686 | break; |
687 | } |
688 | } else { |
689 | break; |
690 | } |
691 | } |
692 | return p; |
693 | } |
694 | |
695 | const char *Prescanner::SkipCComment(const char *p) const { |
696 | char star{' '}, slash{' '}; |
697 | p += 2; |
698 | while (star != '*' || slash != '/') { |
699 | if (p >= limit_) { |
700 | return nullptr; // signifies an unterminated comment |
701 | } |
702 | star = slash; |
703 | slash = *p++; |
704 | } |
705 | return p; |
706 | } |
707 | |
708 | bool Prescanner::NextToken(TokenSequence &tokens) { |
709 | CHECK(at_ >= start_ && at_ < limit_); |
710 | if (InFixedFormSource() && !preprocessingOnly_) { |
711 | SkipSpaces(); |
712 | } else { |
713 | if (*at_ == '/' && IsCComment(p: at_)) { |
714 | // Recognize and skip over classic C style /*comments*/ when |
715 | // outside a character literal. |
716 | if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) { |
717 | Say(LanguageFeature::ClassicCComments, GetCurrentProvenance(), |
718 | "nonstandard usage: C-style comment"_port_en_US); |
719 | } |
720 | SkipCComments(); |
721 | } |
722 | if (IsSpaceOrTab(p: at_)) { |
723 | // Compress free-form white space into a single space character. |
724 | const auto theSpace{at_}; |
725 | char previous{at_ <= start_ ? ' ' : at_[-1]}; |
726 | NextChar(); |
727 | SkipSpaces(); |
728 | if (*at_ == '\n' && !omitNewline_) { |
729 | // Discard white space at the end of a line. |
730 | } else if (!inPreprocessorDirective_ && |
731 | (previous == '(' || *at_ == '(' || *at_ == ')')) { |
732 | // Discard white space before/after '(' and before ')', unless in a |
733 | // preprocessor directive. This helps yield space-free contiguous |
734 | // names for generic interfaces like OPERATOR( + ) and |
735 | // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg). |
736 | // This has the effect of silently ignoring the illegal spaces in |
737 | // the array constructor ( /1,2/ ) but that seems benign; it's |
738 | // hard to avoid that while still removing spaces from OPERATOR( / ) |
739 | // and OPERATOR( // ). |
740 | } else { |
741 | // Preserve the squashed white space as a single space character. |
742 | tokens.PutNextTokenChar(' ', GetProvenance(theSpace)); |
743 | tokens.CloseToken(); |
744 | return true; |
745 | } |
746 | } |
747 | } |
748 | if (insertASpace_) { |
749 | tokens.PutNextTokenChar(' ', spaceProvenance_); |
750 | insertASpace_ = false; |
751 | } |
752 | if (*at_ == '\n') { |
753 | return false; |
754 | } |
755 | const char *start{at_}; |
756 | if (*at_ == '\'' || *at_ == '"') { |
757 | QuotedCharacterLiteral(tokens, start); |
758 | preventHollerith_ = false; |
759 | } else if (IsDecimalDigit(*at_)) { |
760 | int n{0}, digits{0}; |
761 | static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)}; |
762 | do { |
763 | if (n < maxHollerith) { |
764 | n = 10 * n + DecimalDigitValue(*at_); |
765 | } |
766 | EmitCharAndAdvance(tokens, *at_); |
767 | ++digits; |
768 | if (InFixedFormSource()) { |
769 | SkipSpaces(); |
770 | } |
771 | } while (IsDecimalDigit(*at_)); |
772 | if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith && |
773 | !preventHollerith_) { |
774 | Hollerith(tokens, n, start); |
775 | } else if (*at_ == '.') { |
776 | while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { |
777 | } |
778 | HandleExponentAndOrKindSuffix(tokens); |
779 | } else if (HandleExponentAndOrKindSuffix(tokens)) { |
780 | } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') && |
781 | inPreprocessorDirective_) { |
782 | do { |
783 | EmitCharAndAdvance(tokens, *at_); |
784 | } while (IsHexadecimalDigit(*at_)); |
785 | } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..." |
786 | EmitCharAndAdvance(tokens, *at_); |
787 | QuotedCharacterLiteral(tokens, start); |
788 | } else if (IsLetter(*at_) && !preventHollerith_ && |
789 | parenthesisNesting_ > 0 && |
790 | !preprocessor_.IsNameDefined(CharBlock{at_, 1})) { |
791 | // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that |
792 | // we don't misrecognize I9HHOLLERITH as an identifier in the next case. |
793 | EmitCharAndAdvance(tokens, *at_); |
794 | } |
795 | preventHollerith_ = false; |
796 | } else if (*at_ == '.') { |
797 | char nch{EmitCharAndAdvance(tokens, '.')}; |
798 | if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) { |
799 | while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { |
800 | } |
801 | HandleExponentAndOrKindSuffix(tokens); |
802 | } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') { |
803 | EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis |
804 | } |
805 | preventHollerith_ = false; |
806 | } else if (IsLegalInIdentifier(*at_)) { |
807 | std::size_t parts{1}; |
808 | bool anyDefined{false}; |
809 | bool hadContinuation{false}; |
810 | // Subtlety: When an identifier is split across continuation lines, |
811 | // its parts are kept as distinct pp-tokens if that macro replacement |
812 | // should operate on them independently. This trick accommodates the |
813 | // historic practice of using line continuation for token pasting after |
814 | // replacement. |
815 | // In free form, the macro to be replaced must have been preceded |
816 | // by '&' and followed by either '&' or, if last, the end of a line. |
817 | // call & call foo& call foo& |
818 | // &MACRO& OR &MACRO& OR &MACRO |
819 | // &foo(...) &(...) |
820 | do { |
821 | EmitChar(tokens, *at_); |
822 | ++at_, ++column_; |
823 | hadContinuation = SkipToNextSignificantCharacter(); |
824 | if (hadContinuation && IsLegalIdentifierStart(*at_)) { |
825 | // Continued identifier |
826 | tokens.CloseToken(); |
827 | ++parts; |
828 | if (!anyDefined && |
829 | (parts > 2 || inFixedForm_ || |
830 | (start > start_ && start[-1] == '&')) && |
831 | preprocessor_.IsNameDefined( |
832 | tokens.TokenAt(tokens.SizeInTokens() - 1))) { |
833 | anyDefined = true; |
834 | } |
835 | } |
836 | } while (IsLegalInIdentifier(*at_)); |
837 | if (!anyDefined && parts > 1) { |
838 | tokens.CloseToken(); |
839 | char after{*SkipWhiteSpace(p: at_)}; |
840 | anyDefined = (hadContinuation || after == '\n' || after == '&') && |
841 | preprocessor_.IsNameDefined( |
842 | tokens.TokenAt(tokens.SizeInTokens() - 1)); |
843 | tokens.ReopenLastToken(); |
844 | } |
845 | if (!anyDefined) { |
846 | // If no part was a defined macro, combine the parts into one so that |
847 | // the combination itself can be subject to macro replacement. |
848 | while (parts-- > 1) { |
849 | tokens.ReopenLastToken(); |
850 | } |
851 | } |
852 | if (InFixedFormSource()) { |
853 | SkipSpaces(); |
854 | } |
855 | if ((*at_ == '\'' || *at_ == '"') && |
856 | tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..." |
857 | QuotedCharacterLiteral(tokens, start); |
858 | preventHollerith_ = false; |
859 | } else { |
860 | preventHollerith_ = true; // DO 10 H = ... |
861 | } |
862 | } else if (*at_ == '*') { |
863 | if (EmitCharAndAdvance(tokens, '*') == '*') { |
864 | EmitCharAndAdvance(tokens, '*'); |
865 | } else { |
866 | // Subtle ambiguity: |
867 | // CHARACTER*2H declares H because *2 is a kind specifier |
868 | // DATAC/N*2H / is repeated Hollerith |
869 | preventHollerith_ = !slashInCurrentStatement_; |
870 | } |
871 | } else { |
872 | char ch{*at_}; |
873 | if (ch == '(') { |
874 | if (parenthesisNesting_++ == 0) { |
875 | isPossibleMacroCall_ = tokens.SizeInTokens() > 0 && |
876 | preprocessor_.IsFunctionLikeDefinition( |
877 | tokens.TokenAt(tokens.SizeInTokens() - 1)); |
878 | } |
879 | } else if (ch == ')' && parenthesisNesting_ > 0) { |
880 | --parenthesisNesting_; |
881 | } |
882 | char nch{EmitCharAndAdvance(tokens, ch)}; |
883 | preventHollerith_ = false; |
884 | if ((nch == '=' && |
885 | (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) || |
886 | (ch == nch && |
887 | (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' || |
888 | ch == '|' || ch == '<' || ch == '>')) || |
889 | (ch == '=' && nch == '>')) { |
890 | // token comprises two characters |
891 | EmitCharAndAdvance(tokens, nch); |
892 | } else if (ch == '/') { |
893 | slashInCurrentStatement_ = true; |
894 | } else if (ch == ';' && InFixedFormSource()) { |
895 | SkipSpaces(); |
896 | if (IsDecimalDigit(*at_)) { |
897 | if (features_.ShouldWarn( |
898 | common::LanguageFeature::MiscSourceExtensions)) { |
899 | Say(common::LanguageFeature::MiscSourceExtensions, |
900 | GetProvenanceRange(at_, at_ + 1), |
901 | "Label should be in the label field"_port_en_US); |
902 | } |
903 | } |
904 | } |
905 | } |
906 | tokens.CloseToken(); |
907 | return true; |
908 | } |
909 | |
910 | bool Prescanner::HandleExponent(TokenSequence &tokens) { |
911 | if (char ed{ToLowerCaseLetter(*at_)}; ed == 'e' || ed == 'd') { |
912 | // Do some look-ahead to ensure that this 'e'/'d' is an exponent, |
913 | // not the start of an identifier that could be a macro. |
914 | const char *startAt{at_}; |
915 | int startColumn{column_}; |
916 | TokenSequence possible; |
917 | EmitCharAndAdvance(possible, *at_); |
918 | if (*at_ == '+' || *at_ == '-') { |
919 | EmitCharAndAdvance(possible, *at_); |
920 | } |
921 | if (IsDecimalDigit(*at_)) { // it's an exponent; scan it |
922 | while (IsDecimalDigit(*at_)) { |
923 | EmitCharAndAdvance(possible, *at_); |
924 | } |
925 | possible.CloseToken(); |
926 | tokens.AppendRange(possible, 0); // appends to current token |
927 | return true; |
928 | } |
929 | // Not an exponent; backtrack |
930 | at_ = startAt; |
931 | column_ = startColumn; |
932 | } |
933 | return false; |
934 | } |
935 | |
936 | bool Prescanner::HandleKindSuffix(TokenSequence &tokens) { |
937 | if (*at_ != '_') { |
938 | return false; |
939 | } |
940 | TokenSequence withUnderscore, separate; |
941 | EmitChar(withUnderscore, '_'); |
942 | EmitCharAndAdvance(separate, '_'); |
943 | if (IsLegalInIdentifier(*at_)) { |
944 | separate.CloseToken(); |
945 | EmitChar(withUnderscore, *at_); |
946 | EmitCharAndAdvance(separate, *at_); |
947 | while (IsLegalInIdentifier(*at_)) { |
948 | EmitChar(withUnderscore, *at_); |
949 | EmitCharAndAdvance(separate, *at_); |
950 | } |
951 | } |
952 | withUnderscore.CloseToken(); |
953 | separate.CloseToken(); |
954 | tokens.CloseToken(); |
955 | if (separate.SizeInTokens() == 2 && |
956 | preprocessor_.IsNameDefined(separate.TokenAt(1)) && |
957 | !preprocessor_.IsNameDefined(withUnderscore.ToCharBlock())) { |
958 | // "_foo" is not defined, but "foo" is |
959 | tokens.CopyAll(separate); // '_' "foo" |
960 | } else { |
961 | tokens.CopyAll(withUnderscore); // "_foo" |
962 | } |
963 | return true; |
964 | } |
965 | |
966 | bool Prescanner::HandleExponentAndOrKindSuffix(TokenSequence &tokens) { |
967 | bool hadExponent{HandleExponent(tokens)}; |
968 | if (HandleKindSuffix(tokens)) { |
969 | return true; |
970 | } else { |
971 | return hadExponent; |
972 | } |
973 | } |
974 | |
975 | void Prescanner::QuotedCharacterLiteral( |
976 | TokenSequence &tokens, const char *start) { |
977 | char quote{*at_}; |
978 | const char *end{at_ + 1}; |
979 | inCharLiteral_ = true; |
980 | continuationInCharLiteral_ = true; |
981 | const auto emit{[&](char ch) { EmitChar(tokens, ch); }}; |
982 | const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }}; |
983 | bool isEscaped{false}; |
984 | bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)}; |
985 | while (true) { |
986 | if (*at_ == '\\') { |
987 | if (escapesEnabled) { |
988 | isEscaped = !isEscaped; |
989 | } else { |
990 | // The parser always processes escape sequences, so don't confuse it |
991 | // when escapes are disabled. |
992 | insert('\\'); |
993 | } |
994 | } else { |
995 | isEscaped = false; |
996 | } |
997 | if (*at_ == '\n') { |
998 | if (inPreprocessorDirective_) { |
999 | EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false, |
1000 | Encoding::LATIN_1); |
1001 | } else if (InCompilerDirective() && preprocessingOnly_) { |
1002 | // don't complain about -E output of !$, do it in later compilation |
1003 | } else { |
1004 | Say(GetProvenanceRange(start, end), |
1005 | "Incomplete character literal"_err_en_US); |
1006 | } |
1007 | break; |
1008 | } |
1009 | EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false, |
1010 | Encoding::LATIN_1); |
1011 | while (PadOutCharacterLiteral(tokens)) { |
1012 | } |
1013 | // Here's a weird edge case. When there's a two or more following |
1014 | // continuation lines at this point, and the entire significant part of |
1015 | // the next continuation line is the name of a keyword macro, replace |
1016 | // it in the character literal with its definition. Example: |
1017 | // #define FOO foo |
1018 | // subroutine subr() bind(c, name="my_& |
1019 | // &FOO& |
1020 | // &_bar") ... |
1021 | // produces a binding name of "my_foo_bar". |
1022 | while (at_[1] == '&' && nextLine_ < limit_ && !InFixedFormSource()) { |
1023 | const char *idStart{nextLine_}; |
1024 | if (const char *amper{SkipWhiteSpace(p: nextLine_)}; *amper == '&') { |
1025 | idStart = amper + 1; |
1026 | } |
1027 | if (IsLegalIdentifierStart(*idStart)) { |
1028 | std::size_t idLen{1}; |
1029 | for (; IsLegalInIdentifier(idStart[idLen]); ++idLen) { |
1030 | } |
1031 | if (idStart[idLen] == '&') { |
1032 | CharBlock id{idStart, idLen}; |
1033 | if (preprocessor_.IsNameDefined(id)) { |
1034 | TokenSequence ppTokens; |
1035 | ppTokens.Put(id, GetProvenance(idStart)); |
1036 | if (auto replaced{ |
1037 | preprocessor_.MacroReplacement(ppTokens, *this)}) { |
1038 | tokens.CopyAll(*replaced); |
1039 | at_ = &idStart[idLen - 1]; |
1040 | NextLine(); |
1041 | continue; // try again on the next line |
1042 | } |
1043 | } |
1044 | } |
1045 | } |
1046 | break; |
1047 | } |
1048 | end = at_ + 1; |
1049 | NextChar(); |
1050 | if (*at_ == quote && !isEscaped) { |
1051 | // A doubled unescaped quote mark becomes a single instance of that |
1052 | // quote character in the literal (later). There can be spaces between |
1053 | // the quotes in fixed form source. |
1054 | EmitChar(tokens, quote); |
1055 | inCharLiteral_ = false; // for cases like print *, '...'!comment |
1056 | NextChar(); |
1057 | if (InFixedFormSource()) { |
1058 | SkipSpaces(); |
1059 | } |
1060 | if (*at_ != quote) { |
1061 | break; |
1062 | } |
1063 | inCharLiteral_ = true; |
1064 | } |
1065 | } |
1066 | continuationInCharLiteral_ = false; |
1067 | inCharLiteral_ = false; |
1068 | } |
1069 | |
1070 | void Prescanner::Hollerith( |
1071 | TokenSequence &tokens, int count, const char *start) { |
1072 | inCharLiteral_ = true; |
1073 | CHECK(*at_ == 'h' || *at_ == 'H'); |
1074 | EmitChar(tokens, 'H'); |
1075 | while (count-- > 0) { |
1076 | if (PadOutCharacterLiteral(tokens)) { |
1077 | } else if (*at_ == '\n') { |
1078 | if (features_.ShouldWarn(common::UsageWarning::Scanning)) { |
1079 | Say(common::UsageWarning::Scanning, GetProvenanceRange(start, at_), |
1080 | "Possible truncated Hollerith literal"_warn_en_US); |
1081 | } |
1082 | break; |
1083 | } else { |
1084 | NextChar(); |
1085 | // Each multi-byte character encoding counts as a single character. |
1086 | // No escape sequences are recognized. |
1087 | // Hollerith is always emitted to the cooked character |
1088 | // stream in UTF-8. |
1089 | DecodedCharacter decoded{DecodeCharacter( |
1090 | encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)}; |
1091 | if (decoded.bytes > 0) { |
1092 | EncodedCharacter utf8{ |
1093 | EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)}; |
1094 | for (int j{0}; j < utf8.bytes; ++j) { |
1095 | EmitChar(tokens, utf8.buffer[j]); |
1096 | } |
1097 | at_ += decoded.bytes - 1; |
1098 | } else { |
1099 | Say(GetProvenanceRange(start, at_), |
1100 | "Bad character in Hollerith literal"_err_en_US); |
1101 | break; |
1102 | } |
1103 | } |
1104 | } |
1105 | if (*at_ != '\n') { |
1106 | NextChar(); |
1107 | } |
1108 | inCharLiteral_ = false; |
1109 | } |
1110 | |
1111 | // In fixed form, source card images must be processed as if they were at |
1112 | // least 72 columns wide, at least in character literal contexts. |
1113 | bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) { |
1114 | while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') { |
1115 | if (column_ < fixedFormColumnLimit_) { |
1116 | tokens.PutNextTokenChar(' ', spaceProvenance_); |
1117 | ++column_; |
1118 | return true; |
1119 | } |
1120 | if (!FixedFormContinuation(mightNeedSpace: false /*no need to insert space*/) || |
1121 | tabInCurrentLine_) { |
1122 | return false; |
1123 | } |
1124 | CHECK(column_ == 7); |
1125 | --at_; // point to column 6 of continuation line |
1126 | column_ = 6; |
1127 | } |
1128 | return false; |
1129 | } |
1130 | |
1131 | static bool IsAtProcess(const char *p) { |
1132 | static const char pAtProc[]{"process"}; |
1133 | for (std::size_t i{0}; i < sizeof pAtProc - 1; ++i) { |
1134 | if (ToLowerCaseLetter(*++p) != pAtProc[i]) |
1135 | return false; |
1136 | } |
1137 | return true; |
1138 | } |
1139 | |
1140 | bool Prescanner::IsFixedFormCommentLine(const char *start) const { |
1141 | const char *p{start}; |
1142 | // The @process directive must start in column 1. |
1143 | if (*p == '@' && IsAtProcess(p)) { |
1144 | return true; |
1145 | } |
1146 | if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c. |
1147 | ((*p == 'D' || *p == 'd') && |
1148 | !features_.IsEnabled(LanguageFeature::OldDebugLines))) { |
1149 | return true; |
1150 | } |
1151 | bool anyTabs{false}; |
1152 | while (true) { |
1153 | if (int n{IsSpace(p)}) { |
1154 | p += n; |
1155 | } else if (*p == '\t') { |
1156 | anyTabs = true; |
1157 | ++p; |
1158 | } else if (*p == '0' && !anyTabs && p == start + 5) { |
1159 | ++p; // 0 in column 6 must treated as a space |
1160 | } else { |
1161 | break; |
1162 | } |
1163 | } |
1164 | if (!anyTabs && p >= start + fixedFormColumnLimit_) { |
1165 | return true; |
1166 | } |
1167 | if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) { |
1168 | return true; |
1169 | } |
1170 | return *p == '\n'; |
1171 | } |
1172 | |
1173 | const char *Prescanner::IsFreeFormComment(const char *p) const { |
1174 | p = SkipWhiteSpaceAndCComments(p); |
1175 | if (*p == '!' || *p == '\n') { |
1176 | return p; |
1177 | } else if (*p == '@') { |
1178 | return IsAtProcess(p) ? p : nullptr; |
1179 | } else { |
1180 | return nullptr; |
1181 | } |
1182 | } |
1183 | |
1184 | std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const { |
1185 | if (!expandIncludeLines_) { |
1186 | return std::nullopt; |
1187 | } |
1188 | const char *p{SkipWhiteSpace(p: start)}; |
1189 | if (*p == '0' && inFixedForm_ && p == start + 5) { |
1190 | // Accept " 0INCLUDE" in fixed form. |
1191 | p = SkipWhiteSpace(p: p + 1); |
1192 | } |
1193 | for (const char *q{"include"}; *q; ++q) { |
1194 | if (ToLowerCaseLetter(*p) != *q) { |
1195 | return std::nullopt; |
1196 | } |
1197 | p = SkipWhiteSpace(p: p + 1); |
1198 | } |
1199 | if (IsDecimalDigit(*p)) { // accept & ignore a numeric kind prefix |
1200 | for (p = SkipWhiteSpace(p: p + 1); IsDecimalDigit(*p); |
1201 | p = SkipWhiteSpace(p: p + 1)) { |
1202 | } |
1203 | if (*p != '_') { |
1204 | return std::nullopt; |
1205 | } |
1206 | p = SkipWhiteSpace(p: p + 1); |
1207 | } |
1208 | if (*p == '"' || *p == '\'') { |
1209 | return {p - start}; |
1210 | } |
1211 | return std::nullopt; |
1212 | } |
1213 | |
1214 | void Prescanner::FortranInclude(const char *firstQuote) { |
1215 | const char *p{firstQuote}; |
1216 | while (*p != '"' && *p != '\'') { |
1217 | ++p; |
1218 | } |
1219 | char quote{*p}; |
1220 | std::string path; |
1221 | for (++p; *p != '\n'; ++p) { |
1222 | if (*p == quote) { |
1223 | if (p[1] != quote) { |
1224 | break; |
1225 | } |
1226 | ++p; |
1227 | } |
1228 | path += *p; |
1229 | } |
1230 | if (*p != quote) { |
1231 | Say(GetProvenanceRange(firstQuote, p), |
1232 | "malformed path name string"_err_en_US); |
1233 | return; |
1234 | } |
1235 | p = SkipWhiteSpace(p: p + 1); |
1236 | if (*p != '\n' && *p != '!') { |
1237 | const char *garbage{p}; |
1238 | for (; *p != '\n' && *p != '!'; ++p) { |
1239 | } |
1240 | if (features_.ShouldWarn(common::UsageWarning::Scanning)) { |
1241 | Say(common::UsageWarning::Scanning, GetProvenanceRange(garbage, p), |
1242 | "excess characters after path name"_warn_en_US); |
1243 | } |
1244 | } |
1245 | std::string buf; |
1246 | llvm::raw_string_ostream error{buf}; |
1247 | Provenance provenance{GetProvenance(nextLine_)}; |
1248 | std::optional<std::string> prependPath; |
1249 | if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) { |
1250 | prependPath = DirectoryName(currentFile->path()); |
1251 | } |
1252 | const SourceFile *included{ |
1253 | allSources_.Open(path, error, std::move(prependPath))}; |
1254 | if (!included) { |
1255 | Say(provenance, "INCLUDE: %s"_err_en_US, buf); |
1256 | } else if (included->bytes() > 0) { |
1257 | ProvenanceRange includeLineRange{ |
1258 | provenance, static_cast<std::size_t>(p - nextLine_)}; |
1259 | ProvenanceRange fileRange{ |
1260 | allSources_.AddIncludedFile(*included, includeLineRange)}; |
1261 | Preprocessor cleanPrepro{allSources_}; |
1262 | if (preprocessor_.IsNameDefined("__FILE__"s)) { |
1263 | cleanPrepro.DefineStandardMacros(); // __FILE__, __LINE__, &c. |
1264 | } |
1265 | if (preprocessor_.IsNameDefined("_CUDA"s)) { |
1266 | cleanPrepro.Define("_CUDA"s, "1"); |
1267 | } |
1268 | Prescanner{*this, cleanPrepro, /*isNestedInIncludeDirective=*/false} |
1269 | .set_encoding(included->encoding()) |
1270 | .Prescan(fileRange); |
1271 | } |
1272 | } |
1273 | |
1274 | const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const { |
1275 | const char *p{start}; |
1276 | while (int n{IsSpace(p)}) { |
1277 | p += n; |
1278 | } |
1279 | if (*p == '#') { |
1280 | if (inFixedForm_ && p == start + 5) { |
1281 | return nullptr; |
1282 | } |
1283 | } else { |
1284 | p = SkipWhiteSpace(p); |
1285 | if (*p != '#') { |
1286 | return nullptr; |
1287 | } |
1288 | } |
1289 | return SkipWhiteSpace(p: p + 1); |
1290 | } |
1291 | |
1292 | bool Prescanner::IsNextLinePreprocessorDirective() const { |
1293 | return IsPreprocessorDirectiveLine(start: nextLine_) != nullptr; |
1294 | } |
1295 | |
1296 | bool Prescanner::SkipCommentLine(bool afterAmpersand) { |
1297 | if (IsAtEnd()) { |
1298 | if (afterAmpersand && prescannerNesting_ > 0) { |
1299 | // A continuation marker at the end of the last line in an |
1300 | // include file inhibits the newline for that line. |
1301 | SkipToEndOfLine(); |
1302 | omitNewline_ = true; |
1303 | } |
1304 | } else if (inPreprocessorDirective_) { |
1305 | } else { |
1306 | auto lineClass{ClassifyLine(nextLine_)}; |
1307 | if (lineClass.kind == LineClassification::Kind::Comment) { |
1308 | NextLine(); |
1309 | return true; |
1310 | } else if (lineClass.kind == |
1311 | LineClassification::Kind::ConditionalCompilationDirective || |
1312 | lineClass.kind == LineClassification::Kind::PreprocessorDirective) { |
1313 | // Allow conditional compilation directives (e.g., #ifdef) to affect |
1314 | // continuation lines. |
1315 | // Allow other preprocessor directives, too, except #include |
1316 | // (when it does not follow '&'), #define, and #undef (because |
1317 | // they cannot be allowed to affect preceding text on a |
1318 | // continued line). |
1319 | preprocessor_.Directive(TokenizePreprocessorDirective(), *this); |
1320 | return true; |
1321 | } else if (afterAmpersand && |
1322 | (lineClass.kind == LineClassification::Kind::DefinitionDirective || |
1323 | lineClass.kind == LineClassification::Kind::IncludeDirective || |
1324 | lineClass.kind == LineClassification::Kind::IncludeLine)) { |
1325 | SkipToEndOfLine(); |
1326 | omitNewline_ = true; |
1327 | skipLeadingAmpersand_ = true; |
1328 | } |
1329 | } |
1330 | return false; |
1331 | } |
1332 | |
1333 | const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) { |
1334 | if (IsAtEnd()) { |
1335 | return nullptr; |
1336 | } |
1337 | tabInCurrentLine_ = false; |
1338 | char col1{*nextLine_}; |
1339 | bool canBeNonDirectiveContinuation{ |
1340 | (col1 == ' ' || |
1341 | ((col1 == 'D' || col1 == 'd') && |
1342 | features_.IsEnabled(LanguageFeature::OldDebugLines))) && |
1343 | nextLine_[1] == ' ' && nextLine_[2] == ' ' && nextLine_[3] == ' ' && |
1344 | nextLine_[4] == ' '}; |
1345 | if (InCompilerDirective() && |
1346 | !(InOpenMPConditionalLine() && !preprocessingOnly_)) { |
1347 | // !$ under -E is not continued, but deferred to later compilation |
1348 | if (IsFixedFormCommentChar(ch: col1) && |
1349 | !(InOpenMPConditionalLine() && preprocessingOnly_)) { |
1350 | int j{1}; |
1351 | for (; j < 5; ++j) { |
1352 | char ch{directiveSentinel_[j - 1]}; |
1353 | if (ch == '\0') { |
1354 | break; |
1355 | } else if (ch != ToLowerCaseLetter(nextLine_[j])) { |
1356 | return nullptr; |
1357 | } |
1358 | } |
1359 | for (; j < 5; ++j) { |
1360 | if (nextLine_[j] != ' ') { |
1361 | return nullptr; |
1362 | } |
1363 | } |
1364 | const char *col6{nextLine_ + 5}; |
1365 | if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(p: col6)) { |
1366 | if (mightNeedSpace && !IsSpace(p: nextLine_ + 6)) { |
1367 | insertASpace_ = true; |
1368 | } |
1369 | return nextLine_ + 6; |
1370 | } |
1371 | } |
1372 | } else { // Normal case: not in a compiler directive. |
1373 | // !$ conditional compilation lines may be continuations when not |
1374 | // just preprocessing. |
1375 | if (!preprocessingOnly_ && IsFixedFormCommentChar(ch: col1) && |
1376 | nextLine_[1] == '$' && nextLine_[2] == ' ' && nextLine_[3] == ' ' && |
1377 | nextLine_[4] == ' ' && IsCompilerDirectiveSentinel(&nextLine_[1], 1)) { |
1378 | if (const char *col6{nextLine_ + 5}; |
1379 | *col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(p: col6)) { |
1380 | insertASpace_ |= mightNeedSpace && !IsSpace(p: nextLine_ + 6); |
1381 | return nextLine_ + 6; |
1382 | } else { |
1383 | return nullptr; |
1384 | } |
1385 | } |
1386 | if (col1 == '&' && |
1387 | features_.IsEnabled( |
1388 | LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { |
1389 | // Extension: '&' as continuation marker |
1390 | if (features_.ShouldWarn( |
1391 | LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { |
1392 | Say(LanguageFeature::FixedFormContinuationWithColumn1Ampersand, |
1393 | GetProvenance(nextLine_), "nonstandard usage"_port_en_US); |
1394 | } |
1395 | return nextLine_ + 1; |
1396 | } |
1397 | if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') { |
1398 | tabInCurrentLine_ = true; |
1399 | return nextLine_ + 2; // VAX extension |
1400 | } |
1401 | if (canBeNonDirectiveContinuation) { |
1402 | const char *col6{nextLine_ + 5}; |
1403 | if (*col6 != '\n' && *col6 != '0' && !IsSpaceOrTab(p: col6)) { |
1404 | if ((*col6 == 'i' || *col6 == 'I') && IsIncludeLine(start: nextLine_)) { |
1405 | // It's an INCLUDE line, not a continuation |
1406 | } else { |
1407 | return nextLine_ + 6; |
1408 | } |
1409 | } |
1410 | } |
1411 | if (IsImplicitContinuation()) { |
1412 | return nextLine_; |
1413 | } |
1414 | } |
1415 | return nullptr; // not a continuation line |
1416 | } |
1417 | |
1418 | const char *Prescanner::FreeFormContinuationLine(bool ampersand) { |
1419 | const char *lineStart{nextLine_}; |
1420 | const char *p{lineStart}; |
1421 | if (p >= limit_) { |
1422 | return nullptr; |
1423 | } |
1424 | p = SkipWhiteSpaceIncludingEmptyMacros(p); |
1425 | if (InCompilerDirective()) { |
1426 | if (InOpenMPConditionalLine()) { |
1427 | if (preprocessingOnly_) { |
1428 | // in -E mode, don't treat !$ as a continuation |
1429 | return nullptr; |
1430 | } else if (p[0] == '!' && p[1] == '$') { |
1431 | // accept but do not require a matching sentinel |
1432 | if (p[2] != '&' && !IsSpaceOrTab(p: &p[2])) { |
1433 | return nullptr; // not !$ |
1434 | } |
1435 | p += 2; |
1436 | } |
1437 | } else if (*p++ == '!') { |
1438 | for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) { |
1439 | if (*s != ToLowerCaseLetter(*p)) { |
1440 | return nullptr; // not the same directive class |
1441 | } |
1442 | } |
1443 | } else { |
1444 | return nullptr; |
1445 | } |
1446 | p = SkipWhiteSpace(p); |
1447 | if (*p == '&') { |
1448 | if (!ampersand) { |
1449 | insertASpace_ = true; |
1450 | } |
1451 | return p + 1; |
1452 | } else if (ampersand) { |
1453 | return p; |
1454 | } else { |
1455 | return nullptr; |
1456 | } |
1457 | } |
1458 | if (p[0] == '!' && p[1] == '$' && !preprocessingOnly_ && |
1459 | features_.IsEnabled(LanguageFeature::OpenMP)) { |
1460 | // !$ conditional line can be a continuation |
1461 | p = lineStart = SkipWhiteSpace(p: p + 2); |
1462 | } |
1463 | if (*p == '&') { |
1464 | return p + 1; |
1465 | } else if (*p == '!' || *p == '\n' || *p == '#') { |
1466 | return nullptr; |
1467 | } else if (ampersand || IsImplicitContinuation()) { |
1468 | if (continuationInCharLiteral_) { |
1469 | // 'a'& -> 'a''b' == "a'b" |
1470 | // 'b' |
1471 | if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { |
1472 | Say(common::LanguageFeature::MiscSourceExtensions, |
1473 | GetProvenanceRange(p, p + 1), |
1474 | "Character literal continuation line should have been preceded by '&'"_port_en_US); |
1475 | } |
1476 | } else if (p > lineStart && IsSpaceOrTab(p: p - 1)) { |
1477 | --p; |
1478 | } else { |
1479 | insertASpace_ = true; |
1480 | } |
1481 | return p; |
1482 | } else { |
1483 | return nullptr; |
1484 | } |
1485 | } |
1486 | |
1487 | bool Prescanner::FixedFormContinuation(bool mightNeedSpace) { |
1488 | // N.B. We accept '&' as a continuation indicator in fixed form, too, |
1489 | // but not in a character literal. |
1490 | if (*at_ == '&' && inCharLiteral_) { |
1491 | return false; |
1492 | } |
1493 | do { |
1494 | if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) { |
1495 | BeginSourceLine(at: cont); |
1496 | column_ = 7; |
1497 | NextLine(); |
1498 | return true; |
1499 | } |
1500 | } while (SkipCommentLine(afterAmpersand: false /* not after ampersand */)); |
1501 | return false; |
1502 | } |
1503 | |
1504 | bool Prescanner::FreeFormContinuation() { |
1505 | const char *p{at_}; |
1506 | bool ampersand{*p == '&'}; |
1507 | if (ampersand) { |
1508 | p = SkipWhiteSpace(p: p + 1); |
1509 | } |
1510 | if (*p != '\n') { |
1511 | if (inCharLiteral_) { |
1512 | return false; |
1513 | } else if (*p == '!') { // & ! comment - ok |
1514 | } else if (ampersand && isPossibleMacroCall_ && (*p == ',' || *p == ')')) { |
1515 | return false; // allow & at end of a macro argument |
1516 | } else if (ampersand && preprocessingOnly_ && !parenthesisNesting_) { |
1517 | return false; // allow & at start of line, maybe after !$ |
1518 | } else if (features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) { |
1519 | Say(LanguageFeature::CruftAfterAmpersand, GetProvenance(p), |
1520 | "missing ! before comment after &"_warn_en_US); |
1521 | } |
1522 | } |
1523 | do { |
1524 | if (const char *cont{FreeFormContinuationLine(ampersand)}) { |
1525 | BeginSourceLine(at: cont); |
1526 | NextLine(); |
1527 | return true; |
1528 | } |
1529 | } while (SkipCommentLine(afterAmpersand: ampersand)); |
1530 | return false; |
1531 | } |
1532 | |
1533 | // Implicit line continuation allows a preprocessor macro call with |
1534 | // arguments to span multiple lines. |
1535 | bool Prescanner::IsImplicitContinuation() const { |
1536 | return !inPreprocessorDirective_ && !inCharLiteral_ && isPossibleMacroCall_ && |
1537 | parenthesisNesting_ > 0 && !IsAtEnd() && |
1538 | ClassifyLine(nextLine_).kind == LineClassification::Kind::Source; |
1539 | } |
1540 | |
1541 | bool Prescanner::Continuation(bool mightNeedFixedFormSpace) { |
1542 | if (disableSourceContinuation_) { |
1543 | return false; |
1544 | } else if (*at_ == '\n' || *at_ == '&') { |
1545 | if (inFixedForm_) { |
1546 | return FixedFormContinuation(mightNeedSpace: mightNeedFixedFormSpace); |
1547 | } else { |
1548 | return FreeFormContinuation(); |
1549 | } |
1550 | } else if (*at_ == '\\' && at_ + 2 == nextLine_ && |
1551 | backslashFreeFormContinuation_ && !inFixedForm_ && nextLine_ < limit_) { |
1552 | // cpp-like handling of \ at end of a free form source line |
1553 | BeginSourceLine(at: nextLine_); |
1554 | NextLine(); |
1555 | return true; |
1556 | } else { |
1557 | return false; |
1558 | } |
1559 | } |
1560 | |
1561 | std::optional<Prescanner::LineClassification> |
1562 | Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const { |
1563 | const char *p{start}; |
1564 | char col1{*p++}; |
1565 | if (!IsFixedFormCommentChar(ch: col1)) { |
1566 | return std::nullopt; |
1567 | } |
1568 | char sentinel[5], *sp{sentinel}; |
1569 | int column{2}; |
1570 | for (; column < 6; ++column) { |
1571 | if (*p == '\n' || IsSpaceOrTab(p) || IsDecimalDigit(*p)) { |
1572 | break; |
1573 | } |
1574 | *sp++ = ToLowerCaseLetter(*p++); |
1575 | } |
1576 | if (sp == sentinel) { |
1577 | return std::nullopt; |
1578 | } |
1579 | *sp = '\0'; |
1580 | // A fixed form OpenMP conditional compilation sentinel must satisfy the |
1581 | // following criteria, for initial lines: |
1582 | // - Columns 3 through 5 must have only white space or numbers. |
1583 | // - Column 6 must be space or zero. |
1584 | bool isOpenMPConditional{sp == &sentinel[1] && sentinel[0] == '$'}; |
1585 | bool hadDigit{false}; |
1586 | if (isOpenMPConditional) { |
1587 | for (; column < 6; ++column, ++p) { |
1588 | if (IsDecimalDigit(*p)) { |
1589 | hadDigit = true; |
1590 | } else if (!IsSpaceOrTab(p)) { |
1591 | return std::nullopt; |
1592 | } |
1593 | } |
1594 | } |
1595 | if (column == 6) { |
1596 | if (*p == '0') { |
1597 | ++p; |
1598 | } else if (int n{IsSpaceOrTab(p)}) { |
1599 | p += n; |
1600 | } else if (isOpenMPConditional && preprocessingOnly_ && !hadDigit && |
1601 | *p != '\n') { |
1602 | // In -E mode, "!$ &" is treated as a directive |
1603 | } else { |
1604 | // This is a Continuation line, not an initial directive line. |
1605 | return std::nullopt; |
1606 | } |
1607 | } |
1608 | if (const char *ss{IsCompilerDirectiveSentinel( |
1609 | sentinel, static_cast<std::size_t>(sp - sentinel))}) { |
1610 | return { |
1611 | LineClassification{LineClassification::Kind::CompilerDirective, 0, ss}}; |
1612 | } |
1613 | return std::nullopt; |
1614 | } |
1615 | |
1616 | std::optional<Prescanner::LineClassification> |
1617 | Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const { |
1618 | if (const char *p{SkipWhiteSpaceIncludingEmptyMacros(p: start)}; |
1619 | p && *p++ == '!') { |
1620 | if (auto maybePair{IsCompilerDirectiveSentinel(p)}) { |
1621 | auto offset{static_cast<std::size_t>(p - start - 1)}; |
1622 | return {LineClassification{LineClassification::Kind::CompilerDirective, |
1623 | offset, maybePair->first}}; |
1624 | } |
1625 | } |
1626 | return std::nullopt; |
1627 | } |
1628 | |
1629 | Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) { |
1630 | std::uint64_t packed{0}; |
1631 | for (char ch : dir) { |
1632 | packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff); |
1633 | } |
1634 | compilerDirectiveBloomFilter_.set(position: packed % prime1); |
1635 | compilerDirectiveBloomFilter_.set(position: packed % prime2); |
1636 | compilerDirectiveSentinels_.insert(x: dir); |
1637 | return *this; |
1638 | } |
1639 | |
1640 | const char *Prescanner::IsCompilerDirectiveSentinel( |
1641 | const char *sentinel, std::size_t len) const { |
1642 | std::uint64_t packed{0}; |
1643 | for (std::size_t j{0}; j < len; ++j) { |
1644 | packed = (packed << 8) | (sentinel[j] & 0xff); |
1645 | } |
1646 | if (len == 0 || !compilerDirectiveBloomFilter_.test(position: packed % prime1) || |
1647 | !compilerDirectiveBloomFilter_.test(position: packed % prime2)) { |
1648 | return nullptr; |
1649 | } |
1650 | const auto iter{compilerDirectiveSentinels_.find(x: std::string(sentinel, len))}; |
1651 | return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str(); |
1652 | } |
1653 | |
1654 | const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const { |
1655 | const char *p{token.begin()}; |
1656 | const char *end{p + token.size()}; |
1657 | while (p < end && (*p == ' ' || *p == '\n')) { |
1658 | ++p; |
1659 | } |
1660 | if (p < end && *p == '!') { |
1661 | ++p; |
1662 | } |
1663 | while (end > p && (end[-1] == ' ' || end[-1] == '\t')) { |
1664 | --end; |
1665 | } |
1666 | return end > p && IsCompilerDirectiveSentinel(sentinel: p, len: end - p) ? p : nullptr; |
1667 | } |
1668 | |
1669 | std::optional<std::pair<const char *, const char *>> |
1670 | Prescanner::IsCompilerDirectiveSentinel(const char *p) const { |
1671 | char sentinel[8]; |
1672 | for (std::size_t j{0}; j + 1 < sizeof sentinel; ++p, ++j) { |
1673 | if (int n{IsSpaceOrTab(p)}; |
1674 | n || !(IsLetter(*p) || *p == '$' || *p == '@')) { |
1675 | if (j > 0) { |
1676 | if (j == 1 && sentinel[0] == '$' && n == 0 && *p != '&' && *p != '\n') { |
1677 | // Free form OpenMP conditional compilation line sentinels have to |
1678 | // be immediately followed by a space or &, not a digit |
1679 | // or anything else. A newline also works for an initial line. |
1680 | break; |
1681 | } |
1682 | sentinel[j] = '\0'; |
1683 | if (*p != '!') { |
1684 | if (const char *sp{IsCompilerDirectiveSentinel(sentinel, len: j)}) { |
1685 | return std::make_pair(x&: sp, y&: p); |
1686 | } |
1687 | } |
1688 | } |
1689 | break; |
1690 | } else { |
1691 | sentinel[j] = ToLowerCaseLetter(*p); |
1692 | } |
1693 | } |
1694 | return std::nullopt; |
1695 | } |
1696 | |
1697 | constexpr bool IsDirective(const char *match, const char *dir) { |
1698 | for (; *match; ++match) { |
1699 | if (*match != ToLowerCaseLetter(*dir++)) { |
1700 | return false; |
1701 | } |
1702 | } |
1703 | return true; |
1704 | } |
1705 | |
1706 | Prescanner::LineClassification Prescanner::ClassifyLine( |
1707 | const char *start) const { |
1708 | if (inFixedForm_) { |
1709 | if (std::optional<LineClassification> lc{ |
1710 | IsFixedFormCompilerDirectiveLine(start)}) { |
1711 | return std::move(*lc); |
1712 | } |
1713 | if (IsFixedFormCommentLine(start)) { |
1714 | return {LineClassification::Kind::Comment}; |
1715 | } |
1716 | } else { |
1717 | if (std::optional<LineClassification> lc{ |
1718 | IsFreeFormCompilerDirectiveLine(start)}) { |
1719 | return std::move(*lc); |
1720 | } |
1721 | if (const char *bang{IsFreeFormComment(p: start)}) { |
1722 | return {LineClassification::Kind::Comment, |
1723 | static_cast<std::size_t>(bang - start)}; |
1724 | } |
1725 | } |
1726 | if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) { |
1727 | return {LineClassification::Kind::IncludeLine, *quoteOffset}; |
1728 | } |
1729 | if (const char *dir{IsPreprocessorDirectiveLine(start)}) { |
1730 | if (IsDirective(match: "if", dir) || IsDirective(match: "elif", dir) || |
1731 | IsDirective(match: "else", dir) || IsDirective(match: "endif", dir)) { |
1732 | return {LineClassification::Kind::ConditionalCompilationDirective}; |
1733 | } else if (IsDirective(match: "include", dir)) { |
1734 | return {LineClassification::Kind::IncludeDirective}; |
1735 | } else if (IsDirective(match: "define", dir) || IsDirective(match: "undef", dir)) { |
1736 | return {LineClassification::Kind::DefinitionDirective}; |
1737 | } else { |
1738 | return {LineClassification::Kind::PreprocessorDirective}; |
1739 | } |
1740 | } |
1741 | return {LineClassification::Kind::Source}; |
1742 | } |
1743 | |
1744 | Prescanner::LineClassification Prescanner::ClassifyLine( |
1745 | TokenSequence &tokens, Provenance newlineProvenance) const { |
1746 | // Append a newline temporarily. |
1747 | tokens.PutNextTokenChar('\n', newlineProvenance); |
1748 | tokens.CloseToken(); |
1749 | const char *ppd{tokens.ToCharBlock().begin()}; |
1750 | LineClassification classification{ClassifyLine(start: ppd)}; |
1751 | tokens.pop_back(); // remove the newline |
1752 | return classification; |
1753 | } |
1754 | |
1755 | void Prescanner::SourceFormChange(std::string &&dir) { |
1756 | if (dir == "!dir$ free") { |
1757 | inFixedForm_ = false; |
1758 | } else if (dir == "!dir$ fixed") { |
1759 | inFixedForm_ = true; |
1760 | } |
1761 | } |
1762 | |
1763 | // Acquire and append compiler directive continuation lines to |
1764 | // the tokens that constitute a compiler directive, even when those |
1765 | // directive continuation lines are the result of macro expansion. |
1766 | // (Not used when neither the original compiler directive line nor |
1767 | // the directive continuation line result from preprocessing; regular |
1768 | // line continuation during tokenization handles that normal case.) |
1769 | bool Prescanner::CompilerDirectiveContinuation( |
1770 | TokenSequence &tokens, const char *origSentinel) { |
1771 | if (inFixedForm_ || tokens.empty() || |
1772 | tokens.TokenAt(tokens.SizeInTokens() - 1) != "&"|| |
1773 | (preprocessingOnly_ && !parenthesisNesting_)) { |
1774 | return false; |
1775 | } |
1776 | LineClassification followingLine{ClassifyLine(start: nextLine_)}; |
1777 | if (followingLine.kind == LineClassification::Kind::Comment) { |
1778 | nextLine_ += followingLine.payloadOffset; // advance to '!' or newline |
1779 | NextLine(); |
1780 | return true; |
1781 | } |
1782 | CHECK(origSentinel != nullptr); |
1783 | directiveSentinel_ = origSentinel; // so InCompilerDirective() is true |
1784 | const char *nextContinuation{ |
1785 | followingLine.kind == LineClassification::Kind::CompilerDirective |
1786 | ? FreeFormContinuationLine(ampersand: true) |
1787 | : nullptr}; |
1788 | if (!nextContinuation && |
1789 | followingLine.kind != LineClassification::Kind::Source) { |
1790 | return false; |
1791 | } |
1792 | auto origNextLine{nextLine_}; |
1793 | BeginSourceLine(at: nextLine_); |
1794 | NextLine(); |
1795 | if (nextContinuation) { |
1796 | // What follows is !DIR$ & xxx; skip over the & so that it |
1797 | // doesn't cause a spurious continuation. |
1798 | at_ = nextContinuation; |
1799 | } else { |
1800 | // What follows looks like a source line before macro expansion, |
1801 | // but might become a directive continuation afterwards. |
1802 | SkipSpaces(); |
1803 | } |
1804 | TokenSequence followingTokens; |
1805 | while (NextToken(followingTokens)) { |
1806 | } |
1807 | if (auto followingPrepro{ |
1808 | preprocessor_.MacroReplacement(followingTokens, *this)}) { |
1809 | followingTokens = std::move(*followingPrepro); |
1810 | } |
1811 | followingTokens.RemoveRedundantBlanks(); |
1812 | std::size_t startAt{0}; |
1813 | std::size_t following{followingTokens.SizeInTokens()}; |
1814 | bool ok{false}; |
1815 | if (nextContinuation) { |
1816 | ok = true; |
1817 | } else { |
1818 | startAt = 2; |
1819 | if (startAt < following && followingTokens.TokenAt(0) == "!") { |
1820 | CharBlock sentinel{followingTokens.TokenAt(1)}; |
1821 | if (!sentinel.empty() && |
1822 | std::memcmp(s1: sentinel.begin(), s2: origSentinel, n: sentinel.size()) == 0) { |
1823 | ok = true; |
1824 | while ( |
1825 | startAt < following && followingTokens.TokenAt(startAt).IsBlank()) { |
1826 | ++startAt; |
1827 | } |
1828 | if (startAt < following && followingTokens.TokenAt(startAt) == "&") { |
1829 | ++startAt; |
1830 | } |
1831 | } |
1832 | } |
1833 | } |
1834 | if (ok) { |
1835 | tokens.pop_back(); // delete original '&' |
1836 | tokens.AppendRange(followingTokens, startAt, following - startAt); |
1837 | tokens.RemoveRedundantBlanks(); |
1838 | } else { |
1839 | nextLine_ = origNextLine; |
1840 | } |
1841 | return ok; |
1842 | } |
1843 | |
1844 | // Similar, but for source line continuation after macro replacement. |
1845 | bool Prescanner::SourceLineContinuation(TokenSequence &tokens) { |
1846 | if (!inFixedForm_ && !tokens.empty() && |
1847 | tokens.TokenAt(tokens.SizeInTokens() - 1) == "&") { |
1848 | LineClassification followingLine{ClassifyLine(start: nextLine_)}; |
1849 | if (followingLine.kind == LineClassification::Kind::Comment) { |
1850 | nextLine_ += followingLine.payloadOffset; // advance to '!' or newline |
1851 | NextLine(); |
1852 | return true; |
1853 | } else if (const char *nextContinuation{FreeFormContinuationLine(ampersand: true)}) { |
1854 | BeginSourceLine(at: nextLine_); |
1855 | NextLine(); |
1856 | TokenSequence followingTokens; |
1857 | at_ = nextContinuation; |
1858 | while (NextToken(followingTokens)) { |
1859 | } |
1860 | if (auto followingPrepro{ |
1861 | preprocessor_.MacroReplacement(followingTokens, *this)}) { |
1862 | followingTokens = std::move(*followingPrepro); |
1863 | } |
1864 | followingTokens.RemoveRedundantBlanks(); |
1865 | tokens.pop_back(); // delete original '&' |
1866 | tokens.CopyAll(followingTokens); |
1867 | return true; |
1868 | } |
1869 | } |
1870 | return false; |
1871 | } |
1872 | } // namespace Fortran::parser |
1873 |
Definitions
- maxPrescannerNesting
- Prescanner
- Prescanner
- IsSpace
- IsSpaceOrTab
- IsFixedFormCommentChar
- NormalizeCompilerDirectiveCommentMarker
- Prescan
- Statement
- CheckAndEmitLine
- TokenizePreprocessorDirective
- NextLine
- LabelField
- EnforceStupidEndStatementRules
- SkipToEndOfLine
- MustSkipToEndOfLine
- NextChar
- SkipToNextSignificantCharacter
- SkipCComments
- SkipSpaces
- SkipWhiteSpace
- SkipWhiteSpaceIncludingEmptyMacros
- SkipWhiteSpaceAndCComments
- SkipCComment
- NextToken
- HandleExponent
- HandleKindSuffix
- HandleExponentAndOrKindSuffix
- QuotedCharacterLiteral
- Hollerith
- PadOutCharacterLiteral
- IsAtProcess
- IsFixedFormCommentLine
- IsFreeFormComment
- IsIncludeLine
- FortranInclude
- IsPreprocessorDirectiveLine
- IsNextLinePreprocessorDirective
- SkipCommentLine
- FixedFormContinuationLine
- FreeFormContinuationLine
- FixedFormContinuation
- FreeFormContinuation
- IsImplicitContinuation
- Continuation
- IsFixedFormCompilerDirectiveLine
- IsFreeFormCompilerDirectiveLine
- AddCompilerDirectiveSentinel
- IsCompilerDirectiveSentinel
- IsCompilerDirectiveSentinel
- IsCompilerDirectiveSentinel
- IsDirective
- ClassifyLine
- ClassifyLine
- SourceFormChange
- CompilerDirectiveContinuation
Improve your Profiling and Debugging skills
Find out more