1 | //===-- lib/Parser/prescan.cpp --------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "prescan.h" |
10 | #include "flang/Common/idioms.h" |
11 | #include "flang/Parser/characters.h" |
12 | #include "flang/Parser/message.h" |
13 | #include "flang/Parser/preprocessor.h" |
14 | #include "flang/Parser/source.h" |
15 | #include "flang/Parser/token-sequence.h" |
16 | #include "llvm/Support/raw_ostream.h" |
17 | #include <cstddef> |
18 | #include <cstring> |
19 | #include <utility> |
20 | #include <vector> |
21 | |
22 | namespace Fortran::parser { |
23 | |
24 | using common::LanguageFeature; |
25 | |
26 | static constexpr int maxPrescannerNesting{100}; |
27 | |
28 | Prescanner::Prescanner(Messages &messages, CookedSource &cooked, |
29 | Preprocessor &preprocessor, common::LanguageFeatureControl lfc) |
30 | : messages_{messages}, cooked_{cooked}, preprocessor_{preprocessor}, |
31 | allSources_{preprocessor_.allSources()}, features_{lfc}, |
32 | encoding_{allSources_.encoding()} {} |
33 | |
34 | Prescanner::Prescanner(const Prescanner &that) |
35 | : messages_{that.messages_}, cooked_{that.cooked_}, |
36 | preprocessor_{that.preprocessor_}, allSources_{that.allSources_}, |
37 | features_{that.features_}, inFixedForm_{that.inFixedForm_}, |
38 | fixedFormColumnLimit_{that.fixedFormColumnLimit_}, |
39 | encoding_{that.encoding_}, prescannerNesting_{that.prescannerNesting_ + |
40 | 1}, |
41 | skipLeadingAmpersand_{that.skipLeadingAmpersand_}, |
42 | compilerDirectiveBloomFilter_{that.compilerDirectiveBloomFilter_}, |
43 | compilerDirectiveSentinels_{that.compilerDirectiveSentinels_} {} |
44 | |
45 | static inline constexpr bool (char ch) { |
46 | return ch == '!' || ch == '*' || ch == 'C' || ch == 'c'; |
47 | } |
48 | |
49 | static void (TokenSequence &dir) { |
50 | char *p{dir.GetMutableCharData()}; |
51 | char *limit{p + dir.SizeInChars()}; |
52 | for (; p < limit; ++p) { |
53 | if (*p != ' ') { |
54 | CHECK(IsFixedFormCommentChar(ch: *p)); |
55 | *p = '!'; |
56 | return; |
57 | } |
58 | } |
59 | DIE("compiler directive all blank" ); |
60 | } |
61 | |
62 | void Prescanner::Prescan(ProvenanceRange range) { |
63 | startProvenance_ = range.start(); |
64 | start_ = allSources_.GetSource(range); |
65 | CHECK(start_); |
66 | limit_ = start_ + range.size(); |
67 | nextLine_ = start_; |
68 | const bool beganInFixedForm{inFixedForm_}; |
69 | if (prescannerNesting_ > maxPrescannerNesting) { |
70 | Say(GetProvenance(start_), |
71 | "too many nested INCLUDE/#include files, possibly circular"_err_en_US ); |
72 | return; |
73 | } |
74 | while (!IsAtEnd()) { |
75 | Statement(); |
76 | } |
77 | if (inFixedForm_ != beganInFixedForm) { |
78 | std::string dir{"!dir$ " }; |
79 | if (beganInFixedForm) { |
80 | dir += "fixed" ; |
81 | } else { |
82 | dir += "free" ; |
83 | } |
84 | dir += '\n'; |
85 | TokenSequence tokens{dir, allSources_.AddCompilerInsertion(dir).start()}; |
86 | tokens.Emit(cooked_); |
87 | } |
88 | } |
89 | |
90 | void Prescanner::Statement() { |
91 | TokenSequence tokens; |
92 | const char *statementStart{nextLine_}; |
93 | LineClassification line{ClassifyLine(statementStart)}; |
94 | switch (line.kind) { |
95 | case LineClassification::Kind::Comment: |
96 | nextLine_ += line.payloadOffset; // advance to '!' or newline |
97 | NextLine(); |
98 | return; |
99 | case LineClassification::Kind::IncludeLine: |
100 | FortranInclude(quote: nextLine_ + line.payloadOffset); |
101 | NextLine(); |
102 | return; |
103 | case LineClassification::Kind::ConditionalCompilationDirective: |
104 | case LineClassification::Kind::IncludeDirective: |
105 | case LineClassification::Kind::DefinitionDirective: |
106 | case LineClassification::Kind::PreprocessorDirective: |
107 | preprocessor_.Directive(TokenizePreprocessorDirective(), *this); |
108 | return; |
109 | case LineClassification::Kind::CompilerDirective: { |
110 | directiveSentinel_ = line.sentinel; |
111 | CHECK(InCompilerDirective()); |
112 | BeginStatementAndAdvance(); |
113 | if (inFixedForm_) { |
114 | CHECK(IsFixedFormCommentChar(ch: *at_)); |
115 | } else { |
116 | while (*at_ == ' ' || *at_ == '\t') { |
117 | ++at_, ++column_; |
118 | } |
119 | CHECK(*at_ == '!'); |
120 | } |
121 | std::optional<int> condOffset; |
122 | if (directiveSentinel_[0] == '$' && directiveSentinel_[1] == '\0') { |
123 | // OpenMP conditional compilation line. |
124 | condOffset = 2; |
125 | } else if (directiveSentinel_[0] == '@' && directiveSentinel_[1] == 'c' && |
126 | directiveSentinel_[2] == 'u' && directiveSentinel_[3] == 'f' && |
127 | directiveSentinel_[4] == '\0') { |
128 | // CUDA conditional compilation line. |
129 | condOffset = 5; |
130 | } |
131 | if (condOffset) { |
132 | at_ += *condOffset, column_ += *condOffset; |
133 | if (auto payload{IsIncludeLine(at_)}) { |
134 | FortranInclude(quote: at_ + *payload); |
135 | return; |
136 | } else if (inFixedForm_) { |
137 | LabelField(tokens); |
138 | } else { |
139 | SkipSpaces(); |
140 | } |
141 | } else { |
142 | // Compiler directive. Emit normalized sentinel, squash following spaces. |
143 | EmitChar(tokens, '!'); |
144 | ++at_, ++column_; |
145 | for (const char *sp{directiveSentinel_}; *sp != '\0'; |
146 | ++sp, ++at_, ++column_) { |
147 | EmitChar(tokens, *sp); |
148 | } |
149 | if (*at_ == ' ' || *at_ == '\t') { |
150 | EmitChar(tokens, ' '); |
151 | while (*at_ == ' ' || *at_ == '\t') { |
152 | ++at_, ++column_; |
153 | } |
154 | } |
155 | tokens.CloseToken(); |
156 | } |
157 | break; |
158 | } |
159 | case LineClassification::Kind::Source: |
160 | BeginStatementAndAdvance(); |
161 | if (inFixedForm_) { |
162 | if (features_.IsEnabled(LanguageFeature::OldDebugLines) && |
163 | (*at_ == 'D' || *at_ == 'd')) { |
164 | NextChar(); |
165 | } |
166 | LabelField(tokens); |
167 | } else if (skipLeadingAmpersand_) { |
168 | skipLeadingAmpersand_ = false; |
169 | const char *p{SkipWhiteSpace(at_)}; |
170 | if (p < limit_ && *p == '&') { |
171 | column_ += ++p - at_; |
172 | at_ = p; |
173 | } |
174 | } else { |
175 | SkipSpaces(); |
176 | } |
177 | break; |
178 | } |
179 | |
180 | while (NextToken(tokens)) { |
181 | } |
182 | if (continuationLines_ > 255) { |
183 | if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { |
184 | Say(GetProvenance(statementStart), |
185 | "%d continuation lines is more than the Fortran standard allows"_port_en_US , |
186 | continuationLines_); |
187 | } |
188 | } |
189 | |
190 | Provenance newlineProvenance{GetCurrentProvenance()}; |
191 | if (std::optional<TokenSequence> preprocessed{ |
192 | preprocessor_.MacroReplacement(tokens, *this)}) { |
193 | // Reprocess the preprocessed line. Append a newline temporarily. |
194 | preprocessed->PutNextTokenChar('\n', newlineProvenance); |
195 | preprocessed->CloseToken(); |
196 | const char *ppd{preprocessed->ToCharBlock().begin()}; |
197 | LineClassification ppl{ClassifyLine(ppd)}; |
198 | preprocessed->pop_back(); // remove the newline |
199 | switch (ppl.kind) { |
200 | case LineClassification::Kind::Comment: |
201 | break; |
202 | case LineClassification::Kind::IncludeLine: |
203 | FortranInclude(quote: ppd + ppl.payloadOffset); |
204 | break; |
205 | case LineClassification::Kind::ConditionalCompilationDirective: |
206 | case LineClassification::Kind::IncludeDirective: |
207 | case LineClassification::Kind::DefinitionDirective: |
208 | case LineClassification::Kind::PreprocessorDirective: |
209 | Say(preprocessed->GetProvenanceRange(), |
210 | "Preprocessed line resembles a preprocessor directive"_warn_en_US ); |
211 | preprocessed->ToLowerCase() |
212 | .CheckBadFortranCharacters(messages_, *this) |
213 | .CheckBadParentheses(messages_) |
214 | .Emit(cooked_); |
215 | break; |
216 | case LineClassification::Kind::CompilerDirective: |
217 | if (preprocessed->HasRedundantBlanks()) { |
218 | preprocessed->RemoveRedundantBlanks(); |
219 | } |
220 | while (CompilerDirectiveContinuation(*preprocessed, ppl.sentinel)) { |
221 | newlineProvenance = GetCurrentProvenance(); |
222 | } |
223 | NormalizeCompilerDirectiveCommentMarker(*preprocessed); |
224 | preprocessed->ToLowerCase(); |
225 | SourceFormChange(preprocessed->ToString()); |
226 | preprocessed->ClipComment(*this, true /* skip first ! */) |
227 | .CheckBadFortranCharacters(messages_, *this) |
228 | .CheckBadParentheses(messages_) |
229 | .Emit(cooked_); |
230 | break; |
231 | case LineClassification::Kind::Source: |
232 | if (inFixedForm_) { |
233 | if (preprocessed->HasBlanks(/*after column*/ 6)) { |
234 | preprocessed->RemoveBlanks(/*after column*/ 6); |
235 | } |
236 | } else { |
237 | while (SourceLineContinuation(*preprocessed)) { |
238 | newlineProvenance = GetCurrentProvenance(); |
239 | } |
240 | if (preprocessed->HasRedundantBlanks()) { |
241 | preprocessed->RemoveRedundantBlanks(); |
242 | } |
243 | } |
244 | preprocessed->ToLowerCase() |
245 | .ClipComment(*this) |
246 | .CheckBadFortranCharacters(messages_, *this) |
247 | .CheckBadParentheses(messages_) |
248 | .Emit(cooked_); |
249 | break; |
250 | } |
251 | } else { |
252 | if (line.kind == LineClassification::Kind::CompilerDirective) { |
253 | while (CompilerDirectiveContinuation(tokens, line.sentinel)) { |
254 | newlineProvenance = GetCurrentProvenance(); |
255 | } |
256 | tokens.ToLowerCase(); |
257 | SourceFormChange(tokens.ToString()); |
258 | } else { // Kind::Source |
259 | tokens.ToLowerCase(); |
260 | if (inFixedForm_) { |
261 | EnforceStupidEndStatementRules(tokens); |
262 | } |
263 | } |
264 | tokens.CheckBadFortranCharacters(messages_, *this) |
265 | .CheckBadParentheses(messages_) |
266 | .Emit(cooked_); |
267 | } |
268 | if (omitNewline_) { |
269 | omitNewline_ = false; |
270 | } else { |
271 | cooked_.Put('\n', newlineProvenance); |
272 | } |
273 | directiveSentinel_ = nullptr; |
274 | } |
275 | |
276 | TokenSequence Prescanner::TokenizePreprocessorDirective() { |
277 | CHECK(!IsAtEnd() && !inPreprocessorDirective_); |
278 | inPreprocessorDirective_ = true; |
279 | BeginStatementAndAdvance(); |
280 | TokenSequence tokens; |
281 | while (NextToken(tokens)) { |
282 | } |
283 | inPreprocessorDirective_ = false; |
284 | return tokens; |
285 | } |
286 | |
287 | void Prescanner::NextLine() { |
288 | void *vstart{static_cast<void *>(const_cast<char *>(nextLine_))}; |
289 | void *v{std::memchr(s: vstart, c: '\n', n: limit_ - nextLine_)}; |
290 | if (!v) { |
291 | nextLine_ = limit_; |
292 | } else { |
293 | const char *nl{const_cast<const char *>(static_cast<char *>(v))}; |
294 | nextLine_ = nl + 1; |
295 | } |
296 | } |
297 | |
298 | void Prescanner::LabelField(TokenSequence &token) { |
299 | int outCol{1}; |
300 | const char *start{at_}; |
301 | std::optional<int> badColumn; |
302 | for (; *at_ != '\n' && column_ <= 6; ++at_) { |
303 | if (*at_ == '\t') { |
304 | ++at_; |
305 | column_ = 7; |
306 | break; |
307 | } |
308 | if (*at_ != ' ' && |
309 | !(*at_ == '0' && column_ == 6)) { // '0' in column 6 becomes space |
310 | EmitChar(token, *at_); |
311 | ++outCol; |
312 | if (!badColumn && (column_ == 6 || !IsDecimalDigit(*at_))) { |
313 | badColumn = column_; |
314 | } |
315 | } |
316 | ++column_; |
317 | } |
318 | if (badColumn && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) { |
319 | Say(GetProvenance(start + *badColumn - 1), |
320 | *badColumn == 6 |
321 | ? "Statement should not begin with a continuation line"_warn_en_US |
322 | : "Character in fixed-form label field must be a digit"_warn_en_US ); |
323 | token.clear(); |
324 | if (*badColumn < 6) { |
325 | at_ = start; |
326 | column_ = 1; |
327 | return; |
328 | } |
329 | outCol = 1; |
330 | } |
331 | if (outCol == 1) { // empty label field |
332 | // Emit a space so that, if the line is rescanned after preprocessing, |
333 | // a leading 'C' or 'D' won't be left-justified and then accidentally |
334 | // misinterpreted as a comment card. |
335 | EmitChar(token, ' '); |
336 | ++outCol; |
337 | } |
338 | token.CloseToken(); |
339 | SkipToNextSignificantCharacter(); |
340 | if (IsDecimalDigit(*at_)) { |
341 | if (features_.ShouldWarn(common::LanguageFeature::MiscSourceExtensions)) { |
342 | Say(GetCurrentProvenance(), |
343 | "Label digit is not in fixed-form label field"_port_en_US ); |
344 | } |
345 | } |
346 | } |
347 | |
348 | // 6.3.3.5: A program unit END statement, or any other statement whose |
349 | // initial line resembles an END statement, shall not be continued in |
350 | // fixed form source. |
351 | void Prescanner::EnforceStupidEndStatementRules(const TokenSequence &tokens) { |
352 | CharBlock cBlock{tokens.ToCharBlock()}; |
353 | const char *str{cBlock.begin()}; |
354 | std::size_t n{cBlock.size()}; |
355 | if (n < 3) { |
356 | return; |
357 | } |
358 | std::size_t j{0}; |
359 | for (; j < n && (str[j] == ' ' || (str[j] >= '0' && str[j] <= '9')); ++j) { |
360 | } |
361 | if (j + 3 > n || std::memcmp(s1: str + j, s2: "end" , n: 3) != 0) { |
362 | return; |
363 | } |
364 | // It starts with END, possibly after a label. |
365 | auto start{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; |
366 | auto end{allSources_.GetSourcePosition(tokens.GetCharProvenance(n - 1))}; |
367 | if (!start || !end) { |
368 | return; |
369 | } |
370 | if (&*start->sourceFile == &*end->sourceFile && start->line == end->line) { |
371 | return; // no continuation |
372 | } |
373 | j += 3; |
374 | static const char *const prefixes[]{"program" , "subroutine" , "function" , |
375 | "blockdata" , "module" , "submodule" , nullptr}; |
376 | bool isPrefix{j == n || !IsLegalInIdentifier(str[j])}; // prefix is END |
377 | std::size_t endOfPrefix{j - 1}; |
378 | for (const char *const *p{prefixes}; *p; ++p) { |
379 | std::size_t pLen{std::strlen(s: *p)}; |
380 | if (j + pLen <= n && std::memcmp(s1: str + j, s2: *p, n: pLen) == 0) { |
381 | isPrefix = true; // END thing as prefix |
382 | j += pLen; |
383 | endOfPrefix = j - 1; |
384 | for (; j < n && IsLegalInIdentifier(str[j]); ++j) { |
385 | } |
386 | break; |
387 | } |
388 | } |
389 | if (isPrefix) { |
390 | auto range{tokens.GetTokenProvenanceRange(1)}; |
391 | if (j == n) { // END or END thing [name] |
392 | Say(range, |
393 | "Program unit END statement may not be continued in fixed form source"_err_en_US ); |
394 | } else { |
395 | auto endOfPrefixPos{ |
396 | allSources_.GetSourcePosition(tokens.GetCharProvenance(endOfPrefix))}; |
397 | auto next{allSources_.GetSourcePosition(tokens.GetCharProvenance(j))}; |
398 | if (endOfPrefixPos && next && |
399 | &*endOfPrefixPos->sourceFile == &*start->sourceFile && |
400 | endOfPrefixPos->line == start->line && |
401 | (&*next->sourceFile != &*start->sourceFile || |
402 | next->line != start->line)) { |
403 | Say(range, |
404 | "Initial line of continued statement must not appear to be a program unit END in fixed form source"_err_en_US ); |
405 | } |
406 | } |
407 | } |
408 | } |
409 | |
410 | void Prescanner::SkipToEndOfLine() { |
411 | while (*at_ != '\n') { |
412 | ++at_, ++column_; |
413 | } |
414 | } |
415 | |
416 | bool Prescanner::MustSkipToEndOfLine() const { |
417 | if (inFixedForm_ && column_ > fixedFormColumnLimit_ && !tabInCurrentLine_) { |
418 | return true; // skip over ignored columns in right margin (73:80) |
419 | } else if (*at_ == '!' && !inCharLiteral_) { |
420 | return true; // inline comment goes to end of source line |
421 | } else { |
422 | return false; |
423 | } |
424 | } |
425 | |
426 | void Prescanner::NextChar() { |
427 | CHECK(*at_ != '\n'); |
428 | ++at_, ++column_; |
429 | while (at_[0] == '\xef' && at_[1] == '\xbb' && at_[2] == '\xbf') { |
430 | // UTF-8 byte order mark - treat this file as UTF-8 |
431 | at_ += 3; |
432 | encoding_ = Encoding::UTF_8; |
433 | } |
434 | SkipToNextSignificantCharacter(); |
435 | } |
436 | |
437 | // Skip everything that should be ignored until the next significant |
438 | // character is reached; handles C-style comments in preprocessing |
439 | // directives, Fortran ! comments, stuff after the right margin in |
440 | // fixed form, and all forms of line continuation. |
441 | bool Prescanner::SkipToNextSignificantCharacter() { |
442 | auto anyContinuationLine{false}; |
443 | if (inPreprocessorDirective_) { |
444 | SkipCComments(); |
445 | } else { |
446 | bool mightNeedSpace{false}; |
447 | if (MustSkipToEndOfLine()) { |
448 | SkipToEndOfLine(); |
449 | } else { |
450 | mightNeedSpace = *at_ == '\n'; |
451 | } |
452 | for (; Continuation(mightNeedFixedFormSpace: mightNeedSpace); mightNeedSpace = false) { |
453 | anyContinuationLine = true; |
454 | ++continuationLines_; |
455 | if (MustSkipToEndOfLine()) { |
456 | SkipToEndOfLine(); |
457 | } |
458 | } |
459 | if (*at_ == '\t') { |
460 | tabInCurrentLine_ = true; |
461 | } |
462 | } |
463 | return anyContinuationLine; |
464 | } |
465 | |
466 | void Prescanner::() { |
467 | while (true) { |
468 | if (IsCComment(p: at_)) { |
469 | if (const char *after{SkipCComment(at_)}) { |
470 | column_ += after - at_; |
471 | // May have skipped over one or more newlines; relocate the start of |
472 | // the next line. |
473 | nextLine_ = at_ = after; |
474 | NextLine(); |
475 | } else { |
476 | // Don't emit any messages about unclosed C-style comments, because |
477 | // the sequence /* can appear legally in a FORMAT statement. There's |
478 | // no ambiguity, since the sequence */ cannot appear legally. |
479 | break; |
480 | } |
481 | } else if (inPreprocessorDirective_ && at_[0] == '\\' && at_ + 2 < limit_ && |
482 | at_[1] == '\n' && !IsAtEnd()) { |
483 | BeginSourceLineAndAdvance(); |
484 | } else { |
485 | break; |
486 | } |
487 | } |
488 | } |
489 | |
490 | void Prescanner::SkipSpaces() { |
491 | while (*at_ == ' ' || *at_ == '\t') { |
492 | NextChar(); |
493 | } |
494 | insertASpace_ = false; |
495 | } |
496 | |
497 | const char *Prescanner::SkipWhiteSpace(const char *p) { |
498 | while (*p == ' ' || *p == '\t') { |
499 | ++p; |
500 | } |
501 | return p; |
502 | } |
503 | |
504 | const char *Prescanner::SkipWhiteSpaceAndCComments(const char *p) const { |
505 | while (true) { |
506 | if (*p == ' ' || *p == '\t') { |
507 | ++p; |
508 | } else if (IsCComment(p)) { |
509 | if (const char *after{SkipCComment(p)}) { |
510 | p = after; |
511 | } else { |
512 | break; |
513 | } |
514 | } else { |
515 | break; |
516 | } |
517 | } |
518 | return p; |
519 | } |
520 | |
521 | const char *Prescanner::(const char *p) const { |
522 | char star{' '}, slash{' '}; |
523 | p += 2; |
524 | while (star != '*' || slash != '/') { |
525 | if (p >= limit_) { |
526 | return nullptr; // signifies an unterminated comment |
527 | } |
528 | star = slash; |
529 | slash = *p++; |
530 | } |
531 | return p; |
532 | } |
533 | |
534 | bool Prescanner::NextToken(TokenSequence &tokens) { |
535 | CHECK(at_ >= start_ && at_ < limit_); |
536 | if (InFixedFormSource()) { |
537 | SkipSpaces(); |
538 | } else { |
539 | if (*at_ == '/' && IsCComment(p: at_)) { |
540 | // Recognize and skip over classic C style /*comments*/ when |
541 | // outside a character literal. |
542 | if (features_.ShouldWarn(LanguageFeature::ClassicCComments)) { |
543 | Say(GetCurrentProvenance(), |
544 | "nonstandard usage: C-style comment"_port_en_US ); |
545 | } |
546 | SkipCComments(); |
547 | } |
548 | if (*at_ == ' ' || *at_ == '\t') { |
549 | // Compress free-form white space into a single space character. |
550 | const auto theSpace{at_}; |
551 | char previous{at_ <= start_ ? ' ' : at_[-1]}; |
552 | NextChar(); |
553 | SkipSpaces(); |
554 | if (*at_ == '\n') { |
555 | // Discard white space at the end of a line. |
556 | } else if (!inPreprocessorDirective_ && |
557 | (previous == '(' || *at_ == '(' || *at_ == ')')) { |
558 | // Discard white space before/after '(' and before ')', unless in a |
559 | // preprocessor directive. This helps yield space-free contiguous |
560 | // names for generic interfaces like OPERATOR( + ) and |
561 | // READ ( UNFORMATTED ), without misinterpreting #define f (notAnArg). |
562 | // This has the effect of silently ignoring the illegal spaces in |
563 | // the array constructor ( /1,2/ ) but that seems benign; it's |
564 | // hard to avoid that while still removing spaces from OPERATOR( / ) |
565 | // and OPERATOR( // ). |
566 | } else { |
567 | // Preserve the squashed white space as a single space character. |
568 | tokens.PutNextTokenChar(' ', GetProvenance(theSpace)); |
569 | tokens.CloseToken(); |
570 | return true; |
571 | } |
572 | } |
573 | } |
574 | if (insertASpace_) { |
575 | tokens.PutNextTokenChar(' ', spaceProvenance_); |
576 | insertASpace_ = false; |
577 | } |
578 | if (*at_ == '\n') { |
579 | return false; |
580 | } |
581 | const char *start{at_}; |
582 | if (*at_ == '\'' || *at_ == '"') { |
583 | QuotedCharacterLiteral(tokens, start); |
584 | preventHollerith_ = false; |
585 | } else if (IsDecimalDigit(*at_)) { |
586 | int n{0}, digits{0}; |
587 | static constexpr int maxHollerith{256 /*lines*/ * (132 - 6 /*columns*/)}; |
588 | do { |
589 | if (n < maxHollerith) { |
590 | n = 10 * n + DecimalDigitValue(*at_); |
591 | } |
592 | EmitCharAndAdvance(tokens, *at_); |
593 | ++digits; |
594 | if (InFixedFormSource()) { |
595 | SkipSpaces(); |
596 | } |
597 | } while (IsDecimalDigit(*at_)); |
598 | if ((*at_ == 'h' || *at_ == 'H') && n > 0 && n < maxHollerith && |
599 | !preventHollerith_) { |
600 | Hollerith(tokens, n, start); |
601 | } else if (*at_ == '.') { |
602 | while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { |
603 | } |
604 | ExponentAndKind(tokens); |
605 | } else if (ExponentAndKind(tokens)) { |
606 | } else if (digits == 1 && n == 0 && (*at_ == 'x' || *at_ == 'X') && |
607 | inPreprocessorDirective_) { |
608 | do { |
609 | EmitCharAndAdvance(tokens, *at_); |
610 | } while (IsHexadecimalDigit(*at_)); |
611 | } else if (at_[0] == '_' && (at_[1] == '\'' || at_[1] == '"')) { // 4_"..." |
612 | EmitCharAndAdvance(tokens, *at_); |
613 | QuotedCharacterLiteral(tokens, start); |
614 | } else if (IsLetter(*at_) && !preventHollerith_ && |
615 | parenthesisNesting_ > 0) { |
616 | // Handles FORMAT(3I9HHOLLERITH) by skipping over the first I so that |
617 | // we don't misrecognize I9HOLLERITH as an identifier in the next case. |
618 | EmitCharAndAdvance(tokens, *at_); |
619 | } |
620 | preventHollerith_ = false; |
621 | } else if (*at_ == '.') { |
622 | char nch{EmitCharAndAdvance(tokens, '.')}; |
623 | if (!inPreprocessorDirective_ && IsDecimalDigit(nch)) { |
624 | while (IsDecimalDigit(EmitCharAndAdvance(tokens, *at_))) { |
625 | } |
626 | ExponentAndKind(tokens); |
627 | } else if (nch == '.' && EmitCharAndAdvance(tokens, '.') == '.') { |
628 | EmitCharAndAdvance(tokens, '.'); // variadic macro definition ellipsis |
629 | } |
630 | preventHollerith_ = false; |
631 | } else if (IsLegalInIdentifier(*at_)) { |
632 | int parts{1}; |
633 | const char *afterLast{nullptr}; |
634 | do { |
635 | EmitChar(tokens, *at_); |
636 | ++at_, ++column_; |
637 | afterLast = at_; |
638 | if (SkipToNextSignificantCharacter() && IsLegalIdentifierStart(*at_)) { |
639 | tokens.CloseToken(); |
640 | ++parts; |
641 | } |
642 | } while (IsLegalInIdentifier(*at_)); |
643 | if (parts >= 3) { |
644 | // Subtlety: When an identifier is split across three or more continuation |
645 | // lines (or two continuation lines, immediately preceded or followed |
646 | // by '&' free form continuation line markers, its parts are kept as |
647 | // distinct pp-tokens so that macro operates on them independently. |
648 | // This trick accommodates the historic practice of using line |
649 | // continuation for token pasting after replacement. |
650 | } else if (parts == 2) { |
651 | if ((start > start_ && start[-1] == '&') || |
652 | (afterLast < limit_ && (*afterLast == '&' || *afterLast == '\n'))) { |
653 | // call & call foo& call foo& |
654 | // &MACRO& OR &MACRO& OR &MACRO |
655 | // &foo(...) &(...) |
656 | } else { |
657 | tokens.ReopenLastToken(); |
658 | } |
659 | } |
660 | if (InFixedFormSource()) { |
661 | SkipSpaces(); |
662 | } |
663 | if ((*at_ == '\'' || *at_ == '"') && |
664 | tokens.CharAt(tokens.SizeInChars() - 1) == '_') { // kind_"..." |
665 | QuotedCharacterLiteral(tokens, start); |
666 | preventHollerith_ = false; |
667 | } else { |
668 | preventHollerith_ = true; // DO 10 H = ... |
669 | } |
670 | } else if (*at_ == '*') { |
671 | if (EmitCharAndAdvance(tokens, '*') == '*') { |
672 | EmitCharAndAdvance(tokens, '*'); |
673 | } else { |
674 | // Subtle ambiguity: |
675 | // CHARACTER*2H declares H because *2 is a kind specifier |
676 | // DATAC/N*2H / is repeated Hollerith |
677 | preventHollerith_ = !slashInCurrentStatement_; |
678 | } |
679 | } else { |
680 | char ch{*at_}; |
681 | if (ch == '(') { |
682 | if (parenthesisNesting_++ == 0) { |
683 | isPossibleMacroCall_ = tokens.SizeInTokens() > 0 && |
684 | preprocessor_.IsFunctionLikeDefinition( |
685 | tokens.TokenAt(tokens.SizeInTokens() - 1)); |
686 | } |
687 | } else if (ch == ')' && parenthesisNesting_ > 0) { |
688 | --parenthesisNesting_; |
689 | } |
690 | char nch{EmitCharAndAdvance(tokens, ch)}; |
691 | preventHollerith_ = false; |
692 | if ((nch == '=' && |
693 | (ch == '<' || ch == '>' || ch == '/' || ch == '=' || ch == '!')) || |
694 | (ch == nch && |
695 | (ch == '/' || ch == ':' || ch == '*' || ch == '#' || ch == '&' || |
696 | ch == '|' || ch == '<' || ch == '>')) || |
697 | (ch == '=' && nch == '>')) { |
698 | // token comprises two characters |
699 | EmitCharAndAdvance(tokens, nch); |
700 | } else if (ch == '/') { |
701 | slashInCurrentStatement_ = true; |
702 | } else if (ch == ';' && InFixedFormSource()) { |
703 | SkipSpaces(); |
704 | if (IsDecimalDigit(*at_)) { |
705 | if (features_.ShouldWarn( |
706 | common::LanguageFeature::MiscSourceExtensions)) { |
707 | Say(GetProvenanceRange(at_, at_ + 1), |
708 | "Label should be in the label field"_port_en_US ); |
709 | } |
710 | } |
711 | } |
712 | } |
713 | tokens.CloseToken(); |
714 | return true; |
715 | } |
716 | |
717 | bool Prescanner::ExponentAndKind(TokenSequence &tokens) { |
718 | char ed{ToLowerCaseLetter(*at_)}; |
719 | if (ed != 'e' && ed != 'd') { |
720 | return false; |
721 | } |
722 | EmitCharAndAdvance(tokens, ed); |
723 | if (*at_ == '+' || *at_ == '-') { |
724 | EmitCharAndAdvance(tokens, *at_); |
725 | } |
726 | while (IsDecimalDigit(*at_)) { |
727 | EmitCharAndAdvance(tokens, *at_); |
728 | } |
729 | if (*at_ == '_') { |
730 | while (IsLegalInIdentifier(EmitCharAndAdvance(tokens, *at_))) { |
731 | } |
732 | } |
733 | return true; |
734 | } |
735 | |
736 | void Prescanner::QuotedCharacterLiteral( |
737 | TokenSequence &tokens, const char *start) { |
738 | char quote{*at_}; |
739 | const char *end{at_ + 1}; |
740 | inCharLiteral_ = true; |
741 | continuationInCharLiteral_ = true; |
742 | const auto emit{[&](char ch) { EmitChar(tokens, ch); }}; |
743 | const auto insert{[&](char ch) { EmitInsertedChar(tokens, ch); }}; |
744 | bool isEscaped{false}; |
745 | bool escapesEnabled{features_.IsEnabled(LanguageFeature::BackslashEscapes)}; |
746 | while (true) { |
747 | if (*at_ == '\\') { |
748 | if (escapesEnabled) { |
749 | isEscaped = !isEscaped; |
750 | } else { |
751 | // The parser always processes escape sequences, so don't confuse it |
752 | // when escapes are disabled. |
753 | insert('\\'); |
754 | } |
755 | } else { |
756 | isEscaped = false; |
757 | } |
758 | EmitQuotedChar(static_cast<unsigned char>(*at_), emit, insert, false, |
759 | Encoding::LATIN_1); |
760 | while (PadOutCharacterLiteral(tokens)) { |
761 | } |
762 | if (*at_ == '\n') { |
763 | if (!inPreprocessorDirective_) { |
764 | Say(GetProvenanceRange(start, end), |
765 | "Incomplete character literal"_err_en_US ); |
766 | } |
767 | break; |
768 | } |
769 | end = at_ + 1; |
770 | NextChar(); |
771 | if (*at_ == quote && !isEscaped) { |
772 | // A doubled unescaped quote mark becomes a single instance of that |
773 | // quote character in the literal (later). There can be spaces between |
774 | // the quotes in fixed form source. |
775 | EmitChar(tokens, quote); |
776 | inCharLiteral_ = false; // for cases like print *, '...'!comment |
777 | NextChar(); |
778 | if (InFixedFormSource()) { |
779 | SkipSpaces(); |
780 | } |
781 | if (*at_ != quote) { |
782 | break; |
783 | } |
784 | inCharLiteral_ = true; |
785 | } |
786 | } |
787 | continuationInCharLiteral_ = false; |
788 | inCharLiteral_ = false; |
789 | } |
790 | |
791 | void Prescanner::Hollerith( |
792 | TokenSequence &tokens, int count, const char *start) { |
793 | inCharLiteral_ = true; |
794 | CHECK(*at_ == 'h' || *at_ == 'H'); |
795 | EmitChar(tokens, 'H'); |
796 | while (count-- > 0) { |
797 | if (PadOutCharacterLiteral(tokens)) { |
798 | } else if (*at_ == '\n') { |
799 | Say(GetProvenanceRange(start, at_), |
800 | "Possible truncated Hollerith literal"_warn_en_US ); |
801 | break; |
802 | } else { |
803 | NextChar(); |
804 | // Each multi-byte character encoding counts as a single character. |
805 | // No escape sequences are recognized. |
806 | // Hollerith is always emitted to the cooked character |
807 | // stream in UTF-8. |
808 | DecodedCharacter decoded{DecodeCharacter( |
809 | encoding_, at_, static_cast<std::size_t>(limit_ - at_), false)}; |
810 | if (decoded.bytes > 0) { |
811 | EncodedCharacter utf8{ |
812 | EncodeCharacter<Encoding::UTF_8>(decoded.codepoint)}; |
813 | for (int j{0}; j < utf8.bytes; ++j) { |
814 | EmitChar(tokens, utf8.buffer[j]); |
815 | } |
816 | at_ += decoded.bytes - 1; |
817 | } else { |
818 | Say(GetProvenanceRange(start, at_), |
819 | "Bad character in Hollerith literal"_err_en_US ); |
820 | break; |
821 | } |
822 | } |
823 | } |
824 | if (*at_ != '\n') { |
825 | NextChar(); |
826 | } |
827 | inCharLiteral_ = false; |
828 | } |
829 | |
830 | // In fixed form, source card images must be processed as if they were at |
831 | // least 72 columns wide, at least in character literal contexts. |
832 | bool Prescanner::PadOutCharacterLiteral(TokenSequence &tokens) { |
833 | while (inFixedForm_ && !tabInCurrentLine_ && at_[1] == '\n') { |
834 | if (column_ < fixedFormColumnLimit_) { |
835 | tokens.PutNextTokenChar(' ', spaceProvenance_); |
836 | ++column_; |
837 | return true; |
838 | } |
839 | if (!FixedFormContinuation(mightNeedSpace: false /*no need to insert space*/) || |
840 | tabInCurrentLine_) { |
841 | return false; |
842 | } |
843 | CHECK(column_ == 7); |
844 | --at_; // point to column 6 of continuation line |
845 | column_ = 6; |
846 | } |
847 | return false; |
848 | } |
849 | |
850 | static bool IsAtProcess(const char *p) { |
851 | static const char pAtProc[]{"process" }; |
852 | for (std::size_t i{0}; i < sizeof pAtProc - 1; ++i) { |
853 | if (ToLowerCaseLetter(*++p) != pAtProc[i]) |
854 | return false; |
855 | } |
856 | return true; |
857 | } |
858 | |
859 | bool Prescanner::(const char *start) const { |
860 | const char *p{start}; |
861 | |
862 | // The @process directive must start in column 1. |
863 | if (*p == '@' && IsAtProcess(p)) { |
864 | return true; |
865 | } |
866 | |
867 | if (IsFixedFormCommentChar(*p) || *p == '%' || // VAX %list, %eject, &c. |
868 | ((*p == 'D' || *p == 'd') && |
869 | !features_.IsEnabled(LanguageFeature::OldDebugLines))) { |
870 | return true; |
871 | } |
872 | bool anyTabs{false}; |
873 | while (true) { |
874 | if (*p == ' ') { |
875 | ++p; |
876 | } else if (*p == '\t') { |
877 | anyTabs = true; |
878 | ++p; |
879 | } else if (*p == '0' && !anyTabs && p == start + 5) { |
880 | ++p; // 0 in column 6 must treated as a space |
881 | } else { |
882 | break; |
883 | } |
884 | } |
885 | if (!anyTabs && p >= start + fixedFormColumnLimit_) { |
886 | return true; |
887 | } |
888 | if (*p == '!' && !inCharLiteral_ && (anyTabs || p != start + 5)) { |
889 | return true; |
890 | } |
891 | return *p == '\n'; |
892 | } |
893 | |
894 | const char *Prescanner::(const char *p) const { |
895 | p = SkipWhiteSpaceAndCComments(p); |
896 | if (*p == '!' || *p == '\n') { |
897 | return p; |
898 | } else if (*p == '@') { |
899 | return IsAtProcess(p) ? p : nullptr; |
900 | } else { |
901 | return nullptr; |
902 | } |
903 | } |
904 | |
905 | std::optional<std::size_t> Prescanner::IsIncludeLine(const char *start) const { |
906 | const char *p{SkipWhiteSpace(p: start)}; |
907 | if (*p == '0' && inFixedForm_ && p == start + 5) { |
908 | // Accept " 0INCLUDE" in fixed form. |
909 | p = SkipWhiteSpace(p: p + 1); |
910 | } |
911 | for (const char *q{"include" }; *q; ++q) { |
912 | if (ToLowerCaseLetter(*p) != *q) { |
913 | return std::nullopt; |
914 | } |
915 | p = SkipWhiteSpace(p: p + 1); |
916 | } |
917 | if (IsDecimalDigit(*p)) { // accept & ignore a numeric kind prefix |
918 | for (p = SkipWhiteSpace(p: p + 1); IsDecimalDigit(*p); |
919 | p = SkipWhiteSpace(p: p + 1)) { |
920 | } |
921 | if (*p != '_') { |
922 | return std::nullopt; |
923 | } |
924 | p = SkipWhiteSpace(p: p + 1); |
925 | } |
926 | if (*p == '"' || *p == '\'') { |
927 | return {p - start}; |
928 | } |
929 | return std::nullopt; |
930 | } |
931 | |
932 | void Prescanner::FortranInclude(const char *firstQuote) { |
933 | const char *p{firstQuote}; |
934 | while (*p != '"' && *p != '\'') { |
935 | ++p; |
936 | } |
937 | char quote{*p}; |
938 | std::string path; |
939 | for (++p; *p != '\n'; ++p) { |
940 | if (*p == quote) { |
941 | if (p[1] != quote) { |
942 | break; |
943 | } |
944 | ++p; |
945 | } |
946 | path += *p; |
947 | } |
948 | if (*p != quote) { |
949 | Say(GetProvenanceRange(firstQuote, p), |
950 | "malformed path name string"_err_en_US ); |
951 | return; |
952 | } |
953 | p = SkipWhiteSpace(p: p + 1); |
954 | if (*p != '\n' && *p != '!') { |
955 | const char *garbage{p}; |
956 | for (; *p != '\n' && *p != '!'; ++p) { |
957 | } |
958 | Say(GetProvenanceRange(garbage, p), |
959 | "excess characters after path name"_warn_en_US ); |
960 | } |
961 | std::string buf; |
962 | llvm::raw_string_ostream error{buf}; |
963 | Provenance provenance{GetProvenance(nextLine_)}; |
964 | std::optional<std::string> prependPath; |
965 | if (const SourceFile * currentFile{allSources_.GetSourceFile(provenance)}) { |
966 | prependPath = DirectoryName(currentFile->path()); |
967 | } |
968 | const SourceFile *included{ |
969 | allSources_.Open(path, error, std::move(prependPath))}; |
970 | if (!included) { |
971 | Say(provenance, "INCLUDE: %s"_err_en_US , error.str()); |
972 | } else if (included->bytes() > 0) { |
973 | ProvenanceRange includeLineRange{ |
974 | provenance, static_cast<std::size_t>(p - nextLine_)}; |
975 | ProvenanceRange fileRange{ |
976 | allSources_.AddIncludedFile(*included, includeLineRange)}; |
977 | Prescanner{*this}.set_encoding(included->encoding()).Prescan(fileRange); |
978 | } |
979 | } |
980 | |
981 | const char *Prescanner::IsPreprocessorDirectiveLine(const char *start) const { |
982 | const char *p{start}; |
983 | for (; *p == ' '; ++p) { |
984 | } |
985 | if (*p == '#') { |
986 | if (inFixedForm_ && p == start + 5) { |
987 | return nullptr; |
988 | } |
989 | } else { |
990 | p = SkipWhiteSpace(p); |
991 | if (*p != '#') { |
992 | return nullptr; |
993 | } |
994 | } |
995 | return SkipWhiteSpace(p: p + 1); |
996 | } |
997 | |
998 | bool Prescanner::IsNextLinePreprocessorDirective() const { |
999 | return IsPreprocessorDirectiveLine(start: nextLine_) != nullptr; |
1000 | } |
1001 | |
1002 | bool Prescanner::(bool afterAmpersand) { |
1003 | if (IsAtEnd()) { |
1004 | if (afterAmpersand && prescannerNesting_ > 0) { |
1005 | // A continuation marker at the end of the last line in an |
1006 | // include file inhibits the newline for that line. |
1007 | SkipToEndOfLine(); |
1008 | omitNewline_ = true; |
1009 | } |
1010 | return false; |
1011 | } |
1012 | auto lineClass{ClassifyLine(nextLine_)}; |
1013 | if (lineClass.kind == LineClassification::Kind::Comment) { |
1014 | NextLine(); |
1015 | return true; |
1016 | } else if (inPreprocessorDirective_) { |
1017 | return false; |
1018 | } else if (lineClass.kind == |
1019 | LineClassification::Kind::ConditionalCompilationDirective || |
1020 | lineClass.kind == LineClassification::Kind::PreprocessorDirective) { |
1021 | // Allow conditional compilation directives (e.g., #ifdef) to affect |
1022 | // continuation lines. |
1023 | // Allow other preprocessor directives, too, except #include |
1024 | // (when it does not follow '&'), #define, and #undef (because |
1025 | // they cannot be allowed to affect preceding text on a |
1026 | // continued line). |
1027 | preprocessor_.Directive(TokenizePreprocessorDirective(), *this); |
1028 | return true; |
1029 | } else if (afterAmpersand && |
1030 | (lineClass.kind == LineClassification::Kind::IncludeDirective || |
1031 | lineClass.kind == LineClassification::Kind::IncludeLine)) { |
1032 | SkipToEndOfLine(); |
1033 | omitNewline_ = true; |
1034 | skipLeadingAmpersand_ = true; |
1035 | return false; |
1036 | } else { |
1037 | return false; |
1038 | } |
1039 | } |
1040 | |
1041 | const char *Prescanner::FixedFormContinuationLine(bool mightNeedSpace) { |
1042 | if (IsAtEnd()) { |
1043 | return nullptr; |
1044 | } |
1045 | tabInCurrentLine_ = false; |
1046 | char col1{*nextLine_}; |
1047 | if (IsFixedFormCommentChar(ch: col1)) { |
1048 | int j{1}; |
1049 | if (InCompilerDirective()) { |
1050 | // Must be a continued compiler directive. |
1051 | for (; j < 5; ++j) { |
1052 | char ch{directiveSentinel_[j - 1]}; |
1053 | if (ch == '\0') { |
1054 | break; |
1055 | } |
1056 | if (ch != ToLowerCaseLetter(nextLine_[j])) { |
1057 | return nullptr; |
1058 | } |
1059 | } |
1060 | } else if (features_.IsEnabled(LanguageFeature::OpenMP)) { |
1061 | // Fixed Source Form Conditional Compilation Sentinels. |
1062 | if (nextLine_[1] != '$') { |
1063 | return nullptr; |
1064 | } |
1065 | j++; |
1066 | } else { |
1067 | return nullptr; |
1068 | } |
1069 | for (; j < 5; ++j) { |
1070 | if (nextLine_[j] != ' ') { |
1071 | return nullptr; |
1072 | } |
1073 | } |
1074 | char col6{nextLine_[5]}; |
1075 | if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') { |
1076 | if (nextLine_[6] != ' ' && mightNeedSpace) { |
1077 | insertASpace_ = true; |
1078 | } |
1079 | return nextLine_ + 6; |
1080 | } |
1081 | return nullptr; |
1082 | } else { |
1083 | // Normal case: not in a compiler directive. |
1084 | if (col1 == '&' && |
1085 | features_.IsEnabled( |
1086 | LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { |
1087 | // Extension: '&' as continuation marker |
1088 | if (features_.ShouldWarn( |
1089 | LanguageFeature::FixedFormContinuationWithColumn1Ampersand)) { |
1090 | Say(GetProvenance(nextLine_), "nonstandard usage"_port_en_US ); |
1091 | } |
1092 | return nextLine_ + 1; |
1093 | } |
1094 | if (col1 == '\t' && nextLine_[1] >= '1' && nextLine_[1] <= '9') { |
1095 | tabInCurrentLine_ = true; |
1096 | return nextLine_ + 2; // VAX extension |
1097 | } |
1098 | if ((col1 == ' ' || |
1099 | ((col1 == 'D' || col1 == 'd') && |
1100 | features_.IsEnabled(LanguageFeature::OldDebugLines))) && |
1101 | nextLine_[1] == ' ' && nextLine_[2] == ' ' && nextLine_[3] == ' ' && |
1102 | nextLine_[4] == ' ') { |
1103 | char col6{nextLine_[5]}; |
1104 | if (col6 != '\n' && col6 != '\t' && col6 != ' ' && col6 != '0') { |
1105 | if ((col6 == 'i' || col6 == 'I') && IsIncludeLine(start: nextLine_)) { |
1106 | // It's An INCLUDE line, not a continuation |
1107 | } else { |
1108 | return nextLine_ + 6; |
1109 | } |
1110 | } |
1111 | } |
1112 | if (IsImplicitContinuation()) { |
1113 | return nextLine_; |
1114 | } |
1115 | } |
1116 | return nullptr; // not a continuation line |
1117 | } |
1118 | |
1119 | const char *Prescanner::FreeFormContinuationLine(bool ampersand) { |
1120 | const char *p{nextLine_}; |
1121 | if (p >= limit_) { |
1122 | return nullptr; |
1123 | } |
1124 | p = SkipWhiteSpace(p); |
1125 | if (InCompilerDirective()) { |
1126 | if (*p++ != '!') { |
1127 | return nullptr; |
1128 | } |
1129 | for (const char *s{directiveSentinel_}; *s != '\0'; ++p, ++s) { |
1130 | if (*s != ToLowerCaseLetter(*p)) { |
1131 | return nullptr; |
1132 | } |
1133 | } |
1134 | p = SkipWhiteSpace(p); |
1135 | if (*p == '&') { |
1136 | if (!ampersand) { |
1137 | insertASpace_ = true; |
1138 | } |
1139 | return p + 1; |
1140 | } else if (ampersand) { |
1141 | return p; |
1142 | } else { |
1143 | return nullptr; |
1144 | } |
1145 | } else { |
1146 | if (*p == '&') { |
1147 | return p + 1; |
1148 | } else if (*p == '!' || *p == '\n' || *p == '#') { |
1149 | return nullptr; |
1150 | } else if (ampersand || IsImplicitContinuation()) { |
1151 | if (continuationInCharLiteral_) { |
1152 | // 'a'& -> 'a''b' == "a'b" |
1153 | // 'b' |
1154 | if (features_.ShouldWarn( |
1155 | common::LanguageFeature::MiscSourceExtensions)) { |
1156 | Say(GetProvenanceRange(p, p + 1), |
1157 | "Character literal continuation line should have been preceded by '&'"_port_en_US ); |
1158 | } |
1159 | } else if (p > nextLine_) { |
1160 | --p; |
1161 | } else { |
1162 | insertASpace_ = true; |
1163 | } |
1164 | return p; |
1165 | } else { |
1166 | return nullptr; |
1167 | } |
1168 | } |
1169 | } |
1170 | |
1171 | bool Prescanner::FixedFormContinuation(bool mightNeedSpace) { |
1172 | // N.B. We accept '&' as a continuation indicator in fixed form, too, |
1173 | // but not in a character literal. |
1174 | if (*at_ == '&' && inCharLiteral_) { |
1175 | return false; |
1176 | } |
1177 | do { |
1178 | if (const char *cont{FixedFormContinuationLine(mightNeedSpace)}) { |
1179 | BeginSourceLine(at: cont); |
1180 | column_ = 7; |
1181 | NextLine(); |
1182 | return true; |
1183 | } |
1184 | } while (SkipCommentLine(afterAmpersand: false /* not after ampersand */)); |
1185 | return false; |
1186 | } |
1187 | |
1188 | bool Prescanner::FreeFormContinuation() { |
1189 | const char *p{at_}; |
1190 | bool ampersand{*p == '&'}; |
1191 | if (ampersand) { |
1192 | p = SkipWhiteSpace(p: p + 1); |
1193 | } |
1194 | if (*p != '\n') { |
1195 | if (inCharLiteral_) { |
1196 | return false; |
1197 | } else if (*p == '!') { // & ! comment - ok |
1198 | } else if (ampersand && isPossibleMacroCall_ && (*p == ',' || *p == ')')) { |
1199 | return false; // allow & at end of a macro argument |
1200 | } else if (features_.ShouldWarn(LanguageFeature::CruftAfterAmpersand)) { |
1201 | Say(GetProvenance(p), "missing ! before comment after &"_warn_en_US ); |
1202 | } |
1203 | } |
1204 | do { |
1205 | if (const char *cont{FreeFormContinuationLine(ampersand)}) { |
1206 | BeginSourceLine(at: cont); |
1207 | NextLine(); |
1208 | return true; |
1209 | } |
1210 | } while (SkipCommentLine(afterAmpersand: ampersand)); |
1211 | return false; |
1212 | } |
1213 | |
1214 | // Implicit line continuation allows a preprocessor macro call with |
1215 | // arguments to span multiple lines. |
1216 | bool Prescanner::IsImplicitContinuation() const { |
1217 | return !inPreprocessorDirective_ && !inCharLiteral_ && isPossibleMacroCall_ && |
1218 | parenthesisNesting_ > 0 && !IsAtEnd() && |
1219 | ClassifyLine(nextLine_).kind == LineClassification::Kind::Source; |
1220 | } |
1221 | |
1222 | bool Prescanner::Continuation(bool mightNeedFixedFormSpace) { |
1223 | if (*at_ == '\n' || *at_ == '&') { |
1224 | if (inFixedForm_) { |
1225 | return FixedFormContinuation(mightNeedSpace: mightNeedFixedFormSpace); |
1226 | } else { |
1227 | return FreeFormContinuation(); |
1228 | } |
1229 | } else { |
1230 | return false; |
1231 | } |
1232 | } |
1233 | |
1234 | std::optional<Prescanner::LineClassification> |
1235 | Prescanner::IsFixedFormCompilerDirectiveLine(const char *start) const { |
1236 | const char *p{start}; |
1237 | char col1{*p++}; |
1238 | if (!IsFixedFormCommentChar(ch: col1)) { |
1239 | return std::nullopt; |
1240 | } |
1241 | char sentinel[5], *sp{sentinel}; |
1242 | int column{2}; |
1243 | for (; column < 6; ++column, ++p) { |
1244 | if (*p == ' ' || *p == '\n' || *p == '\t') { |
1245 | break; |
1246 | } |
1247 | if (sp == sentinel + 1 && sentinel[0] == '$' && IsDecimalDigit(*p)) { |
1248 | // OpenMP conditional compilation line: leave the label alone |
1249 | break; |
1250 | } |
1251 | *sp++ = ToLowerCaseLetter(*p); |
1252 | } |
1253 | if (column == 6) { |
1254 | if (*p == ' ' || *p == '\t' || *p == '0') { |
1255 | ++p; |
1256 | } else { |
1257 | // This is a Continuation line, not an initial directive line. |
1258 | return std::nullopt; |
1259 | } |
1260 | } |
1261 | if (sp == sentinel) { |
1262 | return std::nullopt; |
1263 | } |
1264 | *sp = '\0'; |
1265 | if (const char *ss{IsCompilerDirectiveSentinel( |
1266 | sentinel, static_cast<std::size_t>(sp - sentinel))}) { |
1267 | std::size_t payloadOffset = p - start; |
1268 | return {LineClassification{ |
1269 | LineClassification::Kind::CompilerDirective, payloadOffset, ss}}; |
1270 | } |
1271 | return std::nullopt; |
1272 | } |
1273 | |
1274 | std::optional<Prescanner::LineClassification> |
1275 | Prescanner::IsFreeFormCompilerDirectiveLine(const char *start) const { |
1276 | char sentinel[8]; |
1277 | const char *p{SkipWhiteSpace(p: start)}; |
1278 | if (*p++ != '!') { |
1279 | return std::nullopt; |
1280 | } |
1281 | for (std::size_t j{0}; j + 1 < sizeof sentinel; ++p, ++j) { |
1282 | if (*p == '\n') { |
1283 | break; |
1284 | } |
1285 | if (*p == ' ' || *p == '\t' || *p == '&') { |
1286 | if (j == 0) { |
1287 | break; |
1288 | } |
1289 | sentinel[j] = '\0'; |
1290 | p = SkipWhiteSpace(p: p + 1); |
1291 | if (*p == '!') { |
1292 | break; |
1293 | } |
1294 | if (const char *sp{IsCompilerDirectiveSentinel(sentinel, j)}) { |
1295 | std::size_t offset = p - start; |
1296 | return {LineClassification{ |
1297 | LineClassification::Kind::CompilerDirective, offset, sp}}; |
1298 | } |
1299 | break; |
1300 | } |
1301 | sentinel[j] = ToLowerCaseLetter(*p); |
1302 | } |
1303 | return std::nullopt; |
1304 | } |
1305 | |
1306 | Prescanner &Prescanner::AddCompilerDirectiveSentinel(const std::string &dir) { |
1307 | std::uint64_t packed{0}; |
1308 | for (char ch : dir) { |
1309 | packed = (packed << 8) | (ToLowerCaseLetter(ch) & 0xff); |
1310 | } |
1311 | compilerDirectiveBloomFilter_.set(position: packed % prime1); |
1312 | compilerDirectiveBloomFilter_.set(position: packed % prime2); |
1313 | compilerDirectiveSentinels_.insert(x: dir); |
1314 | return *this; |
1315 | } |
1316 | |
1317 | const char *Prescanner::IsCompilerDirectiveSentinel( |
1318 | const char *sentinel, std::size_t len) const { |
1319 | std::uint64_t packed{0}; |
1320 | for (std::size_t j{0}; j < len; ++j) { |
1321 | packed = (packed << 8) | (sentinel[j] & 0xff); |
1322 | } |
1323 | if (len == 0 || !compilerDirectiveBloomFilter_.test(position: packed % prime1) || |
1324 | !compilerDirectiveBloomFilter_.test(position: packed % prime2)) { |
1325 | return nullptr; |
1326 | } |
1327 | const auto iter{compilerDirectiveSentinels_.find(x: std::string(sentinel, len))}; |
1328 | return iter == compilerDirectiveSentinels_.end() ? nullptr : iter->c_str(); |
1329 | } |
1330 | |
1331 | const char *Prescanner::IsCompilerDirectiveSentinel(CharBlock token) const { |
1332 | const char *p{token.begin()}; |
1333 | const char *end{p + token.size()}; |
1334 | while (p < end && (*p == ' ' || *p == '\n')) { |
1335 | ++p; |
1336 | } |
1337 | if (p < end && *p == '!') { |
1338 | ++p; |
1339 | } |
1340 | while (end > p && (end[-1] == ' ' || end[-1] == '\t')) { |
1341 | --end; |
1342 | } |
1343 | return end > p && IsCompilerDirectiveSentinel(sentinel: p, len: end - p) ? p : nullptr; |
1344 | } |
1345 | |
1346 | constexpr bool IsDirective(const char *match, const char *dir) { |
1347 | for (; *match; ++match) { |
1348 | if (*match != ToLowerCaseLetter(*dir++)) { |
1349 | return false; |
1350 | } |
1351 | } |
1352 | return true; |
1353 | } |
1354 | |
1355 | Prescanner::LineClassification Prescanner::ClassifyLine( |
1356 | const char *start) const { |
1357 | if (inFixedForm_) { |
1358 | if (std::optional<LineClassification> lc{ |
1359 | IsFixedFormCompilerDirectiveLine(start)}) { |
1360 | return std::move(*lc); |
1361 | } |
1362 | if (IsFixedFormCommentLine(start)) { |
1363 | return {LineClassification::Kind::Comment}; |
1364 | } |
1365 | } else { |
1366 | if (std::optional<LineClassification> lc{ |
1367 | IsFreeFormCompilerDirectiveLine(start)}) { |
1368 | return std::move(*lc); |
1369 | } |
1370 | if (const char *bang{IsFreeFormComment(p: start)}) { |
1371 | return {LineClassification::Kind::Comment, |
1372 | static_cast<std::size_t>(bang - start)}; |
1373 | } |
1374 | } |
1375 | if (std::optional<std::size_t> quoteOffset{IsIncludeLine(start)}) { |
1376 | return {LineClassification::Kind::IncludeLine, *quoteOffset}; |
1377 | } |
1378 | if (const char *dir{IsPreprocessorDirectiveLine(start)}) { |
1379 | if (IsDirective(match: "if" , dir) || IsDirective(match: "elif" , dir) || |
1380 | IsDirective(match: "else" , dir) || IsDirective(match: "endif" , dir)) { |
1381 | return {LineClassification::Kind::ConditionalCompilationDirective}; |
1382 | } else if (IsDirective(match: "include" , dir)) { |
1383 | return {LineClassification::Kind::IncludeDirective}; |
1384 | } else if (IsDirective(match: "define" , dir) || IsDirective(match: "undef" , dir)) { |
1385 | return {LineClassification::Kind::DefinitionDirective}; |
1386 | } else { |
1387 | return {LineClassification::Kind::PreprocessorDirective}; |
1388 | } |
1389 | } |
1390 | return {LineClassification::Kind::Source}; |
1391 | } |
1392 | |
1393 | void Prescanner::SourceFormChange(std::string &&dir) { |
1394 | if (dir == "!dir$ free" ) { |
1395 | inFixedForm_ = false; |
1396 | } else if (dir == "!dir$ fixed" ) { |
1397 | inFixedForm_ = true; |
1398 | } |
1399 | } |
1400 | |
1401 | // Acquire and append compiler directive continuation lines to |
1402 | // the tokens that constitute a compiler directive, even when those |
1403 | // directive continuation lines are the result of macro expansion. |
1404 | // (Not used when neither the original compiler directive line nor |
1405 | // the directive continuation line result from preprocessing; regular |
1406 | // line continuation during tokenization handles that normal case.) |
1407 | bool Prescanner::CompilerDirectiveContinuation( |
1408 | TokenSequence &tokens, const char *origSentinel) { |
1409 | if (inFixedForm_ || tokens.empty() || |
1410 | tokens.TokenAt(tokens.SizeInTokens() - 1) != "&" ) { |
1411 | return false; |
1412 | } |
1413 | LineClassification followingLine{ClassifyLine(start: nextLine_)}; |
1414 | if (followingLine.kind == LineClassification::Kind::Comment) { |
1415 | nextLine_ += followingLine.payloadOffset; // advance to '!' or newline |
1416 | NextLine(); |
1417 | return true; |
1418 | } |
1419 | CHECK(origSentinel != nullptr); |
1420 | directiveSentinel_ = origSentinel; // so IsDirective() is true |
1421 | const char *nextContinuation{ |
1422 | followingLine.kind == LineClassification::Kind::CompilerDirective |
1423 | ? FreeFormContinuationLine(ampersand: true) |
1424 | : nullptr}; |
1425 | if (!nextContinuation && |
1426 | followingLine.kind != LineClassification::Kind::Source) { |
1427 | return false; |
1428 | } |
1429 | auto origNextLine{nextLine_}; |
1430 | BeginSourceLine(at: nextLine_); |
1431 | NextLine(); |
1432 | TokenSequence followingTokens; |
1433 | if (nextContinuation) { |
1434 | // What follows is !DIR$ & xxx; skip over the & so that it |
1435 | // doesn't cause a spurious continuation. |
1436 | at_ = nextContinuation; |
1437 | } else { |
1438 | // What follows looks like a source line before macro expansion, |
1439 | // but might become a directive continuation afterwards. |
1440 | SkipSpaces(); |
1441 | } |
1442 | while (NextToken(followingTokens)) { |
1443 | } |
1444 | if (auto followingPrepro{ |
1445 | preprocessor_.MacroReplacement(followingTokens, *this)}) { |
1446 | followingTokens = std::move(*followingPrepro); |
1447 | } |
1448 | followingTokens.RemoveRedundantBlanks(); |
1449 | std::size_t startAt{0}; |
1450 | std::size_t keep{followingTokens.SizeInTokens()}; |
1451 | bool ok{false}; |
1452 | if (nextContinuation) { |
1453 | ok = true; |
1454 | } else { |
1455 | if (keep >= 3 && followingTokens.TokenAt(0) == "!" && |
1456 | followingTokens.TokenAt(2) == "&" ) { |
1457 | CharBlock sentinel{followingTokens.TokenAt(1)}; |
1458 | if (!sentinel.empty() && |
1459 | std::memcmp(s1: sentinel.begin(), s2: origSentinel, n: sentinel.size()) == 0) { |
1460 | startAt = 3; |
1461 | keep -= 3; |
1462 | ok = true; |
1463 | } |
1464 | } |
1465 | } |
1466 | if (ok) { |
1467 | tokens.pop_back(); // delete original '&' |
1468 | tokens.Put(followingTokens, startAt, keep); |
1469 | } else { |
1470 | nextLine_ = origNextLine; |
1471 | } |
1472 | return ok; |
1473 | } |
1474 | |
1475 | // Similar, but for source line continuation after macro replacement. |
1476 | bool Prescanner::SourceLineContinuation(TokenSequence &tokens) { |
1477 | if (!inFixedForm_ && !tokens.empty() && |
1478 | tokens.TokenAt(tokens.SizeInTokens() - 1) == "&" ) { |
1479 | LineClassification followingLine{ClassifyLine(start: nextLine_)}; |
1480 | if (followingLine.kind == LineClassification::Kind::Comment) { |
1481 | nextLine_ += followingLine.payloadOffset; // advance to '!' or newline |
1482 | NextLine(); |
1483 | return true; |
1484 | } else if (const char *nextContinuation{FreeFormContinuationLine(ampersand: true)}) { |
1485 | BeginSourceLine(at: nextLine_); |
1486 | NextLine(); |
1487 | TokenSequence followingTokens; |
1488 | at_ = nextContinuation; |
1489 | while (NextToken(followingTokens)) { |
1490 | } |
1491 | if (auto followingPrepro{ |
1492 | preprocessor_.MacroReplacement(followingTokens, *this)}) { |
1493 | followingTokens = std::move(*followingPrepro); |
1494 | } |
1495 | followingTokens.RemoveRedundantBlanks(); |
1496 | tokens.pop_back(); // delete original '&' |
1497 | tokens.Put(followingTokens); |
1498 | return true; |
1499 | } |
1500 | } |
1501 | return false; |
1502 | } |
1503 | } // namespace Fortran::parser |
1504 | |