1 | //===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef FORTRAN_PARSER_PRESCAN_H_ |
10 | #define FORTRAN_PARSER_PRESCAN_H_ |
11 | |
12 | // Defines a fast Fortran source prescanning phase that implements some |
13 | // character-level features of the language that can be inefficient to |
14 | // support directly in a backtracking parser. This phase handles Fortran |
15 | // line continuation, comment removal, card image margins, padding out |
16 | // fixed form character literals on truncated card images, file |
17 | // inclusion, and driving the Fortran source preprocessor. |
18 | |
19 | #include "flang/Common/Fortran-features.h" |
20 | #include "flang/Parser/characters.h" |
21 | #include "flang/Parser/message.h" |
22 | #include "flang/Parser/provenance.h" |
23 | #include "flang/Parser/token-sequence.h" |
24 | #include <bitset> |
25 | #include <optional> |
26 | #include <string> |
27 | #include <unordered_set> |
28 | |
29 | namespace Fortran::parser { |
30 | |
31 | class Messages; |
32 | class Preprocessor; |
33 | |
34 | class Prescanner { |
35 | public: |
36 | Prescanner(Messages &, CookedSource &, Preprocessor &, |
37 | common::LanguageFeatureControl); |
38 | Prescanner(const Prescanner &); |
39 | |
40 | const AllSources &allSources() const { return allSources_; } |
41 | AllSources &allSources() { return allSources_; } |
42 | const Messages &messages() const { return messages_; } |
43 | Messages &messages() { return messages_; } |
44 | const Preprocessor &preprocessor() const { return preprocessor_; } |
45 | Preprocessor &preprocessor() { return preprocessor_; } |
46 | |
47 | Prescanner &set_fixedForm(bool yes) { |
48 | inFixedForm_ = yes; |
49 | return *this; |
50 | } |
51 | Prescanner &set_encoding(Encoding code) { |
52 | encoding_ = code; |
53 | return *this; |
54 | } |
55 | Prescanner &set_fixedFormColumnLimit(int limit) { |
56 | fixedFormColumnLimit_ = limit; |
57 | return *this; |
58 | } |
59 | |
60 | Prescanner &AddCompilerDirectiveSentinel(const std::string &); |
61 | |
62 | void Prescan(ProvenanceRange); |
63 | void Statement(); |
64 | void NextLine(); |
65 | |
66 | // Callbacks for use by Preprocessor. |
67 | bool IsAtEnd() const { return nextLine_ >= limit_; } |
68 | bool IsNextLinePreprocessorDirective() const; |
69 | TokenSequence TokenizePreprocessorDirective(); |
70 | Provenance GetCurrentProvenance() const { return GetProvenance(at_); } |
71 | |
72 | const char *IsCompilerDirectiveSentinel(const char *, std::size_t) const; |
73 | const char *IsCompilerDirectiveSentinel(CharBlock) const; |
74 | |
75 | template <typename... A> Message &Say(A &&...a) { |
76 | return messages_.Say(std::forward<A>(a)...); |
77 | } |
78 | |
79 | private: |
80 | struct LineClassification { |
81 | enum class Kind { |
82 | , |
83 | ConditionalCompilationDirective, |
84 | IncludeDirective, // #include |
85 | DefinitionDirective, // #define & #undef |
86 | PreprocessorDirective, |
87 | IncludeLine, // Fortran INCLUDE |
88 | CompilerDirective, |
89 | Source |
90 | }; |
91 | LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr) |
92 | : kind{k}, payloadOffset{po}, sentinel{s} {} |
93 | LineClassification(LineClassification &&) = default; |
94 | Kind kind; |
95 | std::size_t payloadOffset; // byte offset of content |
96 | const char *sentinel; // if it's a compiler directive |
97 | }; |
98 | |
99 | void BeginSourceLine(const char *at) { |
100 | at_ = at; |
101 | column_ = 1; |
102 | tabInCurrentLine_ = false; |
103 | } |
104 | |
105 | void BeginSourceLineAndAdvance() { |
106 | BeginSourceLine(at: nextLine_); |
107 | NextLine(); |
108 | } |
109 | |
110 | void BeginStatementAndAdvance() { |
111 | BeginSourceLineAndAdvance(); |
112 | slashInCurrentStatement_ = false; |
113 | preventHollerith_ = false; |
114 | parenthesisNesting_ = 0; |
115 | continuationLines_ = 0; |
116 | isPossibleMacroCall_ = false; |
117 | } |
118 | |
119 | Provenance GetProvenance(const char *sourceChar) const { |
120 | return startProvenance_ + (sourceChar - start_); |
121 | } |
122 | |
123 | ProvenanceRange GetProvenanceRange( |
124 | const char *first, const char *afterLast) const { |
125 | std::size_t bytes = afterLast - first; |
126 | return {startProvenance_ + (first - start_), bytes}; |
127 | } |
128 | |
129 | void EmitChar(TokenSequence &tokens, char ch) { |
130 | tokens.PutNextTokenChar(ch, GetCurrentProvenance()); |
131 | } |
132 | |
133 | void EmitInsertedChar(TokenSequence &tokens, char ch) { |
134 | Provenance provenance{allSources_.CompilerInsertionProvenance(ch)}; |
135 | tokens.PutNextTokenChar(ch, provenance); |
136 | } |
137 | |
138 | char EmitCharAndAdvance(TokenSequence &tokens, char ch) { |
139 | EmitChar(tokens, ch); |
140 | NextChar(); |
141 | return *at_; |
142 | } |
143 | |
144 | bool InCompilerDirective() const { return directiveSentinel_ != nullptr; } |
145 | bool InFixedFormSource() const { |
146 | return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective(); |
147 | } |
148 | |
149 | bool (const char *p) const { |
150 | return p[0] == '/' && p[1] == '*' && |
151 | (inPreprocessorDirective_ || |
152 | (!inCharLiteral_ && |
153 | features_.IsEnabled( |
154 | common::LanguageFeature::ClassicCComments))); |
155 | } |
156 | |
157 | void LabelField(TokenSequence &); |
158 | void EnforceStupidEndStatementRules(const TokenSequence &); |
159 | void SkipToEndOfLine(); |
160 | bool MustSkipToEndOfLine() const; |
161 | void NextChar(); |
162 | // True when input flowed to a continuation line |
163 | bool SkipToNextSignificantCharacter(); |
164 | void (); |
165 | void SkipSpaces(); |
166 | static const char *SkipWhiteSpace(const char *); |
167 | const char *SkipWhiteSpaceAndCComments(const char *) const; |
168 | const char *(const char *) const; |
169 | bool NextToken(TokenSequence &); |
170 | bool ExponentAndKind(TokenSequence &); |
171 | void QuotedCharacterLiteral(TokenSequence &, const char *start); |
172 | void Hollerith(TokenSequence &, int count, const char *start); |
173 | bool PadOutCharacterLiteral(TokenSequence &); |
174 | bool (bool afterAmpersand); |
175 | bool (const char *) const; |
176 | const char *(const char *) const; |
177 | std::optional<std::size_t> IsIncludeLine(const char *) const; |
178 | void FortranInclude(const char *quote); |
179 | const char *IsPreprocessorDirectiveLine(const char *) const; |
180 | const char *FixedFormContinuationLine(bool mightNeedSpace); |
181 | const char *FreeFormContinuationLine(bool ampersand); |
182 | bool IsImplicitContinuation() const; |
183 | bool FixedFormContinuation(bool mightNeedSpace); |
184 | bool FreeFormContinuation(); |
185 | bool Continuation(bool mightNeedFixedFormSpace); |
186 | std::optional<LineClassification> IsFixedFormCompilerDirectiveLine( |
187 | const char *) const; |
188 | std::optional<LineClassification> IsFreeFormCompilerDirectiveLine( |
189 | const char *) const; |
190 | LineClassification ClassifyLine(const char *) const; |
191 | void SourceFormChange(std::string &&); |
192 | bool CompilerDirectiveContinuation(TokenSequence &, const char *sentinel); |
193 | bool SourceLineContinuation(TokenSequence &); |
194 | |
195 | Messages &messages_; |
196 | CookedSource &cooked_; |
197 | Preprocessor &preprocessor_; |
198 | AllSources &allSources_; |
199 | common::LanguageFeatureControl features_; |
200 | bool inFixedForm_{false}; |
201 | int fixedFormColumnLimit_{72}; |
202 | Encoding encoding_{Encoding::UTF_8}; |
203 | int parenthesisNesting_{0}; |
204 | int prescannerNesting_{0}; |
205 | int continuationLines_{0}; |
206 | bool isPossibleMacroCall_{false}; |
207 | |
208 | Provenance startProvenance_; |
209 | const char *start_{nullptr}; // beginning of current source file content |
210 | const char *limit_{nullptr}; // first address after end of current source |
211 | const char *nextLine_{nullptr}; // next line to process; <= limit_ |
212 | const char *directiveSentinel_{nullptr}; // current compiler directive |
213 | |
214 | // These data members are state for processing the source line containing |
215 | // "at_", which goes to up to the newline character before "nextLine_". |
216 | const char *at_{nullptr}; // next character to process; < nextLine_ |
217 | int column_{1}; // card image column position of next character |
218 | bool tabInCurrentLine_{false}; |
219 | bool slashInCurrentStatement_{false}; |
220 | bool preventHollerith_{false}; // CHARACTER*4HIMOM not Hollerith |
221 | bool inCharLiteral_{false}; |
222 | bool continuationInCharLiteral_{false}; |
223 | bool inPreprocessorDirective_{false}; |
224 | |
225 | // In some edge cases of compiler directive continuation lines, it |
226 | // is necessary to treat the line break as a space character by |
227 | // setting this flag, which is cleared by EmitChar(). |
228 | bool insertASpace_{false}; |
229 | |
230 | // When a free form continuation marker (&) appears at the end of a line |
231 | // before a INCLUDE or #include, we delete it and omit the newline, so |
232 | // that the first line of the included file is truly a continuation of |
233 | // the line before. Also used when the & appears at the end of the last |
234 | // line in an include file. |
235 | bool omitNewline_{false}; |
236 | bool skipLeadingAmpersand_{false}; |
237 | |
238 | const Provenance spaceProvenance_{ |
239 | allSources_.CompilerInsertionProvenance(' ')}; |
240 | const Provenance backslashProvenance_{ |
241 | allSources_.CompilerInsertionProvenance('\\')}; |
242 | |
243 | // To avoid probing the set of active compiler directive sentinel strings |
244 | // on every comment line, they're checked first with a cheap Bloom filter. |
245 | static const int prime1{1019}, prime2{1021}; |
246 | std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes |
247 | std::unordered_set<std::string> compilerDirectiveSentinels_; |
248 | }; |
249 | } // namespace Fortran::parser |
250 | #endif // FORTRAN_PARSER_PRESCAN_H_ |
251 | |