1 | //===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef FORTRAN_PARSER_PRESCAN_H_ |
10 | #define FORTRAN_PARSER_PRESCAN_H_ |
11 | |
12 | // Defines a fast Fortran source prescanning phase that implements some |
13 | // character-level features of the language that can be inefficient to |
14 | // support directly in a backtracking parser. This phase handles Fortran |
15 | // line continuation, comment removal, card image margins, padding out |
16 | // fixed form character literals on truncated card images, file |
17 | // inclusion, and driving the Fortran source preprocessor. |
18 | |
19 | #include "flang/Parser/characters.h" |
20 | #include "flang/Parser/message.h" |
21 | #include "flang/Parser/provenance.h" |
22 | #include "flang/Parser/token-sequence.h" |
23 | #include "flang/Support/Fortran-features.h" |
24 | #include <bitset> |
25 | #include <optional> |
26 | #include <string> |
27 | #include <unordered_set> |
28 | |
29 | namespace Fortran::parser { |
30 | |
31 | class Messages; |
32 | class Preprocessor; |
33 | |
34 | class Prescanner { |
35 | public: |
36 | Prescanner(Messages &, CookedSource &, Preprocessor &, |
37 | common::LanguageFeatureControl); |
38 | Prescanner( |
39 | const Prescanner &, Preprocessor &, bool isNestedInIncludeDirective); |
40 | Prescanner(const Prescanner &) = delete; |
41 | Prescanner(Prescanner &&) = delete; |
42 | |
43 | const AllSources &allSources() const { return allSources_; } |
44 | AllSources &allSources() { return allSources_; } |
45 | const Messages &messages() const { return messages_; } |
46 | Messages &messages() { return messages_; } |
47 | const Preprocessor &preprocessor() const { return preprocessor_; } |
48 | Preprocessor &preprocessor() { return preprocessor_; } |
49 | common::LanguageFeatureControl &features() { return features_; } |
50 | |
51 | Prescanner &set_preprocessingOnly(bool yes) { |
52 | preprocessingOnly_ = yes; |
53 | return *this; |
54 | } |
55 | Prescanner &set_expandIncludeLines(bool yes) { |
56 | expandIncludeLines_ = yes; |
57 | return *this; |
58 | } |
59 | Prescanner &set_fixedForm(bool yes) { |
60 | inFixedForm_ = yes; |
61 | return *this; |
62 | } |
63 | Prescanner &set_encoding(Encoding code) { |
64 | encoding_ = code; |
65 | return *this; |
66 | } |
67 | Prescanner &set_fixedFormColumnLimit(int limit) { |
68 | fixedFormColumnLimit_ = limit; |
69 | return *this; |
70 | } |
71 | |
72 | Prescanner &AddCompilerDirectiveSentinel(const std::string &); |
73 | |
74 | void Prescan(ProvenanceRange); |
75 | void Statement(); |
76 | void NextLine(); |
77 | |
78 | // Callbacks for use by Preprocessor. |
79 | bool IsAtEnd() const { return nextLine_ >= limit_; } |
80 | bool IsNextLinePreprocessorDirective() const; |
81 | TokenSequence TokenizePreprocessorDirective(); |
82 | Provenance GetCurrentProvenance() const { return GetProvenance(at_); } |
83 | |
84 | const char *IsCompilerDirectiveSentinel(const char *, std::size_t) const; |
85 | const char *IsCompilerDirectiveSentinel(CharBlock) const; |
86 | // 'first' is the sentinel, 'second' is beginning of payload |
87 | std::optional<std::pair<const char *, const char *>> |
88 | IsCompilerDirectiveSentinel(const char *p) const; |
89 | |
90 | template <typename... A> Message &Say(A &&...a) { |
91 | return messages_.Say(std::forward<A>(a)...); |
92 | } |
93 | |
94 | private: |
95 | struct LineClassification { |
96 | enum class Kind { |
97 | Comment, |
98 | ConditionalCompilationDirective, |
99 | IncludeDirective, // #include |
100 | DefinitionDirective, // #define & #undef |
101 | PreprocessorDirective, |
102 | IncludeLine, // Fortran INCLUDE |
103 | CompilerDirective, |
104 | Source |
105 | }; |
106 | LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr) |
107 | : kind{k}, payloadOffset{po}, sentinel{s} {} |
108 | LineClassification(LineClassification &&) = default; |
109 | LineClassification &operator=(LineClassification &&) = default; |
110 | Kind kind; |
111 | std::size_t payloadOffset; // byte offset of content |
112 | const char *sentinel; // if it's a compiler directive |
113 | }; |
114 | |
115 | void BeginSourceLine(const char *at) { |
116 | at_ = at; |
117 | column_ = 1; |
118 | tabInCurrentLine_ = false; |
119 | } |
120 | |
121 | void BeginSourceLineAndAdvance() { |
122 | BeginSourceLine(at: nextLine_); |
123 | NextLine(); |
124 | } |
125 | |
126 | void BeginStatementAndAdvance() { |
127 | BeginSourceLineAndAdvance(); |
128 | slashInCurrentStatement_ = false; |
129 | preventHollerith_ = false; |
130 | parenthesisNesting_ = 0; |
131 | continuationLines_ = 0; |
132 | isPossibleMacroCall_ = false; |
133 | disableSourceContinuation_ = false; |
134 | } |
135 | |
136 | Provenance GetProvenance(const char *sourceChar) const { |
137 | return startProvenance_ + (sourceChar - start_); |
138 | } |
139 | |
140 | ProvenanceRange GetProvenanceRange( |
141 | const char *first, const char *afterLast) const { |
142 | std::size_t bytes = afterLast - first; |
143 | return {startProvenance_ + (first - start_), bytes}; |
144 | } |
145 | |
146 | void EmitChar(TokenSequence &tokens, char ch) { |
147 | tokens.PutNextTokenChar(ch, GetCurrentProvenance()); |
148 | } |
149 | |
150 | void EmitInsertedChar(TokenSequence &tokens, char ch) { |
151 | Provenance provenance{allSources_.CompilerInsertionProvenance(ch)}; |
152 | tokens.PutNextTokenChar(ch, provenance); |
153 | } |
154 | |
155 | char EmitCharAndAdvance(TokenSequence &tokens, char ch) { |
156 | EmitChar(tokens, ch); |
157 | NextChar(); |
158 | return *at_; |
159 | } |
160 | |
161 | bool InCompilerDirective() const { return directiveSentinel_ != nullptr; } |
162 | bool InOpenMPConditionalLine() const { |
163 | return directiveSentinel_ && directiveSentinel_[0] == '$' && |
164 | !directiveSentinel_[1]; |
165 | ; |
166 | } |
167 | bool InFixedFormSource() const { |
168 | return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective(); |
169 | } |
170 | |
171 | bool IsCComment(const char *p) const { |
172 | return p[0] == '/' && p[1] == '*' && |
173 | (inPreprocessorDirective_ || |
174 | (!inCharLiteral_ && |
175 | features_.IsEnabled( |
176 | common::LanguageFeature::ClassicCComments))); |
177 | } |
178 | |
179 | void CheckAndEmitLine(TokenSequence &, Provenance newlineProvenance); |
180 | void LabelField(TokenSequence &); |
181 | void EnforceStupidEndStatementRules(const TokenSequence &); |
182 | void SkipToEndOfLine(); |
183 | bool MustSkipToEndOfLine() const; |
184 | void NextChar(); |
185 | // True when input flowed to a continuation line |
186 | bool SkipToNextSignificantCharacter(); |
187 | void SkipCComments(); |
188 | void SkipSpaces(); |
189 | static const char *SkipWhiteSpace(const char *); |
190 | const char *SkipWhiteSpaceIncludingEmptyMacros(const char *) const; |
191 | const char *SkipWhiteSpaceAndCComments(const char *) const; |
192 | const char *SkipCComment(const char *) const; |
193 | bool NextToken(TokenSequence &); |
194 | bool HandleExponent(TokenSequence &); |
195 | bool HandleKindSuffix(TokenSequence &); |
196 | bool HandleExponentAndOrKindSuffix(TokenSequence &); |
197 | void QuotedCharacterLiteral(TokenSequence &, const char *start); |
198 | void Hollerith(TokenSequence &, int count, const char *start); |
199 | bool PadOutCharacterLiteral(TokenSequence &); |
200 | bool SkipCommentLine(bool afterAmpersand); |
201 | bool IsFixedFormCommentLine(const char *) const; |
202 | const char *IsFreeFormComment(const char *) const; |
203 | std::optional<std::size_t> IsIncludeLine(const char *) const; |
204 | void FortranInclude(const char *quote); |
205 | const char *IsPreprocessorDirectiveLine(const char *) const; |
206 | const char *FixedFormContinuationLine(bool mightNeedSpace); |
207 | const char *FreeFormContinuationLine(bool ampersand); |
208 | bool IsImplicitContinuation() const; |
209 | bool FixedFormContinuation(bool mightNeedSpace); |
210 | bool FreeFormContinuation(); |
211 | bool Continuation(bool mightNeedFixedFormSpace); |
212 | std::optional<LineClassification> IsFixedFormCompilerDirectiveLine( |
213 | const char *) const; |
214 | std::optional<LineClassification> IsFreeFormCompilerDirectiveLine( |
215 | const char *) const; |
216 | LineClassification ClassifyLine(const char *) const; |
217 | LineClassification ClassifyLine( |
218 | TokenSequence &, Provenance newlineProvenance) const; |
219 | void SourceFormChange(std::string &&); |
220 | bool CompilerDirectiveContinuation(TokenSequence &, const char *sentinel); |
221 | bool SourceLineContinuation(TokenSequence &); |
222 | |
223 | Messages &messages_; |
224 | CookedSource &cooked_; |
225 | Preprocessor &preprocessor_; |
226 | AllSources &allSources_; |
227 | common::LanguageFeatureControl features_; |
228 | bool preprocessingOnly_{false}; |
229 | bool expandIncludeLines_{true}; |
230 | bool isNestedInIncludeDirective_{false}; |
231 | bool backslashFreeFormContinuation_{false}; |
232 | bool inFixedForm_{false}; |
233 | int fixedFormColumnLimit_{72}; |
234 | Encoding encoding_{Encoding::UTF_8}; |
235 | int parenthesisNesting_{0}; |
236 | int prescannerNesting_{0}; |
237 | int continuationLines_{0}; |
238 | bool isPossibleMacroCall_{false}; |
239 | bool afterPreprocessingDirective_{false}; |
240 | bool disableSourceContinuation_{false}; |
241 | |
242 | Provenance startProvenance_; |
243 | const char *start_{nullptr}; // beginning of current source file content |
244 | const char *limit_{nullptr}; // first address after end of current source |
245 | const char *nextLine_{nullptr}; // next line to process; <= limit_ |
246 | const char *directiveSentinel_{nullptr}; // current compiler directive |
247 | |
248 | // These data members are state for processing the source line containing |
249 | // "at_", which goes to up to the newline character before "nextLine_". |
250 | const char *at_{nullptr}; // next character to process; < nextLine_ |
251 | int column_{1}; // card image column position of next character |
252 | bool tabInCurrentLine_{false}; |
253 | bool slashInCurrentStatement_{false}; |
254 | bool preventHollerith_{false}; // CHARACTER*4HIMOM not Hollerith |
255 | bool inCharLiteral_{false}; |
256 | bool continuationInCharLiteral_{false}; |
257 | bool inPreprocessorDirective_{false}; |
258 | |
259 | // In some edge cases of compiler directive continuation lines, it |
260 | // is necessary to treat the line break as a space character by |
261 | // setting this flag, which is cleared by EmitChar(). |
262 | bool insertASpace_{false}; |
263 | |
264 | // When a free form continuation marker (&) appears at the end of a line |
265 | // before a INCLUDE or #include, we delete it and omit the newline, so |
266 | // that the first line of the included file is truly a continuation of |
267 | // the line before. Also used when the & appears at the end of the last |
268 | // line in an include file. |
269 | bool omitNewline_{false}; |
270 | bool skipLeadingAmpersand_{false}; |
271 | |
272 | const std::size_t firstCookedCharacterOffset_{cooked_.BufferedBytes()}; |
273 | |
274 | const Provenance spaceProvenance_{ |
275 | allSources_.CompilerInsertionProvenance(' ')}; |
276 | const Provenance backslashProvenance_{ |
277 | allSources_.CompilerInsertionProvenance('\\')}; |
278 | |
279 | // To avoid probing the set of active compiler directive sentinel strings |
280 | // on every comment line, they're checked first with a cheap Bloom filter. |
281 | static const int prime1{1019}, prime2{1021}; |
282 | std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes |
283 | std::unordered_set<std::string> compilerDirectiveSentinels_; |
284 | }; |
285 | } // namespace Fortran::parser |
286 | #endif // FORTRAN_PARSER_PRESCAN_H_ |
287 |
Definitions
- Prescanner
- Prescanner
- Prescanner
- allSources
- allSources
- messages
- messages
- preprocessor
- preprocessor
- features
- set_preprocessingOnly
- set_expandIncludeLines
- set_fixedForm
- set_encoding
- set_fixedFormColumnLimit
- IsAtEnd
- GetCurrentProvenance
- Say
- LineClassification
- Kind
- LineClassification
- LineClassification
- operator=
- BeginSourceLine
- BeginSourceLineAndAdvance
- BeginStatementAndAdvance
- GetProvenance
- GetProvenanceRange
- EmitChar
- EmitInsertedChar
- EmitCharAndAdvance
- InCompilerDirective
- InOpenMPConditionalLine
- InFixedFormSource
Learn to use CMake with our Intro Training
Find out more