1//===-- lib/Parser/token-sequence.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "flang/Parser/token-sequence.h"
10
11#include "prescan.h"
12#include "flang/Parser/characters.h"
13#include "flang/Parser/message.h"
14#include "llvm/Support/raw_ostream.h"
15
16namespace Fortran::parser {
17
18TokenSequence &TokenSequence::operator=(TokenSequence &&that) {
19 clear();
20 swap(that);
21 return *this;
22}
23
24void TokenSequence::clear() {
25 start_.clear();
26 nextStart_ = 0;
27 char_.clear();
28 provenances_.clear();
29}
30
31void TokenSequence::pop_back() {
32 CHECK(!start_.empty());
33 CHECK(nextStart_ > start_.back());
34 std::size_t bytes{nextStart_ - start_.back()};
35 nextStart_ = start_.back();
36 start_.pop_back();
37 char_.resize(nextStart_);
38 provenances_.RemoveLastBytes(bytes);
39}
40
41void TokenSequence::shrink_to_fit() {
42 start_.shrink_to_fit();
43 char_.shrink_to_fit();
44 provenances_.shrink_to_fit();
45}
46
47void TokenSequence::swap(TokenSequence &that) {
48 start_.swap(that.start_);
49 std::swap(nextStart_, that.nextStart_);
50 char_.swap(that.char_);
51 provenances_.swap(that.provenances_);
52}
53
54std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
55 std::size_t tokens{start_.size()};
56 for (; at < tokens; ++at) {
57 if (!TokenAt(at).IsBlank()) {
58 return at;
59 }
60 }
61 return tokens; // even if at > tokens
62}
63
64std::optional<std::size_t> TokenSequence::SkipBlanksBackwards(
65 std::size_t at) const {
66 while (at-- > 0) {
67 if (!TokenAt(at).IsBlank()) {
68 return at;
69 }
70 }
71 return std::nullopt;
72}
73
74// C-style /*comments*/ are removed from preprocessing directive
75// token sequences by the prescanner, but not C++ or Fortran
76// free-form line-ending comments (//... and !...) because
77// ignoring them is directive-specific.
78bool TokenSequence::IsAnythingLeft(std::size_t at) const {
79 std::size_t tokens{start_.size()};
80 for (; at < tokens; ++at) {
81 auto tok{TokenAt(at)};
82 const char *end{tok.end()};
83 for (const char *p{tok.begin()}; p < end; ++p) {
84 switch (*p) {
85 case '/':
86 return p + 1 >= end || p[1] != '/';
87 case '!':
88 return false;
89 case ' ':
90 break;
91 default:
92 return true;
93 }
94 }
95 }
96 return false;
97}
98
99void TokenSequence::CopyAll(const TokenSequence &that) {
100 if (nextStart_ < char_.size()) {
101 start_.push_back(nextStart_);
102 }
103 int offset = char_.size();
104 for (int st : that.start_) {
105 start_.push_back(st + offset);
106 }
107 char_.insert(char_.end(), that.char_.begin(), that.char_.end());
108 nextStart_ = char_.size();
109 provenances_.Put(that.provenances_);
110}
111
112void TokenSequence::CopyWithProvenance(
113 const TokenSequence &that, ProvenanceRange range) {
114 std::size_t offset{0};
115 std::size_t tokens{that.SizeInTokens()};
116 for (std::size_t j{0}; j < tokens; ++j) {
117 CharBlock tok{that.TokenAt(j)};
118 Put(tok, range.OffsetMember(offset));
119 offset += tok.size();
120 }
121 CHECK(offset == range.size());
122}
123
124void TokenSequence::AppendRange(
125 const TokenSequence &that, std::size_t at, std::size_t tokens) {
126 ProvenanceRange provenance;
127 std::size_t offset{0};
128 for (; tokens-- > 0; ++at) {
129 CharBlock tok{that.TokenAt(at)};
130 std::size_t tokBytes{tok.size()};
131 for (std::size_t j{0}; j < tokBytes; ++j) {
132 if (offset == provenance.size()) {
133 provenance = that.provenances_.Map(that.start_[at] + j);
134 offset = 0;
135 }
136 PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
137 }
138 CloseToken();
139 }
140}
141
142void TokenSequence::Put(
143 const char *s, std::size_t bytes, Provenance provenance) {
144 for (std::size_t j{0}; j < bytes; ++j) {
145 PutNextTokenChar(s[j], provenance + j);
146 }
147 CloseToken();
148}
149
150void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
151 // Avoid t[0] if t is empty: it would create a reference to nullptr,
152 // which is UB.
153 const char *addr{t.size() ? &t[0] : nullptr};
154 Put(addr, t.size(), provenance);
155}
156
157void TokenSequence::Put(const std::string &s, Provenance provenance) {
158 Put(s.data(), s.size(), provenance);
159}
160
161void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) {
162 Put(ss.str(), provenance);
163}
164
165TokenSequence &TokenSequence::ToLowerCase() {
166 std::size_t tokens{start_.size()};
167 std::size_t chars{char_.size()};
168 std::size_t atToken{0};
169 for (std::size_t j{0}; j < chars;) {
170 std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
171 char *p{&char_[j]};
172 char const *limit{char_.data() + nextStart};
173 const char *lastChar{limit - 1};
174 j = nextStart;
175 // Skip leading whitespaces
176 while (p < limit - 1 && *p == ' ') {
177 ++p;
178 }
179 // Find last non-whitespace char
180 while (lastChar > p + 1 && *lastChar == ' ') {
181 --lastChar;
182 }
183 if (IsDecimalDigit(*p)) {
184 while (p < limit && IsDecimalDigit(*p)) {
185 ++p;
186 }
187 if (p >= limit) {
188 } else if (*p == 'h' || *p == 'H') {
189 // Hollerith
190 *p = 'h';
191 } else if (*p == '_' && p + 1 < limit && (p[1] == '"' || p[1] == '\'')) {
192 // kind-prefixed character literal (e.g., 1_"ABC")
193 } else {
194 // exponent
195 for (; p < limit; ++p) {
196 *p = ToLowerCaseLetter(*p);
197 }
198 }
199 } else if (*lastChar == '\'' || *lastChar == '"') {
200 if (*p == *lastChar) {
201 // Character literal without prefix
202 } else if (p[1] == *lastChar) {
203 // BOZX-prefixed constant
204 for (; p < limit; ++p) {
205 *p = ToLowerCaseLetter(*p);
206 }
207 } else {
208 // Literal with kind-param prefix name (e.g., K_"ABC").
209 for (; *p != *lastChar; ++p) {
210 *p = ToLowerCaseLetter(*p);
211 }
212 }
213 } else {
214 for (; p < limit; ++p) {
215 *p = ToLowerCaseLetter(*p);
216 }
217 }
218 }
219 return *this;
220}
221
222bool TokenSequence::HasBlanks(std::size_t firstChar) const {
223 std::size_t tokens{SizeInTokens()};
224 for (std::size_t j{0}; j < tokens; ++j) {
225 if (start_[j] >= firstChar && TokenAt(j).IsBlank()) {
226 return true;
227 }
228 }
229 return false;
230}
231
232bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const {
233 std::size_t tokens{SizeInTokens()};
234 bool lastWasBlank{false};
235 for (std::size_t j{0}; j < tokens; ++j) {
236 bool isBlank{TokenAt(j).IsBlank()};
237 if (isBlank && lastWasBlank && start_[j] >= firstChar) {
238 return true;
239 }
240 lastWasBlank = isBlank;
241 }
242 return false;
243}
244
245TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) {
246 std::size_t tokens{SizeInTokens()};
247 TokenSequence result;
248 for (std::size_t j{0}; j < tokens; ++j) {
249 if (!TokenAt(j).IsBlank() || start_[j] < firstChar) {
250 result.AppendRange(*this, j);
251 }
252 }
253 swap(result);
254 return *this;
255}
256
257TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) {
258 std::size_t tokens{SizeInTokens()};
259 TokenSequence result;
260 bool lastWasBlank{false};
261 for (std::size_t j{0}; j < tokens; ++j) {
262 bool isBlank{TokenAt(j).IsBlank()};
263 if (!isBlank || !lastWasBlank || start_[j] < firstChar) {
264 result.AppendRange(*this, j);
265 }
266 lastWasBlank = isBlank;
267 }
268 swap(result);
269 return *this;
270}
271
272TokenSequence &TokenSequence::ClipComment(
273 const Prescanner &prescanner, bool skipFirst) {
274 std::size_t tokens{SizeInTokens()};
275 for (std::size_t j{0}; j < tokens; ++j) {
276 CharBlock tok{TokenAt(j)};
277 if (std::size_t blanks{tok.CountLeadingBlanks()};
278 blanks < tok.size() && tok[blanks] == '!') {
279 // Retain active compiler directive sentinels (e.g. "!dir$")
280 for (std::size_t k{j + 1}; k < tokens && tok.size() <= blanks + 5; ++k) {
281 if (tok.begin() + tok.size() == TokenAt(k).begin()) {
282 tok.ExtendToCover(TokenAt(k));
283 } else {
284 break;
285 }
286 }
287 bool isSentinel{false};
288 if (tok.size() > blanks + 5) {
289 isSentinel = prescanner.IsCompilerDirectiveSentinel(&tok[blanks + 1])
290 .has_value();
291 }
292 if (isSentinel) {
293 } else if (skipFirst) {
294 skipFirst = false;
295 } else {
296 TokenSequence result;
297 if (j > 0) {
298 result.AppendRange(*this, 0, j - 1);
299 }
300 swap(result);
301 return *this;
302 }
303 }
304 }
305 return *this;
306}
307
308void TokenSequence::Emit(CookedSource &cooked) const {
309 if (auto n{char_.size()}) {
310 cooked.Put(&char_[0], n);
311 cooked.PutProvenanceMappings(provenances_);
312 }
313}
314
315llvm::raw_ostream &TokenSequence::Dump(llvm::raw_ostream &o) const {
316 o << "TokenSequence has " << char_.size() << " chars; nextStart_ "
317 << nextStart_ << '\n';
318 for (std::size_t j{0}; j < start_.size(); ++j) {
319 o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString()
320 << "'\n";
321 }
322 provenances_.Dump(o << "provenances_:\n");
323 return o;
324}
325
326Provenance TokenSequence::GetCharProvenance(std::size_t offset) const {
327 ProvenanceRange range{provenances_.Map(offset)};
328 return range.start();
329}
330
331Provenance TokenSequence::GetTokenProvenance(
332 std::size_t token, std::size_t offset) const {
333 return GetCharProvenance(start_[token] + offset);
334}
335
336ProvenanceRange TokenSequence::GetTokenProvenanceRange(
337 std::size_t token, std::size_t offset) const {
338 ProvenanceRange range{provenances_.Map(start_[token] + offset)};
339 return range.Prefix(TokenBytes(token) - offset);
340}
341
342ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
343 std::size_t token, std::size_t tokens) const {
344 if (tokens == 0) {
345 return {};
346 }
347 ProvenanceRange range{provenances_.Map(start_[token])};
348 while (--tokens > 0 &&
349 range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) {
350 }
351 return range;
352}
353
354ProvenanceRange TokenSequence::GetProvenanceRange() const {
355 return GetIntervalProvenanceRange(0, start_.size());
356}
357
358const TokenSequence &TokenSequence::CheckBadFortranCharacters(
359 Messages &messages, const Prescanner &prescanner,
360 bool preprocessingOnly) const {
361 std::size_t tokens{SizeInTokens()};
362 for (std::size_t j{0}; j < tokens; ++j) {
363 CharBlock token{TokenAt(j)};
364 char ch{token.FirstNonBlank()};
365 if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
366 if (ch == '!') {
367 if (prescanner.IsCompilerDirectiveSentinel(token)) {
368 continue;
369 } else if (j + 1 < tokens &&
370 prescanner.IsCompilerDirectiveSentinel(
371 TokenAt(j + 1))) { // !dir$, &c.
372 ++j;
373 continue;
374 } else if (preprocessingOnly) {
375 continue;
376 }
377 } else if (ch == '&' && preprocessingOnly) {
378 continue;
379 }
380 if (ch < ' ' || ch >= '\x7f') {
381 messages.Say(GetTokenProvenanceRange(j),
382 "bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
383 } else {
384 messages.Say(GetTokenProvenanceRange(j),
385 "bad character ('%c') in Fortran token"_err_en_US, ch);
386 }
387 }
388 }
389 return *this;
390}
391
392bool TokenSequence::BadlyNestedParentheses() const {
393 int nesting{0};
394 std::size_t tokens{SizeInTokens()};
395 for (std::size_t j{0}; j < tokens; ++j) {
396 CharBlock token{TokenAt(j)};
397 char ch{token.OnlyNonBlank()};
398 if (ch == '(') {
399 ++nesting;
400 } else if (ch == ')') {
401 if (nesting-- == 0) {
402 break;
403 }
404 }
405 }
406 return nesting != 0;
407}
408
409const TokenSequence &TokenSequence::CheckBadParentheses(
410 Messages &messages) const {
411 if (BadlyNestedParentheses()) {
412 // There's an error; diagnose it
413 std::size_t tokens{SizeInTokens()};
414 std::vector<std::size_t> stack;
415 for (std::size_t j{0}; j < tokens; ++j) {
416 CharBlock token{TokenAt(j)};
417 char ch{token.OnlyNonBlank()};
418 if (ch == '(') {
419 stack.push_back(j);
420 } else if (ch == ')') {
421 if (stack.empty()) {
422 messages.Say(GetTokenProvenanceRange(j), "Unmatched ')'"_err_en_US);
423 return *this;
424 }
425 stack.pop_back();
426 }
427 }
428 CHECK(!stack.empty());
429 messages.Say(
430 GetTokenProvenanceRange(stack.back()), "Unmatched '('"_err_en_US);
431 }
432 return *this;
433}
434} // namespace Fortran::parser
435

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of flang/lib/Parser/token-sequence.cpp