1//===-- lib/Parser/token-sequence.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "flang/Parser/token-sequence.h"
10
11#include "prescan.h"
12#include "flang/Parser/characters.h"
13#include "flang/Parser/message.h"
14#include "llvm/Support/raw_ostream.h"
15
16namespace Fortran::parser {
17
18TokenSequence &TokenSequence::operator=(TokenSequence &&that) {
19 clear();
20 swap(that);
21 return *this;
22}
23
24void TokenSequence::clear() {
25 start_.clear();
26 nextStart_ = 0;
27 char_.clear();
28 provenances_.clear();
29}
30
31void TokenSequence::pop_back() {
32 CHECK(!start_.empty());
33 // If the last token is empty then `nextStart_ == start_.back()`.
34 CHECK(nextStart_ >= start_.back());
35 std::size_t bytes{nextStart_ - start_.back()};
36 nextStart_ = start_.back();
37 start_.pop_back();
38 char_.resize(nextStart_);
39 provenances_.RemoveLastBytes(bytes);
40}
41
42void TokenSequence::shrink_to_fit() {
43 start_.shrink_to_fit();
44 char_.shrink_to_fit();
45 provenances_.shrink_to_fit();
46}
47
48void TokenSequence::swap(TokenSequence &that) {
49 start_.swap(that.start_);
50 std::swap(nextStart_, that.nextStart_);
51 char_.swap(that.char_);
52 provenances_.swap(that.provenances_);
53}
54
55std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
56 std::size_t tokens{start_.size()};
57 for (; at < tokens; ++at) {
58 if (!TokenAt(at).IsBlank()) {
59 return at;
60 }
61 }
62 return tokens; // even if at > tokens
63}
64
65std::optional<std::size_t> TokenSequence::SkipBlanksBackwards(
66 std::size_t at) const {
67 while (at-- > 0) {
68 if (!TokenAt(at).IsBlank()) {
69 return at;
70 }
71 }
72 return std::nullopt;
73}
74
75// C-style /*comments*/ are removed from preprocessing directive
76// token sequences by the prescanner, but not C++ or Fortran
77// free-form line-ending comments (//... and !...) because
78// ignoring them is directive-specific.
79bool TokenSequence::IsAnythingLeft(std::size_t at) const {
80 std::size_t tokens{start_.size()};
81 for (; at < tokens; ++at) {
82 auto tok{TokenAt(at)};
83 const char *end{tok.end()};
84 for (const char *p{tok.begin()}; p < end; ++p) {
85 switch (*p) {
86 case '/':
87 return p + 1 >= end || p[1] != '/';
88 case '!':
89 return false;
90 case ' ':
91 break;
92 default:
93 return true;
94 }
95 }
96 }
97 return false;
98}
99
100void TokenSequence::CopyAll(const TokenSequence &that) {
101 if (nextStart_ < char_.size()) {
102 start_.push_back(nextStart_);
103 }
104 int offset = char_.size();
105 for (int st : that.start_) {
106 start_.push_back(st + offset);
107 }
108 char_.insert(char_.end(), that.char_.begin(), that.char_.end());
109 nextStart_ = char_.size();
110 provenances_.Put(that.provenances_);
111}
112
113void TokenSequence::CopyWithProvenance(
114 const TokenSequence &that, ProvenanceRange range) {
115 std::size_t offset{0};
116 std::size_t tokens{that.SizeInTokens()};
117 for (std::size_t j{0}; j < tokens; ++j) {
118 CharBlock tok{that.TokenAt(j)};
119 Put(tok, range.OffsetMember(offset));
120 offset += tok.size();
121 }
122 CHECK(offset == range.size());
123}
124
125void TokenSequence::AppendRange(
126 const TokenSequence &that, std::size_t at, std::size_t tokens) {
127 ProvenanceRange provenance;
128 std::size_t offset{0};
129 for (; tokens-- > 0; ++at) {
130 CharBlock tok{that.TokenAt(at)};
131 std::size_t tokBytes{tok.size()};
132 for (std::size_t j{0}; j < tokBytes; ++j) {
133 if (offset == provenance.size()) {
134 provenance = that.provenances_.Map(that.start_[at] + j);
135 offset = 0;
136 }
137 PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
138 }
139 CloseToken();
140 }
141}
142
143void TokenSequence::Put(
144 const char *s, std::size_t bytes, Provenance provenance) {
145 for (std::size_t j{0}; j < bytes; ++j) {
146 PutNextTokenChar(s[j], provenance + j);
147 }
148 CloseToken();
149}
150
151void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
152 // Avoid t[0] if t is empty: it would create a reference to nullptr,
153 // which is UB.
154 const char *addr{t.size() ? &t[0] : nullptr};
155 Put(addr, t.size(), provenance);
156}
157
158void TokenSequence::Put(const std::string &s, Provenance provenance) {
159 Put(s.data(), s.size(), provenance);
160}
161
162void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) {
163 Put(ss.str(), provenance);
164}
165
166TokenSequence &TokenSequence::ToLowerCase() {
167 std::size_t tokens{start_.size()};
168 std::size_t chars{char_.size()};
169 std::size_t atToken{0};
170 for (std::size_t j{0}; j < chars;) {
171 std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
172 char *p{&char_[j]};
173 char const *limit{char_.data() + nextStart};
174 const char *lastChar{limit - 1};
175 j = nextStart;
176 // Skip leading whitespaces
177 while (p < limit - 1 && *p == ' ') {
178 ++p;
179 }
180 // Find last non-whitespace char
181 while (lastChar > p + 1 && *lastChar == ' ') {
182 --lastChar;
183 }
184 if (IsDecimalDigit(*p)) {
185 while (p < limit && IsDecimalDigit(*p)) {
186 ++p;
187 }
188 if (p >= limit) {
189 } else if (*p == 'h' || *p == 'H') {
190 // Hollerith
191 *p = 'h';
192 } else if (*p == '_' && p + 1 < limit && (p[1] == '"' || p[1] == '\'')) {
193 // kind-prefixed character literal (e.g., 1_"ABC")
194 } else {
195 // exponent
196 for (; p < limit; ++p) {
197 *p = ToLowerCaseLetter(*p);
198 }
199 }
200 } else if (*lastChar == '\'' || *lastChar == '"') {
201 if (*p == *lastChar) {
202 // Character literal without prefix
203 } else if (p[1] == *lastChar) {
204 // BOZX-prefixed constant
205 for (; p < limit; ++p) {
206 *p = ToLowerCaseLetter(*p);
207 }
208 } else {
209 // Literal with kind-param prefix name (e.g., K_"ABC").
210 for (; *p != *lastChar; ++p) {
211 *p = ToLowerCaseLetter(*p);
212 }
213 }
214 } else {
215 for (; p < limit; ++p) {
216 *p = ToLowerCaseLetter(*p);
217 }
218 }
219 }
220 return *this;
221}
222
223bool TokenSequence::HasBlanks(std::size_t firstChar) const {
224 std::size_t tokens{SizeInTokens()};
225 for (std::size_t j{0}; j < tokens; ++j) {
226 if (start_[j] >= firstChar && TokenAt(j).IsBlank()) {
227 return true;
228 }
229 }
230 return false;
231}
232
233bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const {
234 std::size_t tokens{SizeInTokens()};
235 bool lastWasBlank{false};
236 for (std::size_t j{0}; j < tokens; ++j) {
237 bool isBlank{TokenAt(j).IsBlank()};
238 if (isBlank && lastWasBlank && start_[j] >= firstChar) {
239 return true;
240 }
241 lastWasBlank = isBlank;
242 }
243 return false;
244}
245
246TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) {
247 std::size_t tokens{SizeInTokens()};
248 TokenSequence result;
249 for (std::size_t j{0}; j < tokens; ++j) {
250 if (!TokenAt(j).IsBlank() || start_[j] < firstChar) {
251 result.AppendRange(*this, j);
252 }
253 }
254 swap(result);
255 return *this;
256}
257
258TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) {
259 std::size_t tokens{SizeInTokens()};
260 TokenSequence result;
261 bool lastWasBlank{false};
262 for (std::size_t j{0}; j < tokens; ++j) {
263 bool isBlank{TokenAt(j).IsBlank()};
264 if (!isBlank || !lastWasBlank || start_[j] < firstChar) {
265 result.AppendRange(*this, j);
266 }
267 lastWasBlank = isBlank;
268 }
269 swap(result);
270 return *this;
271}
272
273TokenSequence &TokenSequence::ClipComment(
274 const Prescanner &prescanner, bool skipFirst) {
275 std::size_t tokens{SizeInTokens()};
276 for (std::size_t j{0}; j < tokens; ++j) {
277 CharBlock tok{TokenAt(j)};
278 if (std::size_t blanks{tok.CountLeadingBlanks()};
279 blanks < tok.size() && tok[blanks] == '!') {
280 // Retain active compiler directive sentinels (e.g. "!dir$")
281 for (std::size_t k{j + 1}; k < tokens && tok.size() <= blanks + 5; ++k) {
282 if (tok.begin() + tok.size() == TokenAt(k).begin()) {
283 tok.ExtendToCover(TokenAt(k));
284 } else {
285 break;
286 }
287 }
288 bool isSentinel{false};
289 if (tok.size() > blanks + 5) {
290 isSentinel = prescanner.IsCompilerDirectiveSentinel(&tok[blanks + 1])
291 .has_value();
292 }
293 if (isSentinel) {
294 } else if (skipFirst) {
295 skipFirst = false;
296 } else {
297 TokenSequence result;
298 if (j > 0) {
299 result.AppendRange(*this, 0, j - 1);
300 }
301 swap(result);
302 return *this;
303 }
304 }
305 }
306 return *this;
307}
308
309void TokenSequence::Emit(CookedSource &cooked) const {
310 if (auto n{char_.size()}) {
311 cooked.Put(&char_[0], n);
312 cooked.PutProvenanceMappings(provenances_);
313 }
314}
315
316llvm::raw_ostream &TokenSequence::Dump(llvm::raw_ostream &o) const {
317 o << "TokenSequence has " << char_.size() << " chars; nextStart_ "
318 << nextStart_ << '\n';
319 for (std::size_t j{0}; j < start_.size(); ++j) {
320 o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString()
321 << "'\n";
322 }
323 provenances_.Dump(o << "provenances_:\n");
324 return o;
325}
326
327Provenance TokenSequence::GetCharProvenance(std::size_t offset) const {
328 ProvenanceRange range{provenances_.Map(offset)};
329 return range.start();
330}
331
332Provenance TokenSequence::GetTokenProvenance(
333 std::size_t token, std::size_t offset) const {
334 return GetCharProvenance(start_[token] + offset);
335}
336
337ProvenanceRange TokenSequence::GetTokenProvenanceRange(
338 std::size_t token, std::size_t offset) const {
339 ProvenanceRange range{provenances_.Map(start_[token] + offset)};
340 return range.Prefix(TokenBytes(token) - offset);
341}
342
343ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
344 std::size_t token, std::size_t tokens) const {
345 if (tokens == 0) {
346 return {};
347 }
348 ProvenanceRange range{provenances_.Map(start_[token])};
349 while (--tokens > 0 &&
350 range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) {
351 }
352 return range;
353}
354
355ProvenanceRange TokenSequence::GetProvenanceRange() const {
356 return GetIntervalProvenanceRange(0, start_.size());
357}
358
359const TokenSequence &TokenSequence::CheckBadFortranCharacters(
360 Messages &messages, const Prescanner &prescanner,
361 bool preprocessingOnly) const {
362 std::size_t tokens{SizeInTokens()};
363 for (std::size_t j{0}; j < tokens; ++j) {
364 CharBlock token{TokenAt(j)};
365 char ch{token.FirstNonBlank()};
366 if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
367 if (ch == '!') {
368 if (prescanner.IsCompilerDirectiveSentinel(token)) {
369 continue;
370 } else if (j + 1 < tokens &&
371 prescanner.IsCompilerDirectiveSentinel(
372 TokenAt(j + 1))) { // !dir$, &c.
373 ++j;
374 continue;
375 } else if (preprocessingOnly) {
376 continue;
377 }
378 } else if (ch == '&' && preprocessingOnly) {
379 continue;
380 }
381 if (ch < ' ' || ch >= '\x7f') {
382 messages.Say(GetTokenProvenanceRange(j),
383 "bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
384 } else {
385 messages.Say(GetTokenProvenanceRange(j),
386 "bad character ('%c') in Fortran token"_err_en_US, ch);
387 }
388 }
389 }
390 return *this;
391}
392
393bool TokenSequence::BadlyNestedParentheses() const {
394 int nesting{0};
395 std::size_t tokens{SizeInTokens()};
396 for (std::size_t j{0}; j < tokens; ++j) {
397 CharBlock token{TokenAt(j)};
398 char ch{token.OnlyNonBlank()};
399 if (ch == '(') {
400 ++nesting;
401 } else if (ch == ')') {
402 if (nesting-- == 0) {
403 break;
404 }
405 }
406 }
407 return nesting != 0;
408}
409
410const TokenSequence &TokenSequence::CheckBadParentheses(
411 Messages &messages) const {
412 if (BadlyNestedParentheses()) {
413 // There's an error; diagnose it
414 std::size_t tokens{SizeInTokens()};
415 std::vector<std::size_t> stack;
416 for (std::size_t j{0}; j < tokens; ++j) {
417 CharBlock token{TokenAt(j)};
418 char ch{token.OnlyNonBlank()};
419 if (ch == '(') {
420 stack.push_back(j);
421 } else if (ch == ')') {
422 if (stack.empty()) {
423 messages.Say(GetTokenProvenanceRange(j), "Unmatched ')'"_err_en_US);
424 return *this;
425 }
426 stack.pop_back();
427 }
428 }
429 CHECK(!stack.empty());
430 messages.Say(
431 GetTokenProvenanceRange(stack.back()), "Unmatched '('"_err_en_US);
432 }
433 return *this;
434}
435} // namespace Fortran::parser
436

source code of flang/lib/Parser/token-sequence.cpp