1//===-- lib/Parser/token-sequence.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "flang/Parser/token-sequence.h"
10
11#include "prescan.h"
12#include "flang/Parser/characters.h"
13#include "flang/Parser/message.h"
14#include "llvm/Support/raw_ostream.h"
15
16namespace Fortran::parser {
17
18TokenSequence &TokenSequence::operator=(TokenSequence &&that) {
19 clear();
20 swap(that);
21 return *this;
22}
23
24void TokenSequence::clear() {
25 start_.clear();
26 nextStart_ = 0;
27 char_.clear();
28 provenances_.clear();
29}
30
31void TokenSequence::pop_back() {
32 CHECK(!start_.empty());
33 CHECK(nextStart_ > start_.back());
34 std::size_t bytes{nextStart_ - start_.back()};
35 nextStart_ = start_.back();
36 start_.pop_back();
37 char_.resize(nextStart_);
38 provenances_.RemoveLastBytes(bytes);
39}
40
41void TokenSequence::shrink_to_fit() {
42 start_.shrink_to_fit();
43 char_.shrink_to_fit();
44 provenances_.shrink_to_fit();
45}
46
47void TokenSequence::swap(TokenSequence &that) {
48 start_.swap(that.start_);
49 std::swap(nextStart_, that.nextStart_);
50 char_.swap(that.char_);
51 provenances_.swap(that.provenances_);
52}
53
54std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
55 std::size_t tokens{start_.size()};
56 for (; at < tokens; ++at) {
57 if (!TokenAt(at).IsBlank()) {
58 return at;
59 }
60 }
61 return tokens; // even if at > tokens
62}
63
64// C-style /*comments*/ are removed from preprocessing directive
65// token sequences by the prescanner, but not C++ or Fortran
66// free-form line-ending comments (//... and !...) because
67// ignoring them is directive-specific.
68bool TokenSequence::IsAnythingLeft(std::size_t at) const {
69 std::size_t tokens{start_.size()};
70 for (; at < tokens; ++at) {
71 auto tok{TokenAt(at)};
72 const char *end{tok.end()};
73 for (const char *p{tok.begin()}; p < end; ++p) {
74 switch (*p) {
75 case '/':
76 return p + 1 >= end || p[1] != '/';
77 case '!':
78 return false;
79 case ' ':
80 break;
81 default:
82 return true;
83 }
84 }
85 }
86 return false;
87}
88
89void TokenSequence::Put(const TokenSequence &that) {
90 if (nextStart_ < char_.size()) {
91 start_.push_back(nextStart_);
92 }
93 int offset = char_.size();
94 for (int st : that.start_) {
95 start_.push_back(st + offset);
96 }
97 char_.insert(char_.end(), that.char_.begin(), that.char_.end());
98 nextStart_ = char_.size();
99 provenances_.Put(that.provenances_);
100}
101
102void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
103 std::size_t offset{0};
104 std::size_t tokens{that.SizeInTokens()};
105 for (std::size_t j{0}; j < tokens; ++j) {
106 CharBlock tok{that.TokenAt(j)};
107 Put(tok, range.OffsetMember(offset));
108 offset += tok.size();
109 }
110 CHECK(offset == range.size());
111}
112
113void TokenSequence::Put(
114 const TokenSequence &that, std::size_t at, std::size_t tokens) {
115 ProvenanceRange provenance;
116 std::size_t offset{0};
117 for (; tokens-- > 0; ++at) {
118 CharBlock tok{that.TokenAt(at)};
119 std::size_t tokBytes{tok.size()};
120 for (std::size_t j{0}; j < tokBytes; ++j) {
121 if (offset == provenance.size()) {
122 provenance = that.provenances_.Map(that.start_[at] + j);
123 offset = 0;
124 }
125 PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
126 }
127 CloseToken();
128 }
129}
130
131void TokenSequence::Put(
132 const char *s, std::size_t bytes, Provenance provenance) {
133 for (std::size_t j{0}; j < bytes; ++j) {
134 PutNextTokenChar(s[j], provenance + j);
135 }
136 CloseToken();
137}
138
139void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
140 // Avoid t[0] if t is empty: it would create a reference to nullptr,
141 // which is UB.
142 const char *addr{t.size() ? &t[0] : nullptr};
143 Put(addr, t.size(), provenance);
144}
145
146void TokenSequence::Put(const std::string &s, Provenance provenance) {
147 Put(s.data(), s.size(), provenance);
148}
149
150void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) {
151 Put(ss.str(), provenance);
152}
153
154TokenSequence &TokenSequence::ToLowerCase() {
155 std::size_t tokens{start_.size()};
156 std::size_t chars{char_.size()};
157 std::size_t atToken{0};
158 for (std::size_t j{0}; j < chars;) {
159 std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
160 char *p{&char_[j]};
161 char const *limit{char_.data() + nextStart};
162 const char *lastChar{limit - 1};
163 j = nextStart;
164 // Skip leading whitespaces
165 while (p < limit - 1 && *p == ' ') {
166 ++p;
167 }
168 // Find last non-whitespace char
169 while (lastChar > p + 1 && *lastChar == ' ') {
170 --lastChar;
171 }
172 if (IsDecimalDigit(*p)) {
173 while (p < limit && IsDecimalDigit(*p)) {
174 ++p;
175 }
176 if (p >= limit) {
177 } else if (*p == 'h' || *p == 'H') {
178 // Hollerith
179 *p = 'h';
180 } else if (*p == '_') {
181 // kind-prefixed character literal (e.g., 1_"ABC")
182 } else {
183 // exponent
184 for (; p < limit; ++p) {
185 *p = ToLowerCaseLetter(*p);
186 }
187 }
188 } else if (*lastChar == '\'' || *lastChar == '"') {
189 if (*p == *lastChar) {
190 // Character literal without prefix
191 } else if (p[1] == *lastChar) {
192 // BOZX-prefixed constant
193 for (; p < limit; ++p) {
194 *p = ToLowerCaseLetter(*p);
195 }
196 } else {
197 // Literal with kind-param prefix name (e.g., K_"ABC").
198 for (; *p != *lastChar; ++p) {
199 *p = ToLowerCaseLetter(*p);
200 }
201 }
202 } else {
203 for (; p < limit; ++p) {
204 *p = ToLowerCaseLetter(*p);
205 }
206 }
207 }
208 return *this;
209}
210
211bool TokenSequence::HasBlanks(std::size_t firstChar) const {
212 std::size_t tokens{SizeInTokens()};
213 for (std::size_t j{0}; j < tokens; ++j) {
214 if (start_[j] >= firstChar && TokenAt(j).IsBlank()) {
215 return true;
216 }
217 }
218 return false;
219}
220
221bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const {
222 std::size_t tokens{SizeInTokens()};
223 bool lastWasBlank{false};
224 for (std::size_t j{0}; j < tokens; ++j) {
225 bool isBlank{TokenAt(j).IsBlank()};
226 if (isBlank && lastWasBlank && start_[j] >= firstChar) {
227 return true;
228 }
229 lastWasBlank = isBlank;
230 }
231 return false;
232}
233
234TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) {
235 std::size_t tokens{SizeInTokens()};
236 TokenSequence result;
237 for (std::size_t j{0}; j < tokens; ++j) {
238 if (!TokenAt(j).IsBlank() || start_[j] < firstChar) {
239 result.Put(*this, j);
240 }
241 }
242 swap(result);
243 return *this;
244}
245
246TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) {
247 std::size_t tokens{SizeInTokens()};
248 TokenSequence result;
249 bool lastWasBlank{false};
250 for (std::size_t j{0}; j < tokens; ++j) {
251 bool isBlank{TokenAt(j).IsBlank()};
252 if (!isBlank || !lastWasBlank || start_[j] < firstChar) {
253 result.Put(*this, j);
254 }
255 lastWasBlank = isBlank;
256 }
257 swap(result);
258 return *this;
259}
260
261TokenSequence &TokenSequence::ClipComment(
262 const Prescanner &prescanner, bool skipFirst) {
263 std::size_t tokens{SizeInTokens()};
264 for (std::size_t j{0}; j < tokens; ++j) {
265 CharBlock tok{TokenAt(j)};
266 if (std::size_t blanks{tok.CountLeadingBlanks()};
267 blanks < tok.size() && tok[blanks] == '!') {
268 // Retain active compiler directive sentinels (e.g. "!dir$")
269 for (std::size_t k{j + 1}; k < tokens && tok.size() < blanks + 5; ++k) {
270 if (tok.begin() + tok.size() == TokenAt(k).begin()) {
271 tok.ExtendToCover(TokenAt(k));
272 } else {
273 break;
274 }
275 }
276 bool isSentinel{false};
277 if (tok.size() == blanks + 5) {
278 char sentinel[4];
279 for (int k{0}; k < 4; ++k) {
280 sentinel[k] = ToLowerCaseLetter(tok[blanks + k + 1]);
281 }
282 isSentinel = prescanner.IsCompilerDirectiveSentinel(sentinel, 4);
283 }
284 if (isSentinel) {
285 } else if (skipFirst) {
286 skipFirst = false;
287 } else {
288 TokenSequence result;
289 if (j > 0) {
290 result.Put(*this, 0, j - 1);
291 }
292 swap(result);
293 return *this;
294 }
295 }
296 }
297 return *this;
298}
299
300void TokenSequence::Emit(CookedSource &cooked) const {
301 if (auto n{char_.size()}) {
302 cooked.Put(&char_[0], n);
303 cooked.PutProvenanceMappings(provenances_);
304 }
305}
306
307llvm::raw_ostream &TokenSequence::Dump(llvm::raw_ostream &o) const {
308 o << "TokenSequence has " << char_.size() << " chars; nextStart_ "
309 << nextStart_ << '\n';
310 for (std::size_t j{0}; j < start_.size(); ++j) {
311 o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString()
312 << "'\n";
313 }
314 return o;
315}
316
317Provenance TokenSequence::GetCharProvenance(std::size_t offset) const {
318 ProvenanceRange range{provenances_.Map(offset)};
319 return range.start();
320}
321
322Provenance TokenSequence::GetTokenProvenance(
323 std::size_t token, std::size_t offset) const {
324 return GetCharProvenance(start_[token] + offset);
325}
326
327ProvenanceRange TokenSequence::GetTokenProvenanceRange(
328 std::size_t token, std::size_t offset) const {
329 ProvenanceRange range{provenances_.Map(start_[token] + offset)};
330 return range.Prefix(TokenBytes(token) - offset);
331}
332
333ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
334 std::size_t token, std::size_t tokens) const {
335 if (tokens == 0) {
336 return {};
337 }
338 ProvenanceRange range{provenances_.Map(start_[token])};
339 while (--tokens > 0 &&
340 range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) {
341 }
342 return range;
343}
344
345ProvenanceRange TokenSequence::GetProvenanceRange() const {
346 return GetIntervalProvenanceRange(0, start_.size());
347}
348
349const TokenSequence &TokenSequence::CheckBadFortranCharacters(
350 Messages &messages, const Prescanner &prescanner) const {
351 std::size_t tokens{SizeInTokens()};
352 for (std::size_t j{0}; j < tokens; ++j) {
353 CharBlock token{TokenAt(j)};
354 char ch{token.FirstNonBlank()};
355 if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
356 if (ch == '!') {
357 if (prescanner.IsCompilerDirectiveSentinel(token)) {
358 continue;
359 } else if (j + 1 < tokens &&
360 prescanner.IsCompilerDirectiveSentinel(
361 TokenAt(j + 1))) { // !dir$, &c.
362 ++j;
363 continue;
364 }
365 }
366 if (ch < ' ' || ch >= '\x7f') {
367 messages.Say(GetTokenProvenanceRange(j),
368 "bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
369 } else {
370 messages.Say(GetTokenProvenanceRange(j),
371 "bad character ('%c') in Fortran token"_err_en_US, ch);
372 }
373 }
374 }
375 return *this;
376}
377
378const TokenSequence &TokenSequence::CheckBadParentheses(
379 Messages &messages) const {
380 // First, a quick pass with no allocation for the common case
381 int nesting{0};
382 std::size_t tokens{SizeInTokens()};
383 for (std::size_t j{0}; j < tokens; ++j) {
384 CharBlock token{TokenAt(j)};
385 char ch{token.OnlyNonBlank()};
386 if (ch == '(') {
387 ++nesting;
388 } else if (ch == ')') {
389 if (nesting-- == 0) {
390 break;
391 }
392 }
393 }
394 if (nesting != 0) {
395 // There's an error; diagnose it
396 std::vector<std::size_t> stack;
397 for (std::size_t j{0}; j < tokens; ++j) {
398 CharBlock token{TokenAt(j)};
399 char ch{token.OnlyNonBlank()};
400 if (ch == '(') {
401 stack.push_back(j);
402 } else if (ch == ')') {
403 if (stack.empty()) {
404 messages.Say(GetTokenProvenanceRange(j), "Unmatched ')'"_err_en_US);
405 return *this;
406 }
407 stack.pop_back();
408 }
409 }
410 CHECK(!stack.empty());
411 messages.Say(
412 GetTokenProvenanceRange(stack.back()), "Unmatched '('"_err_en_US);
413 }
414 return *this;
415}
416} // namespace Fortran::parser
417

source code of flang/lib/Parser/token-sequence.cpp