token-sequence.cpp source code [flang/lib/Parser/token-sequence.cpp]

1	//===-- lib/Parser/token-sequence.cpp -------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "flang/Parser/token-sequence.h"
10
11	#include "prescan.h"
12	#include "flang/Parser/characters.h"
13	#include "flang/Parser/message.h"
14	#include "llvm/Support/raw_ostream.h"
15
16	namespace Fortran::parser {
17
18	TokenSequence &TokenSequence::operator=(TokenSequence &&that) {
19	clear();
20	swap(that);
21	return *this;
22	}
23
24	void TokenSequence::clear() {
25	start_.clear();
26	nextStart_ = `0`;
27	char_.clear();
28	provenances_.clear();
29	}
30
31	void TokenSequence::pop_back() {
32	CHECK(!start_.empty());
33	CHECK(nextStart_ > start_.back());
34	std::size_t bytes{nextStart_ - start_.back()};
35	nextStart_ = start_.back();
36	start_.pop_back();
37	char_.resize(nextStart_);
38	provenances_.RemoveLastBytes(bytes);
39	}
40
41	void TokenSequence::shrink_to_fit() {
42	start_.shrink_to_fit();
43	char_.shrink_to_fit();
44	provenances_.shrink_to_fit();
45	}
46
47	void TokenSequence::swap(TokenSequence &that) {
48	start_.swap(that.start_);
49	std::swap(nextStart_, that.nextStart_);
50	char_.swap(that.char_);
51	provenances_.swap(that.provenances_);
52	}
53
54	std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
55	std::size_t tokens{start_.size()};
56	for (; at < tokens; ++at) {
57	if (!TokenAt(at).IsBlank()) {
58	return at;
59	}
60	}
61	return tokens; // even if at > tokens
62	}
63
64	// C-style /comments/ are removed from preprocessing directive
65	// token sequences by the prescanner, but not C++ or Fortran
66	// free-form line-ending comments (//... and !...) because
67	// ignoring them is directive-specific.
68	bool TokenSequence::IsAnythingLeft(std::size_t at) const {
69	std::size_t tokens{start_.size()};
70	for (; at < tokens; ++at) {
71	auto tok{TokenAt(at)};
72	const char *end{tok.end()};
73	for (const char *p{tok.begin()}; p < end; ++p) {
74	switch (*p) {
75	case `'/'`:
76	return p + `1` >= end \|\| p[`1`] != `'/'`;
77	case `'!'`:
78	return false;
79	case `' '`:
80	break;
81	default:
82	return true;
83	}
84	}
85	}
86	return false;
87	}
88
89	void TokenSequence::Put(const TokenSequence &that) {
90	if (nextStart_ < char_.size()) {
91	start_.push_back(nextStart_);
92	}
93	int offset = char_.size();
94	for (int st : that.start_) {
95	start_.push_back(st + offset);
96	}
97	char_.insert(char_.end(), that.char_.begin(), that.char_.end());
98	nextStart_ = char_.size();
99	provenances_.Put(that.provenances_);
100	}
101
102	void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
103	std::size_t offset{`0`};
104	std::size_t tokens{that.SizeInTokens()};
105	for (std::size_t j{`0`}; j < tokens; ++j) {
106	CharBlock tok{that.TokenAt(j)};
107	Put(tok, range.OffsetMember(offset));
108	offset += tok.size();
109	}
110	CHECK(offset == range.size());
111	}
112
113	void TokenSequence::Put(
114	const TokenSequence &that, std::size_t at, std::size_t tokens) {
115	ProvenanceRange provenance;
116	std::size_t offset{`0`};
117	for (; tokens-- > `0`; ++at) {
118	CharBlock tok{that.TokenAt(at)};
119	std::size_t tokBytes{tok.size()};
120	for (std::size_t j{`0`}; j < tokBytes; ++j) {
121	if (offset == provenance.size()) {
122	provenance = that.provenances_.Map(that.start_[at] + j);
123	offset = `0`;
124	}
125	PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
126	}
127	CloseToken();
128	}
129	}
130
131	void TokenSequence::Put(
132	const char *s, std::size_t bytes, Provenance provenance) {
133	for (std::size_t j{`0`}; j < bytes; ++j) {
134	PutNextTokenChar(s[j], provenance + j);
135	}
136	CloseToken();
137	}
138
139	void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
140	// Avoid t[0] if t is empty: it would create a reference to nullptr,
141	// which is UB.
142	const char addr{t.size() ? &t[`0`] : nullptr*};
143	Put(addr, t.size(), provenance);
144	}
145
146	void TokenSequence::Put(const std::string &s, Provenance provenance) {
147	Put(s.data(), s.size(), provenance);
148	}
149
150	void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) {
151	Put(ss.str(), provenance);
152	}
153
154	TokenSequence &TokenSequence::ToLowerCase() {
155	std::size_t tokens{start_.size()};
156	std::size_t chars{char_.size()};
157	std::size_t atToken{`0`};
158	for (std::size_t j{`0`}; j < chars;) {
159	std::size_t nextStart{atToken + `1` < tokens ? start_[++atToken] : chars};
160	char *p{&char_[j]};
161	char const *limit{char_.data() + nextStart};
162	const char *lastChar{limit - `1`};
163	j = nextStart;
164	// Skip leading whitespaces
165	while (p < limit - `1` && *p == `' '`) {
166	++p;
167	}
168	// Find last non-whitespace char
169	while (lastChar > p + `1` && *lastChar == `' '`) {
170	--lastChar;
171	}
172	if (IsDecimalDigit(*p)) {
173	while (p < limit && IsDecimalDigit(*p)) {
174	++p;
175	}
176	if (p >= limit) {
177	} else if (p == `'h'` \|\| p == `'H'`) {
178	// Hollerith
179	*p = `'h'`;
180	} else if (*p == `'_'`) {
181	// kind-prefixed character literal (e.g., 1_"ABC")
182	} else {
183	// exponent
184	for (; p < limit; ++p) {
185	p = ToLowerCaseLetter(p);
186	}
187	}
188	} else if (lastChar == `'\''` \|\| lastChar == `'"'`) {
189	if (p == lastChar) {
190	// Character literal without prefix
191	} else if (p[`1`] == *lastChar) {
192	// BOZX-prefixed constant
193	for (; p < limit; ++p) {
194	p = ToLowerCaseLetter(p);
195	}
196	} else {
197	// Literal with kind-param prefix name (e.g., K_"ABC").
198	for (; p != lastChar; ++p) {
199	p = ToLowerCaseLetter(p);
200	}
201	}
202	} else {
203	for (; p < limit; ++p) {
204	p = ToLowerCaseLetter(p);
205	}
206	}
207	}
208	return *this;
209	}
210
211	bool TokenSequence::HasBlanks(std::size_t firstChar) const {
212	std::size_t tokens{SizeInTokens()};
213	for (std::size_t j{`0`}; j < tokens; ++j) {
214	if (start_[j] >= firstChar && TokenAt(j).IsBlank()) {
215	return true;
216	}
217	}
218	return false;
219	}
220
221	bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const {
222	std::size_t tokens{SizeInTokens()};
223	bool lastWasBlank{false};
224	for (std::size_t j{`0`}; j < tokens; ++j) {
225	bool isBlank{TokenAt(j).IsBlank()};
226	if (isBlank && lastWasBlank && start_[j] >= firstChar) {
227	return true;
228	}
229	lastWasBlank = isBlank;
230	}
231	return false;
232	}
233
234	TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) {
235	std::size_t tokens{SizeInTokens()};
236	TokenSequence result;
237	for (std::size_t j{`0`}; j < tokens; ++j) {
238	if (!TokenAt(j).IsBlank() \|\| start_[j] < firstChar) {
239	result.Put(*this, j);
240	}
241	}
242	swap(result);
243	return *this;
244	}
245
246	TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) {
247	std::size_t tokens{SizeInTokens()};
248	TokenSequence result;
249	bool lastWasBlank{false};
250	for (std::size_t j{`0`}; j < tokens; ++j) {
251	bool isBlank{TokenAt(j).IsBlank()};
252	if (!isBlank \|\| !lastWasBlank \|\| start_[j] < firstChar) {
253	result.Put(*this, j);
254	}
255	lastWasBlank = isBlank;
256	}
257	swap(result);
258	return *this;
259	}
260
261	TokenSequence &TokenSequence::ClipComment(
262	const Prescanner &prescanner, bool skipFirst) {
263	std::size_t tokens{SizeInTokens()};
264	for (std::size_t j{`0`}; j < tokens; ++j) {
265	CharBlock tok{TokenAt(j)};
266	if (std::size_t blanks{tok.CountLeadingBlanks()};
267	blanks < tok.size() && tok[blanks] == `'!'`) {
268	// Retain active compiler directive sentinels (e.g. "!dir$")
269	for (std::size_t k{j + `1`}; k < tokens && tok.size() < blanks + `5`; ++k) {
270	if (tok.begin() + tok.size() == TokenAt(k).begin()) {
271	tok.ExtendToCover(TokenAt(k));
272	} else {
273	break;
274	}
275	}
276	bool isSentinel{false};
277	if (tok.size() == blanks + `5`) {
278	char sentinel[`4`];
279	for (int k{`0`}; k < `4`; ++k) {
280	sentinel[k] = ToLowerCaseLetter(tok[blanks + k + `1`]);
281	}
282	isSentinel = prescanner.IsCompilerDirectiveSentinel(sentinel, `4`);
283	}
284	if (isSentinel) {
285	} else if (skipFirst) {
286	skipFirst = false;
287	} else {
288	TokenSequence result;
289	if (j > `0`) {
290	result.Put(*this, `0`, j - `1`);
291	}
292	swap(result);
293	return *this;
294	}
295	}
296	}
297	return *this;
298	}
299
300	void TokenSequence::Emit(CookedSource &cooked) const {
301	if (auto n{char_.size()}) {
302	cooked.Put(&char_[`0`], n);
303	cooked.PutProvenanceMappings(provenances_);
304	}
305	}
306
307	llvm::raw_ostream &TokenSequence::Dump(llvm::raw_ostream &o) const {
308	o << "TokenSequence has " << char_.size() << " chars; nextStart_ "
309	<< nextStart_ << `'\n'`;
310	for (std::size_t j{`0`}; j < start_.size(); ++j) {
311	o << `'['` << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString()
312	<< "'\n";
313	}
314	return o;
315	}
316
317	Provenance TokenSequence::GetCharProvenance(std::size_t offset) const {
318	ProvenanceRange range{provenances_.Map(offset)};
319	return range.start();
320	}
321
322	Provenance TokenSequence::GetTokenProvenance(
323	std::size_t token, std::size_t offset) const {
324	return GetCharProvenance(start_[token] + offset);
325	}
326
327	ProvenanceRange TokenSequence::GetTokenProvenanceRange(
328	std::size_t token, std::size_t offset) const {
329	ProvenanceRange range{provenances_.Map(start_[token] + offset)};
330	return range.Prefix(TokenBytes(token) - offset);
331	}
332
333	ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
334	std::size_t token, std::size_t tokens) const {
335	if (tokens == `0`) {
336	return {};
337	}
338	ProvenanceRange range{provenances_.Map(start_[token])};
339	while (--tokens > `0` &&
340	range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) {
341	}
342	return range;
343	}
344
345	ProvenanceRange TokenSequence::GetProvenanceRange() const {
346	return GetIntervalProvenanceRange(`0`, start_.size());
347	}
348
349	const TokenSequence &TokenSequence::CheckBadFortranCharacters(
350	Messages &messages, const Prescanner &prescanner) const {
351	std::size_t tokens{SizeInTokens()};
352	for (std::size_t j{`0`}; j < tokens; ++j) {
353	CharBlock token{TokenAt(j)};
354	char ch{token.FirstNonBlank()};
355	if (ch != `' '` && !IsValidFortranTokenCharacter(ch)) {
356	if (ch == `'!'`) {
357	if (prescanner.IsCompilerDirectiveSentinel(token)) {
358	continue;
359	} else if (j + `1` < tokens &&
360	prescanner.IsCompilerDirectiveSentinel(
361	TokenAt(j + `1`))) { // !dir$, &c.
362	++j;
363	continue;
364	}
365	}
366	if (ch < `' '` \|\| ch >= `'\x7f'`) {
367	messages.Say(GetTokenProvenanceRange(j),
368	"bad character (0x%02x) in Fortran token"_err_en_US, ch & `0xff`);
369	} else {
370	messages.Say(GetTokenProvenanceRange(j),
371	"bad character ('%c') in Fortran token"_err_en_US, ch);
372	}
373	}
374	}
375	return *this;
376	}
377
378	const TokenSequence &TokenSequence::CheckBadParentheses(
379	Messages &messages) const {
380	// First, a quick pass with no allocation for the common case
381	int nesting{`0`};
382	std::size_t tokens{SizeInTokens()};
383	for (std::size_t j{`0`}; j < tokens; ++j) {
384	CharBlock token{TokenAt(j)};
385	char ch{token.OnlyNonBlank()};
386	if (ch == `'('`) {
387	++nesting;
388	} else if (ch == `')'`) {
389	if (nesting-- == `0`) {
390	break;
391	}
392	}
393	}
394	if (nesting != `0`) {
395	// There's an error; diagnose it
396	std::vector<std::size_t> stack;
397	for (std::size_t j{`0`}; j < tokens; ++j) {
398	CharBlock token{TokenAt(j)};
399	char ch{token.OnlyNonBlank()};
400	if (ch == `'('`) {
401	stack.push_back(j);
402	} else if (ch == `')'`) {
403	if (stack.empty()) {
404	messages.Say(GetTokenProvenanceRange(j), "Unmatched ')'"_err_en_US);
405	return *this;
406	}
407	stack.pop_back();
408	}
409	}
410	CHECK(!stack.empty());
411	messages.Say(
412	GetTokenProvenanceRange(stack.back()), "Unmatched '('"_err_en_US);
413	}
414	return *this;
415	}
416	} // namespace Fortran::parser
417

source code of flang/lib/Parser/token-sequence.cpp