source.cpp source code [flang/lib/Parser/source.cpp]

1	//===-- lib/Parser/source.cpp ---------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "flang/Parser/source.h"
10	#include "flang/Common/idioms.h"
11	#include "flang/Parser/char-buffer.h"
12	#include "flang/Parser/characters.h"
13	#include "llvm/Support/Errno.h"
14	#include "llvm/Support/FileSystem.h"
15	#include "llvm/Support/Path.h"
16	#include "llvm/Support/raw_ostream.h"
17	#include <algorithm>
18	#include <cstring>
19	#include <memory>
20	#include <string>
21	#include <vector>
22
23	namespace Fortran::parser {
24
25	SourceFile::~SourceFile() { Close(); }
26
27	void SourceFile::RecordLineStarts() {
28	if (std::size_t chars{bytes()}; chars > `0`) {
29	origins_.emplace(`1`, SourcePositionOrigin{path_, `1`});
30	const char *source{content().data()};
31	CHECK(source[chars - `1`] == `'\n'` && "missing ultimate newline");
32	std::size_t at{`0`};
33	do { // "at" is always at the beginning of a source line
34	lineStart_.push_back(at);
35	at = reinterpret_cast<const char *>(
36	std::memchr(source + at, `'\n'`, chars - at)) -
37	source + `1`;
38	} while (at < chars);
39	CHECK(at == chars);
40	lineStart_.shrink_to_fit();
41	}
42	}
43
44	// Check for a Unicode byte order mark (BOM).
45	// Module files all have one; so can source files.
46	void SourceFile::IdentifyPayload() {
47	llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()};
48	constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"};
49	if (content.starts_with(UTF8_BOM)) {
50	bom_end_ = UTF8_BOM.size();
51	encoding_ = Encoding::UTF_8;
52	}
53	}
54
55	std::string DirectoryName(std::string path) {
56	llvm::SmallString<`128`> pathBuf{path};
57	llvm::sys::path::remove_filename(path&: pathBuf);
58	return pathBuf.str().str();
59	}
60
61	std::optional<std::string> LocateSourceFile(
62	std::string name, const std::list<std::string> &searchPath) {
63	if (name == "-" \|\| llvm::sys::path::is_absolute(path: name)) {
64	return name;
65	}
66	for (const std::string &dir : searchPath) {
67	llvm::SmallString<`128`> path{dir};
68	llvm::sys::path::append(path, name);
69	bool isDir{false};
70	auto er = llvm::sys::fs::is_directory(path, isDir);
71	if (!er && !isDir) {
72	return path.str().str();
73	}
74	}
75	return std::nullopt;
76	}
77
78	std::vector<std::string> LocateSourceFileAll(
79	std::string name, const std::vector<std::string> &searchPath) {
80	if (name == "-" \|\| llvm::sys::path::is_absolute(path: name)) {
81	return {name};
82	}
83	std::vector<std::string> result;
84	for (const std::string &dir : searchPath) {
85	llvm::SmallString<`128`> path{dir};
86	llvm::sys::path::append(path, a: name);
87	bool isDir{false};
88	auto er = llvm::sys::fs::is_directory(path, result&: isDir);
89	if (!er && !isDir) {
90	result.emplace_back(args: path.str().str());
91	}
92	}
93	return result;
94	}
95
96	std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) {
97	std::size_t wrote{`0`};
98	char *buffer{buf.data()};
99	char *p{buf.data()};
100	std::size_t bytes = buf.size();
101	while (bytes > `0`) {
102	void vp{static_cast<void* *>(p)};
103	void *crvp{std::memchr(s: vp, c: `'\r'`, n: bytes)};
104	char crcp{static_cast<char* *>(crvp)};
105	if (!crcp) {
106	std::memmove(dest: buffer + wrote, src: p, n: bytes);
107	wrote += bytes;
108	break;
109	}
110	std::size_t chunk = crcp - p;
111	auto advance{chunk + `1`};
112	if (chunk + `1` >= bytes \|\| crcp[`1`] == `'\n'`) {
113	// CR followed by LF or EOF: omit
114	} else if ((chunk == `0` && p == buf.data()) \|\| crcp[-`1`] == `'\n'`) {
115	// CR preceded by LF or BOF: omit
116	} else {
117	// CR in line: retain
118	++chunk;
119	}
120	std::memmove(dest: buffer + wrote, src: p, n: chunk);
121	wrote += chunk;
122	p += advance;
123	bytes -= advance;
124	}
125	return wrote;
126	}
127
128	bool SourceFile::Open(std::string path, llvm::raw_ostream &error) {
129	Close();
130	path_ = path;
131	std::string errorPath{"'"s + path_ + "'"};
132	auto bufOr{llvm::WritableMemoryBuffer::getFile(path)};
133	if (!bufOr) {
134	auto err = bufOr.getError();
135	error << "Could not open " << errorPath << ": " << err.message();
136	return false;
137	}
138	buf_ = std::move(bufOr.get());
139	ReadFile();
140	return true;
141	}
142
143	bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) {
144	Close();
145	path_ = "standard input";
146	auto buf_or = llvm::MemoryBuffer::getSTDIN();
147	if (!buf_or) {
148	auto err = buf_or.getError();
149	error << err.message();
150	return false;
151	}
152	auto inbuf = std::move(buf_or.get());
153	buf_ =
154	llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize());
155	llvm::copy(inbuf->getBuffer(), buf_->getBufferStart());
156	ReadFile();
157	return true;
158	}
159
160	void SourceFile::ReadFile() {
161	buf_end_ = RemoveCarriageReturns(buf_->getBuffer());
162	if (content().size() == `0` \|\| content().back() != `'\n'`) {
163	// Don't bother to copy if we have spare memory
164	if (content().size() >= buf_->getBufferSize()) {
165	auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
166	content().size() + `1`)};
167	llvm::copy(content(), tmp_buf->getBufferStart());
168	buf_ = std::move(tmp_buf);
169	}
170	buf_end_++;
171	buf_->getBuffer()[buf_end_ - `1`] = `'\n'`;
172	}
173	IdentifyPayload();
174	RecordLineStarts();
175	}
176
177	void SourceFile::Close() {
178	path_.clear();
179	buf_.reset();
180	distinctPaths_.clear();
181	origins_.clear();
182	}
183
184	SourcePosition SourceFile::GetSourcePosition(std::size_t at) const {
185	CHECK(at < bytes());
186	auto it{llvm::upper_bound(lineStart_, at)};
187	auto trueLineNumber{std::distance(lineStart_.begin(), it - `1`) + `1`};
188	auto ub{origins_.upper_bound(trueLineNumber)};
189	auto column{static_cast<int>(at - lineStart_[trueLineNumber - `1`] + `1`)};
190	if (ub == origins_.begin()) {
191	return {*this, path_, static_cast<int>(trueLineNumber), column,
192	static_cast<int>(trueLineNumber)};
193	} else {
194	--ub;
195	const SourcePositionOrigin &origin{ub->second};
196	auto lineNumber{
197	trueLineNumber - ub->first + static_cast<std::size_t>(origin.line)};
198	return {*this, origin.path, static_cast<int>(lineNumber), column,
199	static_cast<int>(trueLineNumber)};
200	}
201	}
202
203	const std::string &SourceFile::SavePath(std::string &&path) {
204	return *distinctPaths_.emplace(std::move(path)).first;
205	}
206
207	void SourceFile::LineDirective(
208	int trueLineNumber, const std::string &path, int lineNumber) {
209	origins_.emplace(trueLineNumber, SourcePositionOrigin{path, lineNumber});
210	}
211
212	llvm::raw_ostream &SourceFile::Dump(llvm::raw_ostream &o) const {
213	o << "SourceFile '" << path_ << "'\n";
214	for (const auto &[at, spo] : origins_) {
215	o << " origin_[" << at << "] -> '" << spo.path << "' " << spo.line << `'\n'`;
216	}
217	return o;
218	}
219	} // namespace Fortran::parser
220

source code of flang/lib/Parser/source.cpp