1//===-- lib/Parser/source.cpp ---------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "flang/Parser/source.h"
10#include "flang/Common/idioms.h"
11#include "flang/Parser/char-buffer.h"
12#include "flang/Parser/characters.h"
13#include "llvm/Support/Errno.h"
14#include "llvm/Support/FileSystem.h"
15#include "llvm/Support/Path.h"
16#include "llvm/Support/raw_ostream.h"
17#include <algorithm>
18#include <cstring>
19#include <memory>
20#include <string>
21#include <vector>
22
23namespace Fortran::parser {
24
25SourceFile::~SourceFile() { Close(); }
26
27void SourceFile::RecordLineStarts() {
28 if (std::size_t chars{bytes()}; chars > 0) {
29 origins_.emplace(1, SourcePositionOrigin{path_, 1});
30 const char *source{content().data()};
31 CHECK(source[chars - 1] == '\n' && "missing ultimate newline");
32 std::size_t at{0};
33 do { // "at" is always at the beginning of a source line
34 lineStart_.push_back(at);
35 at = reinterpret_cast<const char *>(
36 std::memchr(source + at, '\n', chars - at)) -
37 source + 1;
38 } while (at < chars);
39 CHECK(at == chars);
40 lineStart_.shrink_to_fit();
41 }
42}
43
44// Check for a Unicode byte order mark (BOM).
45// Module files all have one; so can source files.
46void SourceFile::IdentifyPayload() {
47 llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()};
48 constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"};
49 if (content.starts_with(UTF8_BOM)) {
50 bom_end_ = UTF8_BOM.size();
51 encoding_ = Encoding::UTF_8;
52 }
53}
54
55std::string DirectoryName(std::string path) {
56 llvm::SmallString<128> pathBuf{path};
57 llvm::sys::path::remove_filename(path&: pathBuf);
58 return pathBuf.str().str();
59}
60
61std::optional<std::string> LocateSourceFile(
62 std::string name, const std::list<std::string> &searchPath) {
63 if (name == "-" || llvm::sys::path::is_absolute(path: name)) {
64 return name;
65 }
66 for (const std::string &dir : searchPath) {
67 llvm::SmallString<128> path{dir};
68 llvm::sys::path::append(path, name);
69 bool isDir{false};
70 auto er = llvm::sys::fs::is_directory(path, isDir);
71 if (!er && !isDir) {
72 return path.str().str();
73 }
74 }
75 return std::nullopt;
76}
77
78std::vector<std::string> LocateSourceFileAll(
79 std::string name, const std::vector<std::string> &searchPath) {
80 if (name == "-" || llvm::sys::path::is_absolute(path: name)) {
81 return {name};
82 }
83 std::vector<std::string> result;
84 for (const std::string &dir : searchPath) {
85 llvm::SmallString<128> path{dir};
86 llvm::sys::path::append(path, a: name);
87 bool isDir{false};
88 auto er = llvm::sys::fs::is_directory(path, result&: isDir);
89 if (!er && !isDir) {
90 result.emplace_back(args: path.str().str());
91 }
92 }
93 return result;
94}
95
96std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) {
97 std::size_t wrote{0};
98 char *buffer{buf.data()};
99 char *p{buf.data()};
100 std::size_t bytes = buf.size();
101 while (bytes > 0) {
102 void *vp{static_cast<void *>(p)};
103 void *crvp{std::memchr(s: vp, c: '\r', n: bytes)};
104 char *crcp{static_cast<char *>(crvp)};
105 if (!crcp) {
106 std::memmove(dest: buffer + wrote, src: p, n: bytes);
107 wrote += bytes;
108 break;
109 }
110 std::size_t chunk = crcp - p;
111 auto advance{chunk + 1};
112 if (chunk + 1 >= bytes || crcp[1] == '\n') {
113 // CR followed by LF or EOF: omit
114 } else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') {
115 // CR preceded by LF or BOF: omit
116 } else {
117 // CR in line: retain
118 ++chunk;
119 }
120 std::memmove(dest: buffer + wrote, src: p, n: chunk);
121 wrote += chunk;
122 p += advance;
123 bytes -= advance;
124 }
125 return wrote;
126}
127
128bool SourceFile::Open(std::string path, llvm::raw_ostream &error) {
129 Close();
130 path_ = path;
131 std::string errorPath{"'"s + path_ + "'"};
132 auto bufOr{llvm::WritableMemoryBuffer::getFile(path)};
133 if (!bufOr) {
134 auto err = bufOr.getError();
135 error << "Could not open " << errorPath << ": " << err.message();
136 return false;
137 }
138 buf_ = std::move(bufOr.get());
139 ReadFile();
140 return true;
141}
142
143bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) {
144 Close();
145 path_ = "standard input";
146 auto buf_or = llvm::MemoryBuffer::getSTDIN();
147 if (!buf_or) {
148 auto err = buf_or.getError();
149 error << err.message();
150 return false;
151 }
152 auto inbuf = std::move(buf_or.get());
153 buf_ =
154 llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize());
155 llvm::copy(inbuf->getBuffer(), buf_->getBufferStart());
156 ReadFile();
157 return true;
158}
159
160void SourceFile::ReadFile() {
161 buf_end_ = RemoveCarriageReturns(buf_->getBuffer());
162 if (content().size() == 0 || content().back() != '\n') {
163 // Don't bother to copy if we have spare memory
164 if (content().size() >= buf_->getBufferSize()) {
165 auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
166 content().size() + 1)};
167 llvm::copy(content(), tmp_buf->getBufferStart());
168 buf_ = std::move(tmp_buf);
169 }
170 buf_end_++;
171 buf_->getBuffer()[buf_end_ - 1] = '\n';
172 }
173 IdentifyPayload();
174 RecordLineStarts();
175}
176
177void SourceFile::Close() {
178 path_.clear();
179 buf_.reset();
180 distinctPaths_.clear();
181 origins_.clear();
182}
183
184SourcePosition SourceFile::GetSourcePosition(std::size_t at) const {
185 CHECK(at < bytes());
186 auto it{llvm::upper_bound(lineStart_, at)};
187 auto trueLineNumber{std::distance(lineStart_.begin(), it - 1) + 1};
188 auto ub{origins_.upper_bound(trueLineNumber)};
189 auto column{static_cast<int>(at - lineStart_[trueLineNumber - 1] + 1)};
190 if (ub == origins_.begin()) {
191 return {*this, path_, static_cast<int>(trueLineNumber), column,
192 static_cast<int>(trueLineNumber)};
193 } else {
194 --ub;
195 const SourcePositionOrigin &origin{ub->second};
196 auto lineNumber{
197 trueLineNumber - ub->first + static_cast<std::size_t>(origin.line)};
198 return {*this, origin.path, static_cast<int>(lineNumber), column,
199 static_cast<int>(trueLineNumber)};
200 }
201}
202
203const std::string &SourceFile::SavePath(std::string &&path) {
204 return *distinctPaths_.emplace(std::move(path)).first;
205}
206
207void SourceFile::LineDirective(
208 int trueLineNumber, const std::string &path, int lineNumber) {
209 origins_.emplace(trueLineNumber, SourcePositionOrigin{path, lineNumber});
210}
211
212llvm::raw_ostream &SourceFile::Dump(llvm::raw_ostream &o) const {
213 o << "SourceFile '" << path_ << "'\n";
214 for (const auto &[at, spo] : origins_) {
215 o << " origin_[" << at << "] -> '" << spo.path << "' " << spo.line << '\n';
216 }
217 return o;
218}
219} // namespace Fortran::parser
220

source code of flang/lib/Parser/source.cpp