1 | //===-- lib/Parser/source.cpp ---------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "flang/Parser/source.h" |
10 | #include "flang/Common/idioms.h" |
11 | #include "flang/Parser/char-buffer.h" |
12 | #include "flang/Parser/characters.h" |
13 | #include "llvm/Support/Errno.h" |
14 | #include "llvm/Support/FileSystem.h" |
15 | #include "llvm/Support/Path.h" |
16 | #include "llvm/Support/raw_ostream.h" |
17 | #include <algorithm> |
18 | #include <cstring> |
19 | #include <memory> |
20 | #include <string> |
21 | #include <vector> |
22 | |
23 | namespace Fortran::parser { |
24 | |
25 | SourceFile::~SourceFile() { Close(); } |
26 | |
27 | void SourceFile::RecordLineStarts() { |
28 | if (std::size_t chars{bytes()}; chars > 0) { |
29 | origins_.emplace(1, SourcePositionOrigin{path_, 1}); |
30 | const char *source{content().data()}; |
31 | CHECK(source[chars - 1] == '\n' && "missing ultimate newline" ); |
32 | std::size_t at{0}; |
33 | do { // "at" is always at the beginning of a source line |
34 | lineStart_.push_back(at); |
35 | at = reinterpret_cast<const char *>( |
36 | std::memchr(source + at, '\n', chars - at)) - |
37 | source + 1; |
38 | } while (at < chars); |
39 | CHECK(at == chars); |
40 | lineStart_.shrink_to_fit(); |
41 | } |
42 | } |
43 | |
44 | // Check for a Unicode byte order mark (BOM). |
45 | // Module files all have one; so can source files. |
46 | void SourceFile::IdentifyPayload() { |
47 | llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()}; |
48 | constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf" }; |
49 | if (content.starts_with(UTF8_BOM)) { |
50 | bom_end_ = UTF8_BOM.size(); |
51 | encoding_ = Encoding::UTF_8; |
52 | } |
53 | } |
54 | |
55 | std::string DirectoryName(std::string path) { |
56 | llvm::SmallString<128> pathBuf{path}; |
57 | llvm::sys::path::remove_filename(path&: pathBuf); |
58 | return pathBuf.str().str(); |
59 | } |
60 | |
61 | std::optional<std::string> LocateSourceFile( |
62 | std::string name, const std::list<std::string> &searchPath) { |
63 | if (name == "-" || llvm::sys::path::is_absolute(path: name)) { |
64 | return name; |
65 | } |
66 | for (const std::string &dir : searchPath) { |
67 | llvm::SmallString<128> path{dir}; |
68 | llvm::sys::path::append(path, name); |
69 | bool isDir{false}; |
70 | auto er = llvm::sys::fs::is_directory(path, isDir); |
71 | if (!er && !isDir) { |
72 | return path.str().str(); |
73 | } |
74 | } |
75 | return std::nullopt; |
76 | } |
77 | |
78 | std::vector<std::string> LocateSourceFileAll( |
79 | std::string name, const std::vector<std::string> &searchPath) { |
80 | if (name == "-" || llvm::sys::path::is_absolute(path: name)) { |
81 | return {name}; |
82 | } |
83 | std::vector<std::string> result; |
84 | for (const std::string &dir : searchPath) { |
85 | llvm::SmallString<128> path{dir}; |
86 | llvm::sys::path::append(path, a: name); |
87 | bool isDir{false}; |
88 | auto er = llvm::sys::fs::is_directory(path, result&: isDir); |
89 | if (!er && !isDir) { |
90 | result.emplace_back(args: path.str().str()); |
91 | } |
92 | } |
93 | return result; |
94 | } |
95 | |
96 | std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) { |
97 | std::size_t wrote{0}; |
98 | char *buffer{buf.data()}; |
99 | char *p{buf.data()}; |
100 | std::size_t bytes = buf.size(); |
101 | while (bytes > 0) { |
102 | void *vp{static_cast<void *>(p)}; |
103 | void *crvp{std::memchr(s: vp, c: '\r', n: bytes)}; |
104 | char *crcp{static_cast<char *>(crvp)}; |
105 | if (!crcp) { |
106 | std::memmove(dest: buffer + wrote, src: p, n: bytes); |
107 | wrote += bytes; |
108 | break; |
109 | } |
110 | std::size_t chunk = crcp - p; |
111 | auto advance{chunk + 1}; |
112 | if (chunk + 1 >= bytes || crcp[1] == '\n') { |
113 | // CR followed by LF or EOF: omit |
114 | } else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') { |
115 | // CR preceded by LF or BOF: omit |
116 | } else { |
117 | // CR in line: retain |
118 | ++chunk; |
119 | } |
120 | std::memmove(dest: buffer + wrote, src: p, n: chunk); |
121 | wrote += chunk; |
122 | p += advance; |
123 | bytes -= advance; |
124 | } |
125 | return wrote; |
126 | } |
127 | |
128 | bool SourceFile::Open(std::string path, llvm::raw_ostream &error) { |
129 | Close(); |
130 | path_ = path; |
131 | std::string errorPath{"'"s + path_ + "'" }; |
132 | auto bufOr{llvm::WritableMemoryBuffer::getFile(path)}; |
133 | if (!bufOr) { |
134 | auto err = bufOr.getError(); |
135 | error << "Could not open " << errorPath << ": " << err.message(); |
136 | return false; |
137 | } |
138 | buf_ = std::move(bufOr.get()); |
139 | ReadFile(); |
140 | return true; |
141 | } |
142 | |
143 | bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) { |
144 | Close(); |
145 | path_ = "standard input" ; |
146 | auto buf_or = llvm::MemoryBuffer::getSTDIN(); |
147 | if (!buf_or) { |
148 | auto err = buf_or.getError(); |
149 | error << err.message(); |
150 | return false; |
151 | } |
152 | auto inbuf = std::move(buf_or.get()); |
153 | buf_ = |
154 | llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize()); |
155 | llvm::copy(inbuf->getBuffer(), buf_->getBufferStart()); |
156 | ReadFile(); |
157 | return true; |
158 | } |
159 | |
160 | void SourceFile::ReadFile() { |
161 | buf_end_ = RemoveCarriageReturns(buf_->getBuffer()); |
162 | if (content().size() == 0 || content().back() != '\n') { |
163 | // Don't bother to copy if we have spare memory |
164 | if (content().size() >= buf_->getBufferSize()) { |
165 | auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer( |
166 | content().size() + 1)}; |
167 | llvm::copy(content(), tmp_buf->getBufferStart()); |
168 | buf_ = std::move(tmp_buf); |
169 | } |
170 | buf_end_++; |
171 | buf_->getBuffer()[buf_end_ - 1] = '\n'; |
172 | } |
173 | IdentifyPayload(); |
174 | RecordLineStarts(); |
175 | } |
176 | |
177 | void SourceFile::Close() { |
178 | path_.clear(); |
179 | buf_.reset(); |
180 | distinctPaths_.clear(); |
181 | origins_.clear(); |
182 | } |
183 | |
184 | SourcePosition SourceFile::GetSourcePosition(std::size_t at) const { |
185 | CHECK(at < bytes()); |
186 | auto it{llvm::upper_bound(lineStart_, at)}; |
187 | auto trueLineNumber{std::distance(lineStart_.begin(), it - 1) + 1}; |
188 | auto ub{origins_.upper_bound(trueLineNumber)}; |
189 | auto column{static_cast<int>(at - lineStart_[trueLineNumber - 1] + 1)}; |
190 | if (ub == origins_.begin()) { |
191 | return {*this, path_, static_cast<int>(trueLineNumber), column, |
192 | static_cast<int>(trueLineNumber)}; |
193 | } else { |
194 | --ub; |
195 | const SourcePositionOrigin &origin{ub->second}; |
196 | auto lineNumber{ |
197 | trueLineNumber - ub->first + static_cast<std::size_t>(origin.line)}; |
198 | return {*this, origin.path, static_cast<int>(lineNumber), column, |
199 | static_cast<int>(trueLineNumber)}; |
200 | } |
201 | } |
202 | |
203 | const std::string &SourceFile::SavePath(std::string &&path) { |
204 | return *distinctPaths_.emplace(std::move(path)).first; |
205 | } |
206 | |
207 | void SourceFile::LineDirective( |
208 | int trueLineNumber, const std::string &path, int lineNumber) { |
209 | origins_.emplace(trueLineNumber, SourcePositionOrigin{path, lineNumber}); |
210 | } |
211 | |
212 | llvm::raw_ostream &SourceFile::Dump(llvm::raw_ostream &o) const { |
213 | o << "SourceFile '" << path_ << "'\n" ; |
214 | for (const auto &[at, spo] : origins_) { |
215 | o << " origin_[" << at << "] -> '" << spo.path << "' " << spo.line << '\n'; |
216 | } |
217 | return o; |
218 | } |
219 | } // namespace Fortran::parser |
220 | |