| 1 | //===-- lib/Parser/source.cpp ---------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "flang/Parser/source.h" |
| 10 | #include "flang/Common/idioms.h" |
| 11 | #include "flang/Parser/char-buffer.h" |
| 12 | #include "flang/Parser/characters.h" |
| 13 | #include "llvm/Support/Errno.h" |
| 14 | #include "llvm/Support/FileSystem.h" |
| 15 | #include "llvm/Support/Path.h" |
| 16 | #include "llvm/Support/raw_ostream.h" |
| 17 | #include <algorithm> |
| 18 | #include <cstring> |
| 19 | #include <memory> |
| 20 | #include <string> |
| 21 | #include <vector> |
| 22 | |
| 23 | namespace Fortran::parser { |
| 24 | |
| 25 | SourceFile::~SourceFile() { Close(); } |
| 26 | |
| 27 | void SourceFile::RecordLineStarts() { |
| 28 | if (std::size_t chars{bytes()}; chars > 0) { |
| 29 | origins_.emplace(1, SourcePositionOrigin{path_, 1}); |
| 30 | const char *source{content().data()}; |
| 31 | CHECK(source[chars - 1] == '\n' && "missing ultimate newline" ); |
| 32 | std::size_t at{0}; |
| 33 | do { // "at" is always at the beginning of a source line |
| 34 | lineStart_.push_back(at); |
| 35 | at = reinterpret_cast<const char *>( |
| 36 | std::memchr(source + at, '\n', chars - at)) - |
| 37 | source + 1; |
| 38 | } while (at < chars); |
| 39 | CHECK(at == chars); |
| 40 | lineStart_.shrink_to_fit(); |
| 41 | } |
| 42 | } |
| 43 | |
| 44 | // Check for a Unicode byte order mark (BOM). |
| 45 | // Module files all have one; so can source files. |
| 46 | void SourceFile::IdentifyPayload() { |
| 47 | llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()}; |
| 48 | constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf" }; |
| 49 | if (content.starts_with(UTF8_BOM)) { |
| 50 | bom_end_ = UTF8_BOM.size(); |
| 51 | encoding_ = Encoding::UTF_8; |
| 52 | } |
| 53 | } |
| 54 | |
| 55 | std::string DirectoryName(std::string path) { |
| 56 | llvm::SmallString<128> pathBuf{path}; |
| 57 | llvm::sys::path::remove_filename(path&: pathBuf); |
| 58 | return pathBuf.str().str(); |
| 59 | } |
| 60 | |
| 61 | std::optional<std::string> LocateSourceFile( |
| 62 | std::string name, const std::list<std::string> &searchPath) { |
| 63 | if (name == "-" || llvm::sys::path::is_absolute(path: name)) { |
| 64 | return name; |
| 65 | } |
| 66 | for (const std::string &dir : searchPath) { |
| 67 | llvm::SmallString<128> path{dir}; |
| 68 | llvm::sys::path::append(path, name); |
| 69 | bool isDir{false}; |
| 70 | auto er = llvm::sys::fs::is_directory(path, isDir); |
| 71 | if (!er && !isDir) { |
| 72 | return path.str().str(); |
| 73 | } |
| 74 | } |
| 75 | return std::nullopt; |
| 76 | } |
| 77 | |
| 78 | std::vector<std::string> LocateSourceFileAll( |
| 79 | std::string name, const std::vector<std::string> &searchPath) { |
| 80 | if (name == "-" || llvm::sys::path::is_absolute(path: name)) { |
| 81 | return {name}; |
| 82 | } |
| 83 | std::vector<std::string> result; |
| 84 | for (const std::string &dir : searchPath) { |
| 85 | llvm::SmallString<128> path{dir}; |
| 86 | llvm::sys::path::append(path, a: name); |
| 87 | bool isDir{false}; |
| 88 | auto er = llvm::sys::fs::is_directory(path, result&: isDir); |
| 89 | if (!er && !isDir) { |
| 90 | result.emplace_back(args: path.str().str()); |
| 91 | } |
| 92 | } |
| 93 | return result; |
| 94 | } |
| 95 | |
| 96 | std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) { |
| 97 | std::size_t wrote{0}; |
| 98 | char *buffer{buf.data()}; |
| 99 | char *p{buf.data()}; |
| 100 | std::size_t bytes = buf.size(); |
| 101 | while (bytes > 0) { |
| 102 | void *vp{static_cast<void *>(p)}; |
| 103 | void *crvp{std::memchr(s: vp, c: '\r', n: bytes)}; |
| 104 | char *crcp{static_cast<char *>(crvp)}; |
| 105 | if (!crcp) { |
| 106 | std::memmove(dest: buffer + wrote, src: p, n: bytes); |
| 107 | wrote += bytes; |
| 108 | break; |
| 109 | } |
| 110 | std::size_t chunk = crcp - p; |
| 111 | auto advance{chunk + 1}; |
| 112 | if (chunk + 1 >= bytes || crcp[1] == '\n') { |
| 113 | // CR followed by LF or EOF: omit |
| 114 | } else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') { |
| 115 | // CR preceded by LF or BOF: omit |
| 116 | } else { |
| 117 | // CR in line: retain |
| 118 | ++chunk; |
| 119 | } |
| 120 | std::memmove(dest: buffer + wrote, src: p, n: chunk); |
| 121 | wrote += chunk; |
| 122 | p += advance; |
| 123 | bytes -= advance; |
| 124 | } |
| 125 | return wrote; |
| 126 | } |
| 127 | |
| 128 | bool SourceFile::Open(std::string path, llvm::raw_ostream &error) { |
| 129 | Close(); |
| 130 | path_ = path; |
| 131 | std::string errorPath{"'"s + path_ + "'" }; |
| 132 | auto bufOr{llvm::WritableMemoryBuffer::getFile(path)}; |
| 133 | if (!bufOr) { |
| 134 | auto err = bufOr.getError(); |
| 135 | error << "Could not open " << errorPath << ": " << err.message(); |
| 136 | return false; |
| 137 | } |
| 138 | buf_ = std::move(bufOr.get()); |
| 139 | ReadFile(); |
| 140 | return true; |
| 141 | } |
| 142 | |
| 143 | bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) { |
| 144 | Close(); |
| 145 | path_ = "standard input" ; |
| 146 | auto buf_or = llvm::MemoryBuffer::getSTDIN(); |
| 147 | if (!buf_or) { |
| 148 | auto err = buf_or.getError(); |
| 149 | error << err.message(); |
| 150 | return false; |
| 151 | } |
| 152 | auto inbuf = std::move(buf_or.get()); |
| 153 | buf_ = |
| 154 | llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize()); |
| 155 | llvm::copy(inbuf->getBuffer(), buf_->getBufferStart()); |
| 156 | ReadFile(); |
| 157 | return true; |
| 158 | } |
| 159 | |
| 160 | void SourceFile::ReadFile() { |
| 161 | buf_end_ = RemoveCarriageReturns(buf_->getBuffer()); |
| 162 | if (content().size() == 0 || content().back() != '\n') { |
| 163 | // Don't bother to copy if we have spare memory |
| 164 | if (content().size() >= buf_->getBufferSize()) { |
| 165 | auto tmp_buf{llvm::WritableMemoryBuffer::getNewUninitMemBuffer( |
| 166 | content().size() + 1)}; |
| 167 | llvm::copy(content(), tmp_buf->getBufferStart()); |
| 168 | buf_ = std::move(tmp_buf); |
| 169 | } |
| 170 | buf_end_++; |
| 171 | buf_->getBuffer()[buf_end_ - 1] = '\n'; |
| 172 | } |
| 173 | IdentifyPayload(); |
| 174 | RecordLineStarts(); |
| 175 | } |
| 176 | |
| 177 | void SourceFile::Close() { |
| 178 | path_.clear(); |
| 179 | buf_.reset(); |
| 180 | distinctPaths_.clear(); |
| 181 | origins_.clear(); |
| 182 | } |
| 183 | |
| 184 | SourcePosition SourceFile::GetSourcePosition(std::size_t at) const { |
| 185 | CHECK(at < bytes()); |
| 186 | auto it{llvm::upper_bound(lineStart_, at)}; |
| 187 | auto trueLineNumber{std::distance(lineStart_.begin(), it - 1) + 1}; |
| 188 | auto ub{origins_.upper_bound(trueLineNumber)}; |
| 189 | auto column{static_cast<int>(at - lineStart_[trueLineNumber - 1] + 1)}; |
| 190 | if (ub == origins_.begin()) { |
| 191 | return {*this, path_, static_cast<int>(trueLineNumber), column, |
| 192 | static_cast<int>(trueLineNumber)}; |
| 193 | } else { |
| 194 | --ub; |
| 195 | const SourcePositionOrigin &origin{ub->second}; |
| 196 | auto lineNumber{ |
| 197 | trueLineNumber - ub->first + static_cast<std::size_t>(origin.line)}; |
| 198 | return {*this, origin.path, static_cast<int>(lineNumber), column, |
| 199 | static_cast<int>(trueLineNumber)}; |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | const std::string &SourceFile::SavePath(std::string &&path) { |
| 204 | return *distinctPaths_.emplace(std::move(path)).first; |
| 205 | } |
| 206 | |
| 207 | void SourceFile::LineDirective( |
| 208 | int trueLineNumber, const std::string &path, int lineNumber) { |
| 209 | origins_.emplace(trueLineNumber, SourcePositionOrigin{path, lineNumber}); |
| 210 | } |
| 211 | |
| 212 | llvm::raw_ostream &SourceFile::Dump(llvm::raw_ostream &o) const { |
| 213 | o << "SourceFile '" << path_ << "'\n" ; |
| 214 | for (const auto &[at, spo] : origins_) { |
| 215 | o << " origin_[" << at << "] -> '" << spo.path << "' " << spo.line << '\n'; |
| 216 | } |
| 217 | return o; |
| 218 | } |
| 219 | } // namespace Fortran::parser |
| 220 | |