1 | //===-- runtime/buffer.h ----------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | // External file buffering |
10 | |
11 | #ifndef FORTRAN_RUNTIME_BUFFER_H_ |
12 | #define FORTRAN_RUNTIME_BUFFER_H_ |
13 | |
14 | #include "io-error.h" |
15 | #include "flang/Runtime/memory.h" |
16 | #include <algorithm> |
17 | #include <cinttypes> |
18 | #include <cstring> |
19 | |
20 | namespace Fortran::runtime::io { |
21 | |
22 | void LeftShiftBufferCircularly(char *, std::size_t bytes, std::size_t shift); |
23 | |
24 | // Maintains a view of a contiguous region of a file in a memory buffer. |
25 | // The valid data in the buffer may be circular, but any active frame |
26 | // will also be contiguous in memory. The requirement stems from the need to |
27 | // preserve read data that may be reused by means of Tn/TLn edit descriptors |
28 | // without needing to position the file (which may not always be possible, |
29 | // e.g. a socket) and a general desire to reduce system call counts. |
30 | // |
31 | // Possible scenario with a tiny 32-byte buffer after a ReadFrame or |
32 | // WriteFrame with a file offset of 103 to access "DEF": |
33 | // |
34 | // fileOffset_ 100 --+ +-+ frame of interest (103:105) |
35 | // file: ............ABCDEFGHIJKLMNOPQRSTUVWXYZ.... |
36 | // buffer: [NOPQRSTUVWXYZ......ABCDEFGHIJKLM] (size_ == 32) |
37 | // | +-- frame_ == 3 |
38 | // +----- start_ == 19, length_ == 26 |
39 | // |
40 | // The buffer holds length_ == 26 bytes from file offsets 100:125. |
41 | // Those 26 bytes "wrap around" the end of the circular buffer, |
42 | // so file offsets 100:112 map to buffer offsets 19:31 ("A..M") and |
43 | // file offsets 113:125 map to buffer offsets 0:12 ("N..Z") |
44 | // The 3-byte frame of file offsets 103:105 is contiguous in the buffer |
45 | // at buffer offset (start_ + frame_) == 22 ("DEF"). |
46 | |
47 | template <typename STORE, std::size_t minBuffer = 65536> class FileFrame { |
48 | public: |
49 | using FileOffset = std::int64_t; |
50 | |
51 | ~FileFrame() { FreeMemoryAndNullify(buffer_); } |
52 | |
53 | // The valid data in the buffer begins at buffer_[start_] and proceeds |
54 | // with possible wrap-around for length_ bytes. The current frame |
55 | // is offset by frame_ bytes into that region and is guaranteed to |
56 | // be contiguous for at least as many bytes as were requested. |
57 | |
58 | FileOffset FrameAt() const { return fileOffset_ + frame_; } |
59 | char *Frame() const { return buffer_ + start_ + frame_; } |
60 | std::size_t FrameLength() const { |
61 | return std::min<std::size_t>(a: length_ - frame_, b: size_ - (start_ + frame_)); |
62 | } |
63 | std::size_t BytesBufferedBeforeFrame() const { return frame_ - start_; } |
64 | |
65 | // Returns a short frame at a non-fatal EOF. Can return a long frame as well. |
66 | std::size_t ReadFrame( |
67 | FileOffset at, std::size_t bytes, IoErrorHandler &handler) { |
68 | Flush(handler); |
69 | Reallocate(bytes, terminator: handler); |
70 | std::int64_t newFrame{at - fileOffset_}; |
71 | if (newFrame < 0 || newFrame > length_) { |
72 | Reset(at); |
73 | } else { |
74 | frame_ = newFrame; |
75 | } |
76 | RUNTIME_CHECK(handler, at == fileOffset_ + frame_); |
77 | if (static_cast<std::int64_t>(start_ + frame_ + bytes) > size_) { |
78 | DiscardLeadingBytes(n: frame_, terminator: handler); |
79 | MakeDataContiguous(handler, bytes); |
80 | RUNTIME_CHECK(handler, at == fileOffset_ + frame_); |
81 | } |
82 | if (FrameLength() < bytes) { |
83 | auto next{start_ + length_}; |
84 | RUNTIME_CHECK(handler, next < size_); |
85 | auto minBytes{bytes - FrameLength()}; |
86 | auto maxBytes{size_ - next}; |
87 | auto got{Store().Read( |
88 | fileOffset_ + length_, buffer_ + next, minBytes, maxBytes, handler)}; |
89 | length_ += got; |
90 | RUNTIME_CHECK(handler, length_ <= size_); |
91 | } |
92 | return FrameLength(); |
93 | } |
94 | |
95 | void WriteFrame(FileOffset at, std::size_t bytes, IoErrorHandler &handler) { |
96 | Reallocate(bytes, terminator: handler); |
97 | std::int64_t newFrame{at - fileOffset_}; |
98 | if (!dirty_ || newFrame < 0 || newFrame > length_) { |
99 | Flush(handler); |
100 | Reset(at); |
101 | } else if (start_ + newFrame + static_cast<std::int64_t>(bytes) > size_) { |
102 | // Flush leading data before "at", retain from "at" onward |
103 | Flush(handler, keep: length_ - newFrame); |
104 | MakeDataContiguous(handler, bytes); |
105 | } else { |
106 | frame_ = newFrame; |
107 | } |
108 | RUNTIME_CHECK(handler, at == fileOffset_ + frame_); |
109 | dirty_ = true; |
110 | length_ = std::max<std::int64_t>(a: length_, b: frame_ + bytes); |
111 | } |
112 | |
113 | void Flush(IoErrorHandler &handler, std::int64_t keep = 0) { |
114 | if (dirty_) { |
115 | while (length_ > keep) { |
116 | std::size_t chunk{ |
117 | std::min<std::size_t>(a: length_ - keep, b: size_ - start_)}; |
118 | std::size_t put{ |
119 | Store().Write(fileOffset_, buffer_ + start_, chunk, handler)}; |
120 | DiscardLeadingBytes(n: put, terminator: handler); |
121 | if (put < chunk) { |
122 | break; |
123 | } |
124 | } |
125 | if (length_ == 0) { |
126 | Reset(at: fileOffset_); |
127 | } |
128 | } |
129 | } |
130 | |
131 | void TruncateFrame(std::int64_t at, IoErrorHandler &handler) { |
132 | RUNTIME_CHECK(handler, !dirty_); |
133 | if (at <= fileOffset_) { |
134 | Reset(at); |
135 | } else if (at < fileOffset_ + length_) { |
136 | length_ = at - fileOffset_; |
137 | } |
138 | } |
139 | |
140 | private: |
141 | STORE &Store() { return static_cast<STORE &>(*this); } |
142 | |
143 | void Reallocate(std::int64_t bytes, const Terminator &terminator) { |
144 | if (bytes > size_) { |
145 | char *old{buffer_}; |
146 | auto oldSize{size_}; |
147 | size_ = std::max<std::int64_t>(a: bytes, b: size_ + minBuffer); |
148 | buffer_ = |
149 | reinterpret_cast<char *>(AllocateMemoryOrCrash(terminator, size_)); |
150 | auto chunk{std::min<std::int64_t>(a: length_, b: oldSize - start_)}; |
151 | std::memcpy(dest: buffer_, src: old + start_, n: chunk); |
152 | start_ = 0; |
153 | std::memcpy(dest: buffer_ + chunk, src: old, n: length_ - chunk); |
154 | FreeMemory(old); |
155 | } |
156 | } |
157 | |
158 | void Reset(FileOffset at) { |
159 | start_ = length_ = frame_ = 0; |
160 | fileOffset_ = at; |
161 | dirty_ = false; |
162 | } |
163 | |
164 | void DiscardLeadingBytes(std::int64_t n, const Terminator &terminator) { |
165 | RUNTIME_CHECK(terminator, length_ >= n); |
166 | length_ -= n; |
167 | if (length_ == 0) { |
168 | start_ = 0; |
169 | } else { |
170 | start_ += n; |
171 | if (start_ >= size_) { |
172 | start_ -= size_; |
173 | } |
174 | } |
175 | if (frame_ >= n) { |
176 | frame_ -= n; |
177 | } else { |
178 | frame_ = 0; |
179 | } |
180 | fileOffset_ += n; |
181 | } |
182 | |
183 | void MakeDataContiguous(IoErrorHandler &handler, std::size_t bytes) { |
184 | if (static_cast<std::int64_t>(start_ + bytes) > size_) { |
185 | // Frame would wrap around; shift current data (if any) to force |
186 | // contiguity. |
187 | RUNTIME_CHECK(handler, length_ < size_); |
188 | if (start_ + length_ <= size_) { |
189 | // [......abcde..] -> [abcde........] |
190 | std::memmove(dest: buffer_, src: buffer_ + start_, n: length_); |
191 | } else { |
192 | // [cde........ab] -> [abcde........] |
193 | auto n{start_ + length_ - size_}; // 3 for cde |
194 | RUNTIME_CHECK(handler, length_ >= n); |
195 | std::memmove(dest: buffer_ + n, src: buffer_ + start_, n: length_ - n); // cdeab |
196 | LeftShiftBufferCircularly(buffer_, bytes: length_, shift: n); // abcde |
197 | } |
198 | start_ = 0; |
199 | } |
200 | } |
201 | |
202 | char *buffer_{nullptr}; |
203 | std::int64_t size_{0}; // current allocated buffer size |
204 | FileOffset fileOffset_{0}; // file offset corresponding to buffer valid data |
205 | std::int64_t start_{0}; // buffer_[] offset of valid data |
206 | std::int64_t length_{0}; // valid data length (can wrap) |
207 | std::int64_t frame_{0}; // offset of current frame in valid data |
208 | bool dirty_{false}; |
209 | }; |
210 | } // namespace Fortran::runtime::io |
211 | #endif // FORTRAN_RUNTIME_BUFFER_H_ |
212 | |