1 | //===-- runtime/edit-input.cpp --------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "edit-input.h" |
10 | #include "namelist.h" |
11 | #include "utf.h" |
12 | #include "flang/Common/real.h" |
13 | #include "flang/Common/uint128.h" |
14 | #include <algorithm> |
15 | #include <cfenv> |
16 | |
17 | namespace Fortran::runtime::io { |
18 | |
19 | // Checks that a list-directed input value has been entirely consumed and |
20 | // doesn't contain unparsed characters before the next value separator. |
21 | static inline bool IsCharValueSeparator(const DataEdit &edit, char32_t ch) { |
22 | char32_t comma{ |
23 | edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}}; |
24 | return ch == ' ' || ch == '\t' || ch == comma || ch == '/' || |
25 | (edit.IsNamelist() && (ch == '&' || ch == '$')); |
26 | } |
27 | |
28 | static bool CheckCompleteListDirectedField( |
29 | IoStatementState &io, const DataEdit &edit) { |
30 | if (edit.IsListDirected()) { |
31 | std::size_t byteCount; |
32 | if (auto ch{io.GetCurrentChar(byteCount)}) { |
33 | if (IsCharValueSeparator(edit, ch: *ch)) { |
34 | return true; |
35 | } else { |
36 | const auto &connection{io.GetConnectionState()}; |
37 | io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator, |
38 | "invalid character (0x%x) after list-directed input value, " |
39 | "at column %d in record %d" , |
40 | static_cast<unsigned>(*ch), |
41 | static_cast<int>(connection.positionInRecord + 1), |
42 | static_cast<int>(connection.currentRecordNumber)); |
43 | return false; |
44 | } |
45 | } else { |
46 | return true; // end of record: ok |
47 | } |
48 | } else { |
49 | return true; |
50 | } |
51 | } |
52 | |
53 | template <int LOG2_BASE> |
54 | static bool EditBOZInput( |
55 | IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) { |
56 | // Skip leading white space & zeroes |
57 | std::optional<int> remaining{io.CueUpInput(edit)}; |
58 | auto start{io.GetConnectionState().positionInRecord}; |
59 | std::optional<char32_t> next{io.NextInField(remaining, edit)}; |
60 | if (next.value_or(u: '?') == '0') { |
61 | do { |
62 | start = io.GetConnectionState().positionInRecord; |
63 | next = io.NextInField(remaining, edit); |
64 | } while (next && *next == '0'); |
65 | } |
66 | // Count significant digits after any leading white space & zeroes |
67 | int digits{0}; |
68 | int significantBits{0}; |
69 | for (; next; next = io.NextInField(remaining, edit)) { |
70 | char32_t ch{*next}; |
71 | if (ch == ' ' || ch == '\t') { |
72 | if (edit.modes.editingFlags & blankZero) { |
73 | ch = '0'; // BZ mode - treat blank as if it were zero |
74 | } else { |
75 | continue; |
76 | } |
77 | } |
78 | if (ch >= '0' && ch <= '1') { |
79 | } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') { |
80 | } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') { |
81 | } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') { |
82 | } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') { |
83 | } else if (ch == ',') { |
84 | break; // end non-list-directed field early |
85 | } else { |
86 | io.GetIoErrorHandler().SignalError( |
87 | msg: "Bad character '%lc' in B/O/Z input field" , xs&: ch); |
88 | return false; |
89 | } |
90 | if (digits++ == 0) { |
91 | significantBits = 4; |
92 | if (ch >= '0' && ch <= '1') { |
93 | significantBits = 1; |
94 | } else if (ch >= '2' && ch <= '3') { |
95 | significantBits = 2; |
96 | } else if (ch >= '4' && ch <= '7') { |
97 | significantBits = 3; |
98 | } else { |
99 | significantBits = 4; |
100 | } |
101 | } else { |
102 | significantBits += LOG2_BASE; |
103 | } |
104 | } |
105 | auto significantBytes{static_cast<std::size_t>(significantBits + 7) / 8}; |
106 | if (significantBytes > bytes) { |
107 | io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow, |
108 | "B/O/Z input of %d digits overflows %zd-byte variable" , digits, bytes); |
109 | return false; |
110 | } |
111 | // Reset to start of significant digits |
112 | io.HandleAbsolutePosition(byteOffset: start); |
113 | remaining.reset(); |
114 | // Make a second pass now that the digit count is known |
115 | std::memset(n, 0, bytes); |
116 | int increment{isHostLittleEndian ? -1 : 1}; |
117 | auto *data{reinterpret_cast<unsigned char *>(n) + |
118 | (isHostLittleEndian ? significantBytes - 1 : 0)}; |
119 | int shift{((digits - 1) * LOG2_BASE) & 7}; |
120 | while (digits > 0) { |
121 | char32_t ch{*io.NextInField(remaining, edit)}; |
122 | int digit{0}; |
123 | if (ch == ' ' || ch == '\t') { |
124 | if (edit.modes.editingFlags & blankZero) { |
125 | ch = '0'; // BZ mode - treat blank as if it were zero |
126 | } else { |
127 | continue; |
128 | } |
129 | } |
130 | --digits; |
131 | if (ch >= '0' && ch <= '9') { |
132 | digit = ch - '0'; |
133 | } else if (ch >= 'A' && ch <= 'F') { |
134 | digit = ch + 10 - 'A'; |
135 | } else if (ch >= 'a' && ch <= 'f') { |
136 | digit = ch + 10 - 'a'; |
137 | } else { |
138 | continue; |
139 | } |
140 | if (shift < 0) { |
141 | if (shift + LOG2_BASE > 0) { // misaligned octal |
142 | *data |= digit >> -shift; |
143 | } |
144 | shift += 8; |
145 | data += increment; |
146 | } |
147 | *data |= digit << shift; |
148 | shift -= LOG2_BASE; |
149 | } |
150 | return CheckCompleteListDirectedField(io, edit); |
151 | } |
152 | |
153 | static inline char32_t GetRadixPointChar(const DataEdit &edit) { |
154 | return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'}; |
155 | } |
156 | |
157 | // Prepares input from a field, and returns the sign, if any, else '\0'. |
158 | static char ScanNumericPrefix(IoStatementState &io, const DataEdit &edit, |
159 | std::optional<char32_t> &next, std::optional<int> &remaining) { |
160 | remaining = io.CueUpInput(edit); |
161 | next = io.NextInField(remaining, edit); |
162 | char sign{'\0'}; |
163 | if (next) { |
164 | if (*next == '-' || *next == '+') { |
165 | sign = *next; |
166 | if (!edit.IsListDirected()) { |
167 | io.SkipSpaces(remaining); |
168 | } |
169 | next = io.NextInField(remaining, edit); |
170 | } |
171 | } |
172 | return sign; |
173 | } |
174 | |
175 | bool EditIntegerInput( |
176 | IoStatementState &io, const DataEdit &edit, void *n, int kind) { |
177 | RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1))); |
178 | switch (edit.descriptor) { |
179 | case DataEdit::ListDirected: |
180 | if (IsNamelistNameOrSlash(io)) { |
181 | return false; |
182 | } |
183 | break; |
184 | case 'G': |
185 | case 'I': |
186 | break; |
187 | case 'B': |
188 | return EditBOZInput<1>(io, edit, n, bytes: kind); |
189 | case 'O': |
190 | return EditBOZInput<3>(io, edit, n, bytes: kind); |
191 | case 'Z': |
192 | return EditBOZInput<4>(io, edit, n, bytes: kind); |
193 | case 'A': // legacy extension |
194 | return EditCharacterInput(io, edit, x: reinterpret_cast<char *>(n), lengthChars: kind); |
195 | default: |
196 | io.GetIoErrorHandler().SignalError(IostatErrorInFormat, |
197 | "Data edit descriptor '%c' may not be used with an INTEGER data item" , |
198 | edit.descriptor); |
199 | return false; |
200 | } |
201 | std::optional<int> remaining; |
202 | std::optional<char32_t> next; |
203 | char sign{ScanNumericPrefix(io, edit, next, remaining)}; |
204 | common::UnsignedInt128 value{0}; |
205 | bool any{!!sign}; |
206 | bool overflow{false}; |
207 | for (; next; next = io.NextInField(remaining, edit)) { |
208 | char32_t ch{*next}; |
209 | if (ch == ' ' || ch == '\t') { |
210 | if (edit.modes.editingFlags & blankZero) { |
211 | ch = '0'; // BZ mode - treat blank as if it were zero |
212 | } else { |
213 | continue; |
214 | } |
215 | } |
216 | int digit{0}; |
217 | if (ch >= '0' && ch <= '9') { |
218 | digit = ch - '0'; |
219 | } else if (ch == ',') { |
220 | break; // end non-list-directed field early |
221 | } else { |
222 | io.GetIoErrorHandler().SignalError( |
223 | msg: "Bad character '%lc' in INTEGER input field" , xs&: ch); |
224 | return false; |
225 | } |
226 | static constexpr auto maxu128{~common::UnsignedInt128{0}}; |
227 | static constexpr auto maxu128OverTen{maxu128 / 10}; |
228 | static constexpr int maxLastDigit{ |
229 | static_cast<int>(maxu128 - (maxu128OverTen * 10))}; |
230 | overflow |= value >= maxu128OverTen && |
231 | (value > maxu128OverTen || digit > maxLastDigit); |
232 | value *= 10; |
233 | value += digit; |
234 | any = true; |
235 | } |
236 | if (!any && !remaining) { |
237 | io.GetIoErrorHandler().SignalError( |
238 | msg: "Integer value absent from NAMELIST or list-directed input" ); |
239 | return false; |
240 | } |
241 | auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)}; |
242 | overflow |= value >= maxForKind && (value > maxForKind || sign != '-'); |
243 | if (overflow) { |
244 | io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow, |
245 | "Decimal input overflows INTEGER(%d) variable" , kind); |
246 | return false; |
247 | } |
248 | if (sign == '-') { |
249 | value = -value; |
250 | } |
251 | if (any || !io.GetIoErrorHandler().InError()) { |
252 | // The value is stored in the lower order bits on big endian platform. |
253 | // When memcpy, shift the value to the higher order bit. |
254 | auto shft{static_cast<int>(sizeof(value.low())) - kind}; |
255 | // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian. |
256 | if (!isHostLittleEndian && shft >= 0) { |
257 | auto l{value.low() << (8 * shft)}; |
258 | std::memcpy(n, &l, kind); |
259 | } else { |
260 | std::memcpy(n, &value, kind); // a blank field means zero |
261 | } |
262 | return true; |
263 | } else { |
264 | return false; |
265 | } |
266 | } |
267 | |
268 | // Parses a REAL input number from the input source as a normalized |
269 | // fraction into a supplied buffer -- there's an optional '-', a |
270 | // decimal point when the input is not hexadecimal, and at least one |
271 | // digit. Replaces blanks with zeroes where appropriate. |
272 | struct ScannedRealInput { |
273 | // Number of characters that (should) have been written to the |
274 | // buffer -- this can be larger than the buffer size, which |
275 | // indicates buffer overflow. Zero indicates an error. |
276 | int got{0}; |
277 | int exponent{0}; // adjusted as necessary; binary if isHexadecimal |
278 | bool isHexadecimal{false}; // 0X... |
279 | }; |
280 | static ScannedRealInput ScanRealInput( |
281 | char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) { |
282 | std::optional<int> remaining; |
283 | std::optional<char32_t> next; |
284 | int got{0}; |
285 | std::optional<int> radixPointOffset; |
286 | auto Put{[&](char ch) -> void { |
287 | if (got < bufferSize) { |
288 | buffer[got] = ch; |
289 | } |
290 | ++got; |
291 | }}; |
292 | char sign{ScanNumericPrefix(io, edit, next, remaining)}; |
293 | if (sign == '-') { |
294 | Put('-'); |
295 | } |
296 | bool bzMode{(edit.modes.editingFlags & blankZero) != 0}; |
297 | int exponent{0}; |
298 | if (!next || (!bzMode && *next == ' ') || |
299 | (!(edit.modes.editingFlags & decimalComma) && *next == ',')) { |
300 | if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) { |
301 | // An empty/blank field means zero when not list-directed. |
302 | // A fixed-width field containing only a sign is also zero; |
303 | // this behavior isn't standard-conforming in F'2023 but it is |
304 | // required to pass FCVS. |
305 | Put('0'); |
306 | } |
307 | return {.got: got, .exponent: exponent, .isHexadecimal: false}; |
308 | } |
309 | char32_t radixPointChar{GetRadixPointChar(edit)}; |
310 | char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next}; |
311 | bool isHexadecimal{false}; |
312 | if (first == 'N' || first == 'I') { |
313 | // NaN or infinity - convert to upper case |
314 | // Subtle: a blank field of digits could be followed by 'E' or 'D', |
315 | for (; next && |
316 | ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z')); |
317 | next = io.NextInField(remaining, edit)) { |
318 | if (*next >= 'a' && *next <= 'z') { |
319 | Put(*next - 'a' + 'A'); |
320 | } else { |
321 | Put(*next); |
322 | } |
323 | } |
324 | if (next && *next == '(') { // NaN(...) |
325 | Put('('); |
326 | int depth{1}; |
327 | while (true) { |
328 | next = io.NextInField(remaining, edit); |
329 | if (depth == 0) { |
330 | break; |
331 | } else if (!next) { |
332 | return {}; // error |
333 | } else if (*next == '(') { |
334 | ++depth; |
335 | } else if (*next == ')') { |
336 | --depth; |
337 | } |
338 | Put(*next); |
339 | } |
340 | } |
341 | } else if (first == radixPointChar || (first >= '0' && first <= '9') || |
342 | (bzMode && (first == ' ' || first == '\t')) || first == 'E' || |
343 | first == 'D' || first == 'Q') { |
344 | if (first == '0') { |
345 | next = io.NextInField(remaining, edit); |
346 | if (next && (*next == 'x' || *next == 'X')) { // 0X... |
347 | isHexadecimal = true; |
348 | next = io.NextInField(remaining, edit); |
349 | } else { |
350 | Put('0'); |
351 | } |
352 | } |
353 | // input field is normalized to a fraction |
354 | if (!isHexadecimal) { |
355 | Put('.'); |
356 | } |
357 | auto start{got}; |
358 | for (; next; next = io.NextInField(remaining, edit)) { |
359 | char32_t ch{*next}; |
360 | if (ch == ' ' || ch == '\t') { |
361 | if (isHexadecimal) { |
362 | return {}; // error |
363 | } else if (bzMode) { |
364 | ch = '0'; // BZ mode - treat blank as if it were zero |
365 | } else { |
366 | continue; // ignore blank in fixed field |
367 | } |
368 | } |
369 | if (ch == '0' && got == start && !radixPointOffset) { |
370 | // omit leading zeroes before the radix point |
371 | } else if (ch >= '0' && ch <= '9') { |
372 | Put(ch); |
373 | } else if (ch == radixPointChar && !radixPointOffset) { |
374 | // The radix point character is *not* copied to the buffer. |
375 | radixPointOffset = got - start; // # of digits before the radix point |
376 | } else if (isHexadecimal && ch >= 'A' && ch <= 'F') { |
377 | Put(ch); |
378 | } else if (isHexadecimal && ch >= 'a' && ch <= 'f') { |
379 | Put(ch - 'a' + 'A'); // normalize to capitals |
380 | } else { |
381 | break; |
382 | } |
383 | } |
384 | if (got == start) { |
385 | // Nothing but zeroes and maybe a radix point. F'2018 requires |
386 | // at least one digit, but F'77 did not, and a bare "." shows up in |
387 | // the FCVS suite. |
388 | Put('0'); // emit at least one digit |
389 | } |
390 | // In list-directed input, a bad exponent is not consumed. |
391 | auto nextBeforeExponent{next}; |
392 | auto startExponent{io.GetConnectionState().positionInRecord}; |
393 | bool hasGoodExponent{false}; |
394 | if (next) { |
395 | if (isHexadecimal) { |
396 | if (*next == 'p' || *next == 'P') { |
397 | next = io.NextInField(remaining, edit); |
398 | } else { |
399 | // The binary exponent is not optional in the standard. |
400 | return {}; // error |
401 | } |
402 | } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' || |
403 | *next == 'q' || *next == 'Q') { |
404 | // Optional exponent letter. Blanks are allowed between the |
405 | // optional exponent letter and the exponent value. |
406 | io.SkipSpaces(remaining); |
407 | next = io.NextInField(remaining, edit); |
408 | } |
409 | } |
410 | if (next && |
411 | (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') || |
412 | *next == ' ' || *next == '\t')) { |
413 | bool negExpo{*next == '-'}; |
414 | if (negExpo || *next == '+') { |
415 | next = io.NextInField(remaining, edit); |
416 | } |
417 | for (; next; next = io.NextInField(remaining, edit)) { |
418 | if (*next >= '0' && *next <= '9') { |
419 | hasGoodExponent = true; |
420 | if (exponent < 10000) { |
421 | exponent = 10 * exponent + *next - '0'; |
422 | } |
423 | } else if (*next == ' ' || *next == '\t') { |
424 | if (isHexadecimal) { |
425 | break; |
426 | } else if (bzMode) { |
427 | hasGoodExponent = true; |
428 | exponent = 10 * exponent; |
429 | } |
430 | } else { |
431 | break; |
432 | } |
433 | } |
434 | if (negExpo) { |
435 | exponent = -exponent; |
436 | } |
437 | } |
438 | if (!hasGoodExponent) { |
439 | if (isHexadecimal) { |
440 | return {}; // error |
441 | } |
442 | // There isn't a good exponent; do not consume it. |
443 | next = nextBeforeExponent; |
444 | io.HandleAbsolutePosition(byteOffset: startExponent); |
445 | // The default exponent is -kP, but the scale factor doesn't affect |
446 | // an explicit exponent. |
447 | exponent = -edit.modes.scale; |
448 | } |
449 | // Adjust exponent by number of digits before the radix point. |
450 | if (isHexadecimal) { |
451 | // Exponents for hexadecimal input are binary. |
452 | exponent += radixPointOffset.value_or(u: got - start) * 4; |
453 | } else if (radixPointOffset) { |
454 | exponent += *radixPointOffset; |
455 | } else { |
456 | // When no redix point (or comma) appears in the value, the 'd' |
457 | // part of the edit descriptor must be interpreted as the number of |
458 | // digits in the value to be interpreted as being to the *right* of |
459 | // the assumed radix point (13.7.2.3.2) |
460 | exponent += got - start - edit.digits.value_or(u: 0); |
461 | } |
462 | } |
463 | // Consume the trailing ')' of a list-directed or NAMELIST complex |
464 | // input value. |
465 | if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) { |
466 | if (next && (*next == ' ' || *next == '\t')) { |
467 | io.SkipSpaces(remaining); |
468 | next = io.NextInField(remaining, edit); |
469 | } |
470 | if (!next) { // NextInField fails on separators like ')' |
471 | std::size_t byteCount{0}; |
472 | next = io.GetCurrentChar(byteCount); |
473 | if (next && *next == ')') { |
474 | io.HandleRelativePosition(byteOffset: byteCount); |
475 | } |
476 | } |
477 | } else if (remaining) { |
478 | while (next && (*next == ' ' || *next == '\t')) { |
479 | next = io.NextInField(remaining, edit); |
480 | } |
481 | if (next && (*next != ',' || (edit.modes.editingFlags & decimalComma))) { |
482 | return {}; // error: unused nonblank character in fixed-width field |
483 | } |
484 | } |
485 | return {.got: got, .exponent: exponent, .isHexadecimal: isHexadecimal}; |
486 | } |
487 | |
488 | static void RaiseFPExceptions(decimal::ConversionResultFlags flags) { |
489 | #undef RAISE |
490 | #ifdef feraisexcept // a macro in some environments; omit std:: |
491 | #define RAISE feraiseexcept |
492 | #else |
493 | #define RAISE std::feraiseexcept |
494 | #endif |
495 | if (flags & decimal::ConversionResultFlags::Overflow) { |
496 | RAISE(FE_OVERFLOW); |
497 | } |
498 | if (flags & decimal::ConversionResultFlags::Underflow) { |
499 | RAISE(FE_UNDERFLOW); |
500 | } |
501 | if (flags & decimal::ConversionResultFlags::Inexact) { |
502 | RAISE(FE_INEXACT); |
503 | } |
504 | if (flags & decimal::ConversionResultFlags::Invalid) { |
505 | RAISE(FE_INVALID); |
506 | } |
507 | #undef RAISE |
508 | } |
509 | |
510 | // If no special modes are in effect and the form of the input value |
511 | // that's present in the input stream is acceptable to the decimal->binary |
512 | // converter without modification, this fast path for real input |
513 | // saves time by avoiding memory copies and reformatting of the exponent. |
514 | template <int PRECISION> |
515 | static bool TryFastPathRealDecimalInput( |
516 | IoStatementState &io, const DataEdit &edit, void *n) { |
517 | if (edit.modes.editingFlags & (blankZero | decimalComma)) { |
518 | return false; |
519 | } |
520 | if (edit.modes.scale != 0) { |
521 | return false; |
522 | } |
523 | const ConnectionState &connection{io.GetConnectionState()}; |
524 | if (connection.internalIoCharKind > 1) { |
525 | return false; // reading non-default character |
526 | } |
527 | const char *str{nullptr}; |
528 | std::size_t got{io.GetNextInputBytes(str)}; |
529 | if (got == 0 || str == nullptr || !connection.recordLength.has_value()) { |
530 | return false; // could not access reliably-terminated input stream |
531 | } |
532 | const char *p{str}; |
533 | std::int64_t maxConsume{ |
534 | std::min<std::int64_t>(a: got, b: edit.width.value_or(u&: got))}; |
535 | const char *limit{str + maxConsume}; |
536 | decimal::ConversionToBinaryResult<PRECISION> converted{ |
537 | decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)}; |
538 | if (converted.flags & (decimal::Invalid | decimal::Overflow)) { |
539 | return false; |
540 | } |
541 | if (edit.digits.value_or(u: 0) != 0) { |
542 | // Edit descriptor is Fw.d (or other) with d != 0, which |
543 | // implies scaling |
544 | const char *q{str}; |
545 | for (; q < limit; ++q) { |
546 | if (*q == '.' || *q == 'n' || *q == 'N') { |
547 | break; |
548 | } |
549 | } |
550 | if (q == limit) { |
551 | // No explicit decimal point, and not NaN/Inf. |
552 | return false; |
553 | } |
554 | } |
555 | if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) { |
556 | // Need to consume a trailing ')', possibly with leading spaces |
557 | for (; p < limit && (*p == ' ' || *p == '\t'); ++p) { |
558 | } |
559 | if (p < limit && *p == ')') { |
560 | ++p; |
561 | } else { |
562 | return false; |
563 | } |
564 | } else if (edit.IsListDirected()) { |
565 | if (p < limit && !IsCharValueSeparator(edit, ch: *p)) { |
566 | return false; |
567 | } |
568 | } else { |
569 | for (; p < limit && (*p == ' ' || *p == '\t'); ++p) { |
570 | } |
571 | if (edit.width && p < str + *edit.width) { |
572 | return false; // unconverted characters remain in fixed width field |
573 | } |
574 | } |
575 | // Success on the fast path! |
576 | *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) = |
577 | converted.binary; |
578 | io.HandleRelativePosition(byteOffset: p - str); |
579 | // Set FP exception flags |
580 | if (converted.flags != decimal::ConversionResultFlags::Exact) { |
581 | RaiseFPExceptions(converted.flags); |
582 | } |
583 | return true; |
584 | } |
585 | |
586 | template <int binaryPrecision> |
587 | decimal::ConversionToBinaryResult<binaryPrecision> ConvertHexadecimal( |
588 | const char *&p, enum decimal::FortranRounding rounding, int expo) { |
589 | using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>; |
590 | using RawType = typename RealType::RawType; |
591 | bool isNegative{*p == '-'}; |
592 | constexpr RawType one{1}; |
593 | RawType signBit{0}; |
594 | if (isNegative) { |
595 | ++p; |
596 | signBit = one << (RealType::bits - 1); |
597 | } |
598 | RawType fraction{0}; |
599 | // Adjust the incoming binary P+/- exponent to shift the radix point |
600 | // to below the LSB and add in the bias. |
601 | expo += binaryPrecision - 1 + RealType::exponentBias; |
602 | // Input the fraction. |
603 | int roundingBit{0}; |
604 | int guardBit{0}; |
605 | for (; *p; ++p) { |
606 | fraction <<= 4; |
607 | expo -= 4; |
608 | if (*p >= '0' && *p <= '9') { |
609 | fraction |= *p - '0'; |
610 | } else if (*p >= 'A' && *p <= 'F') { |
611 | fraction |= *p - 'A' + 10; // data were normalized to capitals |
612 | } else { |
613 | break; |
614 | } |
615 | while (fraction >> binaryPrecision) { |
616 | guardBit |= roundingBit; |
617 | roundingBit = (int)fraction & 1; |
618 | fraction >>= 1; |
619 | ++expo; |
620 | } |
621 | } |
622 | if (fraction) { |
623 | // Boost biased expo if too small |
624 | while (expo < 1) { |
625 | guardBit |= roundingBit; |
626 | roundingBit = (int)fraction & 1; |
627 | fraction >>= 1; |
628 | ++expo; |
629 | } |
630 | // Normalize |
631 | while (expo > 1 && !(fraction >> (binaryPrecision - 1))) { |
632 | fraction <<= 1; |
633 | --expo; |
634 | } |
635 | // Rounding |
636 | bool increase{false}; |
637 | switch (rounding) { |
638 | case decimal::RoundNearest: // RN & RP |
639 | increase = roundingBit && (guardBit | ((int)fraction & 1)); |
640 | break; |
641 | case decimal::RoundUp: // RU |
642 | increase = !isNegative && (roundingBit | guardBit); |
643 | break; |
644 | case decimal::RoundDown: // RD |
645 | increase = isNegative && (roundingBit | guardBit); |
646 | break; |
647 | case decimal::RoundToZero: // RZ |
648 | break; |
649 | case decimal::RoundCompatible: // RC |
650 | increase = roundingBit != 0; |
651 | break; |
652 | } |
653 | if (increase) { |
654 | ++fraction; |
655 | if (fraction >> binaryPrecision) { |
656 | fraction >>= 1; |
657 | ++expo; |
658 | } |
659 | } |
660 | } |
661 | // Package & return result |
662 | constexpr RawType significandMask{(one << RealType::significandBits) - 1}; |
663 | int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact}; |
664 | if (!fraction) { |
665 | expo = 0; |
666 | } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) { |
667 | expo = 0; // subnormal |
668 | flags |= decimal::Underflow; |
669 | } else if (expo >= RealType::maxExponent) { |
670 | if (rounding == decimal::RoundToZero || |
671 | (rounding == decimal::RoundDown && !isNegative) || |
672 | (rounding == decimal::RoundUp && isNegative)) { |
673 | expo = RealType::maxExponent - 1; // +/-HUGE() |
674 | fraction = significandMask; |
675 | } else { |
676 | expo = RealType::maxExponent; // +/-Inf |
677 | fraction = 0; |
678 | flags |= decimal::Overflow; |
679 | } |
680 | } else { |
681 | fraction &= significandMask; // remove explicit normalization unless x87 |
682 | } |
683 | return decimal::ConversionToBinaryResult<binaryPrecision>{ |
684 | RealType{static_cast<RawType>(signBit | |
685 | static_cast<RawType>(expo) << RealType::significandBits | fraction)}, |
686 | static_cast<decimal::ConversionResultFlags>(flags)}; |
687 | } |
688 | |
689 | template <int KIND> |
690 | bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) { |
691 | constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)}; |
692 | if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) { |
693 | return CheckCompleteListDirectedField(io, edit); |
694 | } |
695 | // Fast path wasn't available or didn't work; go the more general route |
696 | static constexpr int maxDigits{ |
697 | common::MaxDecimalConversionDigits(binaryPrecision)}; |
698 | static constexpr int bufferSize{maxDigits + 18}; |
699 | char buffer[bufferSize]; |
700 | auto scanned{ScanRealInput(buffer, bufferSize: maxDigits + 2, io, edit)}; |
701 | int got{scanned.got}; |
702 | if (got >= maxDigits + 2) { |
703 | io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small" ); |
704 | return false; |
705 | } |
706 | if (got == 0) { |
707 | const auto &connection{io.GetConnectionState()}; |
708 | io.GetIoErrorHandler().SignalError(IostatBadRealInput, |
709 | "Bad real input data at column %d of record %d" , |
710 | static_cast<int>(connection.positionInRecord + 1), |
711 | static_cast<int>(connection.currentRecordNumber)); |
712 | return false; |
713 | } |
714 | decimal::ConversionToBinaryResult<binaryPrecision> converted; |
715 | const char *p{buffer}; |
716 | if (scanned.isHexadecimal) { |
717 | buffer[got] = '\0'; |
718 | converted = ConvertHexadecimal<binaryPrecision>( |
719 | p, edit.modes.round, scanned.exponent); |
720 | } else { |
721 | bool {got > maxDigits}; |
722 | int exponent{scanned.exponent}; |
723 | if (exponent != 0) { |
724 | buffer[got++] = 'e'; |
725 | if (exponent < 0) { |
726 | buffer[got++] = '-'; |
727 | exponent = -exponent; |
728 | } |
729 | if (exponent > 9999) { |
730 | exponent = 9999; // will convert to +/-Inf |
731 | } |
732 | if (exponent > 999) { |
733 | int dig{exponent / 1000}; |
734 | buffer[got++] = '0' + dig; |
735 | int rest{exponent - 1000 * dig}; |
736 | dig = rest / 100; |
737 | buffer[got++] = '0' + dig; |
738 | rest -= 100 * dig; |
739 | dig = rest / 10; |
740 | buffer[got++] = '0' + dig; |
741 | buffer[got++] = '0' + (rest - 10 * dig); |
742 | } else if (exponent > 99) { |
743 | int dig{exponent / 100}; |
744 | buffer[got++] = '0' + dig; |
745 | int rest{exponent - 100 * dig}; |
746 | dig = rest / 10; |
747 | buffer[got++] = '0' + dig; |
748 | buffer[got++] = '0' + (rest - 10 * dig); |
749 | } else if (exponent > 9) { |
750 | int dig{exponent / 10}; |
751 | buffer[got++] = '0' + dig; |
752 | buffer[got++] = '0' + (exponent - 10 * dig); |
753 | } else { |
754 | buffer[got++] = '0' + exponent; |
755 | } |
756 | } |
757 | buffer[got] = '\0'; |
758 | converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round); |
759 | if (hadExtra) { |
760 | converted.flags = static_cast<enum decimal::ConversionResultFlags>( |
761 | converted.flags | decimal::Inexact); |
762 | } |
763 | } |
764 | if (*p) { // unprocessed junk after value |
765 | const auto &connection{io.GetConnectionState()}; |
766 | io.GetIoErrorHandler().SignalError(IostatBadRealInput, |
767 | "Trailing characters after real input data at column %d of record %d" , |
768 | static_cast<int>(connection.positionInRecord + 1), |
769 | static_cast<int>(connection.currentRecordNumber)); |
770 | return false; |
771 | } |
772 | *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) = |
773 | converted.binary; |
774 | // Set FP exception flags |
775 | if (converted.flags != decimal::ConversionResultFlags::Exact) { |
776 | if (converted.flags & decimal::ConversionResultFlags::Overflow) { |
777 | io.GetIoErrorHandler().SignalError(IostatRealInputOverflow); |
778 | return false; |
779 | } |
780 | RaiseFPExceptions(converted.flags); |
781 | } |
782 | return CheckCompleteListDirectedField(io, edit); |
783 | } |
784 | |
785 | template <int KIND> |
786 | bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) { |
787 | switch (edit.descriptor) { |
788 | case DataEdit::ListDirected: |
789 | if (IsNamelistNameOrSlash(io)) { |
790 | return false; |
791 | } |
792 | return EditCommonRealInput<KIND>(io, edit, n); |
793 | case DataEdit::ListDirectedRealPart: |
794 | case DataEdit::ListDirectedImaginaryPart: |
795 | case 'F': |
796 | case 'E': // incl. EN, ES, & EX |
797 | case 'D': |
798 | case 'G': |
799 | return EditCommonRealInput<KIND>(io, edit, n); |
800 | case 'B': |
801 | return EditBOZInput<1>(io, edit, n, |
802 | common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); |
803 | case 'O': |
804 | return EditBOZInput<3>(io, edit, n, |
805 | common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); |
806 | case 'Z': |
807 | return EditBOZInput<4>(io, edit, n, |
808 | common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); |
809 | case 'A': // legacy extension |
810 | return EditCharacterInput(io, edit, x: reinterpret_cast<char *>(n), lengthChars: KIND); |
811 | default: |
812 | io.GetIoErrorHandler().SignalError(IostatErrorInFormat, |
813 | "Data edit descriptor '%c' may not be used for REAL input" , |
814 | edit.descriptor); |
815 | return false; |
816 | } |
817 | } |
818 | |
819 | // 13.7.3 in Fortran 2018 |
820 | bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) { |
821 | switch (edit.descriptor) { |
822 | case DataEdit::ListDirected: |
823 | if (IsNamelistNameOrSlash(io)) { |
824 | return false; |
825 | } |
826 | break; |
827 | case 'L': |
828 | case 'G': |
829 | break; |
830 | default: |
831 | io.GetIoErrorHandler().SignalError(IostatErrorInFormat, |
832 | "Data edit descriptor '%c' may not be used for LOGICAL input" , |
833 | edit.descriptor); |
834 | return false; |
835 | } |
836 | std::optional<int> remaining{io.CueUpInput(edit)}; |
837 | std::optional<char32_t> next{io.NextInField(remaining, edit)}; |
838 | if (next && *next == '.') { // skip optional period |
839 | next = io.NextInField(remaining, edit); |
840 | } |
841 | if (!next) { |
842 | io.GetIoErrorHandler().SignalError(msg: "Empty LOGICAL input field" ); |
843 | return false; |
844 | } |
845 | switch (*next) { |
846 | case 'T': |
847 | case 't': |
848 | x = true; |
849 | break; |
850 | case 'F': |
851 | case 'f': |
852 | x = false; |
853 | break; |
854 | default: |
855 | io.GetIoErrorHandler().SignalError( |
856 | msg: "Bad character '%lc' in LOGICAL input field" , xs&: *next); |
857 | return false; |
858 | } |
859 | if (remaining) { // ignore the rest of a fixed-width field |
860 | io.HandleRelativePosition(byteOffset: *remaining); |
861 | } else if (edit.descriptor == DataEdit::ListDirected) { |
862 | while (io.NextInField(remaining, edit)) { // discard rest of field |
863 | } |
864 | } |
865 | return CheckCompleteListDirectedField(io, edit); |
866 | } |
867 | |
868 | // See 13.10.3.1 paragraphs 7-9 in Fortran 2018 |
869 | template <typename CHAR> |
870 | static bool EditDelimitedCharacterInput( |
871 | IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) { |
872 | bool result{true}; |
873 | while (true) { |
874 | std::size_t byteCount{0}; |
875 | auto ch{io.GetCurrentChar(byteCount)}; |
876 | if (!ch) { |
877 | if (io.AdvanceRecord()) { |
878 | continue; |
879 | } else { |
880 | result = false; // EOF in character value |
881 | break; |
882 | } |
883 | } |
884 | io.HandleRelativePosition(byteOffset: byteCount); |
885 | if (*ch == delimiter) { |
886 | auto next{io.GetCurrentChar(byteCount)}; |
887 | if (next && *next == delimiter) { |
888 | // Repeated delimiter: use as character value |
889 | io.HandleRelativePosition(byteOffset: byteCount); |
890 | } else { |
891 | break; // closing delimiter |
892 | } |
893 | } |
894 | if (length > 0) { |
895 | *x++ = *ch; |
896 | --length; |
897 | } |
898 | } |
899 | std::fill_n(x, length, ' '); |
900 | return result; |
901 | } |
902 | |
903 | template <typename CHAR> |
904 | static bool EditListDirectedCharacterInput( |
905 | IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) { |
906 | std::size_t byteCount{0}; |
907 | auto ch{io.GetCurrentChar(byteCount)}; |
908 | if (ch && (*ch == '\'' || *ch == '"')) { |
909 | io.HandleRelativePosition(byteOffset: byteCount); |
910 | return EditDelimitedCharacterInput(io, x, length, *ch); |
911 | } |
912 | if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) { |
913 | return false; |
914 | } |
915 | // Undelimited list-directed character input: stop at a value separator |
916 | // or the end of the current record. Subtlety: the "remaining" count |
917 | // here is a dummy that's used to avoid the interpretation of separators |
918 | // in NextInField. |
919 | std::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0}; |
920 | while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) { |
921 | bool isSep{false}; |
922 | switch (*next) { |
923 | case ' ': |
924 | case '\t': |
925 | case '/': |
926 | isSep = true; |
927 | break; |
928 | case '&': |
929 | case '$': |
930 | isSep = edit.IsNamelist(); |
931 | break; |
932 | case ',': |
933 | isSep = !(edit.modes.editingFlags & decimalComma); |
934 | break; |
935 | case ';': |
936 | isSep = !!(edit.modes.editingFlags & decimalComma); |
937 | break; |
938 | default: |
939 | break; |
940 | } |
941 | if (isSep) { |
942 | remaining = 0; |
943 | } else { |
944 | *x++ = *next; |
945 | remaining = --length > 0 ? maxUTF8Bytes : 0; |
946 | } |
947 | } |
948 | std::fill_n(x, length, ' '); |
949 | return true; |
950 | } |
951 | |
952 | template <typename CHAR> |
953 | bool EditCharacterInput(IoStatementState &io, const DataEdit &edit, CHAR *x, |
954 | std::size_t lengthChars) { |
955 | switch (edit.descriptor) { |
956 | case DataEdit::ListDirected: |
957 | return EditListDirectedCharacterInput(io, x, lengthChars, edit); |
958 | case 'A': |
959 | case 'G': |
960 | break; |
961 | case 'B': |
962 | return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x); |
963 | case 'O': |
964 | return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x); |
965 | case 'Z': |
966 | return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x); |
967 | default: |
968 | io.GetIoErrorHandler().SignalError(IostatErrorInFormat, |
969 | "Data edit descriptor '%c' may not be used with a CHARACTER data item" , |
970 | edit.descriptor); |
971 | return false; |
972 | } |
973 | const ConnectionState &connection{io.GetConnectionState()}; |
974 | std::size_t remainingChars{lengthChars}; |
975 | // Skip leading characters. |
976 | // Their bytes don't count towards INQUIRE(IOLENGTH=). |
977 | std::size_t skipChars{0}; |
978 | if (edit.width && *edit.width > 0) { |
979 | remainingChars = *edit.width; |
980 | if (remainingChars > lengthChars) { |
981 | skipChars = remainingChars - lengthChars; |
982 | } |
983 | } |
984 | // When the field is wider than the variable, we drop the leading |
985 | // characters. When the variable is wider than the field, there can be |
986 | // trailing padding or an EOR condition. |
987 | const char *input{nullptr}; |
988 | std::size_t readyBytes{0}; |
989 | // Transfer payload bytes; these do count. |
990 | while (remainingChars > 0) { |
991 | if (readyBytes == 0) { |
992 | readyBytes = io.GetNextInputBytes(input); |
993 | if (readyBytes == 0 || |
994 | (readyBytes < remainingChars && edit.modes.nonAdvancing)) { |
995 | if (io.CheckForEndOfRecord(afterReading: readyBytes)) { |
996 | if (readyBytes == 0) { |
997 | // PAD='YES' and no more data |
998 | std::fill_n(x, lengthChars, ' '); |
999 | return !io.GetIoErrorHandler().InError(); |
1000 | } else { |
1001 | // Do partial read(s) then pad on last iteration |
1002 | } |
1003 | } else { |
1004 | return !io.GetIoErrorHandler().InError(); |
1005 | } |
1006 | } |
1007 | } |
1008 | std::size_t chunkBytes; |
1009 | std::size_t chunkChars{1}; |
1010 | bool skipping{skipChars > 0}; |
1011 | if (connection.isUTF8) { |
1012 | chunkBytes = MeasureUTF8Bytes(first: *input); |
1013 | if (skipping) { |
1014 | --skipChars; |
1015 | } else if (auto ucs{DecodeUTF8(input)}) { |
1016 | if ((sizeof *x == 1 && *ucs > 0xff) || |
1017 | (sizeof *x == 2 && *ucs > 0xffff)) { |
1018 | *x++ = '?'; |
1019 | } else { |
1020 | *x++ = *ucs; |
1021 | } |
1022 | --lengthChars; |
1023 | } else if (chunkBytes == 0) { |
1024 | // error recovery: skip bad encoding |
1025 | chunkBytes = 1; |
1026 | } |
1027 | } else if (connection.internalIoCharKind > 1) { |
1028 | // Reading from non-default character internal unit |
1029 | chunkBytes = connection.internalIoCharKind; |
1030 | if (skipping) { |
1031 | --skipChars; |
1032 | } else { |
1033 | char32_t buffer{0}; |
1034 | std::memcpy(&buffer, input, chunkBytes); |
1035 | if ((sizeof *x == 1 && buffer > 0xff) || |
1036 | (sizeof *x == 2 && buffer > 0xffff)) { |
1037 | *x++ = '?'; |
1038 | } else { |
1039 | *x++ = buffer; |
1040 | } |
1041 | --lengthChars; |
1042 | } |
1043 | } else if constexpr (sizeof *x > 1) { |
1044 | // Read single byte with expansion into multi-byte CHARACTER |
1045 | chunkBytes = 1; |
1046 | if (skipping) { |
1047 | --skipChars; |
1048 | } else { |
1049 | *x++ = static_cast<unsigned char>(*input); |
1050 | --lengthChars; |
1051 | } |
1052 | } else { // single bytes -> default CHARACTER |
1053 | if (skipping) { |
1054 | chunkBytes = std::min<std::size_t>(a: skipChars, b: readyBytes); |
1055 | chunkChars = chunkBytes; |
1056 | skipChars -= chunkChars; |
1057 | } else { |
1058 | chunkBytes = std::min<std::size_t>(a: remainingChars, b: readyBytes); |
1059 | chunkBytes = std::min<std::size_t>(a: lengthChars, b: chunkBytes); |
1060 | chunkChars = chunkBytes; |
1061 | std::memcpy(x, input, chunkBytes); |
1062 | x += chunkBytes; |
1063 | lengthChars -= chunkChars; |
1064 | } |
1065 | } |
1066 | input += chunkBytes; |
1067 | remainingChars -= chunkChars; |
1068 | if (!skipping) { |
1069 | io.GotChar(chunkBytes); |
1070 | } |
1071 | io.HandleRelativePosition(byteOffset: chunkBytes); |
1072 | readyBytes -= chunkBytes; |
1073 | } |
1074 | // Pad the remainder of the input variable, if any. |
1075 | std::fill_n(x, lengthChars, ' '); |
1076 | return CheckCompleteListDirectedField(io, edit); |
1077 | } |
1078 | |
1079 | template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *); |
1080 | template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *); |
1081 | template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *); |
1082 | template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *); |
1083 | template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *); |
1084 | // TODO: double/double |
1085 | template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *); |
1086 | |
1087 | template bool EditCharacterInput( |
1088 | IoStatementState &, const DataEdit &, char *, std::size_t); |
1089 | template bool EditCharacterInput( |
1090 | IoStatementState &, const DataEdit &, char16_t *, std::size_t); |
1091 | template bool EditCharacterInput( |
1092 | IoStatementState &, const DataEdit &, char32_t *, std::size_t); |
1093 | |
1094 | } // namespace Fortran::runtime::io |
1095 | |