1//===-- runtime/edit-input.cpp --------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "edit-input.h"
10#include "namelist.h"
11#include "utf.h"
12#include "flang/Common/real.h"
13#include "flang/Common/uint128.h"
14#include <algorithm>
15#include <cfenv>
16
17namespace Fortran::runtime::io {
18
19// Checks that a list-directed input value has been entirely consumed and
20// doesn't contain unparsed characters before the next value separator.
21static inline bool IsCharValueSeparator(const DataEdit &edit, char32_t ch) {
22 char32_t comma{
23 edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}};
24 return ch == ' ' || ch == '\t' || ch == comma || ch == '/' ||
25 (edit.IsNamelist() && (ch == '&' || ch == '$'));
26}
27
28static bool CheckCompleteListDirectedField(
29 IoStatementState &io, const DataEdit &edit) {
30 if (edit.IsListDirected()) {
31 std::size_t byteCount;
32 if (auto ch{io.GetCurrentChar(byteCount)}) {
33 if (IsCharValueSeparator(edit, ch: *ch)) {
34 return true;
35 } else {
36 const auto &connection{io.GetConnectionState()};
37 io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator,
38 "invalid character (0x%x) after list-directed input value, "
39 "at column %d in record %d",
40 static_cast<unsigned>(*ch),
41 static_cast<int>(connection.positionInRecord + 1),
42 static_cast<int>(connection.currentRecordNumber));
43 return false;
44 }
45 } else {
46 return true; // end of record: ok
47 }
48 } else {
49 return true;
50 }
51}
52
53template <int LOG2_BASE>
54static bool EditBOZInput(
55 IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
56 // Skip leading white space & zeroes
57 std::optional<int> remaining{io.CueUpInput(edit)};
58 auto start{io.GetConnectionState().positionInRecord};
59 std::optional<char32_t> next{io.NextInField(remaining, edit)};
60 if (next.value_or(u: '?') == '0') {
61 do {
62 start = io.GetConnectionState().positionInRecord;
63 next = io.NextInField(remaining, edit);
64 } while (next && *next == '0');
65 }
66 // Count significant digits after any leading white space & zeroes
67 int digits{0};
68 int significantBits{0};
69 for (; next; next = io.NextInField(remaining, edit)) {
70 char32_t ch{*next};
71 if (ch == ' ' || ch == '\t') {
72 if (edit.modes.editingFlags & blankZero) {
73 ch = '0'; // BZ mode - treat blank as if it were zero
74 } else {
75 continue;
76 }
77 }
78 if (ch >= '0' && ch <= '1') {
79 } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
80 } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
81 } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
82 } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
83 } else if (ch == ',') {
84 break; // end non-list-directed field early
85 } else {
86 io.GetIoErrorHandler().SignalError(
87 msg: "Bad character '%lc' in B/O/Z input field", xs&: ch);
88 return false;
89 }
90 if (digits++ == 0) {
91 significantBits = 4;
92 if (ch >= '0' && ch <= '1') {
93 significantBits = 1;
94 } else if (ch >= '2' && ch <= '3') {
95 significantBits = 2;
96 } else if (ch >= '4' && ch <= '7') {
97 significantBits = 3;
98 } else {
99 significantBits = 4;
100 }
101 } else {
102 significantBits += LOG2_BASE;
103 }
104 }
105 auto significantBytes{static_cast<std::size_t>(significantBits + 7) / 8};
106 if (significantBytes > bytes) {
107 io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow,
108 "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
109 return false;
110 }
111 // Reset to start of significant digits
112 io.HandleAbsolutePosition(byteOffset: start);
113 remaining.reset();
114 // Make a second pass now that the digit count is known
115 std::memset(n, 0, bytes);
116 int increment{isHostLittleEndian ? -1 : 1};
117 auto *data{reinterpret_cast<unsigned char *>(n) +
118 (isHostLittleEndian ? significantBytes - 1 : 0)};
119 int shift{((digits - 1) * LOG2_BASE) & 7};
120 while (digits > 0) {
121 char32_t ch{*io.NextInField(remaining, edit)};
122 int digit{0};
123 if (ch == ' ' || ch == '\t') {
124 if (edit.modes.editingFlags & blankZero) {
125 ch = '0'; // BZ mode - treat blank as if it were zero
126 } else {
127 continue;
128 }
129 }
130 --digits;
131 if (ch >= '0' && ch <= '9') {
132 digit = ch - '0';
133 } else if (ch >= 'A' && ch <= 'F') {
134 digit = ch + 10 - 'A';
135 } else if (ch >= 'a' && ch <= 'f') {
136 digit = ch + 10 - 'a';
137 } else {
138 continue;
139 }
140 if (shift < 0) {
141 if (shift + LOG2_BASE > 0) { // misaligned octal
142 *data |= digit >> -shift;
143 }
144 shift += 8;
145 data += increment;
146 }
147 *data |= digit << shift;
148 shift -= LOG2_BASE;
149 }
150 return CheckCompleteListDirectedField(io, edit);
151}
152
153static inline char32_t GetRadixPointChar(const DataEdit &edit) {
154 return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
155}
156
157// Prepares input from a field, and returns the sign, if any, else '\0'.
158static char ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
159 std::optional<char32_t> &next, std::optional<int> &remaining) {
160 remaining = io.CueUpInput(edit);
161 next = io.NextInField(remaining, edit);
162 char sign{'\0'};
163 if (next) {
164 if (*next == '-' || *next == '+') {
165 sign = *next;
166 if (!edit.IsListDirected()) {
167 io.SkipSpaces(remaining);
168 }
169 next = io.NextInField(remaining, edit);
170 }
171 }
172 return sign;
173}
174
175bool EditIntegerInput(
176 IoStatementState &io, const DataEdit &edit, void *n, int kind) {
177 RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
178 switch (edit.descriptor) {
179 case DataEdit::ListDirected:
180 if (IsNamelistNameOrSlash(io)) {
181 return false;
182 }
183 break;
184 case 'G':
185 case 'I':
186 break;
187 case 'B':
188 return EditBOZInput<1>(io, edit, n, bytes: kind);
189 case 'O':
190 return EditBOZInput<3>(io, edit, n, bytes: kind);
191 case 'Z':
192 return EditBOZInput<4>(io, edit, n, bytes: kind);
193 case 'A': // legacy extension
194 return EditCharacterInput(io, edit, x: reinterpret_cast<char *>(n), lengthChars: kind);
195 default:
196 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
197 "Data edit descriptor '%c' may not be used with an INTEGER data item",
198 edit.descriptor);
199 return false;
200 }
201 std::optional<int> remaining;
202 std::optional<char32_t> next;
203 char sign{ScanNumericPrefix(io, edit, next, remaining)};
204 common::UnsignedInt128 value{0};
205 bool any{!!sign};
206 bool overflow{false};
207 for (; next; next = io.NextInField(remaining, edit)) {
208 char32_t ch{*next};
209 if (ch == ' ' || ch == '\t') {
210 if (edit.modes.editingFlags & blankZero) {
211 ch = '0'; // BZ mode - treat blank as if it were zero
212 } else {
213 continue;
214 }
215 }
216 int digit{0};
217 if (ch >= '0' && ch <= '9') {
218 digit = ch - '0';
219 } else if (ch == ',') {
220 break; // end non-list-directed field early
221 } else {
222 io.GetIoErrorHandler().SignalError(
223 msg: "Bad character '%lc' in INTEGER input field", xs&: ch);
224 return false;
225 }
226 static constexpr auto maxu128{~common::UnsignedInt128{0}};
227 static constexpr auto maxu128OverTen{maxu128 / 10};
228 static constexpr int maxLastDigit{
229 static_cast<int>(maxu128 - (maxu128OverTen * 10))};
230 overflow |= value >= maxu128OverTen &&
231 (value > maxu128OverTen || digit > maxLastDigit);
232 value *= 10;
233 value += digit;
234 any = true;
235 }
236 if (!any && !remaining) {
237 io.GetIoErrorHandler().SignalError(
238 msg: "Integer value absent from NAMELIST or list-directed input");
239 return false;
240 }
241 auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
242 overflow |= value >= maxForKind && (value > maxForKind || sign != '-');
243 if (overflow) {
244 io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow,
245 "Decimal input overflows INTEGER(%d) variable", kind);
246 return false;
247 }
248 if (sign == '-') {
249 value = -value;
250 }
251 if (any || !io.GetIoErrorHandler().InError()) {
252 // The value is stored in the lower order bits on big endian platform.
253 // When memcpy, shift the value to the higher order bit.
254 auto shft{static_cast<int>(sizeof(value.low())) - kind};
255 // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian.
256 if (!isHostLittleEndian && shft >= 0) {
257 auto l{value.low() << (8 * shft)};
258 std::memcpy(n, &l, kind);
259 } else {
260 std::memcpy(n, &value, kind); // a blank field means zero
261 }
262 return true;
263 } else {
264 return false;
265 }
266}
267
268// Parses a REAL input number from the input source as a normalized
269// fraction into a supplied buffer -- there's an optional '-', a
270// decimal point when the input is not hexadecimal, and at least one
271// digit. Replaces blanks with zeroes where appropriate.
272struct ScannedRealInput {
273 // Number of characters that (should) have been written to the
274 // buffer -- this can be larger than the buffer size, which
275 // indicates buffer overflow. Zero indicates an error.
276 int got{0};
277 int exponent{0}; // adjusted as necessary; binary if isHexadecimal
278 bool isHexadecimal{false}; // 0X...
279};
280static ScannedRealInput ScanRealInput(
281 char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) {
282 std::optional<int> remaining;
283 std::optional<char32_t> next;
284 int got{0};
285 std::optional<int> radixPointOffset;
286 auto Put{[&](char ch) -> void {
287 if (got < bufferSize) {
288 buffer[got] = ch;
289 }
290 ++got;
291 }};
292 char sign{ScanNumericPrefix(io, edit, next, remaining)};
293 if (sign == '-') {
294 Put('-');
295 }
296 bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
297 int exponent{0};
298 if (!next || (!bzMode && *next == ' ') ||
299 (!(edit.modes.editingFlags & decimalComma) && *next == ',')) {
300 if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) {
301 // An empty/blank field means zero when not list-directed.
302 // A fixed-width field containing only a sign is also zero;
303 // this behavior isn't standard-conforming in F'2023 but it is
304 // required to pass FCVS.
305 Put('0');
306 }
307 return {.got: got, .exponent: exponent, .isHexadecimal: false};
308 }
309 char32_t radixPointChar{GetRadixPointChar(edit)};
310 char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
311 bool isHexadecimal{false};
312 if (first == 'N' || first == 'I') {
313 // NaN or infinity - convert to upper case
314 // Subtle: a blank field of digits could be followed by 'E' or 'D',
315 for (; next &&
316 ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
317 next = io.NextInField(remaining, edit)) {
318 if (*next >= 'a' && *next <= 'z') {
319 Put(*next - 'a' + 'A');
320 } else {
321 Put(*next);
322 }
323 }
324 if (next && *next == '(') { // NaN(...)
325 Put('(');
326 int depth{1};
327 while (true) {
328 next = io.NextInField(remaining, edit);
329 if (depth == 0) {
330 break;
331 } else if (!next) {
332 return {}; // error
333 } else if (*next == '(') {
334 ++depth;
335 } else if (*next == ')') {
336 --depth;
337 }
338 Put(*next);
339 }
340 }
341 } else if (first == radixPointChar || (first >= '0' && first <= '9') ||
342 (bzMode && (first == ' ' || first == '\t')) || first == 'E' ||
343 first == 'D' || first == 'Q') {
344 if (first == '0') {
345 next = io.NextInField(remaining, edit);
346 if (next && (*next == 'x' || *next == 'X')) { // 0X...
347 isHexadecimal = true;
348 next = io.NextInField(remaining, edit);
349 } else {
350 Put('0');
351 }
352 }
353 // input field is normalized to a fraction
354 if (!isHexadecimal) {
355 Put('.');
356 }
357 auto start{got};
358 for (; next; next = io.NextInField(remaining, edit)) {
359 char32_t ch{*next};
360 if (ch == ' ' || ch == '\t') {
361 if (isHexadecimal) {
362 return {}; // error
363 } else if (bzMode) {
364 ch = '0'; // BZ mode - treat blank as if it were zero
365 } else {
366 continue; // ignore blank in fixed field
367 }
368 }
369 if (ch == '0' && got == start && !radixPointOffset) {
370 // omit leading zeroes before the radix point
371 } else if (ch >= '0' && ch <= '9') {
372 Put(ch);
373 } else if (ch == radixPointChar && !radixPointOffset) {
374 // The radix point character is *not* copied to the buffer.
375 radixPointOffset = got - start; // # of digits before the radix point
376 } else if (isHexadecimal && ch >= 'A' && ch <= 'F') {
377 Put(ch);
378 } else if (isHexadecimal && ch >= 'a' && ch <= 'f') {
379 Put(ch - 'a' + 'A'); // normalize to capitals
380 } else {
381 break;
382 }
383 }
384 if (got == start) {
385 // Nothing but zeroes and maybe a radix point. F'2018 requires
386 // at least one digit, but F'77 did not, and a bare "." shows up in
387 // the FCVS suite.
388 Put('0'); // emit at least one digit
389 }
390 // In list-directed input, a bad exponent is not consumed.
391 auto nextBeforeExponent{next};
392 auto startExponent{io.GetConnectionState().positionInRecord};
393 bool hasGoodExponent{false};
394 if (next) {
395 if (isHexadecimal) {
396 if (*next == 'p' || *next == 'P') {
397 next = io.NextInField(remaining, edit);
398 } else {
399 // The binary exponent is not optional in the standard.
400 return {}; // error
401 }
402 } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
403 *next == 'q' || *next == 'Q') {
404 // Optional exponent letter. Blanks are allowed between the
405 // optional exponent letter and the exponent value.
406 io.SkipSpaces(remaining);
407 next = io.NextInField(remaining, edit);
408 }
409 }
410 if (next &&
411 (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
412 *next == ' ' || *next == '\t')) {
413 bool negExpo{*next == '-'};
414 if (negExpo || *next == '+') {
415 next = io.NextInField(remaining, edit);
416 }
417 for (; next; next = io.NextInField(remaining, edit)) {
418 if (*next >= '0' && *next <= '9') {
419 hasGoodExponent = true;
420 if (exponent < 10000) {
421 exponent = 10 * exponent + *next - '0';
422 }
423 } else if (*next == ' ' || *next == '\t') {
424 if (isHexadecimal) {
425 break;
426 } else if (bzMode) {
427 hasGoodExponent = true;
428 exponent = 10 * exponent;
429 }
430 } else {
431 break;
432 }
433 }
434 if (negExpo) {
435 exponent = -exponent;
436 }
437 }
438 if (!hasGoodExponent) {
439 if (isHexadecimal) {
440 return {}; // error
441 }
442 // There isn't a good exponent; do not consume it.
443 next = nextBeforeExponent;
444 io.HandleAbsolutePosition(byteOffset: startExponent);
445 // The default exponent is -kP, but the scale factor doesn't affect
446 // an explicit exponent.
447 exponent = -edit.modes.scale;
448 }
449 // Adjust exponent by number of digits before the radix point.
450 if (isHexadecimal) {
451 // Exponents for hexadecimal input are binary.
452 exponent += radixPointOffset.value_or(u: got - start) * 4;
453 } else if (radixPointOffset) {
454 exponent += *radixPointOffset;
455 } else {
456 // When no redix point (or comma) appears in the value, the 'd'
457 // part of the edit descriptor must be interpreted as the number of
458 // digits in the value to be interpreted as being to the *right* of
459 // the assumed radix point (13.7.2.3.2)
460 exponent += got - start - edit.digits.value_or(u: 0);
461 }
462 }
463 // Consume the trailing ')' of a list-directed or NAMELIST complex
464 // input value.
465 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
466 if (next && (*next == ' ' || *next == '\t')) {
467 io.SkipSpaces(remaining);
468 next = io.NextInField(remaining, edit);
469 }
470 if (!next) { // NextInField fails on separators like ')'
471 std::size_t byteCount{0};
472 next = io.GetCurrentChar(byteCount);
473 if (next && *next == ')') {
474 io.HandleRelativePosition(byteOffset: byteCount);
475 }
476 }
477 } else if (remaining) {
478 while (next && (*next == ' ' || *next == '\t')) {
479 next = io.NextInField(remaining, edit);
480 }
481 if (next && (*next != ',' || (edit.modes.editingFlags & decimalComma))) {
482 return {}; // error: unused nonblank character in fixed-width field
483 }
484 }
485 return {.got: got, .exponent: exponent, .isHexadecimal: isHexadecimal};
486}
487
488static void RaiseFPExceptions(decimal::ConversionResultFlags flags) {
489#undef RAISE
490#ifdef feraisexcept // a macro in some environments; omit std::
491#define RAISE feraiseexcept
492#else
493#define RAISE std::feraiseexcept
494#endif
495 if (flags & decimal::ConversionResultFlags::Overflow) {
496 RAISE(FE_OVERFLOW);
497 }
498 if (flags & decimal::ConversionResultFlags::Underflow) {
499 RAISE(FE_UNDERFLOW);
500 }
501 if (flags & decimal::ConversionResultFlags::Inexact) {
502 RAISE(FE_INEXACT);
503 }
504 if (flags & decimal::ConversionResultFlags::Invalid) {
505 RAISE(FE_INVALID);
506 }
507#undef RAISE
508}
509
510// If no special modes are in effect and the form of the input value
511// that's present in the input stream is acceptable to the decimal->binary
512// converter without modification, this fast path for real input
513// saves time by avoiding memory copies and reformatting of the exponent.
514template <int PRECISION>
515static bool TryFastPathRealDecimalInput(
516 IoStatementState &io, const DataEdit &edit, void *n) {
517 if (edit.modes.editingFlags & (blankZero | decimalComma)) {
518 return false;
519 }
520 if (edit.modes.scale != 0) {
521 return false;
522 }
523 const ConnectionState &connection{io.GetConnectionState()};
524 if (connection.internalIoCharKind > 1) {
525 return false; // reading non-default character
526 }
527 const char *str{nullptr};
528 std::size_t got{io.GetNextInputBytes(str)};
529 if (got == 0 || str == nullptr || !connection.recordLength.has_value()) {
530 return false; // could not access reliably-terminated input stream
531 }
532 const char *p{str};
533 std::int64_t maxConsume{
534 std::min<std::int64_t>(a: got, b: edit.width.value_or(u&: got))};
535 const char *limit{str + maxConsume};
536 decimal::ConversionToBinaryResult<PRECISION> converted{
537 decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
538 if (converted.flags & (decimal::Invalid | decimal::Overflow)) {
539 return false;
540 }
541 if (edit.digits.value_or(u: 0) != 0) {
542 // Edit descriptor is Fw.d (or other) with d != 0, which
543 // implies scaling
544 const char *q{str};
545 for (; q < limit; ++q) {
546 if (*q == '.' || *q == 'n' || *q == 'N') {
547 break;
548 }
549 }
550 if (q == limit) {
551 // No explicit decimal point, and not NaN/Inf.
552 return false;
553 }
554 }
555 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
556 // Need to consume a trailing ')', possibly with leading spaces
557 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
558 }
559 if (p < limit && *p == ')') {
560 ++p;
561 } else {
562 return false;
563 }
564 } else if (edit.IsListDirected()) {
565 if (p < limit && !IsCharValueSeparator(edit, ch: *p)) {
566 return false;
567 }
568 } else {
569 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
570 }
571 if (edit.width && p < str + *edit.width) {
572 return false; // unconverted characters remain in fixed width field
573 }
574 }
575 // Success on the fast path!
576 *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
577 converted.binary;
578 io.HandleRelativePosition(byteOffset: p - str);
579 // Set FP exception flags
580 if (converted.flags != decimal::ConversionResultFlags::Exact) {
581 RaiseFPExceptions(converted.flags);
582 }
583 return true;
584}
585
586template <int binaryPrecision>
587decimal::ConversionToBinaryResult<binaryPrecision> ConvertHexadecimal(
588 const char *&p, enum decimal::FortranRounding rounding, int expo) {
589 using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>;
590 using RawType = typename RealType::RawType;
591 bool isNegative{*p == '-'};
592 constexpr RawType one{1};
593 RawType signBit{0};
594 if (isNegative) {
595 ++p;
596 signBit = one << (RealType::bits - 1);
597 }
598 RawType fraction{0};
599 // Adjust the incoming binary P+/- exponent to shift the radix point
600 // to below the LSB and add in the bias.
601 expo += binaryPrecision - 1 + RealType::exponentBias;
602 // Input the fraction.
603 int roundingBit{0};
604 int guardBit{0};
605 for (; *p; ++p) {
606 fraction <<= 4;
607 expo -= 4;
608 if (*p >= '0' && *p <= '9') {
609 fraction |= *p - '0';
610 } else if (*p >= 'A' && *p <= 'F') {
611 fraction |= *p - 'A' + 10; // data were normalized to capitals
612 } else {
613 break;
614 }
615 while (fraction >> binaryPrecision) {
616 guardBit |= roundingBit;
617 roundingBit = (int)fraction & 1;
618 fraction >>= 1;
619 ++expo;
620 }
621 }
622 if (fraction) {
623 // Boost biased expo if too small
624 while (expo < 1) {
625 guardBit |= roundingBit;
626 roundingBit = (int)fraction & 1;
627 fraction >>= 1;
628 ++expo;
629 }
630 // Normalize
631 while (expo > 1 && !(fraction >> (binaryPrecision - 1))) {
632 fraction <<= 1;
633 --expo;
634 }
635 // Rounding
636 bool increase{false};
637 switch (rounding) {
638 case decimal::RoundNearest: // RN & RP
639 increase = roundingBit && (guardBit | ((int)fraction & 1));
640 break;
641 case decimal::RoundUp: // RU
642 increase = !isNegative && (roundingBit | guardBit);
643 break;
644 case decimal::RoundDown: // RD
645 increase = isNegative && (roundingBit | guardBit);
646 break;
647 case decimal::RoundToZero: // RZ
648 break;
649 case decimal::RoundCompatible: // RC
650 increase = roundingBit != 0;
651 break;
652 }
653 if (increase) {
654 ++fraction;
655 if (fraction >> binaryPrecision) {
656 fraction >>= 1;
657 ++expo;
658 }
659 }
660 }
661 // Package & return result
662 constexpr RawType significandMask{(one << RealType::significandBits) - 1};
663 int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
664 if (!fraction) {
665 expo = 0;
666 } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) {
667 expo = 0; // subnormal
668 flags |= decimal::Underflow;
669 } else if (expo >= RealType::maxExponent) {
670 if (rounding == decimal::RoundToZero ||
671 (rounding == decimal::RoundDown && !isNegative) ||
672 (rounding == decimal::RoundUp && isNegative)) {
673 expo = RealType::maxExponent - 1; // +/-HUGE()
674 fraction = significandMask;
675 } else {
676 expo = RealType::maxExponent; // +/-Inf
677 fraction = 0;
678 flags |= decimal::Overflow;
679 }
680 } else {
681 fraction &= significandMask; // remove explicit normalization unless x87
682 }
683 return decimal::ConversionToBinaryResult<binaryPrecision>{
684 RealType{static_cast<RawType>(signBit |
685 static_cast<RawType>(expo) << RealType::significandBits | fraction)},
686 static_cast<decimal::ConversionResultFlags>(flags)};
687}
688
689template <int KIND>
690bool EditCommonRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
691 constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
692 if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) {
693 return CheckCompleteListDirectedField(io, edit);
694 }
695 // Fast path wasn't available or didn't work; go the more general route
696 static constexpr int maxDigits{
697 common::MaxDecimalConversionDigits(binaryPrecision)};
698 static constexpr int bufferSize{maxDigits + 18};
699 char buffer[bufferSize];
700 auto scanned{ScanRealInput(buffer, bufferSize: maxDigits + 2, io, edit)};
701 int got{scanned.got};
702 if (got >= maxDigits + 2) {
703 io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
704 return false;
705 }
706 if (got == 0) {
707 const auto &connection{io.GetConnectionState()};
708 io.GetIoErrorHandler().SignalError(IostatBadRealInput,
709 "Bad real input data at column %d of record %d",
710 static_cast<int>(connection.positionInRecord + 1),
711 static_cast<int>(connection.currentRecordNumber));
712 return false;
713 }
714 decimal::ConversionToBinaryResult<binaryPrecision> converted;
715 const char *p{buffer};
716 if (scanned.isHexadecimal) {
717 buffer[got] = '\0';
718 converted = ConvertHexadecimal<binaryPrecision>(
719 p, edit.modes.round, scanned.exponent);
720 } else {
721 bool hadExtra{got > maxDigits};
722 int exponent{scanned.exponent};
723 if (exponent != 0) {
724 buffer[got++] = 'e';
725 if (exponent < 0) {
726 buffer[got++] = '-';
727 exponent = -exponent;
728 }
729 if (exponent > 9999) {
730 exponent = 9999; // will convert to +/-Inf
731 }
732 if (exponent > 999) {
733 int dig{exponent / 1000};
734 buffer[got++] = '0' + dig;
735 int rest{exponent - 1000 * dig};
736 dig = rest / 100;
737 buffer[got++] = '0' + dig;
738 rest -= 100 * dig;
739 dig = rest / 10;
740 buffer[got++] = '0' + dig;
741 buffer[got++] = '0' + (rest - 10 * dig);
742 } else if (exponent > 99) {
743 int dig{exponent / 100};
744 buffer[got++] = '0' + dig;
745 int rest{exponent - 100 * dig};
746 dig = rest / 10;
747 buffer[got++] = '0' + dig;
748 buffer[got++] = '0' + (rest - 10 * dig);
749 } else if (exponent > 9) {
750 int dig{exponent / 10};
751 buffer[got++] = '0' + dig;
752 buffer[got++] = '0' + (exponent - 10 * dig);
753 } else {
754 buffer[got++] = '0' + exponent;
755 }
756 }
757 buffer[got] = '\0';
758 converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round);
759 if (hadExtra) {
760 converted.flags = static_cast<enum decimal::ConversionResultFlags>(
761 converted.flags | decimal::Inexact);
762 }
763 }
764 if (*p) { // unprocessed junk after value
765 const auto &connection{io.GetConnectionState()};
766 io.GetIoErrorHandler().SignalError(IostatBadRealInput,
767 "Trailing characters after real input data at column %d of record %d",
768 static_cast<int>(connection.positionInRecord + 1),
769 static_cast<int>(connection.currentRecordNumber));
770 return false;
771 }
772 *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
773 converted.binary;
774 // Set FP exception flags
775 if (converted.flags != decimal::ConversionResultFlags::Exact) {
776 if (converted.flags & decimal::ConversionResultFlags::Overflow) {
777 io.GetIoErrorHandler().SignalError(IostatRealInputOverflow);
778 return false;
779 }
780 RaiseFPExceptions(converted.flags);
781 }
782 return CheckCompleteListDirectedField(io, edit);
783}
784
785template <int KIND>
786bool EditRealInput(IoStatementState &io, const DataEdit &edit, void *n) {
787 switch (edit.descriptor) {
788 case DataEdit::ListDirected:
789 if (IsNamelistNameOrSlash(io)) {
790 return false;
791 }
792 return EditCommonRealInput<KIND>(io, edit, n);
793 case DataEdit::ListDirectedRealPart:
794 case DataEdit::ListDirectedImaginaryPart:
795 case 'F':
796 case 'E': // incl. EN, ES, & EX
797 case 'D':
798 case 'G':
799 return EditCommonRealInput<KIND>(io, edit, n);
800 case 'B':
801 return EditBOZInput<1>(io, edit, n,
802 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
803 case 'O':
804 return EditBOZInput<3>(io, edit, n,
805 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
806 case 'Z':
807 return EditBOZInput<4>(io, edit, n,
808 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
809 case 'A': // legacy extension
810 return EditCharacterInput(io, edit, x: reinterpret_cast<char *>(n), lengthChars: KIND);
811 default:
812 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
813 "Data edit descriptor '%c' may not be used for REAL input",
814 edit.descriptor);
815 return false;
816 }
817}
818
819// 13.7.3 in Fortran 2018
820bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
821 switch (edit.descriptor) {
822 case DataEdit::ListDirected:
823 if (IsNamelistNameOrSlash(io)) {
824 return false;
825 }
826 break;
827 case 'L':
828 case 'G':
829 break;
830 default:
831 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
832 "Data edit descriptor '%c' may not be used for LOGICAL input",
833 edit.descriptor);
834 return false;
835 }
836 std::optional<int> remaining{io.CueUpInput(edit)};
837 std::optional<char32_t> next{io.NextInField(remaining, edit)};
838 if (next && *next == '.') { // skip optional period
839 next = io.NextInField(remaining, edit);
840 }
841 if (!next) {
842 io.GetIoErrorHandler().SignalError(msg: "Empty LOGICAL input field");
843 return false;
844 }
845 switch (*next) {
846 case 'T':
847 case 't':
848 x = true;
849 break;
850 case 'F':
851 case 'f':
852 x = false;
853 break;
854 default:
855 io.GetIoErrorHandler().SignalError(
856 msg: "Bad character '%lc' in LOGICAL input field", xs&: *next);
857 return false;
858 }
859 if (remaining) { // ignore the rest of a fixed-width field
860 io.HandleRelativePosition(byteOffset: *remaining);
861 } else if (edit.descriptor == DataEdit::ListDirected) {
862 while (io.NextInField(remaining, edit)) { // discard rest of field
863 }
864 }
865 return CheckCompleteListDirectedField(io, edit);
866}
867
868// See 13.10.3.1 paragraphs 7-9 in Fortran 2018
869template <typename CHAR>
870static bool EditDelimitedCharacterInput(
871 IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) {
872 bool result{true};
873 while (true) {
874 std::size_t byteCount{0};
875 auto ch{io.GetCurrentChar(byteCount)};
876 if (!ch) {
877 if (io.AdvanceRecord()) {
878 continue;
879 } else {
880 result = false; // EOF in character value
881 break;
882 }
883 }
884 io.HandleRelativePosition(byteOffset: byteCount);
885 if (*ch == delimiter) {
886 auto next{io.GetCurrentChar(byteCount)};
887 if (next && *next == delimiter) {
888 // Repeated delimiter: use as character value
889 io.HandleRelativePosition(byteOffset: byteCount);
890 } else {
891 break; // closing delimiter
892 }
893 }
894 if (length > 0) {
895 *x++ = *ch;
896 --length;
897 }
898 }
899 std::fill_n(x, length, ' ');
900 return result;
901}
902
903template <typename CHAR>
904static bool EditListDirectedCharacterInput(
905 IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) {
906 std::size_t byteCount{0};
907 auto ch{io.GetCurrentChar(byteCount)};
908 if (ch && (*ch == '\'' || *ch == '"')) {
909 io.HandleRelativePosition(byteOffset: byteCount);
910 return EditDelimitedCharacterInput(io, x, length, *ch);
911 }
912 if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) {
913 return false;
914 }
915 // Undelimited list-directed character input: stop at a value separator
916 // or the end of the current record. Subtlety: the "remaining" count
917 // here is a dummy that's used to avoid the interpretation of separators
918 // in NextInField.
919 std::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
920 while (std::optional<char32_t> next{io.NextInField(remaining, edit)}) {
921 bool isSep{false};
922 switch (*next) {
923 case ' ':
924 case '\t':
925 case '/':
926 isSep = true;
927 break;
928 case '&':
929 case '$':
930 isSep = edit.IsNamelist();
931 break;
932 case ',':
933 isSep = !(edit.modes.editingFlags & decimalComma);
934 break;
935 case ';':
936 isSep = !!(edit.modes.editingFlags & decimalComma);
937 break;
938 default:
939 break;
940 }
941 if (isSep) {
942 remaining = 0;
943 } else {
944 *x++ = *next;
945 remaining = --length > 0 ? maxUTF8Bytes : 0;
946 }
947 }
948 std::fill_n(x, length, ' ');
949 return true;
950}
951
952template <typename CHAR>
953bool EditCharacterInput(IoStatementState &io, const DataEdit &edit, CHAR *x,
954 std::size_t lengthChars) {
955 switch (edit.descriptor) {
956 case DataEdit::ListDirected:
957 return EditListDirectedCharacterInput(io, x, lengthChars, edit);
958 case 'A':
959 case 'G':
960 break;
961 case 'B':
962 return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x);
963 case 'O':
964 return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x);
965 case 'Z':
966 return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x);
967 default:
968 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
969 "Data edit descriptor '%c' may not be used with a CHARACTER data item",
970 edit.descriptor);
971 return false;
972 }
973 const ConnectionState &connection{io.GetConnectionState()};
974 std::size_t remainingChars{lengthChars};
975 // Skip leading characters.
976 // Their bytes don't count towards INQUIRE(IOLENGTH=).
977 std::size_t skipChars{0};
978 if (edit.width && *edit.width > 0) {
979 remainingChars = *edit.width;
980 if (remainingChars > lengthChars) {
981 skipChars = remainingChars - lengthChars;
982 }
983 }
984 // When the field is wider than the variable, we drop the leading
985 // characters. When the variable is wider than the field, there can be
986 // trailing padding or an EOR condition.
987 const char *input{nullptr};
988 std::size_t readyBytes{0};
989 // Transfer payload bytes; these do count.
990 while (remainingChars > 0) {
991 if (readyBytes == 0) {
992 readyBytes = io.GetNextInputBytes(input);
993 if (readyBytes == 0 ||
994 (readyBytes < remainingChars && edit.modes.nonAdvancing)) {
995 if (io.CheckForEndOfRecord(afterReading: readyBytes)) {
996 if (readyBytes == 0) {
997 // PAD='YES' and no more data
998 std::fill_n(x, lengthChars, ' ');
999 return !io.GetIoErrorHandler().InError();
1000 } else {
1001 // Do partial read(s) then pad on last iteration
1002 }
1003 } else {
1004 return !io.GetIoErrorHandler().InError();
1005 }
1006 }
1007 }
1008 std::size_t chunkBytes;
1009 std::size_t chunkChars{1};
1010 bool skipping{skipChars > 0};
1011 if (connection.isUTF8) {
1012 chunkBytes = MeasureUTF8Bytes(first: *input);
1013 if (skipping) {
1014 --skipChars;
1015 } else if (auto ucs{DecodeUTF8(input)}) {
1016 if ((sizeof *x == 1 && *ucs > 0xff) ||
1017 (sizeof *x == 2 && *ucs > 0xffff)) {
1018 *x++ = '?';
1019 } else {
1020 *x++ = *ucs;
1021 }
1022 --lengthChars;
1023 } else if (chunkBytes == 0) {
1024 // error recovery: skip bad encoding
1025 chunkBytes = 1;
1026 }
1027 } else if (connection.internalIoCharKind > 1) {
1028 // Reading from non-default character internal unit
1029 chunkBytes = connection.internalIoCharKind;
1030 if (skipping) {
1031 --skipChars;
1032 } else {
1033 char32_t buffer{0};
1034 std::memcpy(&buffer, input, chunkBytes);
1035 if ((sizeof *x == 1 && buffer > 0xff) ||
1036 (sizeof *x == 2 && buffer > 0xffff)) {
1037 *x++ = '?';
1038 } else {
1039 *x++ = buffer;
1040 }
1041 --lengthChars;
1042 }
1043 } else if constexpr (sizeof *x > 1) {
1044 // Read single byte with expansion into multi-byte CHARACTER
1045 chunkBytes = 1;
1046 if (skipping) {
1047 --skipChars;
1048 } else {
1049 *x++ = static_cast<unsigned char>(*input);
1050 --lengthChars;
1051 }
1052 } else { // single bytes -> default CHARACTER
1053 if (skipping) {
1054 chunkBytes = std::min<std::size_t>(a: skipChars, b: readyBytes);
1055 chunkChars = chunkBytes;
1056 skipChars -= chunkChars;
1057 } else {
1058 chunkBytes = std::min<std::size_t>(a: remainingChars, b: readyBytes);
1059 chunkBytes = std::min<std::size_t>(a: lengthChars, b: chunkBytes);
1060 chunkChars = chunkBytes;
1061 std::memcpy(x, input, chunkBytes);
1062 x += chunkBytes;
1063 lengthChars -= chunkChars;
1064 }
1065 }
1066 input += chunkBytes;
1067 remainingChars -= chunkChars;
1068 if (!skipping) {
1069 io.GotChar(chunkBytes);
1070 }
1071 io.HandleRelativePosition(byteOffset: chunkBytes);
1072 readyBytes -= chunkBytes;
1073 }
1074 // Pad the remainder of the input variable, if any.
1075 std::fill_n(x, lengthChars, ' ');
1076 return CheckCompleteListDirectedField(io, edit);
1077}
1078
1079template bool EditRealInput<2>(IoStatementState &, const DataEdit &, void *);
1080template bool EditRealInput<3>(IoStatementState &, const DataEdit &, void *);
1081template bool EditRealInput<4>(IoStatementState &, const DataEdit &, void *);
1082template bool EditRealInput<8>(IoStatementState &, const DataEdit &, void *);
1083template bool EditRealInput<10>(IoStatementState &, const DataEdit &, void *);
1084// TODO: double/double
1085template bool EditRealInput<16>(IoStatementState &, const DataEdit &, void *);
1086
1087template bool EditCharacterInput(
1088 IoStatementState &, const DataEdit &, char *, std::size_t);
1089template bool EditCharacterInput(
1090 IoStatementState &, const DataEdit &, char16_t *, std::size_t);
1091template bool EditCharacterInput(
1092 IoStatementState &, const DataEdit &, char32_t *, std::size_t);
1093
1094} // namespace Fortran::runtime::io
1095

source code of flang/runtime/edit-input.cpp