1//===-- lib/runtime/edit-input.cpp ------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "edit-input.h"
10#include "flang-rt/runtime/namelist.h"
11#include "flang-rt/runtime/utf.h"
12#include "flang/Common/optional.h"
13#include "flang/Common/real.h"
14#include "flang/Common/uint128.h"
15#include "flang/Runtime/freestanding-tools.h"
16#include <algorithm>
17#include <cfenv>
18
19namespace Fortran::runtime::io {
20RT_OFFLOAD_API_GROUP_BEGIN
21
22// Checks that a list-directed input value has been entirely consumed and
23// doesn't contain unparsed characters before the next value separator.
24static inline RT_API_ATTRS bool IsCharValueSeparator(
25 const DataEdit &edit, char32_t ch) {
26 char32_t comma{
27 edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}};
28 return ch == ' ' || ch == '\t' || ch == comma || ch == '/' ||
29 (edit.IsNamelist() && (ch == '&' || ch == '$'));
30}
31
32static RT_API_ATTRS bool CheckCompleteListDirectedField(
33 IoStatementState &io, const DataEdit &edit) {
34 if (edit.IsListDirected()) {
35 std::size_t byteCount;
36 if (auto ch{io.GetCurrentChar(byteCount)}) {
37 if (IsCharValueSeparator(edit, *ch)) {
38 return true;
39 } else {
40 const auto &connection{io.GetConnectionState()};
41 io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator,
42 "invalid character (0x%x) after list-directed input value, "
43 "at column %d in record %d",
44 static_cast<unsigned>(*ch),
45 static_cast<int>(connection.positionInRecord + 1),
46 static_cast<int>(connection.currentRecordNumber));
47 return false;
48 }
49 } else {
50 return true; // end of record: ok
51 }
52 } else {
53 return true;
54 }
55}
56
57static inline RT_API_ATTRS char32_t GetSeparatorChar(const DataEdit &edit) {
58 return edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','};
59}
60
61template <int LOG2_BASE>
62static RT_API_ATTRS bool EditBOZInput(
63 IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
64 // Skip leading white space & zeroes
65 Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
66 auto start{io.GetConnectionState().positionInRecord};
67 Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
68 if (next.value_or('?') == '0') {
69 do {
70 start = io.GetConnectionState().positionInRecord;
71 next = io.NextInField(remaining, edit);
72 } while (next && *next == '0');
73 }
74 // Count significant digits after any leading white space & zeroes
75 int digits{0};
76 int significantBits{0};
77 const char32_t comma{GetSeparatorChar(edit)};
78 for (; next; next = io.NextInField(remaining, edit)) {
79 char32_t ch{*next};
80 if (ch == ' ' || ch == '\t') {
81 if (edit.modes.editingFlags & blankZero) {
82 ch = '0'; // BZ mode - treat blank as if it were zero
83 } else {
84 continue;
85 }
86 }
87 if (ch >= '0' && ch <= '1') {
88 } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
89 } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
90 } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
91 } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
92 } else if (ch == comma) {
93 break; // end non-list-directed field early
94 } else {
95 io.GetIoErrorHandler().SignalError(
96 "Bad character '%lc' in B/O/Z input field", ch);
97 return false;
98 }
99 if (digits++ == 0) {
100 if (ch >= '0' && ch <= '1') {
101 significantBits = 1;
102 } else if (ch >= '2' && ch <= '3') {
103 significantBits = 2;
104 } else if (ch >= '4' && ch <= '7') {
105 significantBits = 3;
106 } else {
107 significantBits = 4;
108 }
109 } else {
110 significantBits += LOG2_BASE;
111 }
112 }
113 auto significantBytes{static_cast<std::size_t>(significantBits + 7) / 8};
114 if (significantBytes > bytes) {
115 io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow,
116 "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
117 return false;
118 }
119 // Reset to start of significant digits
120 io.HandleAbsolutePosition(start);
121 remaining.reset();
122 // Make a second pass now that the digit count is known
123 std::memset(n, 0, bytes);
124 int increment{isHostLittleEndian ? -1 : 1};
125 auto *data{reinterpret_cast<unsigned char *>(n) +
126 (isHostLittleEndian ? significantBytes - 1 : bytes - significantBytes)};
127 int bitsAfterFirstDigit{(digits - 1) * LOG2_BASE};
128 int shift{bitsAfterFirstDigit & 7};
129 if (shift + (significantBits - bitsAfterFirstDigit) > 8) {
130 shift = shift - 8; // misaligned octal
131 }
132 while (digits > 0) {
133 char32_t ch{io.NextInField(remaining, edit).value_or(' ')};
134 int digit{0};
135 if (ch == ' ' || ch == '\t') {
136 if (edit.modes.editingFlags & blankZero) {
137 ch = '0'; // BZ mode - treat blank as if it were zero
138 } else {
139 continue;
140 }
141 }
142 --digits;
143 if (ch >= '0' && ch <= '9') {
144 digit = ch - '0';
145 } else if (ch >= 'A' && ch <= 'F') {
146 digit = ch + 10 - 'A';
147 } else if (ch >= 'a' && ch <= 'f') {
148 digit = ch + 10 - 'a';
149 } else {
150 continue;
151 }
152 if (shift < 0) {
153 if (shift + LOG2_BASE > 0) { // misaligned octal
154 *data |= digit >> -shift;
155 }
156 shift += 8;
157 data += increment;
158 }
159 *data |= digit << shift;
160 shift -= LOG2_BASE;
161 }
162 return CheckCompleteListDirectedField(io, edit);
163}
164
165static inline RT_API_ATTRS char32_t GetRadixPointChar(const DataEdit &edit) {
166 return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
167}
168
169// Prepares input from a field, and returns the sign, if any, else '\0'.
170static RT_API_ATTRS char ScanNumericPrefix(IoStatementState &io,
171 const DataEdit &edit, Fortran::common::optional<char32_t> &next,
172 Fortran::common::optional<int> &remaining,
173 IoStatementState::FastAsciiField *fastField = nullptr) {
174 remaining = io.CueUpInput(edit, fastField);
175 next = io.NextInField(remaining, edit, fastField);
176 char sign{'\0'};
177 if (next) {
178 if (*next == '-' || *next == '+') {
179 sign = *next;
180 if (!edit.IsListDirected()) {
181 io.SkipSpaces(remaining, fastField);
182 }
183 next = io.NextInField(remaining, edit, fastField);
184 }
185 }
186 return sign;
187}
188
189RT_API_ATTRS bool EditIntegerInput(IoStatementState &io, const DataEdit &edit,
190 void *n, int kind, bool isSigned) {
191 auto &handler{io.GetIoErrorHandler()};
192 RUNTIME_CHECK(handler, kind >= 1 && !(kind & (kind - 1)));
193 if (!n) {
194 handler.Crash("Null address for integer input item");
195 }
196 switch (edit.descriptor) {
197 case DataEdit::ListDirected:
198 if (IsNamelistNameOrSlash(io)) {
199 return false;
200 }
201 break;
202 case 'G':
203 case 'I':
204 break;
205 case 'B':
206 return EditBOZInput<1>(io, edit, n, kind);
207 case 'O':
208 return EditBOZInput<3>(io, edit, n, kind);
209 case 'Z':
210 return EditBOZInput<4>(io, edit, n, kind);
211 case 'A': // legacy extension
212 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
213 default:
214 handler.SignalError(IostatErrorInFormat,
215 "Data edit descriptor '%c' may not be used with an INTEGER data item",
216 edit.descriptor);
217 return false;
218 }
219 Fortran::common::optional<int> remaining;
220 Fortran::common::optional<char32_t> next;
221 auto fastField{io.GetUpcomingFastAsciiField()};
222 char sign{ScanNumericPrefix(io, edit, next, remaining, &fastField)};
223 if (sign == '-' && !isSigned) {
224 handler.SignalError("Negative sign in UNSIGNED input field");
225 return false;
226 }
227 common::uint128_t value{0};
228 bool any{!!sign};
229 bool overflow{false};
230 const char32_t comma{GetSeparatorChar(edit)};
231 static constexpr auto maxu128{~common::uint128_t{0}};
232 for (; next; next = io.NextInField(remaining, edit, &fastField)) {
233 char32_t ch{*next};
234 if (ch == ' ' || ch == '\t') {
235 if (edit.modes.editingFlags & blankZero) {
236 ch = '0'; // BZ mode - treat blank as if it were zero
237 } else {
238 continue;
239 }
240 }
241 int digit{0};
242 if (ch >= '0' && ch <= '9') {
243 digit = ch - '0';
244 } else if (ch == comma) {
245 break; // end non-list-directed field early
246 } else {
247 if (edit.modes.inNamelist && ch == GetRadixPointChar(edit)) {
248 // Ignore any fractional part that might appear in NAMELIST integer
249 // input, like a few other Fortran compilers do.
250 // TODO: also process exponents? Some compilers do, but they obviously
251 // can't just be ignored.
252 while ((next = io.NextInField(remaining, edit, &fastField))) {
253 if (*next < '0' || *next > '9') {
254 break;
255 }
256 }
257 if (!next || *next == comma) {
258 break;
259 }
260 }
261 handler.SignalError("Bad character '%lc' in INTEGER input field", ch);
262 return false;
263 }
264 static constexpr auto maxu128OverTen{maxu128 / 10};
265 static constexpr int maxLastDigit{
266 static_cast<int>(maxu128 - (maxu128OverTen * 10))};
267 overflow |= value >= maxu128OverTen &&
268 (value > maxu128OverTen || digit > maxLastDigit);
269 value *= 10;
270 value += digit;
271 any = true;
272 }
273 if (!any && !remaining) {
274 handler.SignalError(
275 "Integer value absent from NAMELIST or list-directed input");
276 return false;
277 }
278 if (isSigned) {
279 auto maxForKind{common::uint128_t{1} << ((8 * kind) - 1)};
280 overflow |= value >= maxForKind && (value > maxForKind || sign != '-');
281 } else {
282 auto maxForKind{maxu128 >> (((16 - kind) * 8) + (isSigned ? 1 : 0))};
283 overflow |= value >= maxForKind;
284 }
285 if (overflow) {
286 handler.SignalError(IostatIntegerInputOverflow,
287 "Decimal input overflows INTEGER(%d) variable", kind);
288 return false;
289 }
290 if (sign == '-') {
291 value = -value;
292 }
293 if (any || !handler.InError()) {
294 // The value is stored in the lower order bits on big endian platform.
295 // For memcpy, shift the value to the highest order bits.
296#if USING_NATIVE_INT128_T
297 auto shft{static_cast<int>(sizeof value - kind)};
298 if (!isHostLittleEndian && shft >= 0) {
299 auto shifted{value << (8 * shft)};
300 std::memcpy(n, &shifted, kind);
301 } else {
302 std::memcpy(n, &value, kind); // a blank field means zero
303 }
304#else
305 auto shft{static_cast<int>(sizeof(value.low())) - kind};
306 // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian.
307 if (!isHostLittleEndian && shft >= 0) {
308 auto l{value.low() << (8 * shft)};
309 std::memcpy(n, &l, kind);
310 } else {
311 std::memcpy(n, &value, kind); // a blank field means zero
312 }
313#endif
314 io.GotChar(fastField.got());
315 return true;
316 } else {
317 return false;
318 }
319}
320
321// Parses a REAL input number from the input source as a normalized
322// fraction into a supplied buffer -- there's an optional '-', a
323// decimal point when the input is not hexadecimal, and at least one
324// digit. Replaces blanks with zeroes where appropriate.
325struct ScannedRealInput {
326 // Number of characters that (should) have been written to the
327 // buffer -- this can be larger than the buffer size, which
328 // indicates buffer overflow. Zero indicates an error.
329 int got{0};
330 int exponent{0}; // adjusted as necessary; binary if isHexadecimal
331 bool isHexadecimal{false}; // 0X...
332};
333static RT_API_ATTRS ScannedRealInput ScanRealInput(
334 char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) {
335 Fortran::common::optional<int> remaining;
336 Fortran::common::optional<char32_t> next;
337 int got{0};
338 Fortran::common::optional<int> radixPointOffset;
339 // The following lambda definition violates the conding style,
340 // but cuda-11.8 nvcc hits an internal error with the brace initialization.
341 auto Put = [&](char ch) -> void {
342 if (got < bufferSize) {
343 buffer[got] = ch;
344 }
345 ++got;
346 };
347 char sign{ScanNumericPrefix(io, edit, next, remaining)};
348 if (sign == '-') {
349 Put('-');
350 }
351 bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
352 int exponent{0};
353 if (!next || (!bzMode && *next == ' ') ||
354 (!(edit.modes.editingFlags & decimalComma) && *next == ',')) {
355 if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) {
356 // An empty/blank field means zero when not list-directed.
357 // A fixed-width field containing only a sign is also zero;
358 // this behavior isn't standard-conforming in F'2023 but it is
359 // required to pass FCVS.
360 Put('0');
361 }
362 return {got, exponent, false};
363 }
364 char32_t radixPointChar{GetRadixPointChar(edit)};
365 char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
366 bool isHexadecimal{false};
367 if (first == 'N' || first == 'I') {
368 // NaN or infinity - convert to upper case
369 // Subtle: a blank field of digits could be followed by 'E' or 'D',
370 for (; next &&
371 ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
372 next = io.NextInField(remaining, edit)) {
373 if (*next >= 'a' && *next <= 'z') {
374 Put(*next - 'a' + 'A');
375 } else {
376 Put(*next);
377 }
378 }
379 if (next && *next == '(') { // NaN(...)
380 Put('(');
381 int depth{1};
382 while (true) {
383 next = io.NextInField(remaining, edit);
384 if (depth == 0) {
385 break;
386 } else if (!next) {
387 return {}; // error
388 } else if (*next == '(') {
389 ++depth;
390 } else if (*next == ')') {
391 --depth;
392 }
393 Put(*next);
394 }
395 }
396 } else if (first == radixPointChar || (first >= '0' && first <= '9') ||
397 (bzMode && (first == ' ' || first == '\t')) || first == 'E' ||
398 first == 'D' || first == 'Q') {
399 if (first == '0') {
400 next = io.NextInField(remaining, edit);
401 if (next && (*next == 'x' || *next == 'X')) { // 0X...
402 isHexadecimal = true;
403 next = io.NextInField(remaining, edit);
404 } else {
405 Put('0');
406 }
407 }
408 // input field is normalized to a fraction
409 if (!isHexadecimal) {
410 Put('.');
411 }
412 auto start{got};
413 for (; next; next = io.NextInField(remaining, edit)) {
414 char32_t ch{*next};
415 if (ch == ' ' || ch == '\t') {
416 if (isHexadecimal) {
417 return {}; // error
418 } else if (bzMode) {
419 ch = '0'; // BZ mode - treat blank as if it were zero
420 } else {
421 continue; // ignore blank in fixed field
422 }
423 }
424 if (ch == '0' && got == start && !radixPointOffset) {
425 // omit leading zeroes before the radix point
426 } else if (ch >= '0' && ch <= '9') {
427 Put(ch);
428 } else if (ch == radixPointChar && !radixPointOffset) {
429 // The radix point character is *not* copied to the buffer.
430 radixPointOffset = got - start; // # of digits before the radix point
431 } else if (isHexadecimal && ch >= 'A' && ch <= 'F') {
432 Put(ch);
433 } else if (isHexadecimal && ch >= 'a' && ch <= 'f') {
434 Put(ch - 'a' + 'A'); // normalize to capitals
435 } else {
436 break;
437 }
438 }
439 if (got == start) {
440 // Nothing but zeroes and maybe a radix point. F'2018 requires
441 // at least one digit, but F'77 did not, and a bare "." shows up in
442 // the FCVS suite.
443 Put('0'); // emit at least one digit
444 }
445 // In list-directed input, a bad exponent is not consumed.
446 auto nextBeforeExponent{next};
447 auto startExponent{io.GetConnectionState().positionInRecord};
448 bool hasGoodExponent{false};
449 if (next) {
450 if (isHexadecimal) {
451 if (*next == 'p' || *next == 'P') {
452 next = io.NextInField(remaining, edit);
453 } else {
454 // The binary exponent is not optional in the standard.
455 return {}; // error
456 }
457 } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
458 *next == 'q' || *next == 'Q') {
459 // Optional exponent letter. Blanks are allowed between the
460 // optional exponent letter and the exponent value.
461 io.SkipSpaces(remaining);
462 next = io.NextInField(remaining, edit);
463 }
464 }
465 if (next &&
466 (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
467 *next == ' ' || *next == '\t')) {
468 bool negExpo{*next == '-'};
469 if (negExpo || *next == '+') {
470 next = io.NextInField(remaining, edit);
471 }
472 for (; next; next = io.NextInField(remaining, edit)) {
473 if (*next >= '0' && *next <= '9') {
474 hasGoodExponent = true;
475 if (exponent < 10000) {
476 exponent = 10 * exponent + *next - '0';
477 }
478 } else if (*next == ' ' || *next == '\t') {
479 if (isHexadecimal) {
480 break;
481 } else if (bzMode) {
482 hasGoodExponent = true;
483 exponent = 10 * exponent;
484 }
485 } else {
486 break;
487 }
488 }
489 if (negExpo) {
490 exponent = -exponent;
491 }
492 }
493 if (!hasGoodExponent) {
494 if (isHexadecimal) {
495 return {}; // error
496 }
497 // There isn't a good exponent; do not consume it.
498 next = nextBeforeExponent;
499 io.HandleAbsolutePosition(startExponent);
500 // The default exponent is -kP, but the scale factor doesn't affect
501 // an explicit exponent.
502 exponent = -edit.modes.scale;
503 }
504 // Adjust exponent by number of digits before the radix point.
505 if (isHexadecimal) {
506 // Exponents for hexadecimal input are binary.
507 exponent += radixPointOffset.value_or(got - start) * 4;
508 } else if (radixPointOffset) {
509 exponent += *radixPointOffset;
510 } else {
511 // When no redix point (or comma) appears in the value, the 'd'
512 // part of the edit descriptor must be interpreted as the number of
513 // digits in the value to be interpreted as being to the *right* of
514 // the assumed radix point (13.7.2.3.2)
515 exponent += got - start - edit.digits.value_or(0);
516 }
517 }
518 // Consume the trailing ')' of a list-directed or NAMELIST complex
519 // input value.
520 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
521 if (next && (*next == ' ' || *next == '\t')) {
522 io.SkipSpaces(remaining);
523 next = io.NextInField(remaining, edit);
524 }
525 if (!next) { // NextInField fails on separators like ')'
526 std::size_t byteCount{0};
527 next = io.GetCurrentChar(byteCount);
528 if (next && *next == ')') {
529 io.HandleRelativePosition(byteCount);
530 }
531 }
532 } else if (remaining) {
533 while (next && (*next == ' ' || *next == '\t')) {
534 next = io.NextInField(remaining, edit);
535 }
536 if (next && (*next != ',' || (edit.modes.editingFlags & decimalComma))) {
537 return {}; // error: unused nonblank character in fixed-width field
538 }
539 }
540 return {got, exponent, isHexadecimal};
541}
542
543static RT_API_ATTRS void RaiseFPExceptions(
544 decimal::ConversionResultFlags flags) {
545#undef RAISE
546#if defined(RT_DEVICE_COMPILATION)
547 Terminator terminator(__FILE__, __LINE__);
548#define RAISE(e) \
549 terminator.Crash( \
550 "not implemented yet: raising FP exception in device code: %s", #e);
551#else // !defined(RT_DEVICE_COMPILATION)
552#ifdef feraisexcept // a macro in some environments; omit std::
553#define RAISE feraiseexcept
554#else
555#define RAISE std::feraiseexcept
556#endif
557#endif // !defined(RT_DEVICE_COMPILATION)
558
559// Some environment (e.g. emscripten, musl) don't define FE_OVERFLOW as allowed
560// by c99 (but not c++11) :-/
561#if defined(FE_OVERFLOW) || defined(RT_DEVICE_COMPILATION)
562 if (flags & decimal::ConversionResultFlags::Overflow) {
563 RAISE(FE_OVERFLOW);
564 }
565#endif
566#if defined(FE_UNDERFLOW) || defined(RT_DEVICE_COMPILATION)
567 if (flags & decimal::ConversionResultFlags::Underflow) {
568 RAISE(FE_UNDERFLOW);
569 }
570#endif
571#if defined(FE_INEXACT) || defined(RT_DEVICE_COMPILATION)
572 if (flags & decimal::ConversionResultFlags::Inexact) {
573 RAISE(FE_INEXACT);
574 }
575#endif
576#if defined(FE_INVALID) || defined(RT_DEVICE_COMPILATION)
577 if (flags & decimal::ConversionResultFlags::Invalid) {
578 RAISE(FE_INVALID);
579 }
580#endif
581#undef RAISE
582}
583
584// If no special modes are in effect and the form of the input value
585// that's present in the input stream is acceptable to the decimal->binary
586// converter without modification, this fast path for real input
587// saves time by avoiding memory copies and reformatting of the exponent.
588template <int PRECISION>
589static RT_API_ATTRS bool TryFastPathRealDecimalInput(
590 IoStatementState &io, const DataEdit &edit, void *n) {
591 if (edit.modes.editingFlags & (blankZero | decimalComma)) {
592 return false;
593 }
594 if (edit.modes.scale != 0) {
595 return false;
596 }
597 const ConnectionState &connection{io.GetConnectionState()};
598 if (connection.internalIoCharKind > 1) {
599 return false; // reading non-default character
600 }
601 const char *str{nullptr};
602 std::size_t got{io.GetNextInputBytes(str)};
603 if (got == 0 || str == nullptr || !connection.recordLength.has_value()) {
604 return false; // could not access reliably-terminated input stream
605 }
606 const char *p{str};
607 std::int64_t maxConsume{
608 std::min<std::int64_t>(got, edit.width.value_or(got))};
609 const char *limit{str + maxConsume};
610 decimal::ConversionToBinaryResult<PRECISION> converted{
611 decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
612 if (converted.flags & (decimal::Invalid | decimal::Overflow)) {
613 return false;
614 }
615 if (edit.digits.value_or(0) != 0) {
616 // Edit descriptor is Fw.d (or other) with d != 0, which
617 // implies scaling
618 const char *q{str};
619 for (; q < limit; ++q) {
620 if (*q == '.' || *q == 'n' || *q == 'N') {
621 break;
622 }
623 }
624 if (q == limit) {
625 // No explicit decimal point, and not NaN/Inf.
626 return false;
627 }
628 }
629 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
630 // Need to consume a trailing ')', possibly with leading spaces
631 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
632 }
633 if (p < limit && *p == ')') {
634 ++p;
635 } else {
636 return false;
637 }
638 } else if (edit.IsListDirected()) {
639 if (p < limit && !IsCharValueSeparator(edit, *p)) {
640 return false;
641 }
642 } else {
643 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
644 }
645 if (edit.width && p < str + *edit.width) {
646 return false; // unconverted characters remain in fixed width field
647 }
648 }
649 // Success on the fast path!
650 *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
651 converted.binary;
652 io.HandleRelativePosition(p - str);
653 // Set FP exception flags
654 if (converted.flags != decimal::ConversionResultFlags::Exact) {
655 RaiseFPExceptions(converted.flags);
656 }
657 return true;
658}
659
660template <int binaryPrecision>
661RT_API_ATTRS decimal::ConversionToBinaryResult<binaryPrecision>
662ConvertHexadecimal(
663 const char *&p, enum decimal::FortranRounding rounding, int expo) {
664 using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>;
665 using RawType = typename RealType::RawType;
666 bool isNegative{*p == '-'};
667 constexpr RawType one{1};
668 RawType signBit{0};
669 if (isNegative) {
670 ++p;
671 signBit = one << (RealType::bits - 1);
672 }
673 RawType fraction{0};
674 // Adjust the incoming binary P+/- exponent to shift the radix point
675 // to below the LSB and add in the bias.
676 expo += binaryPrecision - 1 + RealType::exponentBias;
677 // Input the fraction.
678 int roundingBit{0};
679 int guardBit{0};
680 for (; *p; ++p) {
681 fraction <<= 4;
682 expo -= 4;
683 if (*p >= '0' && *p <= '9') {
684 fraction |= *p - '0';
685 } else if (*p >= 'A' && *p <= 'F') {
686 fraction |= *p - 'A' + 10; // data were normalized to capitals
687 } else {
688 break;
689 }
690 if (fraction >> binaryPrecision) {
691 while (fraction >> binaryPrecision) {
692 guardBit |= roundingBit;
693 roundingBit = (int)fraction & 1;
694 fraction >>= 1;
695 ++expo;
696 }
697 // Consume excess digits
698 while (*++p) {
699 if (*p == '0') {
700 } else if ((*p >= '1' && *p <= '9') || (*p >= 'A' && *p <= 'F')) {
701 guardBit = 1;
702 } else {
703 break;
704 }
705 }
706 break;
707 }
708 }
709 if (fraction) {
710 // Boost biased expo if too small
711 while (expo < 1) {
712 guardBit |= roundingBit;
713 roundingBit = (int)fraction & 1;
714 fraction >>= 1;
715 ++expo;
716 }
717 // Normalize
718 while (expo > 1 && !(fraction >> (binaryPrecision - 1))) {
719 fraction <<= 1;
720 --expo;
721 guardBit = roundingBit = 0;
722 }
723 }
724 // Rounding
725 bool increase{false};
726 switch (rounding) {
727 case decimal::RoundNearest: // RN & RP
728 increase = roundingBit && (guardBit | ((int)fraction & 1));
729 break;
730 case decimal::RoundUp: // RU
731 increase = !isNegative && (roundingBit | guardBit);
732 break;
733 case decimal::RoundDown: // RD
734 increase = isNegative && (roundingBit | guardBit);
735 break;
736 case decimal::RoundToZero: // RZ
737 break;
738 case decimal::RoundCompatible: // RC
739 increase = roundingBit != 0;
740 break;
741 }
742 if (increase) {
743 ++fraction;
744 if (fraction >> binaryPrecision) {
745 fraction >>= 1;
746 ++expo;
747 }
748 }
749 // Package & return result
750 constexpr RawType significandMask{(one << RealType::significandBits) - 1};
751 int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
752 if (!fraction) {
753 expo = 0;
754 } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) {
755 expo = 0; // subnormal
756 flags |= decimal::Underflow;
757 } else if (expo >= RealType::maxExponent) {
758 if (rounding == decimal::RoundToZero ||
759 (rounding == decimal::RoundDown && !isNegative) ||
760 (rounding == decimal::RoundUp && isNegative)) {
761 expo = RealType::maxExponent - 1; // +/-HUGE()
762 fraction = significandMask;
763 } else {
764 expo = RealType::maxExponent; // +/-Inf
765 fraction = 0;
766 flags |= decimal::Overflow;
767 }
768 } else {
769 fraction &= significandMask; // remove explicit normalization unless x87
770 }
771 return decimal::ConversionToBinaryResult<binaryPrecision>{
772 RealType{static_cast<RawType>(signBit |
773 static_cast<RawType>(expo) << RealType::significandBits | fraction)},
774 static_cast<decimal::ConversionResultFlags>(flags)};
775}
776
777template <int KIND>
778RT_API_ATTRS bool EditCommonRealInput(
779 IoStatementState &io, const DataEdit &edit, void *n) {
780 constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
781 if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) {
782 return CheckCompleteListDirectedField(io, edit);
783 }
784 // Fast path wasn't available or didn't work; go the more general route
785 static constexpr int maxDigits{
786 common::MaxDecimalConversionDigits(binaryPrecision)};
787 static constexpr int bufferSize{maxDigits + 18};
788 char buffer[bufferSize];
789 auto scanned{ScanRealInput(buffer, maxDigits + 2, io, edit)};
790 int got{scanned.got};
791 if (got >= maxDigits + 2) {
792 io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
793 return false;
794 }
795 if (got == 0) {
796 const auto &connection{io.GetConnectionState()};
797 io.GetIoErrorHandler().SignalError(IostatBadRealInput,
798 "Bad real input data at column %d of record %d",
799 static_cast<int>(connection.positionInRecord + 1),
800 static_cast<int>(connection.currentRecordNumber));
801 return false;
802 }
803 decimal::ConversionToBinaryResult<binaryPrecision> converted;
804 const char *p{buffer};
805 if (scanned.isHexadecimal) {
806 buffer[got] = '\0';
807 converted = ConvertHexadecimal<binaryPrecision>(
808 p, edit.modes.round, scanned.exponent);
809 } else {
810 bool hadExtra{got > maxDigits};
811 int exponent{scanned.exponent};
812 if (exponent != 0) {
813 buffer[got++] = 'e';
814 if (exponent < 0) {
815 buffer[got++] = '-';
816 exponent = -exponent;
817 }
818 if (exponent > 9999) {
819 exponent = 9999; // will convert to +/-Inf
820 }
821 if (exponent > 999) {
822 int dig{exponent / 1000};
823 buffer[got++] = '0' + dig;
824 int rest{exponent - 1000 * dig};
825 dig = rest / 100;
826 buffer[got++] = '0' + dig;
827 rest -= 100 * dig;
828 dig = rest / 10;
829 buffer[got++] = '0' + dig;
830 buffer[got++] = '0' + (rest - 10 * dig);
831 } else if (exponent > 99) {
832 int dig{exponent / 100};
833 buffer[got++] = '0' + dig;
834 int rest{exponent - 100 * dig};
835 dig = rest / 10;
836 buffer[got++] = '0' + dig;
837 buffer[got++] = '0' + (rest - 10 * dig);
838 } else if (exponent > 9) {
839 int dig{exponent / 10};
840 buffer[got++] = '0' + dig;
841 buffer[got++] = '0' + (exponent - 10 * dig);
842 } else {
843 buffer[got++] = '0' + exponent;
844 }
845 }
846 buffer[got] = '\0';
847 converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round);
848 if (hadExtra) {
849 converted.flags = static_cast<enum decimal::ConversionResultFlags>(
850 converted.flags | decimal::Inexact);
851 }
852 }
853 if (*p) { // unprocessed junk after value
854 const auto &connection{io.GetConnectionState()};
855 io.GetIoErrorHandler().SignalError(IostatBadRealInput,
856 "Trailing characters after real input data at column %d of record %d",
857 static_cast<int>(connection.positionInRecord + 1),
858 static_cast<int>(connection.currentRecordNumber));
859 return false;
860 }
861 *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
862 converted.binary;
863 // Set FP exception flags
864 if (converted.flags != decimal::ConversionResultFlags::Exact) {
865 if (converted.flags & decimal::ConversionResultFlags::Overflow) {
866 io.GetIoErrorHandler().SignalError(IostatRealInputOverflow);
867 return false;
868 }
869 RaiseFPExceptions(converted.flags);
870 }
871 return CheckCompleteListDirectedField(io, edit);
872}
873
874template <int KIND>
875RT_API_ATTRS bool EditRealInput(
876 IoStatementState &io, const DataEdit &edit, void *n) {
877 switch (edit.descriptor) {
878 case DataEdit::ListDirected:
879 if (IsNamelistNameOrSlash(io)) {
880 return false;
881 }
882 return EditCommonRealInput<KIND>(io, edit, n);
883 case DataEdit::ListDirectedRealPart:
884 case DataEdit::ListDirectedImaginaryPart:
885 case 'F':
886 case 'E': // incl. EN, ES, & EX
887 case 'D':
888 case 'G':
889 return EditCommonRealInput<KIND>(io, edit, n);
890 case 'B':
891 return EditBOZInput<1>(io, edit, n,
892 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
893 case 'O':
894 return EditBOZInput<3>(io, edit, n,
895 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
896 case 'Z':
897 return EditBOZInput<4>(io, edit, n,
898 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
899 case 'A': // legacy extension
900 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
901 default:
902 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
903 "Data edit descriptor '%c' may not be used for REAL input",
904 edit.descriptor);
905 return false;
906 }
907}
908
909// 13.7.3 in Fortran 2018
910RT_API_ATTRS bool EditLogicalInput(
911 IoStatementState &io, const DataEdit &edit, bool &x) {
912 switch (edit.descriptor) {
913 case DataEdit::ListDirected:
914 if (IsNamelistNameOrSlash(io)) {
915 return false;
916 }
917 break;
918 case 'L':
919 case 'G':
920 break;
921 default:
922 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
923 "Data edit descriptor '%c' may not be used for LOGICAL input",
924 edit.descriptor);
925 return false;
926 }
927 Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
928 Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
929 if (next && *next == '.') { // skip optional period
930 next = io.NextInField(remaining, edit);
931 }
932 if (!next) {
933 io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
934 return false;
935 }
936 switch (*next) {
937 case 'T':
938 case 't':
939 x = true;
940 break;
941 case 'F':
942 case 'f':
943 x = false;
944 break;
945 default:
946 io.GetIoErrorHandler().SignalError(
947 "Bad character '%lc' in LOGICAL input field", *next);
948 return false;
949 }
950 if (remaining) { // ignore the rest of a fixed-width field
951 io.HandleRelativePosition(*remaining);
952 } else if (edit.descriptor == DataEdit::ListDirected) {
953 while (io.NextInField(remaining, edit)) { // discard rest of field
954 }
955 }
956 return CheckCompleteListDirectedField(io, edit);
957}
958
959// See 13.10.3.1 paragraphs 7-9 in Fortran 2018
960template <typename CHAR>
961static RT_API_ATTRS bool EditDelimitedCharacterInput(
962 IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) {
963 bool result{true};
964 while (true) {
965 std::size_t byteCount{0};
966 auto ch{io.GetCurrentChar(byteCount)};
967 if (!ch) {
968 if (io.AdvanceRecord()) {
969 continue;
970 } else {
971 result = false; // EOF in character value
972 break;
973 }
974 }
975 io.HandleRelativePosition(byteCount);
976 if (*ch == delimiter) {
977 auto next{io.GetCurrentChar(byteCount)};
978 if (next && *next == delimiter) {
979 // Repeated delimiter: use as character value
980 io.HandleRelativePosition(byteCount);
981 } else {
982 break; // closing delimiter
983 }
984 }
985 if (length > 0) {
986 *x++ = static_cast<CHAR>(*ch);
987 --length;
988 }
989 }
990 Fortran::runtime::fill_n(x, length, ' ');
991 return result;
992}
993
994template <typename CHAR>
995static RT_API_ATTRS bool EditListDirectedCharacterInput(
996 IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) {
997 std::size_t byteCount{0};
998 auto ch{io.GetCurrentChar(byteCount)};
999 if (ch && (*ch == '\'' || *ch == '"')) {
1000 io.HandleRelativePosition(byteCount);
1001 return EditDelimitedCharacterInput(io, x, length, *ch);
1002 }
1003 if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) {
1004 return false;
1005 }
1006 // Undelimited list-directed character input: stop at a value separator
1007 // or the end of the current record.
1008 while (auto ch{io.GetCurrentChar(byteCount)}) {
1009 bool isSep{false};
1010 switch (*ch) {
1011 case ' ':
1012 case '\t':
1013 case '/':
1014 isSep = true;
1015 break;
1016 case '&':
1017 case '$':
1018 isSep = edit.IsNamelist();
1019 break;
1020 case ',':
1021 isSep = !(edit.modes.editingFlags & decimalComma);
1022 break;
1023 case ';':
1024 isSep = !!(edit.modes.editingFlags & decimalComma);
1025 break;
1026 default:
1027 break;
1028 }
1029 if (isSep) {
1030 break;
1031 }
1032 if (length > 0) {
1033 *x++ = static_cast<CHAR>(*ch);
1034 --length;
1035 } else if (edit.IsNamelist()) {
1036 // GNU compatibility
1037 break;
1038 }
1039 io.HandleRelativePosition(byteCount);
1040 io.GotChar(byteCount);
1041 }
1042 Fortran::runtime::fill_n(x, length, ' ');
1043 return true;
1044}
1045
1046template <typename CHAR>
1047RT_API_ATTRS bool EditCharacterInput(IoStatementState &io, const DataEdit &edit,
1048 CHAR *x, std::size_t lengthChars) {
1049 switch (edit.descriptor) {
1050 case DataEdit::ListDirected:
1051 return EditListDirectedCharacterInput(io, x, lengthChars, edit);
1052 case 'A':
1053 case 'G':
1054 break;
1055 case 'B':
1056 return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x);
1057 case 'O':
1058 return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x);
1059 case 'Z':
1060 return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x);
1061 default:
1062 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
1063 "Data edit descriptor '%c' may not be used with a CHARACTER data item",
1064 edit.descriptor);
1065 return false;
1066 }
1067 const ConnectionState &connection{io.GetConnectionState()};
1068 std::size_t remainingChars{lengthChars};
1069 // Skip leading characters.
1070 // Their bytes don't count towards INQUIRE(IOLENGTH=).
1071 std::size_t skipChars{0};
1072 if (edit.width && *edit.width > 0) {
1073 remainingChars = *edit.width;
1074 if (remainingChars > lengthChars) {
1075 skipChars = remainingChars - lengthChars;
1076 }
1077 }
1078 // When the field is wider than the variable, we drop the leading
1079 // characters. When the variable is wider than the field, there can be
1080 // trailing padding or an EOR condition.
1081 const char *input{nullptr};
1082 std::size_t readyBytes{0};
1083 // Transfer payload bytes; these do count.
1084 while (remainingChars > 0) {
1085 if (readyBytes == 0) {
1086 readyBytes = io.GetNextInputBytes(input);
1087 if (readyBytes == 0 ||
1088 (readyBytes < remainingChars && edit.modes.nonAdvancing)) {
1089 if (io.CheckForEndOfRecord(readyBytes, connection)) {
1090 if (readyBytes == 0) {
1091 // PAD='YES' and no more data
1092 Fortran::runtime::fill_n(x, lengthChars, ' ');
1093 return !io.GetIoErrorHandler().InError();
1094 } else {
1095 // Do partial read(s) then pad on last iteration
1096 }
1097 } else {
1098 return !io.GetIoErrorHandler().InError();
1099 }
1100 }
1101 }
1102 std::size_t chunkBytes;
1103 std::size_t chunkChars{1};
1104 bool skipping{skipChars > 0};
1105 if (connection.isUTF8) {
1106 chunkBytes = MeasureUTF8Bytes(*input);
1107 if (skipping) {
1108 --skipChars;
1109 } else if (auto ucs{DecodeUTF8(input)}) {
1110 if ((sizeof *x == 1 && *ucs > 0xff) ||
1111 (sizeof *x == 2 && *ucs > 0xffff)) {
1112 *x++ = '?';
1113 } else {
1114 *x++ = static_cast<CHAR>(*ucs);
1115 }
1116 --lengthChars;
1117 } else if (chunkBytes == 0) {
1118 // error recovery: skip bad encoding
1119 chunkBytes = 1;
1120 }
1121 } else if (connection.internalIoCharKind > 1) {
1122 // Reading from non-default character internal unit
1123 chunkBytes = connection.internalIoCharKind;
1124 if (skipping) {
1125 --skipChars;
1126 } else {
1127 char32_t buffer{0};
1128 std::memcpy(&buffer, input, chunkBytes);
1129 if ((sizeof *x == 1 && buffer > 0xff) ||
1130 (sizeof *x == 2 && buffer > 0xffff)) {
1131 *x++ = '?';
1132 } else {
1133 *x++ = static_cast<CHAR>(buffer);
1134 }
1135 --lengthChars;
1136 }
1137 } else if constexpr (sizeof *x > 1) {
1138 // Read single byte with expansion into multi-byte CHARACTER
1139 chunkBytes = 1;
1140 if (skipping) {
1141 --skipChars;
1142 } else {
1143 *x++ = static_cast<unsigned char>(*input);
1144 --lengthChars;
1145 }
1146 } else { // single bytes -> default CHARACTER
1147 if (skipping) {
1148 chunkBytes = std::min<std::size_t>(a: skipChars, b: readyBytes);
1149 chunkChars = chunkBytes;
1150 skipChars -= chunkChars;
1151 } else {
1152 chunkBytes = std::min<std::size_t>(a: remainingChars, b: readyBytes);
1153 chunkBytes = std::min<std::size_t>(a: lengthChars, b: chunkBytes);
1154 chunkChars = chunkBytes;
1155 std::memcpy(x, input, chunkBytes);
1156 x += chunkBytes;
1157 lengthChars -= chunkChars;
1158 }
1159 }
1160 input += chunkBytes;
1161 remainingChars -= chunkChars;
1162 if (!skipping) {
1163 io.GotChar(chunkBytes);
1164 }
1165 io.HandleRelativePosition(chunkBytes);
1166 readyBytes -= chunkBytes;
1167 }
1168 // Pad the remainder of the input variable, if any.
1169 Fortran::runtime::fill_n(x, lengthChars, ' ');
1170 return CheckCompleteListDirectedField(io, edit);
1171}
1172
1173template RT_API_ATTRS bool EditRealInput<2>(
1174 IoStatementState &, const DataEdit &, void *);
1175template RT_API_ATTRS bool EditRealInput<3>(
1176 IoStatementState &, const DataEdit &, void *);
1177template RT_API_ATTRS bool EditRealInput<4>(
1178 IoStatementState &, const DataEdit &, void *);
1179template RT_API_ATTRS bool EditRealInput<8>(
1180 IoStatementState &, const DataEdit &, void *);
1181template RT_API_ATTRS bool EditRealInput<10>(
1182 IoStatementState &, const DataEdit &, void *);
1183// TODO: double/double
1184template RT_API_ATTRS bool EditRealInput<16>(
1185 IoStatementState &, const DataEdit &, void *);
1186
1187template RT_API_ATTRS bool EditCharacterInput(
1188 IoStatementState &, const DataEdit &, char *, std::size_t);
1189template RT_API_ATTRS bool EditCharacterInput(
1190 IoStatementState &, const DataEdit &, char16_t *, std::size_t);
1191template RT_API_ATTRS bool EditCharacterInput(
1192 IoStatementState &, const DataEdit &, char32_t *, std::size_t);
1193
1194RT_OFFLOAD_API_GROUP_END
1195} // namespace Fortran::runtime::io
1196

source code of flang-rt/lib/runtime/edit-input.cpp