edit-input.cpp source code [flang-rt/lib/runtime/edit-input.cpp]

1	//===-- lib/runtime/edit-input.cpp ------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "edit-input.h"
10	#include "flang-rt/runtime/namelist.h"
11	#include "flang-rt/runtime/utf.h"
12	#include "flang/Common/optional.h"
13	#include "flang/Common/real.h"
14	#include "flang/Common/uint128.h"
15	#include "flang/Runtime/freestanding-tools.h"
16	#include <algorithm>
17	#include <cfenv>
18
19	namespace Fortran::runtime::io {
20	RT_OFFLOAD_API_GROUP_BEGIN
21
22	// Checks that a list-directed input value has been entirely consumed and
23	// doesn't contain unparsed characters before the next value separator.
24	static inline RT_API_ATTRS bool IsCharValueSeparator(
25	const DataEdit &edit, char32_t ch) {
26	char32_t comma{
27	edit.modes.editingFlags & decimalComma ? char32_t{`';'`} : char32_t{`','`}};
28	return ch == `' '` \|\| ch == `'\t'` \|\| ch == comma \|\| ch == `'/'` \|\|
29	(edit.IsNamelist() && (ch == `'&'` \|\| ch == `'$'`));
30	}
31
32	static RT_API_ATTRS bool CheckCompleteListDirectedField(
33	IoStatementState &io, const DataEdit &edit) {
34	if (edit.IsListDirected()) {
35	std::size_t byteCount;
36	if (auto ch{io.GetCurrentChar(byteCount)}) {
37	if (IsCharValueSeparator(edit, *ch)) {
38	return true;
39	} else {
40	const auto &connection{io.GetConnectionState()};
41	io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator,
42	"invalid character (0x%x) after list-directed input value, "
43	"at column %d in record %d",
44	static_cast<unsigned>(*ch),
45	static_cast<int>(connection.positionInRecord + `1`),
46	static_cast<int>(connection.currentRecordNumber));
47	return false;
48	}
49	} else {
50	return true; // end of record: ok
51	}
52	} else {
53	return true;
54	}
55	}
56
57	static inline RT_API_ATTRS char32_t GetSeparatorChar(const DataEdit &edit) {
58	return edit.modes.editingFlags & decimalComma ? char32_t{`';'`} : char32_t{`','`};
59	}
60
61	template <int LOG2_BASE>
62	static RT_API_ATTRS bool EditBOZInput(
63	IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
64	// Skip leading white space & zeroes
65	Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
66	auto start{io.GetConnectionState().positionInRecord};
67	Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
68	if (next.value_or(`'?'`) == `'0'`) {
69	do {
70	start = io.GetConnectionState().positionInRecord;
71	next = io.NextInField(remaining, edit);
72	} while (next && *next == `'0'`);
73	}
74	// Count significant digits after any leading white space & zeroes
75	int digits{`0`};
76	int significantBits{`0`};
77	const char32_t comma{GetSeparatorChar(edit)};
78	for (; next; next = io.NextInField(remaining, edit)) {
79	char32_t ch{*next};
80	if (ch == `' '` \|\| ch == `'\t'`) {
81	if (edit.modes.editingFlags & blankZero) {
82	ch = `'0'`; // BZ mode - treat blank as if it were zero
83	} else {
84	continue;
85	}
86	}
87	if (ch >= `'0'` && ch <= `'1'`) {
88	} else if (LOG2_BASE >= `3` && ch >= `'2'` && ch <= `'7'`) {
89	} else if (LOG2_BASE >= `4` && ch >= `'8'` && ch <= `'9'`) {
90	} else if (LOG2_BASE >= `4` && ch >= `'A'` && ch <= `'F'`) {
91	} else if (LOG2_BASE >= `4` && ch >= `'a'` && ch <= `'f'`) {
92	} else if (ch == comma) {
93	break; // end non-list-directed field early
94	} else {
95	io.GetIoErrorHandler().SignalError(
96	"Bad character '%lc' in B/O/Z input field", ch);
97	return false;
98	}
99	if (digits++ == `0`) {
100	if (ch >= `'0'` && ch <= `'1'`) {
101	significantBits = `1`;
102	} else if (ch >= `'2'` && ch <= `'3'`) {
103	significantBits = `2`;
104	} else if (ch >= `'4'` && ch <= `'7'`) {
105	significantBits = `3`;
106	} else {
107	significantBits = `4`;
108	}
109	} else {
110	significantBits += LOG2_BASE;
111	}
112	}
113	auto significantBytes{static_cast<std::size_t>(significantBits + `7`) / `8`};
114	if (significantBytes > bytes) {
115	io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow,
116	"B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
117	return false;
118	}
119	// Reset to start of significant digits
120	io.HandleAbsolutePosition(start);
121	remaining.reset();
122	// Make a second pass now that the digit count is known
123	std::memset(n, `0`, bytes);
124	int increment{isHostLittleEndian ? -`1` : `1`};
125	auto data{reinterpret_cast<unsigned* char *>(n) +
126	(isHostLittleEndian ? significantBytes - `1` : bytes - significantBytes)};
127	int bitsAfterFirstDigit{(digits - `1`) * LOG2_BASE};
128	int shift{bitsAfterFirstDigit & `7`};
129	if (shift + (significantBits - bitsAfterFirstDigit) > `8`) {
130	shift = shift - `8`; // misaligned octal
131	}
132	while (digits > `0`) {
133	char32_t ch{io.NextInField(remaining, edit).value_or(`' '`)};
134	int digit{`0`};
135	if (ch == `' '` \|\| ch == `'\t'`) {
136	if (edit.modes.editingFlags & blankZero) {
137	ch = `'0'`; // BZ mode - treat blank as if it were zero
138	} else {
139	continue;
140	}
141	}
142	--digits;
143	if (ch >= `'0'` && ch <= `'9'`) {
144	digit = ch - `'0'`;
145	} else if (ch >= `'A'` && ch <= `'F'`) {
146	digit = ch + `10` - `'A'`;
147	} else if (ch >= `'a'` && ch <= `'f'`) {
148	digit = ch + `10` - `'a'`;
149	} else {
150	continue;
151	}
152	if (shift < `0`) {
153	if (shift + LOG2_BASE > `0`) { // misaligned octal
154	*data \|= digit >> -shift;
155	}
156	shift += `8`;
157	data += increment;
158	}
159	*data \|= digit << shift;
160	shift -= LOG2_BASE;
161	}
162	return CheckCompleteListDirectedField(io, edit);
163	}
164
165	static inline RT_API_ATTRS char32_t GetRadixPointChar(const DataEdit &edit) {
166	return edit.modes.editingFlags & decimalComma ? char32_t{`','`} : char32_t{`'.'`};
167	}
168
169	// Prepares input from a field, and returns the sign, if any, else '\0'.
170	static RT_API_ATTRS char ScanNumericPrefix(IoStatementState &io,
171	const DataEdit &edit, Fortran::common::optional<char32_t> &next,
172	Fortran::common::optional<int> &remaining,
173	IoStatementState::FastAsciiField fastField = nullptr*) {
174	remaining = io.CueUpInput(edit, fastField);
175	next = io.NextInField(remaining, edit, fastField);
176	char sign{`'\0'`};
177	if (next) {
178	if (next == `'-'` \|\| next == `'+'`) {
179	sign = *next;
180	if (!edit.IsListDirected()) {
181	io.SkipSpaces(remaining, fastField);
182	}
183	next = io.NextInField(remaining, edit, fastField);
184	}
185	}
186	return sign;
187	}
188
189	RT_API_ATTRS bool EditIntegerInput(IoStatementState &io, const DataEdit &edit,
190	void n, int* kind, bool isSigned) {
191	auto &handler{io.GetIoErrorHandler()};
192	RUNTIME_CHECK(handler, kind >= `1` && !(kind & (kind - `1`)));
193	if (!n) {
194	handler.Crash("Null address for integer input item");
195	}
196	switch (edit.descriptor) {
197	case DataEdit::ListDirected:
198	if (IsNamelistNameOrSlash(io)) {
199	return false;
200	}
201	break;
202	case `'G'`:
203	case `'I'`:
204	break;
205	case `'B'`:
206	return EditBOZInput<`1`>(io, edit, n, kind);
207	case `'O'`:
208	return EditBOZInput<`3`>(io, edit, n, kind);
209	case `'Z'`:
210	return EditBOZInput<`4`>(io, edit, n, kind);
211	case `'A'`: // legacy extension
212	return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
213	default:
214	handler.SignalError(IostatErrorInFormat,
215	"Data edit descriptor '%c' may not be used with an INTEGER data item",
216	edit.descriptor);
217	return false;
218	}
219	Fortran::common::optional<int> remaining;
220	Fortran::common::optional<char32_t> next;
221	auto fastField{io.GetUpcomingFastAsciiField()};
222	char sign{ScanNumericPrefix(io, edit, next, remaining, &fastField)};
223	if (sign == `'-'` && !isSigned) {
224	handler.SignalError("Negative sign in UNSIGNED input field");
225	return false;
226	}
227	common::uint128_t value{`0`};
228	bool any{!!sign};
229	bool overflow{false};
230	const char32_t comma{GetSeparatorChar(edit)};
231	static constexpr auto maxu128{~common::uint128_t{`0`}};
232	for (; next; next = io.NextInField(remaining, edit, &fastField)) {
233	char32_t ch{*next};
234	if (ch == `' '` \|\| ch == `'\t'`) {
235	if (edit.modes.editingFlags & blankZero) {
236	ch = `'0'`; // BZ mode - treat blank as if it were zero
237	} else {
238	continue;
239	}
240	}
241	int digit{`0`};
242	if (ch >= `'0'` && ch <= `'9'`) {
243	digit = ch - `'0'`;
244	} else if (ch == comma) {
245	break; // end non-list-directed field early
246	} else {
247	if (edit.modes.inNamelist && ch == GetRadixPointChar(edit)) {
248	// Ignore any fractional part that might appear in NAMELIST integer
249	// input, like a few other Fortran compilers do.
250	// TODO: also process exponents? Some compilers do, but they obviously
251	// can't just be ignored.
252	while ((next = io.NextInField(remaining, edit, &fastField))) {
253	if (next < `'0'` \|\| next > `'9'`) {
254	break;
255	}
256	}
257	if (!next \|\| *next == comma) {
258	break;
259	}
260	}
261	handler.SignalError("Bad character '%lc' in INTEGER input field", ch);
262	return false;
263	}
264	static constexpr auto maxu128OverTen{maxu128 / `10`};
265	static constexpr int maxLastDigit{
266	static_cast<int>(maxu128 - (maxu128OverTen * `10`))};
267	overflow \|= value >= maxu128OverTen &&
268	(value > maxu128OverTen \|\| digit > maxLastDigit);
269	value *= `10`;
270	value += digit;
271	any = true;
272	}
273	if (!any && !remaining) {
274	handler.SignalError(
275	"Integer value absent from NAMELIST or list-directed input");
276	return false;
277	}
278	if (isSigned) {
279	auto maxForKind{common::uint128_t{`1`} << ((`8` * kind) - `1`)};
280	overflow \|= value >= maxForKind && (value > maxForKind \|\| sign != `'-'`);
281	} else {
282	auto maxForKind{maxu128 >> (((`16` - kind) * `8`) + (isSigned ? `1` : `0`))};
283	overflow \|= value >= maxForKind;
284	}
285	if (overflow) {
286	handler.SignalError(IostatIntegerInputOverflow,
287	"Decimal input overflows INTEGER(%d) variable", kind);
288	return false;
289	}
290	if (sign == `'-'`) {
291	value = -value;
292	}
293	if (any \|\| !handler.InError()) {
294	// The value is stored in the lower order bits on big endian platform.
295	// For memcpy, shift the value to the highest order bits.
296	#if USING_NATIVE_INT128_T
297	auto shft{static_cast<int>(sizeof value - kind)};
298	if (!isHostLittleEndian && shft >= `0`) {
299	auto shifted{value << (`8` * shft)};
300	std::memcpy(n, &shifted, kind);
301	} else {
302	std::memcpy(n, &value, kind); // a blank field means zero
303	}
304	#else
305	auto shft{static_cast<int>(sizeof(value.low())) - kind};
306	// For kind==8 (i.e. shft==0), the value is stored in low_ in big endian.
307	if (!isHostLittleEndian && shft >= `0`) {
308	auto l{value.low() << (`8` * shft)};
309	std::memcpy(n, &l, kind);
310	} else {
311	std::memcpy(n, &value, kind); // a blank field means zero
312	}
313	#endif
314	io.GotChar(fastField.got());
315	return true;
316	} else {
317	return false;
318	}
319	}
320
321	// Parses a REAL input number from the input source as a normalized
322	// fraction into a supplied buffer -- there's an optional '-', a
323	// decimal point when the input is not hexadecimal, and at least one
324	// digit. Replaces blanks with zeroes where appropriate.
325	struct ScannedRealInput {
326	// Number of characters that (should) have been written to the
327	// buffer -- this can be larger than the buffer size, which
328	// indicates buffer overflow. Zero indicates an error.
329	int got{`0`};
330	int exponent{`0`}; // adjusted as necessary; binary if isHexadecimal
331	bool isHexadecimal{false}; // 0X...
332	};
333	static RT_API_ATTRS ScannedRealInput ScanRealInput(
334	char buffer, int* bufferSize, IoStatementState &io, const DataEdit &edit) {
335	Fortran::common::optional<int> remaining;
336	Fortran::common::optional<char32_t> next;
337	int got{`0`};
338	Fortran::common::optional<int> radixPointOffset;
339	// The following lambda definition violates the conding style,
340	// but cuda-11.8 nvcc hits an internal error with the brace initialization.
341	auto Put = [&](char ch) -> void {
342	if (got < bufferSize) {
343	buffer[got] = ch;
344	}
345	++got;
346	};
347	char sign{ScanNumericPrefix(io, edit, next, remaining)};
348	if (sign == `'-'`) {
349	Put(`'-'`);
350	}
351	bool bzMode{(edit.modes.editingFlags & blankZero) != `0`};
352	int exponent{`0`};
353	if (!next \|\| (!bzMode && *next == `' '`) \|\|
354	(!(edit.modes.editingFlags & decimalComma) && *next == `','`)) {
355	if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) {
356	// An empty/blank field means zero when not list-directed.
357	// A fixed-width field containing only a sign is also zero;
358	// this behavior isn't standard-conforming in F'2023 but it is
359	// required to pass FCVS.
360	Put(`'0'`);
361	}
362	return {got, exponent, false};
363	}
364	char32_t radixPointChar{GetRadixPointChar(edit)};
365	char32_t first{next >= `'a'` && next <= `'z'` ? next + `'A'` - `'a'` : next};
366	bool isHexadecimal{false};
367	if (first == `'N'` \|\| first == `'I'`) {
368	// NaN or infinity - convert to upper case
369	// Subtle: a blank field of digits could be followed by 'E' or 'D',
370	for (; next &&
371	((next >= `'a'` && next <= `'z'`) \|\| (next >= `'A'` && next <= `'Z'`));
372	next = io.NextInField(remaining, edit)) {
373	if (next >= `'a'` && next <= `'z'`) {
374	Put(*next - `'a'` + `'A'`);
375	} else {
376	Put(*next);
377	}
378	}
379	if (next && next == `'('`) { // NaN(...)*
380	Put(`'('`);
381	int depth{`1`};
382	while (true) {
383	next = io.NextInField(remaining, edit);
384	if (depth == `0`) {
385	break;
386	} else if (!next) {
387	return {}; // error
388	} else if (*next == `'('`) {
389	++depth;
390	} else if (*next == `')'`) {
391	--depth;
392	}
393	Put(*next);
394	}
395	}
396	} else if (first == radixPointChar \|\| (first >= `'0'` && first <= `'9'`) \|\|
397	(bzMode && (first == `' '` \|\| first == `'\t'`)) \|\| first == `'E'` \|\|
398	first == `'D'` \|\| first == `'Q'`) {
399	if (first == `'0'`) {
400	next = io.NextInField(remaining, edit);
401	if (next && (next == `'x'` \|\| next == `'X'`)) { // 0X...
402	isHexadecimal = true;
403	next = io.NextInField(remaining, edit);
404	} else {
405	Put(`'0'`);
406	}
407	}
408	// input field is normalized to a fraction
409	if (!isHexadecimal) {
410	Put(`'.'`);
411	}
412	auto start{got};
413	for (; next; next = io.NextInField(remaining, edit)) {
414	char32_t ch{*next};
415	if (ch == `' '` \|\| ch == `'\t'`) {
416	if (isHexadecimal) {
417	return {}; // error
418	} else if (bzMode) {
419	ch = `'0'`; // BZ mode - treat blank as if it were zero
420	} else {
421	continue; // ignore blank in fixed field
422	}
423	}
424	if (ch == `'0'` && got == start && !radixPointOffset) {
425	// omit leading zeroes before the radix point
426	} else if (ch >= `'0'` && ch <= `'9'`) {
427	Put(ch);
428	} else if (ch == radixPointChar && !radixPointOffset) {
429	// The radix point character is not* copied to the buffer.*
430	radixPointOffset = got - start; // # of digits before the radix point
431	} else if (isHexadecimal && ch >= `'A'` && ch <= `'F'`) {
432	Put(ch);
433	} else if (isHexadecimal && ch >= `'a'` && ch <= `'f'`) {
434	Put(ch - `'a'` + `'A'`); // normalize to capitals
435	} else {
436	break;
437	}
438	}
439	if (got == start) {
440	// Nothing but zeroes and maybe a radix point. F'2018 requires
441	// at least one digit, but F'77 did not, and a bare "." shows up in
442	// the FCVS suite.
443	Put(`'0'`); // emit at least one digit
444	}
445	// In list-directed input, a bad exponent is not consumed.
446	auto nextBeforeExponent{next};
447	auto startExponent{io.GetConnectionState().positionInRecord};
448	bool hasGoodExponent{false};
449	if (next) {
450	if (isHexadecimal) {
451	if (next == `'p'` \|\| next == `'P'`) {
452	next = io.NextInField(remaining, edit);
453	} else {
454	// The binary exponent is not optional in the standard.
455	return {}; // error
456	}
457	} else if (next == `'e'` \|\| next == `'E'` \|\| next == `'d'` \|\| next == `'D'` \|\|
458	next == `'q'` \|\| next == `'Q'`) {
459	// Optional exponent letter. Blanks are allowed between the
460	// optional exponent letter and the exponent value.
461	io.SkipSpaces(remaining);
462	next = io.NextInField(remaining, edit);
463	}
464	}
465	if (next &&
466	(next == `'-'` \|\| next == `'+'` \|\| (next >= `'0'` && next <= `'9'`) \|\|
467	next == `' '` \|\| next == `'\t'`)) {
468	bool negExpo{*next == `'-'`};
469	if (negExpo \|\| *next == `'+'`) {
470	next = io.NextInField(remaining, edit);
471	}
472	for (; next; next = io.NextInField(remaining, edit)) {
473	if (next >= `'0'` && next <= `'9'`) {
474	hasGoodExponent = true;
475	if (exponent < `10000`) {
476	exponent = `10` * exponent + *next - `'0'`;
477	}
478	} else if (next == `' '` \|\| next == `'\t'`) {
479	if (isHexadecimal) {
480	break;
481	} else if (bzMode) {
482	hasGoodExponent = true;
483	exponent = `10` * exponent;
484	}
485	} else {
486	break;
487	}
488	}
489	if (negExpo) {
490	exponent = -exponent;
491	}
492	}
493	if (!hasGoodExponent) {
494	if (isHexadecimal) {
495	return {}; // error
496	}
497	// There isn't a good exponent; do not consume it.
498	next = nextBeforeExponent;
499	io.HandleAbsolutePosition(startExponent);
500	// The default exponent is -kP, but the scale factor doesn't affect
501	// an explicit exponent.
502	exponent = -edit.modes.scale;
503	}
504	// Adjust exponent by number of digits before the radix point.
505	if (isHexadecimal) {
506	// Exponents for hexadecimal input are binary.
507	exponent += radixPointOffset.value_or(got - start) * `4`;
508	} else if (radixPointOffset) {
509	exponent += *radixPointOffset;
510	} else {
511	// When no redix point (or comma) appears in the value, the 'd'
512	// part of the edit descriptor must be interpreted as the number of
513	// digits in the value to be interpreted as being to the right* of*
514	// the assumed radix point (13.7.2.3.2)
515	exponent += got - start - edit.digits.value_or(`0`);
516	}
517	}
518	// Consume the trailing ')' of a list-directed or NAMELIST complex
519	// input value.
520	if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
521	if (next && (next == `' '` \|\| next == `'\t'`)) {
522	io.SkipSpaces(remaining);
523	next = io.NextInField(remaining, edit);
524	}
525	if (!next) { // NextInField fails on separators like ')'
526	std::size_t byteCount{`0`};
527	next = io.GetCurrentChar(byteCount);
528	if (next && *next == `')'`) {
529	io.HandleRelativePosition(byteCount);
530	}
531	}
532	} else if (remaining) {
533	while (next && (next == `' '` \|\| next == `'\t'`)) {
534	next = io.NextInField(remaining, edit);
535	}
536	if (next && (*next != `','` \|\| (edit.modes.editingFlags & decimalComma))) {
537	return {}; // error: unused nonblank character in fixed-width field
538	}
539	}
540	return {got, exponent, isHexadecimal};
541	}
542
543	static RT_API_ATTRS void RaiseFPExceptions(
544	decimal::ConversionResultFlags flags) {
545	#undef RAISE
546	#if defined(RT_DEVICE_COMPILATION)
547	Terminator terminator(__FILE__, __LINE__);
548	#define RAISE(e) \
549	terminator.Crash( \
550	"not implemented yet: raising FP exception in device code: %s", #e);
551	#else // !defined(RT_DEVICE_COMPILATION)
552	#ifdef feraisexcept // a macro in some environments; omit std::
553	#define RAISE feraiseexcept
554	#else
555	#define RAISE std::feraiseexcept
556	#endif
557	#endif // !defined(RT_DEVICE_COMPILATION)
558
559	// Some environment (e.g. emscripten, musl) don't define FE_OVERFLOW as allowed
560	// by c99 (but not c++11) :-/
561	#if defined(FE_OVERFLOW) \|\| defined(RT_DEVICE_COMPILATION)
562	if (flags & decimal::ConversionResultFlags::Overflow) {
563	RAISE(FE_OVERFLOW);
564	}
565	#endif
566	#if defined(FE_UNDERFLOW) \|\| defined(RT_DEVICE_COMPILATION)
567	if (flags & decimal::ConversionResultFlags::Underflow) {
568	RAISE(FE_UNDERFLOW);
569	}
570	#endif
571	#if defined(FE_INEXACT) \|\| defined(RT_DEVICE_COMPILATION)
572	if (flags & decimal::ConversionResultFlags::Inexact) {
573	RAISE(FE_INEXACT);
574	}
575	#endif
576	#if defined(FE_INVALID) \|\| defined(RT_DEVICE_COMPILATION)
577	if (flags & decimal::ConversionResultFlags::Invalid) {
578	RAISE(FE_INVALID);
579	}
580	#endif
581	#undef RAISE
582	}
583
584	// If no special modes are in effect and the form of the input value
585	// that's present in the input stream is acceptable to the decimal->binary
586	// converter without modification, this fast path for real input
587	// saves time by avoiding memory copies and reformatting of the exponent.
588	template <int PRECISION>
589	static RT_API_ATTRS bool TryFastPathRealDecimalInput(
590	IoStatementState &io, const DataEdit &edit, void *n) {
591	if (edit.modes.editingFlags & (blankZero \| decimalComma)) {
592	return false;
593	}
594	if (edit.modes.scale != `0`) {
595	return false;
596	}
597	const ConnectionState &connection{io.GetConnectionState()};
598	if (connection.internalIoCharKind > `1`) {
599	return false; // reading non-default character
600	}
601	const char str{nullptr*};
602	std::size_t got{io.GetNextInputBytes(str)};
603	if (got == `0` \|\| str == nullptr \|\| !connection.recordLength.has_value()) {
604	return false; // could not access reliably-terminated input stream
605	}
606	const char *p{str};
607	std::int64_t maxConsume{
608	std::min<std::int64_t>(got, edit.width.value_or(got))};
609	const char *limit{str + maxConsume};
610	decimal::ConversionToBinaryResult<PRECISION> converted{
611	decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
612	if (converted.flags & (decimal::Invalid \| decimal::Overflow)) {
613	return false;
614	}
615	if (edit.digits.value_or(`0`) != `0`) {
616	// Edit descriptor is Fw.d (or other) with d != 0, which
617	// implies scaling
618	const char *q{str};
619	for (; q < limit; ++q) {
620	if (q == `'.'` \|\| q == `'n'` \|\| *q == `'N'`) {
621	break;
622	}
623	}
624	if (q == limit) {
625	// No explicit decimal point, and not NaN/Inf.
626	return false;
627	}
628	}
629	if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
630	// Need to consume a trailing ')', possibly with leading spaces
631	for (; p < limit && (p == `' '` \|\| p == `'\t'`); ++p) {
632	}
633	if (p < limit && *p == `')'`) {
634	++p;
635	} else {
636	return false;
637	}
638	} else if (edit.IsListDirected()) {
639	if (p < limit && !IsCharValueSeparator(edit, *p)) {
640	return false;
641	}
642	} else {
643	for (; p < limit && (p == `' '` \|\| p == `'\t'`); ++p) {
644	}
645	if (edit.width && p < str + *edit.width) {
646	return false; // unconverted characters remain in fixed width field
647	}
648	}
649	// Success on the fast path!
650	*reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
651	converted.binary;
652	io.HandleRelativePosition(p - str);
653	// Set FP exception flags
654	if (converted.flags != decimal::ConversionResultFlags::Exact) {
655	RaiseFPExceptions(converted.flags);
656	}
657	return true;
658	}
659
660	template <int binaryPrecision>
661	RT_API_ATTRS decimal::ConversionToBinaryResult<binaryPrecision>
662	ConvertHexadecimal(
663	const char &p, enum* decimal::FortranRounding rounding, int expo) {
664	using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>;
665	using RawType = typename RealType::RawType;
666	bool isNegative{*p == `'-'`};
667	constexpr RawType one{`1`};
668	RawType signBit{`0`};
669	if (isNegative) {
670	++p;
671	signBit = one << (RealType::bits - `1`);
672	}
673	RawType fraction{`0`};
674	// Adjust the incoming binary P+/- exponent to shift the radix point
675	// to below the LSB and add in the bias.
676	expo += binaryPrecision - `1` + RealType::exponentBias;
677	// Input the fraction.
678	int roundingBit{`0`};
679	int guardBit{`0`};
680	for (; *p; ++p) {
681	fraction <<= `4`;
682	expo -= `4`;
683	if (p >= `'0'` && p <= `'9'`) {
684	fraction \|= *p - `'0'`;
685	} else if (p >= `'A'` && p <= `'F'`) {
686	fraction \|= p - `'A'` + `10`; // data were normalized to capitals*
687	} else {
688	break;
689	}
690	if (fraction >> binaryPrecision) {
691	while (fraction >> binaryPrecision) {
692	guardBit \|= roundingBit;
693	roundingBit = (int)fraction & `1`;
694	fraction >>= `1`;
695	++expo;
696	}
697	// Consume excess digits
698	while (*++p) {
699	if (*p == `'0'`) {
700	} else if ((p >= `'1'` && p <= `'9'`) \|\| (p >= `'A'` && p <= `'F'`)) {
701	guardBit = `1`;
702	} else {
703	break;
704	}
705	}
706	break;
707	}
708	}
709	if (fraction) {
710	// Boost biased expo if too small
711	while (expo < `1`) {
712	guardBit \|= roundingBit;
713	roundingBit = (int)fraction & `1`;
714	fraction >>= `1`;
715	++expo;
716	}
717	// Normalize
718	while (expo > `1` && !(fraction >> (binaryPrecision - `1`))) {
719	fraction <<= `1`;
720	--expo;
721	guardBit = roundingBit = `0`;
722	}
723	}
724	// Rounding
725	bool increase{false};
726	switch (rounding) {
727	case decimal::RoundNearest: // RN & RP
728	increase = roundingBit && (guardBit \| ((int)fraction & `1`));
729	break;
730	case decimal::RoundUp: // RU
731	increase = !isNegative && (roundingBit \| guardBit);
732	break;
733	case decimal::RoundDown: // RD
734	increase = isNegative && (roundingBit \| guardBit);
735	break;
736	case decimal::RoundToZero: // RZ
737	break;
738	case decimal::RoundCompatible: // RC
739	increase = roundingBit != `0`;
740	break;
741	}
742	if (increase) {
743	++fraction;
744	if (fraction >> binaryPrecision) {
745	fraction >>= `1`;
746	++expo;
747	}
748	}
749	// Package & return result
750	constexpr RawType significandMask{(one << RealType::significandBits) - `1`};
751	int flags{(roundingBit \| guardBit) ? decimal::Inexact : decimal::Exact};
752	if (!fraction) {
753	expo = `0`;
754	} else if (expo == `1` && !(fraction >> (binaryPrecision - `1`))) {
755	expo = `0`; // subnormal
756	flags \|= decimal::Underflow;
757	} else if (expo >= RealType::maxExponent) {
758	if (rounding == decimal::RoundToZero \|\|
759	(rounding == decimal::RoundDown && !isNegative) \|\|
760	(rounding == decimal::RoundUp && isNegative)) {
761	expo = RealType::maxExponent - `1`; // +/-HUGE()
762	fraction = significandMask;
763	} else {
764	expo = RealType::maxExponent; // +/-Inf
765	fraction = `0`;
766	flags \|= decimal::Overflow;
767	}
768	} else {
769	fraction &= significandMask; // remove explicit normalization unless x87
770	}
771	return decimal::ConversionToBinaryResult<binaryPrecision>{
772	RealType{static_cast<RawType>(signBit \|
773	static_cast<RawType>(expo) << RealType::significandBits \| fraction)},
774	static_cast<decimal::ConversionResultFlags>(flags)};
775	}
776
777	template <int KIND>
778	RT_API_ATTRS bool EditCommonRealInput(
779	IoStatementState &io, const DataEdit &edit, void *n) {
780	constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
781	if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) {
782	return CheckCompleteListDirectedField(io, edit);
783	}
784	// Fast path wasn't available or didn't work; go the more general route
785	static constexpr int maxDigits{
786	common::MaxDecimalConversionDigits(binaryPrecision)};
787	static constexpr int bufferSize{maxDigits + `18`};
788	char buffer[bufferSize];
789	auto scanned{ScanRealInput(buffer, maxDigits + `2`, io, edit)};
790	int got{scanned.got};
791	if (got >= maxDigits + `2`) {
792	io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
793	return false;
794	}
795	if (got == `0`) {
796	const auto &connection{io.GetConnectionState()};
797	io.GetIoErrorHandler().SignalError(IostatBadRealInput,
798	"Bad real input data at column %d of record %d",
799	static_cast<int>(connection.positionInRecord + `1`),
800	static_cast<int>(connection.currentRecordNumber));
801	return false;
802	}
803	decimal::ConversionToBinaryResult<binaryPrecision> converted;
804	const char *p{buffer};
805	if (scanned.isHexadecimal) {
806	buffer[got] = `'\0'`;
807	converted = ConvertHexadecimal<binaryPrecision>(
808	p, edit.modes.round, scanned.exponent);
809	} else {
810	bool hadExtra{got > maxDigits};
811	int exponent{scanned.exponent};
812	if (exponent != `0`) {
813	buffer[got++] = `'e'`;
814	if (exponent < `0`) {
815	buffer[got++] = `'-'`;
816	exponent = -exponent;
817	}
818	if (exponent > `9999`) {
819	exponent = `9999`; // will convert to +/-Inf
820	}
821	if (exponent > `999`) {
822	int dig{exponent / `1000`};
823	buffer[got++] = `'0'` + dig;
824	int rest{exponent - `1000` * dig};
825	dig = rest / `100`;
826	buffer[got++] = `'0'` + dig;
827	rest -= `100` * dig;
828	dig = rest / `10`;
829	buffer[got++] = `'0'` + dig;
830	buffer[got++] = `'0'` + (rest - `10` * dig);
831	} else if (exponent > `99`) {
832	int dig{exponent / `100`};
833	buffer[got++] = `'0'` + dig;
834	int rest{exponent - `100` * dig};
835	dig = rest / `10`;
836	buffer[got++] = `'0'` + dig;
837	buffer[got++] = `'0'` + (rest - `10` * dig);
838	} else if (exponent > `9`) {
839	int dig{exponent / `10`};
840	buffer[got++] = `'0'` + dig;
841	buffer[got++] = `'0'` + (exponent - `10` * dig);
842	} else {
843	buffer[got++] = `'0'` + exponent;
844	}
845	}
846	buffer[got] = `'\0'`;
847	converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round);
848	if (hadExtra) {
849	converted.flags = static_cast<enum decimal::ConversionResultFlags>(
850	converted.flags \| decimal::Inexact);
851	}
852	}
853	if (p) { // unprocessed junk after value*
854	const auto &connection{io.GetConnectionState()};
855	io.GetIoErrorHandler().SignalError(IostatBadRealInput,
856	"Trailing characters after real input data at column %d of record %d",
857	static_cast<int>(connection.positionInRecord + `1`),
858	static_cast<int>(connection.currentRecordNumber));
859	return false;
860	}
861	*reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
862	converted.binary;
863	// Set FP exception flags
864	if (converted.flags != decimal::ConversionResultFlags::Exact) {
865	if (converted.flags & decimal::ConversionResultFlags::Overflow) {
866	io.GetIoErrorHandler().SignalError(IostatRealInputOverflow);
867	return false;
868	}
869	RaiseFPExceptions(converted.flags);
870	}
871	return CheckCompleteListDirectedField(io, edit);
872	}
873
874	template <int KIND>
875	RT_API_ATTRS bool EditRealInput(
876	IoStatementState &io, const DataEdit &edit, void *n) {
877	switch (edit.descriptor) {
878	case DataEdit::ListDirected:
879	if (IsNamelistNameOrSlash(io)) {
880	return false;
881	}
882	return EditCommonRealInput<KIND>(io, edit, n);
883	case DataEdit::ListDirectedRealPart:
884	case DataEdit::ListDirectedImaginaryPart:
885	case `'F'`:
886	case `'E'`: // incl. EN, ES, & EX
887	case `'D'`:
888	case `'G'`:
889	return EditCommonRealInput<KIND>(io, edit, n);
890	case `'B'`:
891	return EditBOZInput<`1`>(io, edit, n,
892	common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> `3`);
893	case `'O'`:
894	return EditBOZInput<`3`>(io, edit, n,
895	common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> `3`);
896	case `'Z'`:
897	return EditBOZInput<`4`>(io, edit, n,
898	common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> `3`);
899	case `'A'`: // legacy extension
900	return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
901	default:
902	io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
903	"Data edit descriptor '%c' may not be used for REAL input",
904	edit.descriptor);
905	return false;
906	}
907	}
908
909	// 13.7.3 in Fortran 2018
910	RT_API_ATTRS bool EditLogicalInput(
911	IoStatementState &io, const DataEdit &edit, bool &x) {
912	switch (edit.descriptor) {
913	case DataEdit::ListDirected:
914	if (IsNamelistNameOrSlash(io)) {
915	return false;
916	}
917	break;
918	case `'L'`:
919	case `'G'`:
920	break;
921	default:
922	io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
923	"Data edit descriptor '%c' may not be used for LOGICAL input",
924	edit.descriptor);
925	return false;
926	}
927	Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
928	Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
929	if (next && next == `'.'`) { // skip optional period*
930	next = io.NextInField(remaining, edit);
931	}
932	if (!next) {
933	io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
934	return false;
935	}
936	switch (*next) {
937	case `'T'`:
938	case `'t'`:
939	x = true;
940	break;
941	case `'F'`:
942	case `'f'`:
943	x = false;
944	break;
945	default:
946	io.GetIoErrorHandler().SignalError(
947	"Bad character '%lc' in LOGICAL input field", *next);
948	return false;
949	}
950	if (remaining) { // ignore the rest of a fixed-width field
951	io.HandleRelativePosition(*remaining);
952	} else if (edit.descriptor == DataEdit::ListDirected) {
953	while (io.NextInField(remaining, edit)) { // discard rest of field
954	}
955	}
956	return CheckCompleteListDirectedField(io, edit);
957	}
958
959	// See 13.10.3.1 paragraphs 7-9 in Fortran 2018
960	template <typename CHAR>
961	static RT_API_ATTRS bool EditDelimitedCharacterInput(
962	IoStatementState &io, CHAR x, std::size_t length, char32_t* delimiter) {
963	bool result{true};
964	while (true) {
965	std::size_t byteCount{`0`};
966	auto ch{io.GetCurrentChar(byteCount)};
967	if (!ch) {
968	if (io.AdvanceRecord()) {
969	continue;
970	} else {
971	result = false; // EOF in character value
972	break;
973	}
974	}
975	io.HandleRelativePosition(byteCount);
976	if (*ch == delimiter) {
977	auto next{io.GetCurrentChar(byteCount)};
978	if (next && *next == delimiter) {
979	// Repeated delimiter: use as character value
980	io.HandleRelativePosition(byteCount);
981	} else {
982	break; // closing delimiter
983	}
984	}
985	if (length > `0`) {
986	x++ = static_cast<CHAR>(ch);
987	--length;
988	}
989	}
990	Fortran::runtime::fill_n(x, length, `' '`);
991	return result;
992	}
993
994	template <typename CHAR>
995	static RT_API_ATTRS bool EditListDirectedCharacterInput(
996	IoStatementState &io, CHAR x, std::size_t length, const* DataEdit &edit) {
997	std::size_t byteCount{`0`};
998	auto ch{io.GetCurrentChar(byteCount)};
999	if (ch && (ch == `'\''` \|\| ch == `'"'`)) {
1000	io.HandleRelativePosition(byteCount);
1001	return EditDelimitedCharacterInput(io, x, length, *ch);
1002	}
1003	if (IsNamelistNameOrSlash(io) \|\| io.GetConnectionState().IsAtEOF()) {
1004	return false;
1005	}
1006	// Undelimited list-directed character input: stop at a value separator
1007	// or the end of the current record.
1008	while (auto ch{io.GetCurrentChar(byteCount)}) {
1009	bool isSep{false};
1010	switch (*ch) {
1011	case `' '`:
1012	case `'\t'`:
1013	case `'/'`:
1014	isSep = true;
1015	break;
1016	case `'&'`:
1017	case `'$'`:
1018	isSep = edit.IsNamelist();
1019	break;
1020	case `','`:
1021	isSep = !(edit.modes.editingFlags & decimalComma);
1022	break;
1023	case `';'`:
1024	isSep = !!(edit.modes.editingFlags & decimalComma);
1025	break;
1026	default:
1027	break;
1028	}
1029	if (isSep) {
1030	break;
1031	}
1032	if (length > `0`) {
1033	x++ = static_cast<CHAR>(ch);
1034	--length;
1035	} else if (edit.IsNamelist()) {
1036	// GNU compatibility
1037	break;
1038	}
1039	io.HandleRelativePosition(byteCount);
1040	io.GotChar(byteCount);
1041	}
1042	Fortran::runtime::fill_n(x, length, `' '`);
1043	return true;
1044	}
1045
1046	template <typename CHAR>
1047	RT_API_ATTRS bool EditCharacterInput(IoStatementState &io, const DataEdit &edit,
1048	CHAR *x, std::size_t lengthChars) {
1049	switch (edit.descriptor) {
1050	case DataEdit::ListDirected:
1051	return EditListDirectedCharacterInput(io, x, lengthChars, edit);
1052	case `'A'`:
1053	case `'G'`:
1054	break;
1055	case `'B'`:
1056	return EditBOZInput<`1`>(io, edit, x, lengthChars * sizeof *x);
1057	case `'O'`:
1058	return EditBOZInput<`3`>(io, edit, x, lengthChars * sizeof *x);
1059	case `'Z'`:
1060	return EditBOZInput<`4`>(io, edit, x, lengthChars * sizeof *x);
1061	default:
1062	io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
1063	"Data edit descriptor '%c' may not be used with a CHARACTER data item",
1064	edit.descriptor);
1065	return false;
1066	}
1067	const ConnectionState &connection{io.GetConnectionState()};
1068	std::size_t remainingChars{lengthChars};
1069	// Skip leading characters.
1070	// Their bytes don't count towards INQUIRE(IOLENGTH=).
1071	std::size_t skipChars{`0`};
1072	if (edit.width && *edit.width > `0`) {
1073	remainingChars = *edit.width;
1074	if (remainingChars > lengthChars) {
1075	skipChars = remainingChars - lengthChars;
1076	}
1077	}
1078	// When the field is wider than the variable, we drop the leading
1079	// characters. When the variable is wider than the field, there can be
1080	// trailing padding or an EOR condition.
1081	const char input{nullptr*};
1082	std::size_t readyBytes{`0`};
1083	// Transfer payload bytes; these do count.
1084	while (remainingChars > `0`) {
1085	if (readyBytes == `0`) {
1086	readyBytes = io.GetNextInputBytes(input);
1087	if (readyBytes == `0` \|\|
1088	(readyBytes < remainingChars && edit.modes.nonAdvancing)) {
1089	if (io.CheckForEndOfRecord(readyBytes, connection)) {
1090	if (readyBytes == `0`) {
1091	// PAD='YES' and no more data
1092	Fortran::runtime::fill_n(x, lengthChars, `' '`);
1093	return !io.GetIoErrorHandler().InError();
1094	} else {
1095	// Do partial read(s) then pad on last iteration
1096	}
1097	} else {
1098	return !io.GetIoErrorHandler().InError();
1099	}
1100	}
1101	}
1102	std::size_t chunkBytes;
1103	std::size_t chunkChars{`1`};
1104	bool skipping{skipChars > `0`};
1105	if (connection.isUTF8) {
1106	chunkBytes = MeasureUTF8Bytes(*input);
1107	if (skipping) {
1108	--skipChars;
1109	} else if (auto ucs{DecodeUTF8(input)}) {
1110	if ((sizeof x == `1` && ucs > `0xff`) \|\|
1111	(sizeof x == `2` && ucs > `0xffff`)) {
1112	*x++ = `'?'`;
1113	} else {
1114	x++ = static_cast<CHAR>(ucs);
1115	}
1116	--lengthChars;
1117	} else if (chunkBytes == `0`) {
1118	// error recovery: skip bad encoding
1119	chunkBytes = `1`;
1120	}
1121	} else if (connection.internalIoCharKind > `1`) {
1122	// Reading from non-default character internal unit
1123	chunkBytes = connection.internalIoCharKind;
1124	if (skipping) {
1125	--skipChars;
1126	} else {
1127	char32_t buffer{`0`};
1128	std::memcpy(&buffer, input, chunkBytes);
1129	if ((sizeof *x == `1` && buffer > `0xff`) \|\|
1130	(sizeof *x == `2` && buffer > `0xffff`)) {
1131	*x++ = `'?'`;
1132	} else {
1133	x++ = static_cast*<CHAR>(buffer);
1134	}
1135	--lengthChars;
1136	}
1137	} else if constexpr (sizeof *x > `1`) {
1138	// Read single byte with expansion into multi-byte CHARACTER
1139	chunkBytes = `1`;
1140	if (skipping) {
1141	--skipChars;
1142	} else {
1143	x++ = static_cast<unsigned* char>(*input);
1144	--lengthChars;
1145	}
1146	} else { // single bytes -> default CHARACTER
1147	if (skipping) {
1148	chunkBytes = std::min<std::size_t>(a: skipChars, b: readyBytes);
1149	chunkChars = chunkBytes;
1150	skipChars -= chunkChars;
1151	} else {
1152	chunkBytes = std::min<std::size_t>(a: remainingChars, b: readyBytes);
1153	chunkBytes = std::min<std::size_t>(a: lengthChars, b: chunkBytes);
1154	chunkChars = chunkBytes;
1155	std::memcpy(x, input, chunkBytes);
1156	x += chunkBytes;
1157	lengthChars -= chunkChars;
1158	}
1159	}
1160	input += chunkBytes;
1161	remainingChars -= chunkChars;
1162	if (!skipping) {
1163	io.GotChar(chunkBytes);
1164	}
1165	io.HandleRelativePosition(chunkBytes);
1166	readyBytes -= chunkBytes;
1167	}
1168	// Pad the remainder of the input variable, if any.
1169	Fortran::runtime::fill_n(x, lengthChars, `' '`);
1170	return CheckCompleteListDirectedField(io, edit);
1171	}
1172
1173	template RT_API_ATTRS bool EditRealInput<`2`>(
1174	IoStatementState &, const DataEdit &, void *);
1175	template RT_API_ATTRS bool EditRealInput<`3`>(
1176	IoStatementState &, const DataEdit &, void *);
1177	template RT_API_ATTRS bool EditRealInput<`4`>(
1178	IoStatementState &, const DataEdit &, void *);
1179	template RT_API_ATTRS bool EditRealInput<`8`>(
1180	IoStatementState &, const DataEdit &, void *);
1181	template RT_API_ATTRS bool EditRealInput<`10`>(
1182	IoStatementState &, const DataEdit &, void *);
1183	// TODO: double/double
1184	template RT_API_ATTRS bool EditRealInput<`16`>(
1185	IoStatementState &, const DataEdit &, void *);
1186
1187	template RT_API_ATTRS bool EditCharacterInput(
1188	IoStatementState &, const DataEdit &, char *, std::size_t);
1189	template RT_API_ATTRS bool EditCharacterInput(
1190	IoStatementState &, const DataEdit &, char16_t *, std::size_t);
1191	template RT_API_ATTRS bool EditCharacterInput(
1192	IoStatementState &, const DataEdit &, char32_t *, std::size_t);
1193
1194	RT_OFFLOAD_API_GROUP_END
1195	} // namespace Fortran::runtime::io
1196

source code of flang-rt/lib/runtime/edit-input.cpp