edit-input.cpp source code [flang/runtime/edit-input.cpp]

1	//===-- runtime/edit-input.cpp --------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "edit-input.h"
10	#include "namelist.h"
11	#include "utf.h"
12	#include "flang/Common/optional.h"
13	#include "flang/Common/real.h"
14	#include "flang/Common/uint128.h"
15	#include "flang/Runtime/freestanding-tools.h"
16	#include <algorithm>
17	#include <cfenv>
18
19	namespace Fortran::runtime::io {
20	RT_OFFLOAD_API_GROUP_BEGIN
21
22	// Checks that a list-directed input value has been entirely consumed and
23	// doesn't contain unparsed characters before the next value separator.
24	static inline RT_API_ATTRS bool IsCharValueSeparator(
25	const DataEdit &edit, char32_t ch) {
26	char32_t comma{
27	edit.modes.editingFlags & decimalComma ? char32_t{`';'`} : char32_t{`','`}};
28	return ch == `' '` \|\| ch == `'\t'` \|\| ch == comma \|\| ch == `'/'` \|\|
29	(edit.IsNamelist() && (ch == `'&'` \|\| ch == `'$'`));
30	}
31
32	static RT_API_ATTRS bool CheckCompleteListDirectedField(
33	IoStatementState &io, const DataEdit &edit) {
34	if (edit.IsListDirected()) {
35	std::size_t byteCount;
36	if (auto ch{io.GetCurrentChar(byteCount)}) {
37	if (IsCharValueSeparator(edit, *ch)) {
38	return true;
39	} else {
40	const auto &connection{io.GetConnectionState()};
41	io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator,
42	"invalid character (0x%x) after list-directed input value, "
43	"at column %d in record %d",
44	static_cast<unsigned>(*ch),
45	static_cast<int>(connection.positionInRecord + `1`),
46	static_cast<int>(connection.currentRecordNumber));
47	return false;
48	}
49	} else {
50	return true; // end of record: ok
51	}
52	} else {
53	return true;
54	}
55	}
56
57	template <int LOG2_BASE>
58	static RT_API_ATTRS bool EditBOZInput(
59	IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
60	// Skip leading white space & zeroes
61	Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
62	auto start{io.GetConnectionState().positionInRecord};
63	Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
64	if (next.value_or(`'?'`) == `'0'`) {
65	do {
66	start = io.GetConnectionState().positionInRecord;
67	next = io.NextInField(remaining, edit);
68	} while (next && *next == `'0'`);
69	}
70	// Count significant digits after any leading white space & zeroes
71	int digits{`0`};
72	int significantBits{`0`};
73	for (; next; next = io.NextInField(remaining, edit)) {
74	char32_t ch{*next};
75	if (ch == `' '` \|\| ch == `'\t'`) {
76	if (edit.modes.editingFlags & blankZero) {
77	ch = `'0'`; // BZ mode - treat blank as if it were zero
78	} else {
79	continue;
80	}
81	}
82	if (ch >= `'0'` && ch <= `'1'`) {
83	} else if (LOG2_BASE >= `3` && ch >= `'2'` && ch <= `'7'`) {
84	} else if (LOG2_BASE >= `4` && ch >= `'8'` && ch <= `'9'`) {
85	} else if (LOG2_BASE >= `4` && ch >= `'A'` && ch <= `'F'`) {
86	} else if (LOG2_BASE >= `4` && ch >= `'a'` && ch <= `'f'`) {
87	} else if (ch == `','`) {
88	break; // end non-list-directed field early
89	} else {
90	io.GetIoErrorHandler().SignalError(
91	"Bad character '%lc' in B/O/Z input field", ch);
92	return false;
93	}
94	if (digits++ == `0`) {
95	significantBits = `4`;
96	if (ch >= `'0'` && ch <= `'1'`) {
97	significantBits = `1`;
98	} else if (ch >= `'2'` && ch <= `'3'`) {
99	significantBits = `2`;
100	} else if (ch >= `'4'` && ch <= `'7'`) {
101	significantBits = `3`;
102	} else {
103	significantBits = `4`;
104	}
105	} else {
106	significantBits += LOG2_BASE;
107	}
108	}
109	auto significantBytes{static_cast<std::size_t>(significantBits + `7`) / `8`};
110	if (significantBytes > bytes) {
111	io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow,
112	"B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
113	return false;
114	}
115	// Reset to start of significant digits
116	io.HandleAbsolutePosition(start);
117	remaining.reset();
118	// Make a second pass now that the digit count is known
119	std::memset(n, `0`, bytes);
120	int increment{isHostLittleEndian ? -`1` : `1`};
121	auto data{reinterpret_cast<unsigned* char *>(n) +
122	(isHostLittleEndian ? significantBytes - `1` : `0`)};
123	int shift{((digits - `1`) * LOG2_BASE) & `7`};
124	while (digits > `0`) {
125	char32_t ch{*io.NextInField(remaining, edit)};
126	int digit{`0`};
127	if (ch == `' '` \|\| ch == `'\t'`) {
128	if (edit.modes.editingFlags & blankZero) {
129	ch = `'0'`; // BZ mode - treat blank as if it were zero
130	} else {
131	continue;
132	}
133	}
134	--digits;
135	if (ch >= `'0'` && ch <= `'9'`) {
136	digit = ch - `'0'`;
137	} else if (ch >= `'A'` && ch <= `'F'`) {
138	digit = ch + `10` - `'A'`;
139	} else if (ch >= `'a'` && ch <= `'f'`) {
140	digit = ch + `10` - `'a'`;
141	} else {
142	continue;
143	}
144	if (shift < `0`) {
145	if (shift + LOG2_BASE > `0`) { // misaligned octal
146	*data \|= digit >> -shift;
147	}
148	shift += `8`;
149	data += increment;
150	}
151	*data \|= digit << shift;
152	shift -= LOG2_BASE;
153	}
154	return CheckCompleteListDirectedField(io, edit);
155	}
156
157	static inline RT_API_ATTRS char32_t GetRadixPointChar(const DataEdit &edit) {
158	return edit.modes.editingFlags & decimalComma ? char32_t{`','`} : char32_t{`'.'`};
159	}
160
161	// Prepares input from a field, and returns the sign, if any, else '\0'.
162	static RT_API_ATTRS char ScanNumericPrefix(IoStatementState &io,
163	const DataEdit &edit, Fortran::common::optional<char32_t> &next,
164	Fortran::common::optional<int> &remaining) {
165	remaining = io.CueUpInput(edit);
166	next = io.NextInField(remaining, edit);
167	char sign{`'\0'`};
168	if (next) {
169	if (next == `'-'` \|\| next == `'+'`) {
170	sign = *next;
171	if (!edit.IsListDirected()) {
172	io.SkipSpaces(remaining);
173	}
174	next = io.NextInField(remaining, edit);
175	}
176	}
177	return sign;
178	}
179
180	RT_API_ATTRS bool EditIntegerInput(
181	IoStatementState &io, const DataEdit &edit, void n, int* kind) {
182	RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= `1` && !(kind & (kind - `1`)));
183	switch (edit.descriptor) {
184	case DataEdit::ListDirected:
185	if (IsNamelistNameOrSlash(io)) {
186	return false;
187	}
188	break;
189	case `'G'`:
190	case `'I'`:
191	break;
192	case `'B'`:
193	return EditBOZInput<`1`>(io, edit, n, kind);
194	case `'O'`:
195	return EditBOZInput<`3`>(io, edit, n, kind);
196	case `'Z'`:
197	return EditBOZInput<`4`>(io, edit, n, kind);
198	case `'A'`: // legacy extension
199	return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
200	default:
201	io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
202	"Data edit descriptor '%c' may not be used with an INTEGER data item",
203	edit.descriptor);
204	return false;
205	}
206	Fortran::common::optional<int> remaining;
207	Fortran::common::optional<char32_t> next;
208	char sign{ScanNumericPrefix(io, edit, next, remaining)};
209	common::UnsignedInt128 value{`0`};
210	bool any{!!sign};
211	bool overflow{false};
212	for (; next; next = io.NextInField(remaining, edit)) {
213	char32_t ch{*next};
214	if (ch == `' '` \|\| ch == `'\t'`) {
215	if (edit.modes.editingFlags & blankZero) {
216	ch = `'0'`; // BZ mode - treat blank as if it were zero
217	} else {
218	continue;
219	}
220	}
221	int digit{`0`};
222	if (ch >= `'0'` && ch <= `'9'`) {
223	digit = ch - `'0'`;
224	} else if (ch == `','`) {
225	break; // end non-list-directed field early
226	} else {
227	io.GetIoErrorHandler().SignalError(
228	"Bad character '%lc' in INTEGER input field", ch);
229	return false;
230	}
231	static constexpr auto maxu128{~common::UnsignedInt128{`0`}};
232	static constexpr auto maxu128OverTen{maxu128 / `10`};
233	static constexpr int maxLastDigit{
234	static_cast<int>(maxu128 - (maxu128OverTen * `10`))};
235	overflow \|= value >= maxu128OverTen &&
236	(value > maxu128OverTen \|\| digit > maxLastDigit);
237	value *= `10`;
238	value += digit;
239	any = true;
240	}
241	if (!any && !remaining) {
242	io.GetIoErrorHandler().SignalError(
243	"Integer value absent from NAMELIST or list-directed input");
244	return false;
245	}
246	auto maxForKind{common::UnsignedInt128{`1`} << ((`8` * kind) - `1`)};
247	overflow \|= value >= maxForKind && (value > maxForKind \|\| sign != `'-'`);
248	if (overflow) {
249	io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow,
250	"Decimal input overflows INTEGER(%d) variable", kind);
251	return false;
252	}
253	if (sign == `'-'`) {
254	value = -value;
255	}
256	if (any \|\| !io.GetIoErrorHandler().InError()) {
257	// The value is stored in the lower order bits on big endian platform.
258	// When memcpy, shift the value to the higher order bit.
259	auto shft{static_cast<int>(sizeof(value.low())) - kind};
260	// For kind==8 (i.e. shft==0), the value is stored in low_ in big endian.
261	if (!isHostLittleEndian && shft >= `0`) {
262	auto l{value.low() << (`8` * shft)};
263	std::memcpy(n, &l, kind);
264	} else {
265	std::memcpy(n, &value, kind); // a blank field means zero
266	}
267	return true;
268	} else {
269	return false;
270	}
271	}
272
273	// Parses a REAL input number from the input source as a normalized
274	// fraction into a supplied buffer -- there's an optional '-', a
275	// decimal point when the input is not hexadecimal, and at least one
276	// digit. Replaces blanks with zeroes where appropriate.
277	struct ScannedRealInput {
278	// Number of characters that (should) have been written to the
279	// buffer -- this can be larger than the buffer size, which
280	// indicates buffer overflow. Zero indicates an error.
281	int got{`0`};
282	int exponent{`0`}; // adjusted as necessary; binary if isHexadecimal
283	bool isHexadecimal{false}; // 0X...
284	};
285	static RT_API_ATTRS ScannedRealInput ScanRealInput(
286	char buffer, int* bufferSize, IoStatementState &io, const DataEdit &edit) {
287	Fortran::common::optional<int> remaining;
288	Fortran::common::optional<char32_t> next;
289	int got{`0`};
290	Fortran::common::optional<int> radixPointOffset;
291	// The following lambda definition violates the conding style,
292	// but cuda-11.8 nvcc hits an internal error with the brace initialization.
293	auto Put = [&](char ch) -> void {
294	if (got < bufferSize) {
295	buffer[got] = ch;
296	}
297	++got;
298	};
299	char sign{ScanNumericPrefix(io, edit, next, remaining)};
300	if (sign == `'-'`) {
301	Put(`'-'`);
302	}
303	bool bzMode{(edit.modes.editingFlags & blankZero) != `0`};
304	int exponent{`0`};
305	if (!next \|\| (!bzMode && *next == `' '`) \|\|
306	(!(edit.modes.editingFlags & decimalComma) && *next == `','`)) {
307	if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) {
308	// An empty/blank field means zero when not list-directed.
309	// A fixed-width field containing only a sign is also zero;
310	// this behavior isn't standard-conforming in F'2023 but it is
311	// required to pass FCVS.
312	Put(`'0'`);
313	}
314	return {got, exponent, false};
315	}
316	char32_t radixPointChar{GetRadixPointChar(edit)};
317	char32_t first{next >= `'a'` && next <= `'z'` ? next + `'A'` - `'a'` : next};
318	bool isHexadecimal{false};
319	if (first == `'N'` \|\| first == `'I'`) {
320	// NaN or infinity - convert to upper case
321	// Subtle: a blank field of digits could be followed by 'E' or 'D',
322	for (; next &&
323	((next >= `'a'` && next <= `'z'`) \|\| (next >= `'A'` && next <= `'Z'`));
324	next = io.NextInField(remaining, edit)) {
325	if (next >= `'a'` && next <= `'z'`) {
326	Put(*next - `'a'` + `'A'`);
327	} else {
328	Put(*next);
329	}
330	}
331	if (next && next == `'('`) { // NaN(...)*
332	Put(`'('`);
333	int depth{`1`};
334	while (true) {
335	next = io.NextInField(remaining, edit);
336	if (depth == `0`) {
337	break;
338	} else if (!next) {
339	return {}; // error
340	} else if (*next == `'('`) {
341	++depth;
342	} else if (*next == `')'`) {
343	--depth;
344	}
345	Put(*next);
346	}
347	}
348	} else if (first == radixPointChar \|\| (first >= `'0'` && first <= `'9'`) \|\|
349	(bzMode && (first == `' '` \|\| first == `'\t'`)) \|\| first == `'E'` \|\|
350	first == `'D'` \|\| first == `'Q'`) {
351	if (first == `'0'`) {
352	next = io.NextInField(remaining, edit);
353	if (next && (next == `'x'` \|\| next == `'X'`)) { // 0X...
354	isHexadecimal = true;
355	next = io.NextInField(remaining, edit);
356	} else {
357	Put(`'0'`);
358	}
359	}
360	// input field is normalized to a fraction
361	if (!isHexadecimal) {
362	Put(`'.'`);
363	}
364	auto start{got};
365	for (; next; next = io.NextInField(remaining, edit)) {
366	char32_t ch{*next};
367	if (ch == `' '` \|\| ch == `'\t'`) {
368	if (isHexadecimal) {
369	return {}; // error
370	} else if (bzMode) {
371	ch = `'0'`; // BZ mode - treat blank as if it were zero
372	} else {
373	continue; // ignore blank in fixed field
374	}
375	}
376	if (ch == `'0'` && got == start && !radixPointOffset) {
377	// omit leading zeroes before the radix point
378	} else if (ch >= `'0'` && ch <= `'9'`) {
379	Put(ch);
380	} else if (ch == radixPointChar && !radixPointOffset) {
381	// The radix point character is not* copied to the buffer.*
382	radixPointOffset = got - start; // # of digits before the radix point
383	} else if (isHexadecimal && ch >= `'A'` && ch <= `'F'`) {
384	Put(ch);
385	} else if (isHexadecimal && ch >= `'a'` && ch <= `'f'`) {
386	Put(ch - `'a'` + `'A'`); // normalize to capitals
387	} else {
388	break;
389	}
390	}
391	if (got == start) {
392	// Nothing but zeroes and maybe a radix point. F'2018 requires
393	// at least one digit, but F'77 did not, and a bare "." shows up in
394	// the FCVS suite.
395	Put(`'0'`); // emit at least one digit
396	}
397	// In list-directed input, a bad exponent is not consumed.
398	auto nextBeforeExponent{next};
399	auto startExponent{io.GetConnectionState().positionInRecord};
400	bool hasGoodExponent{false};
401	if (next) {
402	if (isHexadecimal) {
403	if (next == `'p'` \|\| next == `'P'`) {
404	next = io.NextInField(remaining, edit);
405	} else {
406	// The binary exponent is not optional in the standard.
407	return {}; // error
408	}
409	} else if (next == `'e'` \|\| next == `'E'` \|\| next == `'d'` \|\| next == `'D'` \|\|
410	next == `'q'` \|\| next == `'Q'`) {
411	// Optional exponent letter. Blanks are allowed between the
412	// optional exponent letter and the exponent value.
413	io.SkipSpaces(remaining);
414	next = io.NextInField(remaining, edit);
415	}
416	}
417	if (next &&
418	(next == `'-'` \|\| next == `'+'` \|\| (next >= `'0'` && next <= `'9'`) \|\|
419	next == `' '` \|\| next == `'\t'`)) {
420	bool negExpo{*next == `'-'`};
421	if (negExpo \|\| *next == `'+'`) {
422	next = io.NextInField(remaining, edit);
423	}
424	for (; next; next = io.NextInField(remaining, edit)) {
425	if (next >= `'0'` && next <= `'9'`) {
426	hasGoodExponent = true;
427	if (exponent < `10000`) {
428	exponent = `10` * exponent + *next - `'0'`;
429	}
430	} else if (next == `' '` \|\| next == `'\t'`) {
431	if (isHexadecimal) {
432	break;
433	} else if (bzMode) {
434	hasGoodExponent = true;
435	exponent = `10` * exponent;
436	}
437	} else {
438	break;
439	}
440	}
441	if (negExpo) {
442	exponent = -exponent;
443	}
444	}
445	if (!hasGoodExponent) {
446	if (isHexadecimal) {
447	return {}; // error
448	}
449	// There isn't a good exponent; do not consume it.
450	next = nextBeforeExponent;
451	io.HandleAbsolutePosition(startExponent);
452	// The default exponent is -kP, but the scale factor doesn't affect
453	// an explicit exponent.
454	exponent = -edit.modes.scale;
455	}
456	// Adjust exponent by number of digits before the radix point.
457	if (isHexadecimal) {
458	// Exponents for hexadecimal input are binary.
459	exponent += radixPointOffset.value_or(got - start) * `4`;
460	} else if (radixPointOffset) {
461	exponent += *radixPointOffset;
462	} else {
463	// When no redix point (or comma) appears in the value, the 'd'
464	// part of the edit descriptor must be interpreted as the number of
465	// digits in the value to be interpreted as being to the right* of*
466	// the assumed radix point (13.7.2.3.2)
467	exponent += got - start - edit.digits.value_or(`0`);
468	}
469	}
470	// Consume the trailing ')' of a list-directed or NAMELIST complex
471	// input value.
472	if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
473	if (next && (next == `' '` \|\| next == `'\t'`)) {
474	io.SkipSpaces(remaining);
475	next = io.NextInField(remaining, edit);
476	}
477	if (!next) { // NextInField fails on separators like ')'
478	std::size_t byteCount{`0`};
479	next = io.GetCurrentChar(byteCount);
480	if (next && *next == `')'`) {
481	io.HandleRelativePosition(byteCount);
482	}
483	}
484	} else if (remaining) {
485	while (next && (next == `' '` \|\| next == `'\t'`)) {
486	next = io.NextInField(remaining, edit);
487	}
488	if (next && (*next != `','` \|\| (edit.modes.editingFlags & decimalComma))) {
489	return {}; // error: unused nonblank character in fixed-width field
490	}
491	}
492	return {got, exponent, isHexadecimal};
493	}
494
495	static RT_API_ATTRS void RaiseFPExceptions(
496	decimal::ConversionResultFlags flags) {
497	#undef RAISE
498	#if defined(RT_DEVICE_COMPILATION)
499	Terminator terminator(__FILE__, __LINE__);
500	#define RAISE(e) \
501	terminator.Crash( \
502	"not implemented yet: raising FP exception in device code: %s", #e);
503	#else // !defined(RT_DEVICE_COMPILATION)
504	#ifdef feraisexcept // a macro in some environments; omit std::
505	#define RAISE feraiseexcept
506	#else
507	#define RAISE std::feraiseexcept
508	#endif
509	#endif // !defined(RT_DEVICE_COMPILATION)
510	if (flags & decimal::ConversionResultFlags::Overflow) {
511	RAISE(FE_OVERFLOW);
512	}
513	if (flags & decimal::ConversionResultFlags::Underflow) {
514	RAISE(FE_UNDERFLOW);
515	}
516	if (flags & decimal::ConversionResultFlags::Inexact) {
517	RAISE(FE_INEXACT);
518	}
519	if (flags & decimal::ConversionResultFlags::Invalid) {
520	RAISE(FE_INVALID);
521	}
522	#undef RAISE
523	}
524
525	// If no special modes are in effect and the form of the input value
526	// that's present in the input stream is acceptable to the decimal->binary
527	// converter without modification, this fast path for real input
528	// saves time by avoiding memory copies and reformatting of the exponent.
529	template <int PRECISION>
530	static RT_API_ATTRS bool TryFastPathRealDecimalInput(
531	IoStatementState &io, const DataEdit &edit, void *n) {
532	if (edit.modes.editingFlags & (blankZero \| decimalComma)) {
533	return false;
534	}
535	if (edit.modes.scale != `0`) {
536	return false;
537	}
538	const ConnectionState &connection{io.GetConnectionState()};
539	if (connection.internalIoCharKind > `1`) {
540	return false; // reading non-default character
541	}
542	const char str{nullptr*};
543	std::size_t got{io.GetNextInputBytes(str)};
544	if (got == `0` \|\| str == nullptr \|\| !connection.recordLength.has_value()) {
545	return false; // could not access reliably-terminated input stream
546	}
547	const char *p{str};
548	std::int64_t maxConsume{
549	std::min<std::int64_t>(got, edit.width.value_or(got))};
550	const char *limit{str + maxConsume};
551	decimal::ConversionToBinaryResult<PRECISION> converted{
552	decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
553	if (converted.flags & (decimal::Invalid \| decimal::Overflow)) {
554	return false;
555	}
556	if (edit.digits.value_or(`0`) != `0`) {
557	// Edit descriptor is Fw.d (or other) with d != 0, which
558	// implies scaling
559	const char *q{str};
560	for (; q < limit; ++q) {
561	if (q == `'.'` \|\| q == `'n'` \|\| *q == `'N'`) {
562	break;
563	}
564	}
565	if (q == limit) {
566	// No explicit decimal point, and not NaN/Inf.
567	return false;
568	}
569	}
570	if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
571	// Need to consume a trailing ')', possibly with leading spaces
572	for (; p < limit && (p == `' '` \|\| p == `'\t'`); ++p) {
573	}
574	if (p < limit && *p == `')'`) {
575	++p;
576	} else {
577	return false;
578	}
579	} else if (edit.IsListDirected()) {
580	if (p < limit && !IsCharValueSeparator(edit, ch: *p)) {
581	return false;
582	}
583	} else {
584	for (; p < limit && (p == `' '` \|\| p == `'\t'`); ++p) {
585	}
586	if (edit.width && p < str + *edit.width) {
587	return false; // unconverted characters remain in fixed width field
588	}
589	}
590	// Success on the fast path!
591	*reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
592	converted.binary;
593	io.HandleRelativePosition(p - str);
594	// Set FP exception flags
595	if (converted.flags != decimal::ConversionResultFlags::Exact) {
596	RaiseFPExceptions(converted.flags);
597	}
598	return true;
599	}
600
601	template <int binaryPrecision>
602	RT_API_ATTRS decimal::ConversionToBinaryResult<binaryPrecision>
603	ConvertHexadecimal(
604	const char &p, enum* decimal::FortranRounding rounding, int expo) {
605	using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>;
606	using RawType = typename RealType::RawType;
607	bool isNegative{*p == `'-'`};
608	constexpr RawType one{`1`};
609	RawType signBit{`0`};
610	if (isNegative) {
611	++p;
612	signBit = one << (RealType::bits - `1`);
613	}
614	RawType fraction{`0`};
615	// Adjust the incoming binary P+/- exponent to shift the radix point
616	// to below the LSB and add in the bias.
617	expo += binaryPrecision - `1` + RealType::exponentBias;
618	// Input the fraction.
619	int roundingBit{`0`};
620	int guardBit{`0`};
621	for (; *p; ++p) {
622	fraction <<= `4`;
623	expo -= `4`;
624	if (p >= `'0'` && p <= `'9'`) {
625	fraction \|= *p - `'0'`;
626	} else if (p >= `'A'` && p <= `'F'`) {
627	fraction \|= p - `'A'` + `10`; // data were normalized to capitals*
628	} else {
629	break;
630	}
631	if (fraction >> binaryPrecision) {
632	while (fraction >> binaryPrecision) {
633	guardBit \|= roundingBit;
634	roundingBit = (int)fraction & `1`;
635	fraction >>= `1`;
636	++expo;
637	}
638	// Consume excess digits
639	while (*++p) {
640	if (*p == `'0'`) {
641	} else if ((p >= `'1'` && p <= `'9'`) \|\| (p >= `'A'` && p <= `'F'`)) {
642	guardBit = `1`;
643	} else {
644	break;
645	}
646	}
647	break;
648	}
649	}
650	if (fraction) {
651	// Boost biased expo if too small
652	while (expo < `1`) {
653	guardBit \|= roundingBit;
654	roundingBit = (int)fraction & `1`;
655	fraction >>= `1`;
656	++expo;
657	}
658	// Normalize
659	while (expo > `1` && !(fraction >> (binaryPrecision - `1`))) {
660	fraction <<= `1`;
661	--expo;
662	guardBit = roundingBit = `0`;
663	}
664	}
665	// Rounding
666	bool increase{false};
667	switch (rounding) {
668	case decimal::RoundNearest: // RN & RP
669	increase = roundingBit && (guardBit \| ((int)fraction & `1`));
670	break;
671	case decimal::RoundUp: // RU
672	increase = !isNegative && (roundingBit \| guardBit);
673	break;
674	case decimal::RoundDown: // RD
675	increase = isNegative && (roundingBit \| guardBit);
676	break;
677	case decimal::RoundToZero: // RZ
678	break;
679	case decimal::RoundCompatible: // RC
680	increase = roundingBit != `0`;
681	break;
682	}
683	if (increase) {
684	++fraction;
685	if (fraction >> binaryPrecision) {
686	fraction >>= `1`;
687	++expo;
688	}
689	}
690	// Package & return result
691	constexpr RawType significandMask{(one << RealType::significandBits) - `1`};
692	int flags{(roundingBit \| guardBit) ? decimal::Inexact : decimal::Exact};
693	if (!fraction) {
694	expo = `0`;
695	} else if (expo == `1` && !(fraction >> (binaryPrecision - `1`))) {
696	expo = `0`; // subnormal
697	flags \|= decimal::Underflow;
698	} else if (expo >= RealType::maxExponent) {
699	if (rounding == decimal::RoundToZero \|\|
700	(rounding == decimal::RoundDown && !isNegative) \|\|
701	(rounding == decimal::RoundUp && isNegative)) {
702	expo = RealType::maxExponent - `1`; // +/-HUGE()
703	fraction = significandMask;
704	} else {
705	expo = RealType::maxExponent; // +/-Inf
706	fraction = `0`;
707	flags \|= decimal::Overflow;
708	}
709	} else {
710	fraction &= significandMask; // remove explicit normalization unless x87
711	}
712	return decimal::ConversionToBinaryResult<binaryPrecision>{
713	RealType{static_cast<RawType>(signBit \|
714	static_cast<RawType>(expo) << RealType::significandBits \| fraction)},
715	static_cast<decimal::ConversionResultFlags>(flags)};
716	}
717
718	template <int KIND>
719	RT_API_ATTRS bool EditCommonRealInput(
720	IoStatementState &io, const DataEdit &edit, void *n) {
721	constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
722	if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) {
723	return CheckCompleteListDirectedField(io, edit);
724	}
725	// Fast path wasn't available or didn't work; go the more general route
726	static constexpr int maxDigits{
727	common::MaxDecimalConversionDigits(binaryPrecision)};
728	static constexpr int bufferSize{maxDigits + `18`};
729	char buffer[bufferSize];
730	auto scanned{ScanRealInput(buffer, maxDigits + `2`, io, edit)};
731	int got{scanned.got};
732	if (got >= maxDigits + `2`) {
733	io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
734	return false;
735	}
736	if (got == `0`) {
737	const auto &connection{io.GetConnectionState()};
738	io.GetIoErrorHandler().SignalError(IostatBadRealInput,
739	"Bad real input data at column %d of record %d",
740	static_cast<int>(connection.positionInRecord + `1`),
741	static_cast<int>(connection.currentRecordNumber));
742	return false;
743	}
744	decimal::ConversionToBinaryResult<binaryPrecision> converted;
745	const char *p{buffer};
746	if (scanned.isHexadecimal) {
747	buffer[got] = `'\0'`;
748	converted = ConvertHexadecimal<binaryPrecision>(
749	p, edit.modes.round, scanned.exponent);
750	} else {
751	bool hadExtra{got > maxDigits};
752	int exponent{scanned.exponent};
753	if (exponent != `0`) {
754	buffer[got++] = `'e'`;
755	if (exponent < `0`) {
756	buffer[got++] = `'-'`;
757	exponent = -exponent;
758	}
759	if (exponent > `9999`) {
760	exponent = `9999`; // will convert to +/-Inf
761	}
762	if (exponent > `999`) {
763	int dig{exponent / `1000`};
764	buffer[got++] = `'0'` + dig;
765	int rest{exponent - `1000` * dig};
766	dig = rest / `100`;
767	buffer[got++] = `'0'` + dig;
768	rest -= `100` * dig;
769	dig = rest / `10`;
770	buffer[got++] = `'0'` + dig;
771	buffer[got++] = `'0'` + (rest - `10` * dig);
772	} else if (exponent > `99`) {
773	int dig{exponent / `100`};
774	buffer[got++] = `'0'` + dig;
775	int rest{exponent - `100` * dig};
776	dig = rest / `10`;
777	buffer[got++] = `'0'` + dig;
778	buffer[got++] = `'0'` + (rest - `10` * dig);
779	} else if (exponent > `9`) {
780	int dig{exponent / `10`};
781	buffer[got++] = `'0'` + dig;
782	buffer[got++] = `'0'` + (exponent - `10` * dig);
783	} else {
784	buffer[got++] = `'0'` + exponent;
785	}
786	}
787	buffer[got] = `'\0'`;
788	converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round);
789	if (hadExtra) {
790	converted.flags = static_cast<enum decimal::ConversionResultFlags>(
791	converted.flags \| decimal::Inexact);
792	}
793	}
794	if (p) { // unprocessed junk after value*
795	const auto &connection{io.GetConnectionState()};
796	io.GetIoErrorHandler().SignalError(IostatBadRealInput,
797	"Trailing characters after real input data at column %d of record %d",
798	static_cast<int>(connection.positionInRecord + `1`),
799	static_cast<int>(connection.currentRecordNumber));
800	return false;
801	}
802	*reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
803	converted.binary;
804	// Set FP exception flags
805	if (converted.flags != decimal::ConversionResultFlags::Exact) {
806	if (converted.flags & decimal::ConversionResultFlags::Overflow) {
807	io.GetIoErrorHandler().SignalError(IostatRealInputOverflow);
808	return false;
809	}
810	RaiseFPExceptions(converted.flags);
811	}
812	return CheckCompleteListDirectedField(io, edit);
813	}
814
815	template <int KIND>
816	RT_API_ATTRS bool EditRealInput(
817	IoStatementState &io, const DataEdit &edit, void *n) {
818	switch (edit.descriptor) {
819	case DataEdit::ListDirected:
820	if (IsNamelistNameOrSlash(io)) {
821	return false;
822	}
823	return EditCommonRealInput<KIND>(io, edit, n);
824	case DataEdit::ListDirectedRealPart:
825	case DataEdit::ListDirectedImaginaryPart:
826	case `'F'`:
827	case `'E'`: // incl. EN, ES, & EX
828	case `'D'`:
829	case `'G'`:
830	return EditCommonRealInput<KIND>(io, edit, n);
831	case `'B'`:
832	return EditBOZInput<`1`>(io, edit, n,
833	common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> `3`);
834	case `'O'`:
835	return EditBOZInput<`3`>(io, edit, n,
836	common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> `3`);
837	case `'Z'`:
838	return EditBOZInput<`4`>(io, edit, n,
839	common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> `3`);
840	case `'A'`: // legacy extension
841	return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
842	default:
843	io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
844	"Data edit descriptor '%c' may not be used for REAL input",
845	edit.descriptor);
846	return false;
847	}
848	}
849
850	// 13.7.3 in Fortran 2018
851	RT_API_ATTRS bool EditLogicalInput(
852	IoStatementState &io, const DataEdit &edit, bool &x) {
853	switch (edit.descriptor) {
854	case DataEdit::ListDirected:
855	if (IsNamelistNameOrSlash(io)) {
856	return false;
857	}
858	break;
859	case `'L'`:
860	case `'G'`:
861	break;
862	default:
863	io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
864	"Data edit descriptor '%c' may not be used for LOGICAL input",
865	edit.descriptor);
866	return false;
867	}
868	Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
869	Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
870	if (next && next == `'.'`) { // skip optional period*
871	next = io.NextInField(remaining, edit);
872	}
873	if (!next) {
874	io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
875	return false;
876	}
877	switch (*next) {
878	case `'T'`:
879	case `'t'`:
880	x = true;
881	break;
882	case `'F'`:
883	case `'f'`:
884	x = false;
885	break;
886	default:
887	io.GetIoErrorHandler().SignalError(
888	"Bad character '%lc' in LOGICAL input field", *next);
889	return false;
890	}
891	if (remaining) { // ignore the rest of a fixed-width field
892	io.HandleRelativePosition(*remaining);
893	} else if (edit.descriptor == DataEdit::ListDirected) {
894	while (io.NextInField(remaining, edit)) { // discard rest of field
895	}
896	}
897	return CheckCompleteListDirectedField(io, edit);
898	}
899
900	// See 13.10.3.1 paragraphs 7-9 in Fortran 2018
901	template <typename CHAR>
902	static RT_API_ATTRS bool EditDelimitedCharacterInput(
903	IoStatementState &io, CHAR x, std::size_t length, char32_t* delimiter) {
904	bool result{true};
905	while (true) {
906	std::size_t byteCount{`0`};
907	auto ch{io.GetCurrentChar(byteCount)};
908	if (!ch) {
909	if (io.AdvanceRecord()) {
910	continue;
911	} else {
912	result = false; // EOF in character value
913	break;
914	}
915	}
916	io.HandleRelativePosition(byteCount);
917	if (*ch == delimiter) {
918	auto next{io.GetCurrentChar(byteCount)};
919	if (next && *next == delimiter) {
920	// Repeated delimiter: use as character value
921	io.HandleRelativePosition(byteCount);
922	} else {
923	break; // closing delimiter
924	}
925	}
926	if (length > `0`) {
927	x++ = ch;
928	--length;
929	}
930	}
931	Fortran::runtime::fill_n(x, length, `' '`);
932	return result;
933	}
934
935	template <typename CHAR>
936	static RT_API_ATTRS bool EditListDirectedCharacterInput(
937	IoStatementState &io, CHAR x, std::size_t length, const* DataEdit &edit) {
938	std::size_t byteCount{`0`};
939	auto ch{io.GetCurrentChar(byteCount)};
940	if (ch && (ch == `'\''` \|\| ch == `'"'`)) {
941	io.HandleRelativePosition(byteCount);
942	return EditDelimitedCharacterInput(io, x, length, *ch);
943	}
944	if (IsNamelistNameOrSlash(io) \|\| io.GetConnectionState().IsAtEOF()) {
945	return false;
946	}
947	// Undelimited list-directed character input: stop at a value separator
948	// or the end of the current record. Subtlety: the "remaining" count
949	// here is a dummy that's used to avoid the interpretation of separators
950	// in NextInField.
951	Fortran::common::optional<int> remaining{length > `0` ? maxUTF8Bytes : `0`};
952	while (Fortran::common::optional<char32_t> next{
953	io.NextInField(remaining, edit)}) {
954	bool isSep{false};
955	switch (*next) {
956	case `' '`:
957	case `'\t'`:
958	case `'/'`:
959	isSep = true;
960	break;
961	case `'&'`:
962	case `'$'`:
963	isSep = edit.IsNamelist();
964	break;
965	case `','`:
966	isSep = !(edit.modes.editingFlags & decimalComma);
967	break;
968	case `';'`:
969	isSep = !!(edit.modes.editingFlags & decimalComma);
970	break;
971	default:
972	break;
973	}
974	if (isSep) {
975	remaining = `0`;
976	} else {
977	x++ = next;
978	remaining = --length > `0` ? maxUTF8Bytes : `0`;
979	}
980	}
981	Fortran::runtime::fill_n(x, length, `' '`);
982	return true;
983	}
984
985	template <typename CHAR>
986	RT_API_ATTRS bool EditCharacterInput(IoStatementState &io, const DataEdit &edit,
987	CHAR *x, std::size_t lengthChars) {
988	switch (edit.descriptor) {
989	case DataEdit::ListDirected:
990	return EditListDirectedCharacterInput(io, x, lengthChars, edit);
991	case `'A'`:
992	case `'G'`:
993	break;
994	case `'B'`:
995	return EditBOZInput<`1`>(io, edit, x, lengthChars * sizeof *x);
996	case `'O'`:
997	return EditBOZInput<`3`>(io, edit, x, lengthChars * sizeof *x);
998	case `'Z'`:
999	return EditBOZInput<`4`>(io, edit, x, lengthChars * sizeof *x);
1000	default:
1001	io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
1002	"Data edit descriptor '%c' may not be used with a CHARACTER data item",
1003	edit.descriptor);
1004	return false;
1005	}
1006	const ConnectionState &connection{io.GetConnectionState()};
1007	std::size_t remainingChars{lengthChars};
1008	// Skip leading characters.
1009	// Their bytes don't count towards INQUIRE(IOLENGTH=).
1010	std::size_t skipChars{`0`};
1011	if (edit.width && *edit.width > `0`) {
1012	remainingChars = *edit.width;
1013	if (remainingChars > lengthChars) {
1014	skipChars = remainingChars - lengthChars;
1015	}
1016	}
1017	// When the field is wider than the variable, we drop the leading
1018	// characters. When the variable is wider than the field, there can be
1019	// trailing padding or an EOR condition.
1020	const char input{nullptr*};
1021	std::size_t readyBytes{`0`};
1022	// Transfer payload bytes; these do count.
1023	while (remainingChars > `0`) {
1024	if (readyBytes == `0`) {
1025	readyBytes = io.GetNextInputBytes(input);
1026	if (readyBytes == `0` \|\|
1027	(readyBytes < remainingChars && edit.modes.nonAdvancing)) {
1028	if (io.CheckForEndOfRecord(readyBytes)) {
1029	if (readyBytes == `0`) {
1030	// PAD='YES' and no more data
1031	Fortran::runtime::fill_n(x, lengthChars, `' '`);
1032	return !io.GetIoErrorHandler().InError();
1033	} else {
1034	// Do partial read(s) then pad on last iteration
1035	}
1036	} else {
1037	return !io.GetIoErrorHandler().InError();
1038	}
1039	}
1040	}
1041	std::size_t chunkBytes;
1042	std::size_t chunkChars{`1`};
1043	bool skipping{skipChars > `0`};
1044	if (connection.isUTF8) {
1045	chunkBytes = MeasureUTF8Bytes(first: *input);
1046	if (skipping) {
1047	--skipChars;
1048	} else if (auto ucs{DecodeUTF8(input)}) {
1049	if ((sizeof x == `1` && ucs > `0xff`) \|\|
1050	(sizeof x == `2` && ucs > `0xffff`)) {
1051	*x++ = `'?'`;
1052	} else {
1053	x++ = ucs;
1054	}
1055	--lengthChars;
1056	} else if (chunkBytes == `0`) {
1057	// error recovery: skip bad encoding
1058	chunkBytes = `1`;
1059	}
1060	} else if (connection.internalIoCharKind > `1`) {
1061	// Reading from non-default character internal unit
1062	chunkBytes = connection.internalIoCharKind;
1063	if (skipping) {
1064	--skipChars;
1065	} else {
1066	char32_t buffer{`0`};
1067	std::memcpy(&buffer, input, chunkBytes);
1068	if ((sizeof *x == `1` && buffer > `0xff`) \|\|
1069	(sizeof *x == `2` && buffer > `0xffff`)) {
1070	*x++ = `'?'`;
1071	} else {
1072	*x++ = buffer;
1073	}
1074	--lengthChars;
1075	}
1076	} else if constexpr (sizeof *x > `1`) {
1077	// Read single byte with expansion into multi-byte CHARACTER
1078	chunkBytes = `1`;
1079	if (skipping) {
1080	--skipChars;
1081	} else {
1082	x++ = static_cast<unsigned* char>(*input);
1083	--lengthChars;
1084	}
1085	} else { // single bytes -> default CHARACTER
1086	if (skipping) {
1087	chunkBytes = std::min<std::size_t>(a: skipChars, b: readyBytes);
1088	chunkChars = chunkBytes;
1089	skipChars -= chunkChars;
1090	} else {
1091	chunkBytes = std::min<std::size_t>(a: remainingChars, b: readyBytes);
1092	chunkBytes = std::min<std::size_t>(a: lengthChars, b: chunkBytes);
1093	chunkChars = chunkBytes;
1094	std::memcpy(x, input, chunkBytes);
1095	x += chunkBytes;
1096	lengthChars -= chunkChars;
1097	}
1098	}
1099	input += chunkBytes;
1100	remainingChars -= chunkChars;
1101	if (!skipping) {
1102	io.GotChar(chunkBytes);
1103	}
1104	io.HandleRelativePosition(chunkBytes);
1105	readyBytes -= chunkBytes;
1106	}
1107	// Pad the remainder of the input variable, if any.
1108	Fortran::runtime::fill_n(x, lengthChars, `' '`);
1109	return CheckCompleteListDirectedField(io, edit);
1110	}
1111
1112	template RT_API_ATTRS bool EditRealInput<`2`>(
1113	IoStatementState &, const DataEdit &, void *);
1114	template RT_API_ATTRS bool EditRealInput<`3`>(
1115	IoStatementState &, const DataEdit &, void *);
1116	template RT_API_ATTRS bool EditRealInput<`4`>(
1117	IoStatementState &, const DataEdit &, void *);
1118	template RT_API_ATTRS bool EditRealInput<`8`>(
1119	IoStatementState &, const DataEdit &, void *);
1120	template RT_API_ATTRS bool EditRealInput<`10`>(
1121	IoStatementState &, const DataEdit &, void *);
1122	// TODO: double/double
1123	template RT_API_ATTRS bool EditRealInput<`16`>(
1124	IoStatementState &, const DataEdit &, void *);
1125
1126	template RT_API_ATTRS bool EditCharacterInput(
1127	IoStatementState &, const DataEdit &, char *, std::size_t);
1128	template RT_API_ATTRS bool EditCharacterInput(
1129	IoStatementState &, const DataEdit &, char16_t *, std::size_t);
1130	template RT_API_ATTRS bool EditCharacterInput(
1131	IoStatementState &, const DataEdit &, char32_t *, std::size_t);
1132
1133	RT_OFFLOAD_API_GROUP_END
1134	} // namespace Fortran::runtime::io
1135

source code of flang/runtime/edit-input.cpp