1//===-- runtime/format-implementation.h -------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// Implements out-of-line member functions of template class FormatControl
10
11#ifndef FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
12#define FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
13
14#include "emit-encoded.h"
15#include "format.h"
16#include "io-stmt.h"
17#include "memory.h"
18#include "flang/Common/format.h"
19#include "flang/Decimal/decimal.h"
20#include "flang/Runtime/main.h"
21#include <algorithm>
22#include <cstring>
23#include <limits>
24
25namespace Fortran::runtime::io {
26
27template <typename CONTEXT>
28FormatControl<CONTEXT>::FormatControl(const Terminator &terminator,
29 const CharType *format, std::size_t formatLength,
30 const Descriptor *formatDescriptor, int maxHeight)
31 : maxHeight_{static_cast<std::uint8_t>(maxHeight)}, format_{format},
32 formatLength_{static_cast<int>(formatLength)} {
33 RUNTIME_CHECK(terminator, maxHeight == maxHeight_);
34 if (!format && formatDescriptor) {
35 // The format is a character array passed via a descriptor.
36 std::size_t elements{formatDescriptor->Elements()};
37 std::size_t elementBytes{formatDescriptor->ElementBytes()};
38 formatLength = elements * elementBytes / sizeof(CharType);
39 formatLength_ = static_cast<int>(formatLength);
40 if (formatDescriptor->IsContiguous()) {
41 // Treat the contiguous array as a single character value.
42 format_ = const_cast<const CharType *>(
43 reinterpret_cast<CharType *>(formatDescriptor->raw().base_addr));
44 } else {
45 // Concatenate its elements into a temporary array.
46 char *p{reinterpret_cast<char *>(
47 AllocateMemoryOrCrash(terminator, formatLength * sizeof(CharType)))};
48 format_ = p;
49 SubscriptValue at[maxRank];
50 formatDescriptor->GetLowerBounds(at);
51 for (std::size_t j{0}; j < elements; ++j) {
52 std::memcpy(dest: p, src: formatDescriptor->Element<char>(at), n: elementBytes);
53 p += elementBytes;
54 formatDescriptor->IncrementSubscripts(at);
55 }
56 freeFormat_ = true;
57 }
58 }
59 RUNTIME_CHECK(
60 terminator, formatLength == static_cast<std::size_t>(formatLength_));
61 stack_[0].start = offset_;
62 stack_[0].remaining = Iteration::unlimited; // 13.4(8)
63}
64
65template <typename CONTEXT>
66int FormatControl<CONTEXT>::GetIntField(
67 IoErrorHandler &handler, CharType firstCh, bool *hadError) {
68 CharType ch{firstCh ? firstCh : PeekNext()};
69 if (ch != '-' && ch != '+' && (ch < '0' || ch > '9')) {
70 handler.SignalError(IostatErrorInFormat,
71 "Invalid FORMAT: integer expected at '%c'", static_cast<char>(ch));
72 if (hadError) {
73 *hadError = true;
74 }
75 return 0;
76 }
77 int result{0};
78 bool negate{ch == '-'};
79 if (negate || ch == '+') {
80 if (firstCh) {
81 firstCh = '\0';
82 } else {
83 ++offset_;
84 }
85 ch = PeekNext();
86 }
87 while (ch >= '0' && ch <= '9') {
88 constexpr int tenth{std::numeric_limits<int>::max() / 10};
89 if (result > tenth ||
90 ch - '0' > std::numeric_limits<int>::max() - 10 * result) {
91 handler.SignalError(
92 IostatErrorInFormat, "FORMAT integer field out of range");
93 if (hadError) {
94 *hadError = true;
95 }
96 return result;
97 }
98 result = 10 * result + ch - '0';
99 if (firstCh) {
100 firstCh = '\0';
101 } else {
102 ++offset_;
103 }
104 ch = PeekNext();
105 }
106 if (negate && (result *= -1) > 0) {
107 handler.SignalError(
108 IostatErrorInFormat, "FORMAT integer field out of range");
109 if (hadError) {
110 *hadError = true;
111 }
112 }
113 return result;
114}
115
116template <typename CONTEXT>
117static void HandleControl(CONTEXT &context, char ch, char next, int n) {
118 MutableModes &modes{context.mutableModes()};
119 switch (ch) {
120 case 'B':
121 if (next == 'Z') {
122 modes.editingFlags |= blankZero;
123 return;
124 }
125 if (next == 'N') {
126 modes.editingFlags &= ~blankZero;
127 return;
128 }
129 break;
130 case 'D':
131 if (next == 'C') {
132 modes.editingFlags |= decimalComma;
133 return;
134 }
135 if (next == 'P') {
136 modes.editingFlags &= ~decimalComma;
137 return;
138 }
139 break;
140 case 'P':
141 if (!next) {
142 modes.scale = n; // kP - decimal scaling by 10**k
143 return;
144 }
145 break;
146 case 'R':
147 switch (next) {
148 case 'N':
149 modes.round = decimal::RoundNearest;
150 return;
151 case 'Z':
152 modes.round = decimal::RoundToZero;
153 return;
154 case 'U':
155 modes.round = decimal::RoundUp;
156 return;
157 case 'D':
158 modes.round = decimal::RoundDown;
159 return;
160 case 'C':
161 modes.round = decimal::RoundCompatible;
162 return;
163 case 'P':
164 modes.round = executionEnvironment.defaultOutputRoundingMode;
165 return;
166 default:
167 break;
168 }
169 break;
170 case 'X':
171 if (!next) {
172 ConnectionState &connection{context.GetConnectionState()};
173 if (connection.internalIoCharKind > 1) {
174 n *= connection.internalIoCharKind;
175 }
176 context.HandleRelativePosition(n);
177 return;
178 }
179 break;
180 case 'S':
181 if (next == 'P') {
182 modes.editingFlags |= signPlus;
183 return;
184 }
185 if (!next || next == 'S') {
186 modes.editingFlags &= ~signPlus;
187 return;
188 }
189 break;
190 case 'T': {
191 if (!next) { // Tn
192 --n; // convert 1-based to 0-based
193 }
194 ConnectionState &connection{context.GetConnectionState()};
195 if (connection.internalIoCharKind > 1) {
196 n *= connection.internalIoCharKind;
197 }
198 if (!next) { // Tn
199 context.HandleAbsolutePosition(n);
200 return;
201 }
202 if (next == 'L' || next == 'R') { // TLn & TRn
203 context.HandleRelativePosition(next == 'L' ? -n : n);
204 return;
205 }
206 } break;
207 default:
208 break;
209 }
210 if (next) {
211 context.SignalError(IostatErrorInFormat,
212 "Unknown '%c%c' edit descriptor in FORMAT", ch, next);
213 } else {
214 context.SignalError(
215 IostatErrorInFormat, "Unknown '%c' edit descriptor in FORMAT", ch);
216 }
217}
218
219// Locates the next data edit descriptor in the format.
220// Handles all repetition counts and control edit descriptors.
221// Generally assumes that the format string has survived the common
222// format validator gauntlet.
223template <typename CONTEXT>
224int FormatControl<CONTEXT>::CueUpNextDataEdit(Context &context, bool stop) {
225 bool hitUnlimitedLoopEnd{false};
226 // Do repetitions remain on an unparenthesized data edit?
227 while (height_ > 1 && format_[stack_[height_ - 1].start] != '(') {
228 offset_ = stack_[height_ - 1].start;
229 int repeat{stack_[height_ - 1].remaining};
230 --height_;
231 if (repeat > 0) {
232 return repeat;
233 }
234 }
235 while (true) {
236 std::optional<int> repeat;
237 bool unlimited{false};
238 auto maybeReversionPoint{offset_};
239 CharType ch{GetNextChar(handler&: context)};
240 while (ch == ',' || ch == ':') {
241 // Skip commas, and don't complain if they're missing; the format
242 // validator does that.
243 if (stop && ch == ':') {
244 return 0;
245 }
246 ch = GetNextChar(handler&: context);
247 }
248 if (ch == '-' || ch == '+' || (ch >= '0' && ch <= '9')) {
249 repeat = GetIntField(handler&: context, firstCh: ch);
250 ch = GetNextChar(handler&: context);
251 } else if (ch == '*') {
252 unlimited = true;
253 ch = GetNextChar(handler&: context);
254 if (ch != '(') {
255 ReportBadFormat(context,
256 msg: "Invalid FORMAT: '*' may appear only before '('",
257 offset: maybeReversionPoint);
258 return 0;
259 }
260 if (height_ != 1) {
261 ReportBadFormat(context,
262 msg: "Invalid FORMAT: '*' must be nested in exactly one set of "
263 "parentheses",
264 offset: maybeReversionPoint);
265 return 0;
266 }
267 }
268 ch = Capitalize(ch);
269 if (ch == '(') {
270 if (height_ >= maxHeight_) {
271 ReportBadFormat(context,
272 msg: "FORMAT stack overflow: too many nested parentheses",
273 offset: maybeReversionPoint);
274 return 0;
275 }
276 stack_[height_].start = offset_ - 1; // the '('
277 RUNTIME_CHECK(context, format_[stack_[height_].start] == '(');
278 if (unlimited || height_ == 0) {
279 stack_[height_].remaining = Iteration::unlimited;
280 } else if (repeat) {
281 if (*repeat <= 0) {
282 *repeat = 1; // error recovery
283 }
284 stack_[height_].remaining = *repeat - 1;
285 } else {
286 stack_[height_].remaining = 0;
287 }
288 if (height_ == 1 && !hitEnd_) {
289 // Subtle point (F'2018 13.4 para 9): the last parenthesized group
290 // at height 1 becomes the restart point after control reaches the
291 // end of the format, including its repeat count.
292 stack_[0].start = maybeReversionPoint;
293 }
294 ++height_;
295 } else if (height_ == 0) {
296 ReportBadFormat(context, msg: "FORMAT lacks initial '('", offset: maybeReversionPoint);
297 return 0;
298 } else if (ch == ')') {
299 if (height_ == 1) {
300 if (stop) {
301 return 0; // end of FORMAT and no data items remain
302 }
303 context.AdvanceRecord(); // implied / before rightmost )
304 hitEnd_ = true;
305 }
306 auto restart{stack_[height_ - 1].start};
307 if (format_[restart] == '(') {
308 ++restart;
309 }
310 if (stack_[height_ - 1].remaining == Iteration::unlimited) {
311 if (height_ > 1 && GetNextChar(handler&: context) != ')') {
312 ReportBadFormat(context,
313 msg: "Unlimited repetition in FORMAT may not be followed by more "
314 "items",
315 offset: restart);
316 return 0;
317 }
318 if (hitUnlimitedLoopEnd) {
319 ReportBadFormat(context,
320 msg: "Unlimited repetition in FORMAT lacks data edit descriptors",
321 offset: restart);
322 return 0;
323 }
324 hitUnlimitedLoopEnd = true;
325 offset_ = restart;
326 } else if (stack_[height_ - 1].remaining-- > 0) {
327 offset_ = restart;
328 } else {
329 --height_;
330 }
331 } else if (ch == '\'' || ch == '"') {
332 // Quoted 'character literal'
333 CharType quote{ch};
334 auto start{offset_};
335 while (offset_ < formatLength_ && format_[offset_] != quote) {
336 ++offset_;
337 }
338 if (offset_ >= formatLength_) {
339 ReportBadFormat(context,
340 msg: "FORMAT missing closing quote on character literal",
341 offset: maybeReversionPoint);
342 return 0;
343 }
344 ++offset_;
345 std::size_t chars{
346 static_cast<std::size_t>(&format_[offset_] - &format_[start])};
347 if (offset_ < formatLength_ && format_[offset_] == quote) {
348 // subtle: handle doubled quote character in a literal by including
349 // the first in the output, then treating the second as the start
350 // of another character literal.
351 } else {
352 --chars;
353 }
354 EmitAscii(context, format_ + start, chars);
355 } else if (ch == 'H') {
356 // 9HHOLLERITH
357 if (!repeat || *repeat < 1 || offset_ + *repeat > formatLength_) {
358 ReportBadFormat(context, msg: "Invalid width on Hollerith in FORMAT",
359 offset: maybeReversionPoint);
360 return 0;
361 }
362 EmitAscii(context, format_ + offset_, static_cast<std::size_t>(*repeat));
363 offset_ += *repeat;
364 } else if (ch >= 'A' && ch <= 'Z') {
365 int start{offset_ - 1};
366 CharType next{'\0'};
367 if (ch != 'P') { // 1PE5.2 - comma not required (C1302)
368 CharType peek{Capitalize(ch: PeekNext())};
369 if (peek >= 'A' && peek <= 'Z') {
370 if (ch == 'A' /* anticipate F'202X AT editing */ || ch == 'B' ||
371 ch == 'D' || ch == 'E' || ch == 'R' || ch == 'S' || ch == 'T') {
372 // Assume a two-letter edit descriptor
373 next = peek;
374 ++offset_;
375 } else {
376 // extension: assume a comma between 'ch' and 'peek'
377 }
378 }
379 }
380 if ((!next &&
381 (ch == 'A' || ch == 'I' || ch == 'B' || ch == 'E' || ch == 'D' ||
382 ch == 'O' || ch == 'Z' || ch == 'F' || ch == 'G' ||
383 ch == 'L')) ||
384 (ch == 'E' && (next == 'N' || next == 'S' || next == 'X')) ||
385 (ch == 'D' && next == 'T')) {
386 // Data edit descriptor found
387 offset_ = start;
388 return repeat && *repeat > 0 ? *repeat : 1;
389 } else {
390 // Control edit descriptor
391 if (ch == 'T') { // Tn, TLn, TRn
392 repeat = GetIntField(handler&: context);
393 }
394 HandleControl(context, static_cast<char>(ch), static_cast<char>(next),
395 repeat ? *repeat : 1);
396 }
397 } else if (ch == '/') {
398 context.AdvanceRecord(repeat && *repeat > 0 ? *repeat : 1);
399 } else if (ch == '$' || ch == '\\') {
400 context.mutableModes().nonAdvancing = true;
401 } else if (ch == '\t' || ch == '\v') {
402 // Tabs (extension)
403 // TODO: any other raw characters?
404 EmitAscii(context, format_ + offset_ - 1, 1);
405 } else {
406 ReportBadFormat(
407 context, msg: "Invalid character in FORMAT", offset: maybeReversionPoint);
408 return 0;
409 }
410 }
411}
412
413// Returns the next data edit descriptor
414template <typename CONTEXT>
415std::optional<DataEdit> FormatControl<CONTEXT>::GetNextDataEdit(
416 Context &context, int maxRepeat) {
417 int repeat{CueUpNextDataEdit(context)};
418 auto start{offset_};
419 DataEdit edit;
420 edit.modes = context.mutableModes();
421 // Handle repeated nonparenthesized edit descriptors
422 edit.repeat = std::min(a: repeat, b: maxRepeat); // 0 if maxRepeat==0
423 if (repeat > maxRepeat) {
424 stack_[height_].start = start; // after repeat count
425 stack_[height_].remaining = repeat - edit.repeat;
426 ++height_;
427 }
428 edit.descriptor = static_cast<char>(Capitalize(ch: GetNextChar(handler&: context)));
429 if (edit.descriptor == 'D' && Capitalize(ch: PeekNext()) == 'T') {
430 // DT['iotype'][(v_list)] defined I/O
431 edit.descriptor = DataEdit::DefinedDerivedType;
432 ++offset_;
433 if (auto quote{static_cast<char>(PeekNext())};
434 quote == '\'' || quote == '"') {
435 // Capture the quoted 'iotype'
436 bool ok{false};
437 for (++offset_; offset_ < formatLength_;) {
438 auto ch{static_cast<char>(format_[offset_++])};
439 if (ch == quote &&
440 (offset_ == formatLength_ ||
441 static_cast<char>(format_[offset_]) != quote)) {
442 ok = true;
443 break; // that was terminating quote
444 }
445 if (edit.ioTypeChars >= edit.maxIoTypeChars) {
446 ReportBadFormat(context, msg: "Excessive DT'iotype' in FORMAT", offset: start);
447 return std::nullopt;
448 }
449 edit.ioType[edit.ioTypeChars++] = ch;
450 if (ch == quote) {
451 ++offset_;
452 }
453 }
454 if (!ok) {
455 ReportBadFormat(context, msg: "Unclosed DT'iotype' in FORMAT", offset: start);
456 return std::nullopt;
457 }
458 }
459 if (PeekNext() == '(') {
460 // Capture the v_list arguments
461 bool ok{false};
462 for (++offset_; offset_ < formatLength_;) {
463 bool hadError{false};
464 int n{GetIntField(handler&: context, firstCh: '\0', hadError: &hadError)};
465 if (hadError) {
466 ok = false;
467 break;
468 }
469 if (edit.vListEntries >= edit.maxVListEntries) {
470 ReportBadFormat(context, msg: "Excessive DT(v_list) in FORMAT", offset: start);
471 return std::nullopt;
472 }
473 edit.vList[edit.vListEntries++] = n;
474 auto ch{static_cast<char>(GetNextChar(handler&: context))};
475 if (ch != ',') {
476 ok = ch == ')';
477 break;
478 }
479 }
480 if (!ok) {
481 ReportBadFormat(context, msg: "Unclosed DT(v_list) in FORMAT", offset: start);
482 return std::nullopt;
483 }
484 }
485 } else { // not DT'iotype'
486 if (edit.descriptor == 'E') {
487 if (auto next{static_cast<char>(Capitalize(ch: PeekNext()))};
488 next == 'N' || next == 'S' || next == 'X') {
489 edit.variation = next;
490 ++offset_;
491 }
492 }
493 // Width is optional for A[w] in the standard and optional
494 // for Lw in most compilers.
495 // Intel & (presumably, from bug report) Fujitsu allow
496 // a missing 'w' & 'd'/'m' for other edit descriptors -- but not
497 // 'd'/'m' with a missing 'w' -- and so interpret "(E)" as "(E0)".
498 if (CharType ch{PeekNext()}; (ch >= '0' && ch <= '9') || ch == '.') {
499 edit.width = GetIntField(handler&: context);
500 if constexpr (std::is_base_of_v<InputStatementState, CONTEXT>) {
501 if (edit.width.value_or(u: -1) == 0) {
502 ReportBadFormat(context, msg: "Input field width is zero", offset: start);
503 }
504 }
505 if (PeekNext() == '.') {
506 ++offset_;
507 edit.digits = GetIntField(handler&: context);
508 if (CharType ch{PeekNext()};
509 ch == 'e' || ch == 'E' || ch == 'd' || ch == 'D') {
510 ++offset_;
511 edit.expoDigits = GetIntField(handler&: context);
512 }
513 }
514 }
515 }
516 return edit;
517}
518
519template <typename CONTEXT>
520void FormatControl<CONTEXT>::Finish(Context &context) {
521 CueUpNextDataEdit(context, stop: true /* stop at colon or end of FORMAT */);
522 if (freeFormat_) {
523 FreeMemory(const_cast<CharType *>(format_));
524 }
525}
526} // namespace Fortran::runtime::io
527#endif // FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
528

source code of flang/runtime/format-implementation.h