1 | //===-- runtime/format-implementation.h -------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | // Implements out-of-line member functions of template class FormatControl |
10 | |
11 | #ifndef FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_ |
12 | #define FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_ |
13 | |
14 | #include "emit-encoded.h" |
15 | #include "format.h" |
16 | #include "io-stmt.h" |
17 | #include "memory.h" |
18 | #include "flang/Common/format.h" |
19 | #include "flang/Decimal/decimal.h" |
20 | #include "flang/Runtime/main.h" |
21 | #include <algorithm> |
22 | #include <cstring> |
23 | #include <limits> |
24 | |
25 | namespace Fortran::runtime::io { |
26 | |
27 | template <typename CONTEXT> |
28 | RT_API_ATTRS FormatControl<CONTEXT>::FormatControl(const Terminator &terminator, |
29 | const CharType *format, std::size_t formatLength, |
30 | const Descriptor *formatDescriptor, int maxHeight) |
31 | : maxHeight_{static_cast<std::uint8_t>(maxHeight)}, format_{format}, |
32 | formatLength_{static_cast<int>(formatLength)} { |
33 | RUNTIME_CHECK(terminator, maxHeight == maxHeight_); |
34 | if (!format && formatDescriptor) { |
35 | // The format is a character array passed via a descriptor. |
36 | std::size_t elements{formatDescriptor->Elements()}; |
37 | std::size_t elementBytes{formatDescriptor->ElementBytes()}; |
38 | formatLength = elements * elementBytes / sizeof(CharType); |
39 | formatLength_ = static_cast<int>(formatLength); |
40 | if (formatDescriptor->IsContiguous()) { |
41 | // Treat the contiguous array as a single character value. |
42 | format_ = const_cast<const CharType *>( |
43 | reinterpret_cast<CharType *>(formatDescriptor->raw().base_addr)); |
44 | } else { |
45 | // Concatenate its elements into a temporary array. |
46 | char *p{reinterpret_cast<char *>( |
47 | AllocateMemoryOrCrash(terminator, formatLength * sizeof(CharType)))}; |
48 | format_ = p; |
49 | SubscriptValue at[maxRank]; |
50 | formatDescriptor->GetLowerBounds(at); |
51 | for (std::size_t j{0}; j < elements; ++j) { |
52 | std::memcpy(p, formatDescriptor->Element<char>(at), elementBytes); |
53 | p += elementBytes; |
54 | formatDescriptor->IncrementSubscripts(at); |
55 | } |
56 | freeFormat_ = true; |
57 | } |
58 | } |
59 | RUNTIME_CHECK( |
60 | terminator, formatLength == static_cast<std::size_t>(formatLength_)); |
61 | stack_[0].start = offset_; |
62 | stack_[0].remaining = Iteration::unlimited; // 13.4(8) |
63 | } |
64 | |
65 | template <typename CONTEXT> |
66 | RT_API_ATTRS int FormatControl<CONTEXT>::GetIntField( |
67 | IoErrorHandler &handler, CharType firstCh, bool *hadError) { |
68 | CharType ch{firstCh ? firstCh : PeekNext()}; |
69 | bool negate{ch == '-'}; |
70 | if (negate || ch == '+') { |
71 | if (firstCh) { |
72 | firstCh = '\0'; |
73 | } else { |
74 | ++offset_; |
75 | } |
76 | ch = PeekNext(); |
77 | } |
78 | if (ch < '0' || ch > '9') { |
79 | handler.SignalError(IostatErrorInFormat, |
80 | "Invalid FORMAT: integer expected at '%c'" , static_cast<char>(ch)); |
81 | if (hadError) { |
82 | *hadError = true; |
83 | } |
84 | return 0; |
85 | } |
86 | int result{0}; |
87 | while (ch >= '0' && ch <= '9') { |
88 | constexpr int tenth{std::numeric_limits<int>::max() / 10}; |
89 | if (result > tenth || |
90 | ch - '0' > std::numeric_limits<int>::max() - 10 * result) { |
91 | handler.SignalError( |
92 | IostatErrorInFormat, "FORMAT integer field out of range" ); |
93 | if (hadError) { |
94 | *hadError = true; |
95 | } |
96 | return result; |
97 | } |
98 | result = 10 * result + ch - '0'; |
99 | if (firstCh) { |
100 | firstCh = '\0'; |
101 | } else { |
102 | ++offset_; |
103 | } |
104 | ch = PeekNext(); |
105 | } |
106 | if (negate && (result *= -1) > 0) { |
107 | handler.SignalError( |
108 | IostatErrorInFormat, "FORMAT integer field out of range" ); |
109 | if (hadError) { |
110 | *hadError = true; |
111 | } |
112 | } |
113 | return result; |
114 | } |
115 | |
116 | template <typename CONTEXT> |
117 | static RT_API_ATTRS void HandleControl( |
118 | CONTEXT &context, char ch, char next, int n) { |
119 | MutableModes &modes{context.mutableModes()}; |
120 | switch (ch) { |
121 | case 'B': |
122 | if (next == 'Z') { |
123 | modes.editingFlags |= blankZero; |
124 | return; |
125 | } |
126 | if (next == 'N') { |
127 | modes.editingFlags &= ~blankZero; |
128 | return; |
129 | } |
130 | break; |
131 | case 'D': |
132 | if (next == 'C') { |
133 | modes.editingFlags |= decimalComma; |
134 | return; |
135 | } |
136 | if (next == 'P') { |
137 | modes.editingFlags &= ~decimalComma; |
138 | return; |
139 | } |
140 | break; |
141 | case 'P': |
142 | if (!next) { |
143 | modes.scale = n; // kP - decimal scaling by 10**k |
144 | return; |
145 | } |
146 | break; |
147 | case 'R': |
148 | switch (next) { |
149 | case 'N': |
150 | modes.round = decimal::RoundNearest; |
151 | return; |
152 | case 'Z': |
153 | modes.round = decimal::RoundToZero; |
154 | return; |
155 | case 'U': |
156 | modes.round = decimal::RoundUp; |
157 | return; |
158 | case 'D': |
159 | modes.round = decimal::RoundDown; |
160 | return; |
161 | case 'C': |
162 | modes.round = decimal::RoundCompatible; |
163 | return; |
164 | case 'P': |
165 | modes.round = executionEnvironment.defaultOutputRoundingMode; |
166 | return; |
167 | default: |
168 | break; |
169 | } |
170 | break; |
171 | case 'X': |
172 | if (!next) { |
173 | ConnectionState &connection{context.GetConnectionState()}; |
174 | if (connection.internalIoCharKind > 1) { |
175 | n *= connection.internalIoCharKind; |
176 | } |
177 | context.HandleRelativePosition(n); |
178 | return; |
179 | } |
180 | break; |
181 | case 'S': |
182 | if (next == 'P') { |
183 | modes.editingFlags |= signPlus; |
184 | return; |
185 | } |
186 | if (!next || next == 'S') { |
187 | modes.editingFlags &= ~signPlus; |
188 | return; |
189 | } |
190 | break; |
191 | case 'T': { |
192 | if (!next) { // Tn |
193 | --n; // convert 1-based to 0-based |
194 | } |
195 | ConnectionState &connection{context.GetConnectionState()}; |
196 | if (connection.internalIoCharKind > 1) { |
197 | n *= connection.internalIoCharKind; |
198 | } |
199 | if (!next) { // Tn |
200 | context.HandleAbsolutePosition(n); |
201 | return; |
202 | } |
203 | if (next == 'L' || next == 'R') { // TLn & TRn |
204 | context.HandleRelativePosition(next == 'L' ? -n : n); |
205 | return; |
206 | } |
207 | } break; |
208 | default: |
209 | break; |
210 | } |
211 | if (next) { |
212 | context.SignalError(IostatErrorInFormat, |
213 | "Unknown '%c%c' edit descriptor in FORMAT" , ch, next); |
214 | } else { |
215 | context.SignalError( |
216 | IostatErrorInFormat, "Unknown '%c' edit descriptor in FORMAT" , ch); |
217 | } |
218 | } |
219 | |
220 | // Locates the next data edit descriptor in the format. |
221 | // Handles all repetition counts and control edit descriptors. |
222 | // Generally assumes that the format string has survived the common |
223 | // format validator gauntlet. |
224 | template <typename CONTEXT> |
225 | RT_API_ATTRS int FormatControl<CONTEXT>::CueUpNextDataEdit( |
226 | Context &context, bool stop) { |
227 | bool hitUnlimitedLoopEnd{false}; |
228 | // Do repetitions remain on an unparenthesized data edit? |
229 | while (height_ > 1 && format_[stack_[height_ - 1].start] != '(') { |
230 | offset_ = stack_[height_ - 1].start; |
231 | int repeat{stack_[height_ - 1].remaining}; |
232 | --height_; |
233 | if (repeat > 0) { |
234 | return repeat; |
235 | } |
236 | } |
237 | while (true) { |
238 | Fortran::common::optional<int> repeat; |
239 | bool unlimited{false}; |
240 | auto maybeReversionPoint{offset_}; |
241 | CharType ch{GetNextChar(context)}; |
242 | while (ch == ',' || ch == ':') { |
243 | // Skip commas, and don't complain if they're missing; the format |
244 | // validator does that. |
245 | if (stop && ch == ':') { |
246 | return 0; |
247 | } |
248 | ch = GetNextChar(context); |
249 | } |
250 | if (ch == '-' || ch == '+' || (ch >= '0' && ch <= '9')) { |
251 | bool hadSign{ch == '-' || ch == '+'}; |
252 | repeat = GetIntField(context, ch); |
253 | ch = GetNextChar(context); |
254 | if (hadSign && ch != 'p' && ch != 'P') { |
255 | ReportBadFormat(context, |
256 | "Invalid FORMAT: signed integer may appear only before 'P" , |
257 | maybeReversionPoint); |
258 | return 0; |
259 | } |
260 | } else if (ch == '*') { |
261 | unlimited = true; |
262 | ch = GetNextChar(context); |
263 | if (ch != '(') { |
264 | ReportBadFormat(context, |
265 | "Invalid FORMAT: '*' may appear only before '('" , |
266 | maybeReversionPoint); |
267 | return 0; |
268 | } |
269 | if (height_ != 1) { |
270 | ReportBadFormat(context, |
271 | "Invalid FORMAT: '*' must be nested in exactly one set of " |
272 | "parentheses" , |
273 | maybeReversionPoint); |
274 | return 0; |
275 | } |
276 | } |
277 | ch = Capitalize(ch); |
278 | if (ch == '(') { |
279 | if (height_ >= maxHeight_) { |
280 | ReportBadFormat(context, |
281 | "FORMAT stack overflow: too many nested parentheses" , |
282 | maybeReversionPoint); |
283 | return 0; |
284 | } |
285 | stack_[height_].start = offset_ - 1; // the '(' |
286 | RUNTIME_CHECK(context, format_[stack_[height_].start] == '('); |
287 | if (unlimited || height_ == 0) { |
288 | stack_[height_].remaining = Iteration::unlimited; |
289 | } else if (repeat) { |
290 | if (*repeat <= 0) { |
291 | *repeat = 1; // error recovery |
292 | } |
293 | stack_[height_].remaining = *repeat - 1; |
294 | } else { |
295 | stack_[height_].remaining = 0; |
296 | } |
297 | if (height_ == 1 && !hitEnd_) { |
298 | // Subtle point (F'2018 13.4 para 9): the last parenthesized group |
299 | // at height 1 becomes the restart point after control reaches the |
300 | // end of the format, including its repeat count. |
301 | stack_[0].start = maybeReversionPoint; |
302 | } |
303 | ++height_; |
304 | } else if (height_ == 0) { |
305 | ReportBadFormat(context, "FORMAT lacks initial '('" , maybeReversionPoint); |
306 | return 0; |
307 | } else if (ch == ')') { |
308 | if (height_ == 1) { |
309 | hitEnd_ = true; |
310 | if (stop) { |
311 | return 0; // end of FORMAT and no data items remain |
312 | } |
313 | context.AdvanceRecord(); // implied / before rightmost ) |
314 | } |
315 | auto restart{stack_[height_ - 1].start}; |
316 | if (format_[restart] == '(') { |
317 | ++restart; |
318 | } |
319 | if (stack_[height_ - 1].remaining == Iteration::unlimited) { |
320 | if (height_ > 1 && GetNextChar(context) != ')') { |
321 | ReportBadFormat(context, |
322 | "Unlimited repetition in FORMAT may not be followed by more " |
323 | "items" , |
324 | restart); |
325 | return 0; |
326 | } |
327 | if (hitUnlimitedLoopEnd) { |
328 | ReportBadFormat(context, |
329 | "Unlimited repetition in FORMAT lacks data edit descriptors" , |
330 | restart); |
331 | return 0; |
332 | } |
333 | hitUnlimitedLoopEnd = true; |
334 | offset_ = restart; |
335 | } else if (stack_[height_ - 1].remaining-- > 0) { |
336 | offset_ = restart; |
337 | } else { |
338 | --height_; |
339 | } |
340 | } else if (ch == '\'' || ch == '"') { |
341 | // Quoted 'character literal' |
342 | CharType quote{ch}; |
343 | auto start{offset_}; |
344 | while (offset_ < formatLength_ && format_[offset_] != quote) { |
345 | ++offset_; |
346 | } |
347 | if (offset_ >= formatLength_) { |
348 | ReportBadFormat(context, |
349 | "FORMAT missing closing quote on character literal" , |
350 | maybeReversionPoint); |
351 | return 0; |
352 | } |
353 | ++offset_; |
354 | std::size_t chars{ |
355 | static_cast<std::size_t>(&format_[offset_] - &format_[start])}; |
356 | if (offset_ < formatLength_ && format_[offset_] == quote) { |
357 | // subtle: handle doubled quote character in a literal by including |
358 | // the first in the output, then treating the second as the start |
359 | // of another character literal. |
360 | } else { |
361 | --chars; |
362 | } |
363 | EmitAscii(context, format_ + start, chars); |
364 | } else if (ch == 'H') { |
365 | // 9HHOLLERITH |
366 | if (!repeat || *repeat < 1 || offset_ + *repeat > formatLength_) { |
367 | ReportBadFormat(context, "Invalid width on Hollerith in FORMAT" , |
368 | maybeReversionPoint); |
369 | return 0; |
370 | } |
371 | EmitAscii(context, format_ + offset_, static_cast<std::size_t>(*repeat)); |
372 | offset_ += *repeat; |
373 | } else if (ch >= 'A' && ch <= 'Z') { |
374 | int start{offset_ - 1}; |
375 | CharType next{'\0'}; |
376 | if (ch != 'P') { // 1PE5.2 - comma not required (C1302) |
377 | CharType peek{Capitalize(PeekNext())}; |
378 | if (peek >= 'A' && peek <= 'Z') { |
379 | if (ch == 'A' /* anticipate F'202X AT editing */ || ch == 'B' || |
380 | ch == 'D' || ch == 'E' || ch == 'R' || ch == 'S' || ch == 'T') { |
381 | // Assume a two-letter edit descriptor |
382 | next = peek; |
383 | ++offset_; |
384 | } else { |
385 | // extension: assume a comma between 'ch' and 'peek' |
386 | } |
387 | } |
388 | } |
389 | if ((!next && |
390 | (ch == 'A' || ch == 'I' || ch == 'B' || ch == 'E' || ch == 'D' || |
391 | ch == 'O' || ch == 'Z' || ch == 'F' || ch == 'G' || |
392 | ch == 'L')) || |
393 | (ch == 'E' && (next == 'N' || next == 'S' || next == 'X')) || |
394 | (ch == 'D' && next == 'T')) { |
395 | // Data edit descriptor found |
396 | offset_ = start; |
397 | return repeat && *repeat > 0 ? *repeat : 1; |
398 | } else { |
399 | // Control edit descriptor |
400 | if (ch == 'T') { // Tn, TLn, TRn |
401 | repeat = GetIntField(context); |
402 | } |
403 | HandleControl(context, static_cast<char>(ch), static_cast<char>(next), |
404 | repeat ? *repeat : 1); |
405 | } |
406 | } else if (ch == '/') { |
407 | context.AdvanceRecord(repeat && *repeat > 0 ? *repeat : 1); |
408 | } else if (ch == '$' || ch == '\\') { |
409 | context.mutableModes().nonAdvancing = true; |
410 | } else if (ch == '\t' || ch == '\v') { |
411 | // Tabs (extension) |
412 | // TODO: any other raw characters? |
413 | EmitAscii(context, format_ + offset_ - 1, 1); |
414 | } else { |
415 | ReportBadFormat( |
416 | context, "Invalid character in FORMAT" , maybeReversionPoint); |
417 | return 0; |
418 | } |
419 | } |
420 | } |
421 | |
422 | // Returns the next data edit descriptor |
423 | template <typename CONTEXT> |
424 | RT_API_ATTRS Fortran::common::optional<DataEdit> |
425 | FormatControl<CONTEXT>::GetNextDataEdit(Context &context, int maxRepeat) { |
426 | int repeat{CueUpNextDataEdit(context)}; |
427 | auto start{offset_}; |
428 | DataEdit edit; |
429 | edit.modes = context.mutableModes(); |
430 | // Handle repeated nonparenthesized edit descriptors |
431 | edit.repeat = std::min(repeat, maxRepeat); // 0 if maxRepeat==0 |
432 | if (repeat > maxRepeat) { |
433 | stack_[height_].start = start; // after repeat count |
434 | stack_[height_].remaining = repeat - edit.repeat; |
435 | ++height_; |
436 | } |
437 | edit.descriptor = static_cast<char>(Capitalize(GetNextChar(context))); |
438 | if (edit.descriptor == 'D' && Capitalize(PeekNext()) == 'T') { |
439 | // DT['iotype'][(v_list)] defined I/O |
440 | edit.descriptor = DataEdit::DefinedDerivedType; |
441 | ++offset_; |
442 | if (auto quote{static_cast<char>(PeekNext())}; |
443 | quote == '\'' || quote == '"') { |
444 | // Capture the quoted 'iotype' |
445 | bool ok{false}; |
446 | for (++offset_; offset_ < formatLength_;) { |
447 | auto ch{static_cast<char>(format_[offset_++])}; |
448 | if (ch == quote && |
449 | (offset_ == formatLength_ || |
450 | static_cast<char>(format_[offset_]) != quote)) { |
451 | ok = true; |
452 | break; // that was terminating quote |
453 | } |
454 | if (edit.ioTypeChars >= edit.maxIoTypeChars) { |
455 | ReportBadFormat(context, "Excessive DT'iotype' in FORMAT" , start); |
456 | return Fortran::common::nullopt; |
457 | } |
458 | edit.ioType[edit.ioTypeChars++] = ch; |
459 | if (ch == quote) { |
460 | ++offset_; |
461 | } |
462 | } |
463 | if (!ok) { |
464 | ReportBadFormat(context, "Unclosed DT'iotype' in FORMAT" , start); |
465 | return Fortran::common::nullopt; |
466 | } |
467 | } |
468 | if (PeekNext() == '(') { |
469 | // Capture the v_list arguments |
470 | bool ok{false}; |
471 | for (++offset_; offset_ < formatLength_;) { |
472 | bool hadError{false}; |
473 | int n{GetIntField(context, '\0', &hadError)}; |
474 | if (hadError) { |
475 | ok = false; |
476 | break; |
477 | } |
478 | if (edit.vListEntries >= edit.maxVListEntries) { |
479 | ReportBadFormat(context, "Excessive DT(v_list) in FORMAT" , start); |
480 | return Fortran::common::nullopt; |
481 | } |
482 | edit.vList[edit.vListEntries++] = n; |
483 | auto ch{static_cast<char>(GetNextChar(context))}; |
484 | if (ch != ',') { |
485 | ok = ch == ')'; |
486 | break; |
487 | } |
488 | } |
489 | if (!ok) { |
490 | ReportBadFormat(context, "Unclosed DT(v_list) in FORMAT" , start); |
491 | return Fortran::common::nullopt; |
492 | } |
493 | } |
494 | } else { // not DT'iotype' |
495 | if (edit.descriptor == 'E') { |
496 | if (auto next{static_cast<char>(Capitalize(PeekNext()))}; |
497 | next == 'N' || next == 'S' || next == 'X') { |
498 | edit.variation = next; |
499 | ++offset_; |
500 | } |
501 | } |
502 | // Width is optional for A[w] in the standard and optional |
503 | // for Lw in most compilers. |
504 | // Intel & (presumably, from bug report) Fujitsu allow |
505 | // a missing 'w' & 'd'/'m' for other edit descriptors -- but not |
506 | // 'd'/'m' with a missing 'w' -- and so interpret "(E)" as "(E0)". |
507 | if (CharType ch{PeekNext()}; (ch >= '0' && ch <= '9') || ch == '.') { |
508 | edit.width = GetIntField(context); |
509 | if constexpr (std::is_base_of_v<InputStatementState, CONTEXT>) { |
510 | if (edit.width.value_or(-1) == 0) { |
511 | ReportBadFormat(context, "Input field width is zero" , start); |
512 | } |
513 | } |
514 | if (PeekNext() == '.') { |
515 | ++offset_; |
516 | edit.digits = GetIntField(context); |
517 | if (CharType ch{PeekNext()}; |
518 | ch == 'e' || ch == 'E' || ch == 'd' || ch == 'D') { |
519 | ++offset_; |
520 | edit.expoDigits = GetIntField(context); |
521 | } |
522 | } |
523 | } |
524 | } |
525 | return edit; |
526 | } |
527 | |
528 | template <typename CONTEXT> |
529 | RT_API_ATTRS void FormatControl<CONTEXT>::Finish(Context &context) { |
530 | CueUpNextDataEdit(context, true /* stop at colon or end of FORMAT */); |
531 | if (freeFormat_) { |
532 | FreeMemory(const_cast<CharType *>(format_)); |
533 | } |
534 | } |
535 | } // namespace Fortran::runtime::io |
536 | #endif // FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_ |
537 | |