Warning: This file is not a C or C++ file. It does not have highlighting.

1// -*- C++ -*-
2//===----------------------------------------------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
11#define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
12
13/// \file Contains the std-format-spec parser.
14///
15/// Most of the code can be reused in the chrono-format-spec.
16/// This header has some support for the chrono-format-spec since it doesn't
17/// affect the std-format-spec.
18
19#include <__algorithm/copy_n.h>
20#include <__algorithm/min.h>
21#include <__assert>
22#include <__concepts/arithmetic.h>
23#include <__concepts/same_as.h>
24#include <__config>
25#include <__format/format_arg.h>
26#include <__format/format_error.h>
27#include <__format/format_parse_context.h>
28#include <__format/format_string.h>
29#include <__format/unicode.h>
30#include <__format/width_estimation_table.h>
31#include <__iterator/concepts.h>
32#include <__iterator/iterator_traits.h> // iter_value_t
33#include <__memory/addressof.h>
34#include <__type_traits/common_type.h>
35#include <__type_traits/is_constant_evaluated.h>
36#include <__type_traits/is_trivially_copyable.h>
37#include <__variant/monostate.h>
38#include <cstdint>
39#include <string>
40#include <string_view>
41
42#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
43# pragma GCC system_header
44#endif
45
46_LIBCPP_PUSH_MACROS
47#include <__undef_macros>
48
49_LIBCPP_BEGIN_NAMESPACE_STD
50
51#if _LIBCPP_STD_VER >= 20
52
53namespace __format_spec {
54
55[[noreturn]] _LIBCPP_HIDE_FROM_ABI inline void
56__throw_invalid_option_format_error(const char* __id, const char* __option) {
57 std::__throw_format_error(
58 (string("The format specifier for ") + __id + " does not allow the " + __option + " option").c_str());
59}
60
61[[noreturn]] _LIBCPP_HIDE_FROM_ABI inline void __throw_invalid_type_format_error(const char* __id) {
62 std::__throw_format_error(
63 (string("The type option contains an invalid value for ") + __id + " formatting argument").c_str());
64}
65
66template <contiguous_iterator _Iterator, class _ParseContext>
67_LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result<_Iterator>
68__parse_arg_id(_Iterator __begin, _Iterator __end, _ParseContext& __ctx) {
69 using _CharT = iter_value_t<_Iterator>;
70 // This function is a wrapper to call the real parser. But it does the
71 // validation for the pre-conditions and post-conditions.
72 if (__begin == __end)
73 std::__throw_format_error("End of input while parsing an argument index");
74
75 __format::__parse_number_result __r = __format::__parse_arg_id(__begin, __end, __ctx);
76
77 if (__r.__last == __end || *__r.__last != _CharT('}'))
78 std::__throw_format_error("The argument index is invalid");
79
80 ++__r.__last;
81 return __r;
82}
83
84template <class _Context>
85_LIBCPP_HIDE_FROM_ABI constexpr uint32_t __substitute_arg_id(basic_format_arg<_Context> __format_arg) {
86 // [format.string.std]/8
87 // If the corresponding formatting argument is not of integral type...
88 // This wording allows char and bool too. LWG-3720 changes the wording to
89 // If the corresponding formatting argument is not of standard signed or
90 // unsigned integer type,
91 // This means the 128-bit will not be valid anymore.
92 // TODO FMT Verify this resolution is accepted and add a test to verify
93 // 128-bit integrals fail and switch to visit_format_arg.
94 return std::__visit_format_arg(
95 [](auto __arg) -> uint32_t {
96 using _Type = decltype(__arg);
97 if constexpr (same_as<_Type, monostate>)
98 std::__throw_format_error("The argument index value is too large for the number of arguments supplied");
99
100 // [format.string.std]/8
101 // If { arg-idopt } is used in a width or precision, the value of the
102 // corresponding formatting argument is used in its place. If the
103 // corresponding formatting argument is not of standard signed or unsigned
104 // integer type, or its value is negative for precision or non-positive for
105 // width, an exception of type format_error is thrown.
106 //
107 // When an integral is used in a format function, it is stored as one of
108 // the types checked below. Other integral types are promoted. For example,
109 // a signed char is stored as an int.
110 if constexpr (same_as<_Type, int> || same_as<_Type, unsigned int> || //
111 same_as<_Type, long long> || same_as<_Type, unsigned long long>) {
112 if constexpr (signed_integral<_Type>) {
113 if (__arg < 0)
114 std::__throw_format_error("An argument index may not have a negative value");
115 }
116
117 using _CT = common_type_t<_Type, decltype(__format::__number_max)>;
118 if (static_cast<_CT>(__arg) > static_cast<_CT>(__format::__number_max))
119 std::__throw_format_error("The value of the argument index exceeds its maximum value");
120
121 return __arg;
122 } else
123 std::__throw_format_error("Replacement argument isn't a standard signed or unsigned integer type");
124 },
125 __format_arg);
126}
127
128/// These fields are a filter for which elements to parse.
129///
130/// They default to false so when a new field is added it needs to be opted in
131/// explicitly.
132struct _LIBCPP_HIDE_FROM_ABI __fields {
133 uint16_t __sign_ : 1 {false};
134 uint16_t __alternate_form_ : 1 {false};
135 uint16_t __zero_padding_ : 1 {false};
136 uint16_t __precision_ : 1 {false};
137 uint16_t __locale_specific_form_ : 1 {false};
138 uint16_t __type_ : 1 {false};
139 // Determines the valid values for fill.
140 //
141 // Originally the fill could be any character except { and }. Range-based
142 // formatters use the colon to mark the beginning of the
143 // underlying-format-spec. To avoid parsing ambiguities these formatter
144 // specializations prohibit the use of the colon as a fill character.
145 uint16_t __use_range_fill_ : 1 {false};
146 uint16_t __clear_brackets_ : 1 {false};
147 uint16_t __consume_all_ : 1 {false};
148};
149
150// By not placing this constant in the formatter class it's not duplicated for
151// char and wchar_t.
152inline constexpr __fields __fields_bool{.__locale_specific_form_ = true, .__type_ = true, .__consume_all_ = true};
153inline constexpr __fields __fields_integral{
154 .__sign_ = true,
155 .__alternate_form_ = true,
156 .__zero_padding_ = true,
157 .__locale_specific_form_ = true,
158 .__type_ = true,
159 .__consume_all_ = true};
160inline constexpr __fields __fields_floating_point{
161 .__sign_ = true,
162 .__alternate_form_ = true,
163 .__zero_padding_ = true,
164 .__precision_ = true,
165 .__locale_specific_form_ = true,
166 .__type_ = true,
167 .__consume_all_ = true};
168inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true, .__consume_all_ = true};
169inline constexpr __fields __fields_pointer{.__zero_padding_ = true, .__type_ = true, .__consume_all_ = true};
170
171# if _LIBCPP_STD_VER >= 23
172inline constexpr __fields __fields_tuple{.__use_range_fill_ = true, .__clear_brackets_ = true};
173inline constexpr __fields __fields_range{.__use_range_fill_ = true, .__clear_brackets_ = true};
174inline constexpr __fields __fields_fill_align_width{};
175# endif
176
177enum class __alignment : uint8_t {
178 /// No alignment is set in the format string.
179 __default,
180 __left,
181 __center,
182 __right,
183 __zero_padding
184};
185
186enum class __sign : uint8_t {
187 /// No sign is set in the format string.
188 ///
189 /// The sign isn't allowed for certain format-types. By using this value
190 /// it's possible to detect whether or not the user explicitly set the sign
191 /// flag. For formatting purposes it behaves the same as \ref __minus.
192 __default,
193 __minus,
194 __plus,
195 __space
196};
197
198enum class __type : uint8_t {
199 __default = 0,
200 __string,
201 __binary_lower_case,
202 __binary_upper_case,
203 __octal,
204 __decimal,
205 __hexadecimal_lower_case,
206 __hexadecimal_upper_case,
207 __pointer_lower_case,
208 __pointer_upper_case,
209 __char,
210 __hexfloat_lower_case,
211 __hexfloat_upper_case,
212 __scientific_lower_case,
213 __scientific_upper_case,
214 __fixed_lower_case,
215 __fixed_upper_case,
216 __general_lower_case,
217 __general_upper_case,
218 __debug
219};
220
221_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __create_type_mask(__type __t) {
222 uint32_t __shift = static_cast<uint32_t>(__t);
223 if (__shift == 0)
224 return 1;
225
226 if (__shift > 31)
227 std::__throw_format_error("The type does not fit in the mask");
228
229 return 1 << __shift;
230}
231
232inline constexpr uint32_t __type_mask_integer =
233 __create_type_mask(__type::__binary_lower_case) | //
234 __create_type_mask(__type::__binary_upper_case) | //
235 __create_type_mask(__type::__decimal) | //
236 __create_type_mask(__type::__octal) | //
237 __create_type_mask(__type::__hexadecimal_lower_case) | //
238 __create_type_mask(__type::__hexadecimal_upper_case);
239
240struct __std {
241 __alignment __alignment_ : 3;
242 __sign __sign_ : 2;
243 bool __alternate_form_ : 1;
244 bool __locale_specific_form_ : 1;
245 __type __type_;
246};
247
248struct __chrono {
249 __alignment __alignment_ : 3;
250 bool __locale_specific_form_ : 1;
251 bool __hour_ : 1;
252 bool __weekday_name_ : 1;
253 bool __weekday_ : 1;
254 bool __day_of_year_ : 1;
255 bool __week_of_year_ : 1;
256 bool __month_name_ : 1;
257};
258
259// The fill UCS scalar value.
260//
261// This is always an array, with 1, 2, or 4 elements.
262// The size of the data structure is always 32-bits.
263template <class _CharT>
264struct __code_point;
265
266template <>
267struct __code_point<char> {
268 char __data[4] = {' '};
269};
270
271# if _LIBCPP_HAS_WIDE_CHARACTERS
272template <>
273struct __code_point<wchar_t> {
274 wchar_t __data[4 / sizeof(wchar_t)] = {L' '};
275};
276# endif
277
278/// Contains the parsed formatting specifications.
279///
280/// This contains information for both the std-format-spec and the
281/// chrono-format-spec. This results in some unused members for both
282/// specifications. However these unused members don't increase the size
283/// of the structure.
284///
285/// This struct doesn't cross ABI boundaries so its layout doesn't need to be
286/// kept stable.
287template <class _CharT>
288struct __parsed_specifications {
289 union {
290 // The field __alignment_ is the first element in __std_ and __chrono_.
291 // This allows the code to always inspect this value regards which member
292 // of the union is the active member [class.union.general]/2.
293 //
294 // This is needed since the generic output routines handle the alignment of
295 // the output.
296 __alignment __alignment_ : 3;
297 __std __std_;
298 __chrono __chrono_;
299 };
300
301 /// The requested width.
302 ///
303 /// When the format-spec used an arg-id for this field it has already been
304 /// replaced with the value of that arg-id.
305 int32_t __width_;
306
307 /// The requested precision.
308 ///
309 /// When the format-spec used an arg-id for this field it has already been
310 /// replaced with the value of that arg-id.
311 int32_t __precision_;
312
313 __code_point<_CharT> __fill_;
314
315 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; }
316
317 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; }
318};
319
320// Validate the struct is small and cheap to copy since the struct is passed by
321// value in formatting functions.
322static_assert(sizeof(__parsed_specifications<char>) == 16);
323static_assert(is_trivially_copyable_v<__parsed_specifications<char>>);
324# if _LIBCPP_HAS_WIDE_CHARACTERS
325static_assert(sizeof(__parsed_specifications<wchar_t>) == 16);
326static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>);
327# endif
328
329/// The parser for the std-format-spec.
330///
331/// Note this class is a member of std::formatter specializations. It's
332/// expected developers will create their own formatter specializations that
333/// inherit from the std::formatter specializations. This means this class
334/// must be ABI stable. To aid the stability the unused bits in the class are
335/// set to zero. That way they can be repurposed if a future revision of the
336/// Standards adds new fields to std-format-spec.
337template <class _CharT>
338class __parser {
339public:
340 // Parses the format specification.
341 //
342 // Depending on whether the parsing is done compile-time or run-time
343 // the method slightly differs.
344 // - Only parses a field when it is in the __fields. Accepting all
345 // fields and then validating the valid ones has a performance impact.
346 // This is faster but gives slighly worse error messages.
347 // - At compile-time when a field is not accepted the parser will still
348 // parse it and give an error when it's present. This gives a more
349 // accurate error.
350 // The idea is that most times the format instead of the vformat
351 // functions are used. In that case the error will be detected during
352 // compilation and there is no need to pay for the run-time overhead.
353 template <class _ParseContext>
354 _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator __parse(_ParseContext& __ctx, __fields __fields) {
355 auto __begin = __ctx.begin();
356 auto __end = __ctx.end();
357 if (__begin == __end || *__begin == _CharT('}') || (__fields.__use_range_fill_ && *__begin == _CharT(':')))
358 return __begin;
359
360 if (__parse_fill_align(__begin, __end) && __begin == __end)
361 return __begin;
362
363 if (__fields.__sign_) {
364 if (__parse_sign(__begin) && __begin == __end)
365 return __begin;
366 } else if (std::is_constant_evaluated() && __parse_sign(__begin)) {
367 std::__throw_format_error("The format specification does not allow the sign option");
368 }
369
370 if (__fields.__alternate_form_) {
371 if (__parse_alternate_form(__begin) && __begin == __end)
372 return __begin;
373 } else if (std::is_constant_evaluated() && __parse_alternate_form(__begin)) {
374 std::__throw_format_error("The format specifier does not allow the alternate form option");
375 }
376
377 if (__fields.__zero_padding_) {
378 if (__parse_zero_padding(__begin) && __begin == __end)
379 return __begin;
380 } else if (std::is_constant_evaluated() && __parse_zero_padding(__begin)) {
381 std::__throw_format_error("The format specifier does not allow the zero-padding option");
382 }
383
384 if (__parse_width(__begin, __end, __ctx) && __begin == __end)
385 return __begin;
386
387 if (__fields.__precision_) {
388 if (__parse_precision(__begin, __end, __ctx) && __begin == __end)
389 return __begin;
390 } else if (std::is_constant_evaluated() && __parse_precision(__begin, __end, __ctx)) {
391 std::__throw_format_error("The format specifier does not allow the precision option");
392 }
393
394 if (__fields.__locale_specific_form_) {
395 if (__parse_locale_specific_form(__begin) && __begin == __end)
396 return __begin;
397 } else if (std::is_constant_evaluated() && __parse_locale_specific_form(__begin)) {
398 std::__throw_format_error("The format specifier does not allow the locale-specific form option");
399 }
400
401 if (__fields.__clear_brackets_) {
402 if (__parse_clear_brackets(__begin) && __begin == __end)
403 return __begin;
404 } else if (std::is_constant_evaluated() && __parse_clear_brackets(__begin)) {
405 std::__throw_format_error("The format specifier does not allow the n option");
406 }
407
408 if (__fields.__type_)
409 __parse_type(__begin);
410
411 if (!__fields.__consume_all_)
412 return __begin;
413
414 if (__begin != __end && *__begin != _CharT('}'))
415 std::__throw_format_error("The format specifier should consume the input or end with a '}'");
416
417 return __begin;
418 }
419
420 // Validates the selected the parsed data.
421 //
422 // The valid fields in the parser may depend on the display type
423 // selected. But the type is the last optional field, so by the time
424 // it's known an option can't be used, it already has been parsed.
425 // This does the validation again.
426 //
427 // For example an integral may have a sign, zero-padding, or alternate
428 // form when the type option is not 'c'. So the generic approach is:
429 //
430 // typename _ParseContext::iterator __result = __parser_.__parse(__ctx, __format_spec::__fields_integral);
431 // if (__parser.__type_ == __format_spec::__type::__char) {
432 // __parser.__validate((__format_spec::__fields_bool, "an integer");
433 // ... // more char adjustments
434 // } else {
435 // ... // validate an integral type.
436 // }
437 //
438 // For some types all valid options need a second validation run, like
439 // boolean types.
440 //
441 // Depending on whether the validation is done at compile-time or
442 // run-time the error differs
443 // - run-time the exception is thrown and contains the type of field
444 // being validated.
445 // - at compile-time the line with `std::__throw_format_error` is shown
446 // in the output. In that case it's important for the error to be on one
447 // line.
448 // Note future versions of C++ may allow better compile-time error
449 // reporting.
450 _LIBCPP_HIDE_FROM_ABI constexpr void
451 __validate(__fields __fields, const char* __id, uint32_t __type_mask = -1) const {
452 if (!__fields.__sign_ && __sign_ != __sign::__default) {
453 if (std::is_constant_evaluated())
454 std::__throw_format_error("The format specifier does not allow the sign option");
455 else
456 __format_spec::__throw_invalid_option_format_error(__id, "sign");
457 }
458
459 if (!__fields.__alternate_form_ && __alternate_form_) {
460 if (std::is_constant_evaluated())
461 std::__throw_format_error("The format specifier does not allow the alternate form option");
462 else
463 __format_spec::__throw_invalid_option_format_error(__id, "alternate form");
464 }
465
466 if (!__fields.__zero_padding_ && __alignment_ == __alignment::__zero_padding) {
467 if (std::is_constant_evaluated())
468 std::__throw_format_error("The format specifier does not allow the zero-padding option");
469 else
470 __format_spec::__throw_invalid_option_format_error(__id, "zero-padding");
471 }
472
473 if (!__fields.__precision_ && __precision_ != -1) { // Works both when the precision has a value or an arg-id.
474 if (std::is_constant_evaluated())
475 std::__throw_format_error("The format specifier does not allow the precision option");
476 else
477 __format_spec::__throw_invalid_option_format_error(__id, "precision");
478 }
479
480 if (!__fields.__locale_specific_form_ && __locale_specific_form_) {
481 if (std::is_constant_evaluated())
482 std::__throw_format_error("The format specifier does not allow the locale-specific form option");
483 else
484 __format_spec::__throw_invalid_option_format_error(__id, "locale-specific form");
485 }
486
487 if ((__create_type_mask(__type_) & __type_mask) == 0) {
488 if (std::is_constant_evaluated())
489 std::__throw_format_error("The format specifier uses an invalid value for the type option");
490 else
491 __format_spec::__throw_invalid_type_format_error(__id);
492 }
493 }
494
495 /// \returns the `__parsed_specifications` with the resolved dynamic sizes..
496 _LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const {
497 return __parsed_specifications<_CharT>{
498 .__std_ = __std{.__alignment_ = __alignment_,
499 .__sign_ = __sign_,
500 .__alternate_form_ = __alternate_form_,
501 .__locale_specific_form_ = __locale_specific_form_,
502 .__type_ = __type_},
503 .__width_{__get_width(__ctx)},
504 .__precision_{__get_precision(__ctx)},
505 .__fill_{__fill_}};
506 }
507
508 _LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_chrono_specifications(auto& __ctx) const {
509 return __parsed_specifications<_CharT>{
510 .__chrono_ =
511 __chrono{.__alignment_ = __alignment_,
512 .__locale_specific_form_ = __locale_specific_form_,
513 .__hour_ = __hour_,
514 .__weekday_name_ = __weekday_name_,
515 .__weekday_ = __weekday_,
516 .__day_of_year_ = __day_of_year_,
517 .__week_of_year_ = __week_of_year_,
518 .__month_name_ = __month_name_},
519 .__width_{__get_width(__ctx)},
520 .__precision_{__get_precision(__ctx)},
521 .__fill_{__fill_}};
522 }
523
524 __alignment __alignment_ : 3 {__alignment::__default};
525 __sign __sign_ : 2 {__sign::__default};
526 bool __alternate_form_ : 1 {false};
527 bool __locale_specific_form_ : 1 {false};
528 bool __clear_brackets_ : 1 {false};
529 __type __type_{__type::__default};
530
531 // These flags are only used for formatting chrono. Since the struct has
532 // padding space left it's added to this structure.
533 bool __hour_ : 1 {false};
534
535 bool __weekday_name_ : 1 {false};
536 bool __weekday_ : 1 {false};
537
538 bool __day_of_year_ : 1 {false};
539 bool __week_of_year_ : 1 {false};
540
541 bool __month_name_ : 1 {false};
542
543 uint8_t __reserved_0_ : 2 {0};
544 uint8_t __reserved_1_ : 6 {0};
545 // These two flags are only used internally and not part of the
546 // __parsed_specifications. Therefore put them at the end.
547 bool __width_as_arg_ : 1 {false};
548 bool __precision_as_arg_ : 1 {false};
549
550 /// The requested width, either the value or the arg-id.
551 int32_t __width_{0};
552
553 /// The requested precision, either the value or the arg-id.
554 int32_t __precision_{-1};
555
556 __code_point<_CharT> __fill_{};
557
558private:
559 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) {
560 switch (__c) {
561 case _CharT('<'):
562 __alignment_ = __alignment::__left;
563 return true;
564
565 case _CharT('^'):
566 __alignment_ = __alignment::__center;
567 return true;
568
569 case _CharT('>'):
570 __alignment_ = __alignment::__right;
571 return true;
572 }
573 return false;
574 }
575
576 _LIBCPP_HIDE_FROM_ABI constexpr void __validate_fill_character(_CharT __fill) {
577 // The forbidden fill characters all code points formed from a single code unit, thus the
578 // check can be omitted when more code units are used.
579 if (__fill == _CharT('{'))
580 std::__throw_format_error("The fill option contains an invalid value");
581 }
582
583# if _LIBCPP_HAS_UNICODE
584 // range-fill and tuple-fill are identical
585 template <contiguous_iterator _Iterator>
586 requires same_as<_CharT, char>
587# if _LIBCPP_HAS_WIDE_CHARACTERS
588 || (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2)
589# endif
590 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) {
591 _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
592 __begin != __end,
593 "when called with an empty input the function will cause "
594 "undefined behavior by evaluating data not in the input");
595 __unicode::__code_point_view<_CharT> __view{__begin, __end};
596 __unicode::__consume_result __consumed = __view.__consume();
597 if (__consumed.__status != __unicode::__consume_result::__ok)
598 std::__throw_format_error("The format specifier contains malformed Unicode characters");
599
600 if (__view.__position() < __end && __parse_alignment(*__view.__position())) {
601 ptrdiff_t __code_units = __view.__position() - __begin;
602 if (__code_units == 1)
603 // The forbidden fill characters all are code points encoded
604 // in one code unit, thus the check can be omitted when more
605 // code units are used.
606 __validate_fill_character(*__begin);
607
608 std::copy_n(__begin, __code_units, std::addressof(__fill_.__data[0]));
609 __begin += __code_units + 1;
610 return true;
611 }
612
613 if (!__parse_alignment(*__begin))
614 return false;
615
616 ++__begin;
617 return true;
618 }
619
620# if _LIBCPP_HAS_WIDE_CHARACTERS
621 template <contiguous_iterator _Iterator>
622 requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4)
623 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) {
624 _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
625 __begin != __end,
626 "when called with an empty input the function will cause "
627 "undefined behavior by evaluating data not in the input");
628 if (__begin + 1 != __end && __parse_alignment(*(__begin + 1))) {
629 if (!__unicode::__is_scalar_value(*__begin))
630 std::__throw_format_error("The fill option contains an invalid value");
631
632 __validate_fill_character(*__begin);
633
634 __fill_.__data[0] = *__begin;
635 __begin += 2;
636 return true;
637 }
638
639 if (!__parse_alignment(*__begin))
640 return false;
641
642 ++__begin;
643 return true;
644 }
645
646# endif // _LIBCPP_HAS_WIDE_CHARACTERS
647
648# else // _LIBCPP_HAS_UNICODE
649 // range-fill and tuple-fill are identical
650 template <contiguous_iterator _Iterator>
651 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) {
652 _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
653 __begin != __end,
654 "when called with an empty input the function will cause "
655 "undefined behavior by evaluating data not in the input");
656 if (__begin + 1 != __end) {
657 if (__parse_alignment(*(__begin + 1))) {
658 __validate_fill_character(*__begin);
659
660 __fill_.__data[0] = *__begin;
661 __begin += 2;
662 return true;
663 }
664 }
665
666 if (!__parse_alignment(*__begin))
667 return false;
668
669 ++__begin;
670 return true;
671 }
672
673# endif // _LIBCPP_HAS_UNICODE
674
675 template <contiguous_iterator _Iterator>
676 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(_Iterator& __begin) {
677 switch (*__begin) {
678 case _CharT('-'):
679 __sign_ = __sign::__minus;
680 break;
681 case _CharT('+'):
682 __sign_ = __sign::__plus;
683 break;
684 case _CharT(' '):
685 __sign_ = __sign::__space;
686 break;
687 default:
688 return false;
689 }
690 ++__begin;
691 return true;
692 }
693
694 template <contiguous_iterator _Iterator>
695 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(_Iterator& __begin) {
696 if (*__begin != _CharT('#'))
697 return false;
698
699 __alternate_form_ = true;
700 ++__begin;
701 return true;
702 }
703
704 template <contiguous_iterator _Iterator>
705 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(_Iterator& __begin) {
706 if (*__begin != _CharT('0'))
707 return false;
708
709 if (__alignment_ == __alignment::__default)
710 __alignment_ = __alignment::__zero_padding;
711 ++__begin;
712 return true;
713 }
714
715 template <contiguous_iterator _Iterator>
716 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(_Iterator& __begin, _Iterator __end, auto& __ctx) {
717 if (*__begin == _CharT('0'))
718 std::__throw_format_error("The width option should not have a leading zero");
719
720 if (*__begin == _CharT('{')) {
721 __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __ctx);
722 __width_as_arg_ = true;
723 __width_ = __r.__value;
724 __begin = __r.__last;
725 return true;
726 }
727
728 if (*__begin < _CharT('0') || *__begin > _CharT('9'))
729 return false;
730
731 __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
732 __width_ = __r.__value;
733 _LIBCPP_ASSERT_INTERNAL(__width_ != 0,
734 "A zero value isn't allowed and should be impossible, "
735 "due to validations in this function");
736 __begin = __r.__last;
737 return true;
738 }
739
740 template <contiguous_iterator _Iterator>
741 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(_Iterator& __begin, _Iterator __end, auto& __ctx) {
742 if (*__begin != _CharT('.'))
743 return false;
744
745 ++__begin;
746 if (__begin == __end)
747 std::__throw_format_error("End of input while parsing format specifier precision");
748
749 if (*__begin == _CharT('{')) {
750 __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __ctx);
751 __precision_as_arg_ = true;
752 __precision_ = __arg_id.__value;
753 __begin = __arg_id.__last;
754 return true;
755 }
756
757 if (*__begin < _CharT('0') || *__begin > _CharT('9'))
758 std::__throw_format_error("The precision option does not contain a value or an argument index");
759
760 __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
761 __precision_ = __r.__value;
762 __precision_as_arg_ = false;
763 __begin = __r.__last;
764 return true;
765 }
766
767 template <contiguous_iterator _Iterator>
768 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(_Iterator& __begin) {
769 if (*__begin != _CharT('L'))
770 return false;
771
772 __locale_specific_form_ = true;
773 ++__begin;
774 return true;
775 }
776
777 template <contiguous_iterator _Iterator>
778 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_clear_brackets(_Iterator& __begin) {
779 if (*__begin != _CharT('n'))
780 return false;
781
782 __clear_brackets_ = true;
783 ++__begin;
784 return true;
785 }
786
787 template <contiguous_iterator _Iterator>
788 _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(_Iterator& __begin) {
789 // Determines the type. It does not validate whether the selected type is
790 // valid. Most formatters have optional fields that are only allowed for
791 // certain types. These parsers need to do validation after the type has
792 // been parsed. So its easier to implement the validation for all types in
793 // the specific parse function.
794 switch (*__begin) {
795 case 'A':
796 __type_ = __type::__hexfloat_upper_case;
797 break;
798 case 'B':
799 __type_ = __type::__binary_upper_case;
800 break;
801 case 'E':
802 __type_ = __type::__scientific_upper_case;
803 break;
804 case 'F':
805 __type_ = __type::__fixed_upper_case;
806 break;
807 case 'G':
808 __type_ = __type::__general_upper_case;
809 break;
810 case 'X':
811 __type_ = __type::__hexadecimal_upper_case;
812 break;
813 case 'a':
814 __type_ = __type::__hexfloat_lower_case;
815 break;
816 case 'b':
817 __type_ = __type::__binary_lower_case;
818 break;
819 case 'c':
820 __type_ = __type::__char;
821 break;
822 case 'd':
823 __type_ = __type::__decimal;
824 break;
825 case 'e':
826 __type_ = __type::__scientific_lower_case;
827 break;
828 case 'f':
829 __type_ = __type::__fixed_lower_case;
830 break;
831 case 'g':
832 __type_ = __type::__general_lower_case;
833 break;
834 case 'o':
835 __type_ = __type::__octal;
836 break;
837 case 'p':
838 __type_ = __type::__pointer_lower_case;
839 break;
840 case 'P':
841 __type_ = __type::__pointer_upper_case;
842 break;
843 case 's':
844 __type_ = __type::__string;
845 break;
846 case 'x':
847 __type_ = __type::__hexadecimal_lower_case;
848 break;
849# if _LIBCPP_STD_VER >= 23
850 case '?':
851 __type_ = __type::__debug;
852 break;
853# endif
854 default:
855 return;
856 }
857 ++__begin;
858 }
859
860 _LIBCPP_HIDE_FROM_ABI int32_t __get_width(auto& __ctx) const {
861 if (!__width_as_arg_)
862 return __width_;
863
864 return __format_spec::__substitute_arg_id(__ctx.arg(__width_));
865 }
866
867 _LIBCPP_HIDE_FROM_ABI int32_t __get_precision(auto& __ctx) const {
868 if (!__precision_as_arg_)
869 return __precision_;
870
871 return __format_spec::__substitute_arg_id(__ctx.arg(__precision_));
872 }
873};
874
875// Validates whether the reserved bitfields don't change the size.
876static_assert(sizeof(__parser<char>) == 16);
877# if _LIBCPP_HAS_WIDE_CHARACTERS
878static_assert(sizeof(__parser<wchar_t>) == 16);
879# endif
880
881_LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) {
882 switch (__type) {
883 case __format_spec::__type::__default:
884 case __format_spec::__type::__string:
885 case __format_spec::__type::__debug:
886 break;
887
888 default:
889 std::__throw_format_error("The type option contains an invalid value for a string formatting argument");
890 }
891}
892
893template <class _CharT>
894_LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser, const char* __id) {
895 __parser.__validate(__format_spec::__fields_bool, __id);
896 if (__parser.__alignment_ == __alignment::__default)
897 __parser.__alignment_ = __alignment::__left;
898}
899
900template <class _CharT>
901_LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser, const char* __id) {
902 __format_spec::__process_display_type_bool_string(__parser, __id);
903}
904
905template <class _CharT>
906_LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser, const char* __id) {
907 switch (__parser.__type_) {
908 case __format_spec::__type::__default:
909 case __format_spec::__type::__string:
910 __format_spec::__process_display_type_bool_string(__parser, __id);
911 break;
912
913 case __format_spec::__type::__binary_lower_case:
914 case __format_spec::__type::__binary_upper_case:
915 case __format_spec::__type::__octal:
916 case __format_spec::__type::__decimal:
917 case __format_spec::__type::__hexadecimal_lower_case:
918 case __format_spec::__type::__hexadecimal_upper_case:
919 break;
920
921 default:
922 __format_spec::__throw_invalid_type_format_error(__id);
923 }
924}
925
926template <class _CharT>
927_LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser, const char* __id) {
928 switch (__parser.__type_) {
929 case __format_spec::__type::__default:
930 case __format_spec::__type::__char:
931 case __format_spec::__type::__debug:
932 __format_spec::__process_display_type_char(__parser, __id);
933 break;
934
935 case __format_spec::__type::__binary_lower_case:
936 case __format_spec::__type::__binary_upper_case:
937 case __format_spec::__type::__octal:
938 case __format_spec::__type::__decimal:
939 case __format_spec::__type::__hexadecimal_lower_case:
940 case __format_spec::__type::__hexadecimal_upper_case:
941 break;
942
943 default:
944 __format_spec::__throw_invalid_type_format_error(__id);
945 }
946}
947
948template <class _CharT>
949_LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser, const char* __id) {
950 switch (__parser.__type_) {
951 case __format_spec::__type::__default:
952 case __format_spec::__type::__binary_lower_case:
953 case __format_spec::__type::__binary_upper_case:
954 case __format_spec::__type::__octal:
955 case __format_spec::__type::__decimal:
956 case __format_spec::__type::__hexadecimal_lower_case:
957 case __format_spec::__type::__hexadecimal_upper_case:
958 break;
959
960 case __format_spec::__type::__char:
961 __format_spec::__process_display_type_char(__parser, __id);
962 break;
963
964 default:
965 __format_spec::__throw_invalid_type_format_error(__id);
966 }
967}
968
969template <class _CharT>
970_LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser, const char* __id) {
971 switch (__parser.__type_) {
972 case __format_spec::__type::__default:
973 case __format_spec::__type::__hexfloat_lower_case:
974 case __format_spec::__type::__hexfloat_upper_case:
975 // Precision specific behavior will be handled later.
976 break;
977 case __format_spec::__type::__scientific_lower_case:
978 case __format_spec::__type::__scientific_upper_case:
979 case __format_spec::__type::__fixed_lower_case:
980 case __format_spec::__type::__fixed_upper_case:
981 case __format_spec::__type::__general_lower_case:
982 case __format_spec::__type::__general_upper_case:
983 if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1)
984 // Set the default precision for the call to to_chars.
985 __parser.__precision_ = 6;
986 break;
987
988 default:
989 __format_spec::__throw_invalid_type_format_error(__id);
990 }
991}
992
993_LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type, const char* __id) {
994 switch (__type) {
995 case __format_spec::__type::__default:
996 case __format_spec::__type::__pointer_lower_case:
997 case __format_spec::__type::__pointer_upper_case:
998 break;
999
1000 default:
1001 __format_spec::__throw_invalid_type_format_error(__id);
1002 }
1003}
1004
1005template <contiguous_iterator _Iterator>
1006struct __column_width_result {
1007 /// The number of output columns.
1008 size_t __width_;
1009 /// One beyond the last code unit used in the estimation.
1010 ///
1011 /// This limits the original output to fit in the wanted number of columns.
1012 _Iterator __last_;
1013};
1014
1015template <contiguous_iterator _Iterator>
1016__column_width_result(size_t, _Iterator) -> __column_width_result<_Iterator>;
1017
1018/// Since a column width can be two it's possible that the requested column
1019/// width can't be achieved. Depending on the intended usage the policy can be
1020/// selected.
1021/// - When used as precision the maximum width may not be exceeded and the
1022/// result should be "rounded down" to the previous boundary.
1023/// - When used as a width we're done once the minimum is reached, but
1024/// exceeding is not an issue. Rounding down is an issue since that will
1025/// result in writing fill characters. Therefore the result needs to be
1026/// "rounded up".
1027enum class __column_width_rounding { __down, __up };
1028
1029# if _LIBCPP_HAS_UNICODE
1030
1031namespace __detail {
1032template <contiguous_iterator _Iterator>
1033_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width_grapheme_clustering(
1034 _Iterator __first, _Iterator __last, size_t __maximum, __column_width_rounding __rounding) noexcept {
1035 using _CharT = iter_value_t<_Iterator>;
1036 __unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last};
1037
1038 __column_width_result<_Iterator> __result{0, __first};
1039 while (__result.__last_ != __last && __result.__width_ <= __maximum) {
1040 typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume();
1041 int __width = __width_estimation_table::__estimated_width(__cluster.__code_point_);
1042
1043 // When the next entry would exceed the maximum width the previous width
1044 // might be returned. For example when a width of 100 is requested the
1045 // returned width might be 99, since the next code point has an estimated
1046 // column width of 2. This depends on the rounding flag.
1047 // When the maximum is exceeded the loop will abort the next iteration.
1048 if (__rounding == __column_width_rounding::__down && __result.__width_ + __width > __maximum)
1049 return __result;
1050
1051 __result.__width_ += __width;
1052 __result.__last_ = __cluster.__last_;
1053 }
1054
1055 return __result;
1056}
1057
1058} // namespace __detail
1059
1060// Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
1061// Depending on format the relation between the number of code units stored and
1062// the number of output columns differs. The first relation is the number of
1063// code units forming a code point. (The text assumes the code units are
1064// unsigned.)
1065// - UTF-8 The number of code units is between one and four. The first 127
1066// Unicode code points match the ASCII character set. When the highest bit is
1067// set it means the code point has more than one code unit.
1068// - UTF-16: The number of code units is between 1 and 2. When the first
1069// code unit is in the range [0xd800,0xdfff) it means the code point uses two
1070// code units.
1071// - UTF-32: The number of code units is always one.
1072//
1073// The code point to the number of columns is specified in
1074// [format.string.std]/11. This list might change in the future.
1075//
1076// Another thing to be taken into account is Grapheme clustering. This means
1077// that in some cases multiple code points are combined one element in the
1078// output. For example:
1079// - an ASCII character with a combined diacritical mark
1080// - an emoji with a skin tone modifier
1081// - a group of combined people emoji to create a family
1082// - a combination of flag emoji
1083//
1084// See also:
1085// - [format.string.general]/11
1086// - https://en.wikipedia.org/wiki/UTF-8#Encoding
1087// - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
1088
1089_LIBCPP_HIDE_FROM_ABI constexpr bool __is_ascii(char32_t __c) { return __c < 0x80; }
1090
1091/// Determines the number of output columns needed to render the input.
1092///
1093/// \note When the scanner encounters malformed Unicode it acts as-if every
1094/// code unit is a one column code point. Typically a terminal uses the same
1095/// strategy and replaces every malformed code unit with a one column
1096/// replacement character.
1097///
1098/// \param __first Points to the first element of the input range.
1099/// \param __last Points beyond the last element of the input range.
1100/// \param __maximum The maximum number of output columns. The returned number
1101/// of estimated output columns will not exceed this value.
1102/// \param __rounding Selects the rounding method.
1103/// \c __down result.__width_ <= __maximum
1104/// \c __up result.__width_ <= __maximum + 1
1105template <class _CharT, class _Iterator = typename basic_string_view<_CharT>::const_iterator>
1106_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width(
1107 basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding __rounding) noexcept {
1108 // The width estimation is done in two steps:
1109 // - Quickly process for the ASCII part. ASCII has the following properties
1110 // - One code unit is one code point
1111 // - Every code point has an estimated width of one
1112 // - When needed it will a Unicode Grapheme clustering algorithm to find
1113 // the proper place for truncation.
1114
1115 if (__str.empty() || __maximum == 0)
1116 return {0, __str.begin()};
1117
1118 // ASCII has one caveat; when an ASCII character is followed by a non-ASCII
1119 // character they might be part of an extended grapheme cluster. For example:
1120 // an ASCII letter and a COMBINING ACUTE ACCENT
1121 // The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we
1122 // need to scan one code unit beyond the requested precision. When this code
1123 // unit is non-ASCII we omit the current code unit and let the Grapheme
1124 // clustering algorithm do its work.
1125 auto __it = __str.begin();
1126 if (__format_spec::__is_ascii(*__it)) {
1127 do {
1128 --__maximum;
1129 ++__it;
1130 if (__it == __str.end())
1131 return {__str.size(), __str.end()};
1132
1133 if (__maximum == 0) {
1134 if (__format_spec::__is_ascii(*__it))
1135 return {static_cast<size_t>(__it - __str.begin()), __it};
1136
1137 break;
1138 }
1139 } while (__format_spec::__is_ascii(*__it));
1140 --__it;
1141 ++__maximum;
1142 }
1143
1144 ptrdiff_t __ascii_size = __it - __str.begin();
1145 __column_width_result __result =
1146 __detail::__estimate_column_width_grapheme_clustering(__it, __str.end(), __maximum, __rounding);
1147
1148 __result.__width_ += __ascii_size;
1149 return __result;
1150}
1151# else // _LIBCPP_HAS_UNICODE
1152template <class _CharT>
1153_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<typename basic_string_view<_CharT>::const_iterator>
1154__estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept {
1155 // When Unicode isn't supported assume ASCII and every code unit is one code
1156 // point. In ASCII the estimated column width is always one. Thus there's no
1157 // need for rounding.
1158 size_t __width = std::min(__str.size(), __maximum);
1159 return {__width, __str.begin() + __width};
1160}
1161
1162# endif // _LIBCPP_HAS_UNICODE
1163
1164} // namespace __format_spec
1165
1166#endif // _LIBCPP_STD_VER >= 20
1167
1168_LIBCPP_END_NAMESPACE_STD
1169
1170_LIBCPP_POP_MACROS
1171
1172#endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
1173

Warning: This file is not a C or C++ file. It does not have highlighting.

source code of libcxx/include/__format/parser_std_format_spec.h