Warning: This file is not a C or C++ file. It does not have highlighting.
1 | // -*- C++ -*- |
---|---|
2 | //===----------------------------------------------------------------------===// |
3 | // |
4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | // See https://llvm.org/LICENSE.txt for license information. |
6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | |
10 | #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H |
11 | #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H |
12 | |
13 | /// \file Contains the std-format-spec parser. |
14 | /// |
15 | /// Most of the code can be reused in the chrono-format-spec. |
16 | /// This header has some support for the chrono-format-spec since it doesn't |
17 | /// affect the std-format-spec. |
18 | |
19 | #include <__algorithm/copy_n.h> |
20 | #include <__algorithm/min.h> |
21 | #include <__assert> |
22 | #include <__concepts/arithmetic.h> |
23 | #include <__concepts/same_as.h> |
24 | #include <__config> |
25 | #include <__format/format_arg.h> |
26 | #include <__format/format_error.h> |
27 | #include <__format/format_parse_context.h> |
28 | #include <__format/format_string.h> |
29 | #include <__format/unicode.h> |
30 | #include <__format/width_estimation_table.h> |
31 | #include <__iterator/concepts.h> |
32 | #include <__iterator/iterator_traits.h> // iter_value_t |
33 | #include <__memory/addressof.h> |
34 | #include <__type_traits/common_type.h> |
35 | #include <__type_traits/is_constant_evaluated.h> |
36 | #include <__type_traits/is_trivially_copyable.h> |
37 | #include <__variant/monostate.h> |
38 | #include <cstdint> |
39 | #include <string> |
40 | #include <string_view> |
41 | |
42 | #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
43 | # pragma GCC system_header |
44 | #endif |
45 | |
46 | _LIBCPP_PUSH_MACROS |
47 | #include <__undef_macros> |
48 | |
49 | _LIBCPP_BEGIN_NAMESPACE_STD |
50 | |
51 | #if _LIBCPP_STD_VER >= 20 |
52 | |
53 | namespace __format_spec { |
54 | |
55 | [[noreturn]] _LIBCPP_HIDE_FROM_ABI inline void |
56 | __throw_invalid_option_format_error(const char* __id, const char* __option) { |
57 | std::__throw_format_error( |
58 | (string("The format specifier for ") + __id + " does not allow the " + __option + " option").c_str()); |
59 | } |
60 | |
61 | [[noreturn]] _LIBCPP_HIDE_FROM_ABI inline void __throw_invalid_type_format_error(const char* __id) { |
62 | std::__throw_format_error( |
63 | (string("The type option contains an invalid value for ") + __id + " formatting argument").c_str()); |
64 | } |
65 | |
66 | template <contiguous_iterator _Iterator, class _ParseContext> |
67 | _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result<_Iterator> |
68 | __parse_arg_id(_Iterator __begin, _Iterator __end, _ParseContext& __ctx) { |
69 | using _CharT = iter_value_t<_Iterator>; |
70 | // This function is a wrapper to call the real parser. But it does the |
71 | // validation for the pre-conditions and post-conditions. |
72 | if (__begin == __end) |
73 | std::__throw_format_error("End of input while parsing an argument index"); |
74 | |
75 | __format::__parse_number_result __r = __format::__parse_arg_id(__begin, __end, __ctx); |
76 | |
77 | if (__r.__last == __end || *__r.__last != _CharT('}')) |
78 | std::__throw_format_error("The argument index is invalid"); |
79 | |
80 | ++__r.__last; |
81 | return __r; |
82 | } |
83 | |
84 | template <class _Context> |
85 | _LIBCPP_HIDE_FROM_ABI constexpr uint32_t __substitute_arg_id(basic_format_arg<_Context> __format_arg) { |
86 | // [format.string.std]/8 |
87 | // If the corresponding formatting argument is not of integral type... |
88 | // This wording allows char and bool too. LWG-3720 changes the wording to |
89 | // If the corresponding formatting argument is not of standard signed or |
90 | // unsigned integer type, |
91 | // This means the 128-bit will not be valid anymore. |
92 | // TODO FMT Verify this resolution is accepted and add a test to verify |
93 | // 128-bit integrals fail and switch to visit_format_arg. |
94 | return std::__visit_format_arg( |
95 | [](auto __arg) -> uint32_t { |
96 | using _Type = decltype(__arg); |
97 | if constexpr (same_as<_Type, monostate>) |
98 | std::__throw_format_error("The argument index value is too large for the number of arguments supplied"); |
99 | |
100 | // [format.string.std]/8 |
101 | // If { arg-idopt } is used in a width or precision, the value of the |
102 | // corresponding formatting argument is used in its place. If the |
103 | // corresponding formatting argument is not of standard signed or unsigned |
104 | // integer type, or its value is negative for precision or non-positive for |
105 | // width, an exception of type format_error is thrown. |
106 | // |
107 | // When an integral is used in a format function, it is stored as one of |
108 | // the types checked below. Other integral types are promoted. For example, |
109 | // a signed char is stored as an int. |
110 | if constexpr (same_as<_Type, int> || same_as<_Type, unsigned int> || // |
111 | same_as<_Type, long long> || same_as<_Type, unsigned long long>) { |
112 | if constexpr (signed_integral<_Type>) { |
113 | if (__arg < 0) |
114 | std::__throw_format_error("An argument index may not have a negative value"); |
115 | } |
116 | |
117 | using _CT = common_type_t<_Type, decltype(__format::__number_max)>; |
118 | if (static_cast<_CT>(__arg) > static_cast<_CT>(__format::__number_max)) |
119 | std::__throw_format_error("The value of the argument index exceeds its maximum value"); |
120 | |
121 | return __arg; |
122 | } else |
123 | std::__throw_format_error("Replacement argument isn't a standard signed or unsigned integer type"); |
124 | }, |
125 | __format_arg); |
126 | } |
127 | |
128 | /// These fields are a filter for which elements to parse. |
129 | /// |
130 | /// They default to false so when a new field is added it needs to be opted in |
131 | /// explicitly. |
132 | struct _LIBCPP_HIDE_FROM_ABI __fields { |
133 | uint16_t __sign_ : 1 {false}; |
134 | uint16_t __alternate_form_ : 1 {false}; |
135 | uint16_t __zero_padding_ : 1 {false}; |
136 | uint16_t __precision_ : 1 {false}; |
137 | uint16_t __locale_specific_form_ : 1 {false}; |
138 | uint16_t __type_ : 1 {false}; |
139 | // Determines the valid values for fill. |
140 | // |
141 | // Originally the fill could be any character except { and }. Range-based |
142 | // formatters use the colon to mark the beginning of the |
143 | // underlying-format-spec. To avoid parsing ambiguities these formatter |
144 | // specializations prohibit the use of the colon as a fill character. |
145 | uint16_t __use_range_fill_ : 1 {false}; |
146 | uint16_t __clear_brackets_ : 1 {false}; |
147 | uint16_t __consume_all_ : 1 {false}; |
148 | }; |
149 | |
150 | // By not placing this constant in the formatter class it's not duplicated for |
151 | // char and wchar_t. |
152 | inline constexpr __fields __fields_bool{.__locale_specific_form_ = true, .__type_ = true, .__consume_all_ = true}; |
153 | inline constexpr __fields __fields_integral{ |
154 | .__sign_ = true, |
155 | .__alternate_form_ = true, |
156 | .__zero_padding_ = true, |
157 | .__locale_specific_form_ = true, |
158 | .__type_ = true, |
159 | .__consume_all_ = true}; |
160 | inline constexpr __fields __fields_floating_point{ |
161 | .__sign_ = true, |
162 | .__alternate_form_ = true, |
163 | .__zero_padding_ = true, |
164 | .__precision_ = true, |
165 | .__locale_specific_form_ = true, |
166 | .__type_ = true, |
167 | .__consume_all_ = true}; |
168 | inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true, .__consume_all_ = true}; |
169 | inline constexpr __fields __fields_pointer{.__zero_padding_ = true, .__type_ = true, .__consume_all_ = true}; |
170 | |
171 | # if _LIBCPP_STD_VER >= 23 |
172 | inline constexpr __fields __fields_tuple{.__use_range_fill_ = true, .__clear_brackets_ = true}; |
173 | inline constexpr __fields __fields_range{.__use_range_fill_ = true, .__clear_brackets_ = true}; |
174 | inline constexpr __fields __fields_fill_align_width{}; |
175 | # endif |
176 | |
177 | enum class __alignment : uint8_t { |
178 | /// No alignment is set in the format string. |
179 | __default, |
180 | __left, |
181 | __center, |
182 | __right, |
183 | __zero_padding |
184 | }; |
185 | |
186 | enum class __sign : uint8_t { |
187 | /// No sign is set in the format string. |
188 | /// |
189 | /// The sign isn't allowed for certain format-types. By using this value |
190 | /// it's possible to detect whether or not the user explicitly set the sign |
191 | /// flag. For formatting purposes it behaves the same as \ref __minus. |
192 | __default, |
193 | __minus, |
194 | __plus, |
195 | __space |
196 | }; |
197 | |
198 | enum class __type : uint8_t { |
199 | __default = 0, |
200 | __string, |
201 | __binary_lower_case, |
202 | __binary_upper_case, |
203 | __octal, |
204 | __decimal, |
205 | __hexadecimal_lower_case, |
206 | __hexadecimal_upper_case, |
207 | __pointer_lower_case, |
208 | __pointer_upper_case, |
209 | __char, |
210 | __hexfloat_lower_case, |
211 | __hexfloat_upper_case, |
212 | __scientific_lower_case, |
213 | __scientific_upper_case, |
214 | __fixed_lower_case, |
215 | __fixed_upper_case, |
216 | __general_lower_case, |
217 | __general_upper_case, |
218 | __debug |
219 | }; |
220 | |
221 | _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __create_type_mask(__type __t) { |
222 | uint32_t __shift = static_cast<uint32_t>(__t); |
223 | if (__shift == 0) |
224 | return 1; |
225 | |
226 | if (__shift > 31) |
227 | std::__throw_format_error("The type does not fit in the mask"); |
228 | |
229 | return 1 << __shift; |
230 | } |
231 | |
232 | inline constexpr uint32_t __type_mask_integer = |
233 | __create_type_mask(__type::__binary_lower_case) | // |
234 | __create_type_mask(__type::__binary_upper_case) | // |
235 | __create_type_mask(__type::__decimal) | // |
236 | __create_type_mask(__type::__octal) | // |
237 | __create_type_mask(__type::__hexadecimal_lower_case) | // |
238 | __create_type_mask(__type::__hexadecimal_upper_case); |
239 | |
240 | struct __std { |
241 | __alignment __alignment_ : 3; |
242 | __sign __sign_ : 2; |
243 | bool __alternate_form_ : 1; |
244 | bool __locale_specific_form_ : 1; |
245 | __type __type_; |
246 | }; |
247 | |
248 | struct __chrono { |
249 | __alignment __alignment_ : 3; |
250 | bool __locale_specific_form_ : 1; |
251 | bool __hour_ : 1; |
252 | bool __weekday_name_ : 1; |
253 | bool __weekday_ : 1; |
254 | bool __day_of_year_ : 1; |
255 | bool __week_of_year_ : 1; |
256 | bool __month_name_ : 1; |
257 | }; |
258 | |
259 | // The fill UCS scalar value. |
260 | // |
261 | // This is always an array, with 1, 2, or 4 elements. |
262 | // The size of the data structure is always 32-bits. |
263 | template <class _CharT> |
264 | struct __code_point; |
265 | |
266 | template <> |
267 | struct __code_point<char> { |
268 | char __data[4] = {' '}; |
269 | }; |
270 | |
271 | # if _LIBCPP_HAS_WIDE_CHARACTERS |
272 | template <> |
273 | struct __code_point<wchar_t> { |
274 | wchar_t __data[4 / sizeof(wchar_t)] = {L' '}; |
275 | }; |
276 | # endif |
277 | |
278 | /// Contains the parsed formatting specifications. |
279 | /// |
280 | /// This contains information for both the std-format-spec and the |
281 | /// chrono-format-spec. This results in some unused members for both |
282 | /// specifications. However these unused members don't increase the size |
283 | /// of the structure. |
284 | /// |
285 | /// This struct doesn't cross ABI boundaries so its layout doesn't need to be |
286 | /// kept stable. |
287 | template <class _CharT> |
288 | struct __parsed_specifications { |
289 | union { |
290 | // The field __alignment_ is the first element in __std_ and __chrono_. |
291 | // This allows the code to always inspect this value regards which member |
292 | // of the union is the active member [class.union.general]/2. |
293 | // |
294 | // This is needed since the generic output routines handle the alignment of |
295 | // the output. |
296 | __alignment __alignment_ : 3; |
297 | __std __std_; |
298 | __chrono __chrono_; |
299 | }; |
300 | |
301 | /// The requested width. |
302 | /// |
303 | /// When the format-spec used an arg-id for this field it has already been |
304 | /// replaced with the value of that arg-id. |
305 | int32_t __width_; |
306 | |
307 | /// The requested precision. |
308 | /// |
309 | /// When the format-spec used an arg-id for this field it has already been |
310 | /// replaced with the value of that arg-id. |
311 | int32_t __precision_; |
312 | |
313 | __code_point<_CharT> __fill_; |
314 | |
315 | _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; } |
316 | |
317 | _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; } |
318 | }; |
319 | |
320 | // Validate the struct is small and cheap to copy since the struct is passed by |
321 | // value in formatting functions. |
322 | static_assert(sizeof(__parsed_specifications<char>) == 16); |
323 | static_assert(is_trivially_copyable_v<__parsed_specifications<char>>); |
324 | # if _LIBCPP_HAS_WIDE_CHARACTERS |
325 | static_assert(sizeof(__parsed_specifications<wchar_t>) == 16); |
326 | static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>); |
327 | # endif |
328 | |
329 | /// The parser for the std-format-spec. |
330 | /// |
331 | /// Note this class is a member of std::formatter specializations. It's |
332 | /// expected developers will create their own formatter specializations that |
333 | /// inherit from the std::formatter specializations. This means this class |
334 | /// must be ABI stable. To aid the stability the unused bits in the class are |
335 | /// set to zero. That way they can be repurposed if a future revision of the |
336 | /// Standards adds new fields to std-format-spec. |
337 | template <class _CharT> |
338 | class __parser { |
339 | public: |
340 | // Parses the format specification. |
341 | // |
342 | // Depending on whether the parsing is done compile-time or run-time |
343 | // the method slightly differs. |
344 | // - Only parses a field when it is in the __fields. Accepting all |
345 | // fields and then validating the valid ones has a performance impact. |
346 | // This is faster but gives slighly worse error messages. |
347 | // - At compile-time when a field is not accepted the parser will still |
348 | // parse it and give an error when it's present. This gives a more |
349 | // accurate error. |
350 | // The idea is that most times the format instead of the vformat |
351 | // functions are used. In that case the error will be detected during |
352 | // compilation and there is no need to pay for the run-time overhead. |
353 | template <class _ParseContext> |
354 | _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator __parse(_ParseContext& __ctx, __fields __fields) { |
355 | auto __begin = __ctx.begin(); |
356 | auto __end = __ctx.end(); |
357 | if (__begin == __end || *__begin == _CharT('}') || (__fields.__use_range_fill_ && *__begin == _CharT(':'))) |
358 | return __begin; |
359 | |
360 | if (__parse_fill_align(__begin, __end) && __begin == __end) |
361 | return __begin; |
362 | |
363 | if (__fields.__sign_) { |
364 | if (__parse_sign(__begin) && __begin == __end) |
365 | return __begin; |
366 | } else if (std::is_constant_evaluated() && __parse_sign(__begin)) { |
367 | std::__throw_format_error("The format specification does not allow the sign option"); |
368 | } |
369 | |
370 | if (__fields.__alternate_form_) { |
371 | if (__parse_alternate_form(__begin) && __begin == __end) |
372 | return __begin; |
373 | } else if (std::is_constant_evaluated() && __parse_alternate_form(__begin)) { |
374 | std::__throw_format_error("The format specifier does not allow the alternate form option"); |
375 | } |
376 | |
377 | if (__fields.__zero_padding_) { |
378 | if (__parse_zero_padding(__begin) && __begin == __end) |
379 | return __begin; |
380 | } else if (std::is_constant_evaluated() && __parse_zero_padding(__begin)) { |
381 | std::__throw_format_error("The format specifier does not allow the zero-padding option"); |
382 | } |
383 | |
384 | if (__parse_width(__begin, __end, __ctx) && __begin == __end) |
385 | return __begin; |
386 | |
387 | if (__fields.__precision_) { |
388 | if (__parse_precision(__begin, __end, __ctx) && __begin == __end) |
389 | return __begin; |
390 | } else if (std::is_constant_evaluated() && __parse_precision(__begin, __end, __ctx)) { |
391 | std::__throw_format_error("The format specifier does not allow the precision option"); |
392 | } |
393 | |
394 | if (__fields.__locale_specific_form_) { |
395 | if (__parse_locale_specific_form(__begin) && __begin == __end) |
396 | return __begin; |
397 | } else if (std::is_constant_evaluated() && __parse_locale_specific_form(__begin)) { |
398 | std::__throw_format_error("The format specifier does not allow the locale-specific form option"); |
399 | } |
400 | |
401 | if (__fields.__clear_brackets_) { |
402 | if (__parse_clear_brackets(__begin) && __begin == __end) |
403 | return __begin; |
404 | } else if (std::is_constant_evaluated() && __parse_clear_brackets(__begin)) { |
405 | std::__throw_format_error("The format specifier does not allow the n option"); |
406 | } |
407 | |
408 | if (__fields.__type_) |
409 | __parse_type(__begin); |
410 | |
411 | if (!__fields.__consume_all_) |
412 | return __begin; |
413 | |
414 | if (__begin != __end && *__begin != _CharT('}')) |
415 | std::__throw_format_error("The format specifier should consume the input or end with a '}'"); |
416 | |
417 | return __begin; |
418 | } |
419 | |
420 | // Validates the selected the parsed data. |
421 | // |
422 | // The valid fields in the parser may depend on the display type |
423 | // selected. But the type is the last optional field, so by the time |
424 | // it's known an option can't be used, it already has been parsed. |
425 | // This does the validation again. |
426 | // |
427 | // For example an integral may have a sign, zero-padding, or alternate |
428 | // form when the type option is not 'c'. So the generic approach is: |
429 | // |
430 | // typename _ParseContext::iterator __result = __parser_.__parse(__ctx, __format_spec::__fields_integral); |
431 | // if (__parser.__type_ == __format_spec::__type::__char) { |
432 | // __parser.__validate((__format_spec::__fields_bool, "an integer"); |
433 | // ... // more char adjustments |
434 | // } else { |
435 | // ... // validate an integral type. |
436 | // } |
437 | // |
438 | // For some types all valid options need a second validation run, like |
439 | // boolean types. |
440 | // |
441 | // Depending on whether the validation is done at compile-time or |
442 | // run-time the error differs |
443 | // - run-time the exception is thrown and contains the type of field |
444 | // being validated. |
445 | // - at compile-time the line with `std::__throw_format_error` is shown |
446 | // in the output. In that case it's important for the error to be on one |
447 | // line. |
448 | // Note future versions of C++ may allow better compile-time error |
449 | // reporting. |
450 | _LIBCPP_HIDE_FROM_ABI constexpr void |
451 | __validate(__fields __fields, const char* __id, uint32_t __type_mask = -1) const { |
452 | if (!__fields.__sign_ && __sign_ != __sign::__default) { |
453 | if (std::is_constant_evaluated()) |
454 | std::__throw_format_error("The format specifier does not allow the sign option"); |
455 | else |
456 | __format_spec::__throw_invalid_option_format_error(__id, "sign"); |
457 | } |
458 | |
459 | if (!__fields.__alternate_form_ && __alternate_form_) { |
460 | if (std::is_constant_evaluated()) |
461 | std::__throw_format_error("The format specifier does not allow the alternate form option"); |
462 | else |
463 | __format_spec::__throw_invalid_option_format_error(__id, "alternate form"); |
464 | } |
465 | |
466 | if (!__fields.__zero_padding_ && __alignment_ == __alignment::__zero_padding) { |
467 | if (std::is_constant_evaluated()) |
468 | std::__throw_format_error("The format specifier does not allow the zero-padding option"); |
469 | else |
470 | __format_spec::__throw_invalid_option_format_error(__id, "zero-padding"); |
471 | } |
472 | |
473 | if (!__fields.__precision_ && __precision_ != -1) { // Works both when the precision has a value or an arg-id. |
474 | if (std::is_constant_evaluated()) |
475 | std::__throw_format_error("The format specifier does not allow the precision option"); |
476 | else |
477 | __format_spec::__throw_invalid_option_format_error(__id, "precision"); |
478 | } |
479 | |
480 | if (!__fields.__locale_specific_form_ && __locale_specific_form_) { |
481 | if (std::is_constant_evaluated()) |
482 | std::__throw_format_error("The format specifier does not allow the locale-specific form option"); |
483 | else |
484 | __format_spec::__throw_invalid_option_format_error(__id, "locale-specific form"); |
485 | } |
486 | |
487 | if ((__create_type_mask(__type_) & __type_mask) == 0) { |
488 | if (std::is_constant_evaluated()) |
489 | std::__throw_format_error("The format specifier uses an invalid value for the type option"); |
490 | else |
491 | __format_spec::__throw_invalid_type_format_error(__id); |
492 | } |
493 | } |
494 | |
495 | /// \returns the `__parsed_specifications` with the resolved dynamic sizes.. |
496 | _LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const { |
497 | return __parsed_specifications<_CharT>{ |
498 | .__std_ = __std{.__alignment_ = __alignment_, |
499 | .__sign_ = __sign_, |
500 | .__alternate_form_ = __alternate_form_, |
501 | .__locale_specific_form_ = __locale_specific_form_, |
502 | .__type_ = __type_}, |
503 | .__width_{__get_width(__ctx)}, |
504 | .__precision_{__get_precision(__ctx)}, |
505 | .__fill_{__fill_}}; |
506 | } |
507 | |
508 | _LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_chrono_specifications(auto& __ctx) const { |
509 | return __parsed_specifications<_CharT>{ |
510 | .__chrono_ = |
511 | __chrono{.__alignment_ = __alignment_, |
512 | .__locale_specific_form_ = __locale_specific_form_, |
513 | .__hour_ = __hour_, |
514 | .__weekday_name_ = __weekday_name_, |
515 | .__weekday_ = __weekday_, |
516 | .__day_of_year_ = __day_of_year_, |
517 | .__week_of_year_ = __week_of_year_, |
518 | .__month_name_ = __month_name_}, |
519 | .__width_{__get_width(__ctx)}, |
520 | .__precision_{__get_precision(__ctx)}, |
521 | .__fill_{__fill_}}; |
522 | } |
523 | |
524 | __alignment __alignment_ : 3 {__alignment::__default}; |
525 | __sign __sign_ : 2 {__sign::__default}; |
526 | bool __alternate_form_ : 1 {false}; |
527 | bool __locale_specific_form_ : 1 {false}; |
528 | bool __clear_brackets_ : 1 {false}; |
529 | __type __type_{__type::__default}; |
530 | |
531 | // These flags are only used for formatting chrono. Since the struct has |
532 | // padding space left it's added to this structure. |
533 | bool __hour_ : 1 {false}; |
534 | |
535 | bool __weekday_name_ : 1 {false}; |
536 | bool __weekday_ : 1 {false}; |
537 | |
538 | bool __day_of_year_ : 1 {false}; |
539 | bool __week_of_year_ : 1 {false}; |
540 | |
541 | bool __month_name_ : 1 {false}; |
542 | |
543 | uint8_t __reserved_0_ : 2 {0}; |
544 | uint8_t __reserved_1_ : 6 {0}; |
545 | // These two flags are only used internally and not part of the |
546 | // __parsed_specifications. Therefore put them at the end. |
547 | bool __width_as_arg_ : 1 {false}; |
548 | bool __precision_as_arg_ : 1 {false}; |
549 | |
550 | /// The requested width, either the value or the arg-id. |
551 | int32_t __width_{0}; |
552 | |
553 | /// The requested precision, either the value or the arg-id. |
554 | int32_t __precision_{-1}; |
555 | |
556 | __code_point<_CharT> __fill_{}; |
557 | |
558 | private: |
559 | _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) { |
560 | switch (__c) { |
561 | case _CharT('<'): |
562 | __alignment_ = __alignment::__left; |
563 | return true; |
564 | |
565 | case _CharT('^'): |
566 | __alignment_ = __alignment::__center; |
567 | return true; |
568 | |
569 | case _CharT('>'): |
570 | __alignment_ = __alignment::__right; |
571 | return true; |
572 | } |
573 | return false; |
574 | } |
575 | |
576 | _LIBCPP_HIDE_FROM_ABI constexpr void __validate_fill_character(_CharT __fill) { |
577 | // The forbidden fill characters all code points formed from a single code unit, thus the |
578 | // check can be omitted when more code units are used. |
579 | if (__fill == _CharT('{')) |
580 | std::__throw_format_error("The fill option contains an invalid value"); |
581 | } |
582 | |
583 | # if _LIBCPP_HAS_UNICODE |
584 | // range-fill and tuple-fill are identical |
585 | template <contiguous_iterator _Iterator> |
586 | requires same_as<_CharT, char> |
587 | # if _LIBCPP_HAS_WIDE_CHARACTERS |
588 | || (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) |
589 | # endif |
590 | _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) { |
591 | _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( |
592 | __begin != __end, |
593 | "when called with an empty input the function will cause " |
594 | "undefined behavior by evaluating data not in the input"); |
595 | __unicode::__code_point_view<_CharT> __view{__begin, __end}; |
596 | __unicode::__consume_result __consumed = __view.__consume(); |
597 | if (__consumed.__status != __unicode::__consume_result::__ok) |
598 | std::__throw_format_error("The format specifier contains malformed Unicode characters"); |
599 | |
600 | if (__view.__position() < __end && __parse_alignment(*__view.__position())) { |
601 | ptrdiff_t __code_units = __view.__position() - __begin; |
602 | if (__code_units == 1) |
603 | // The forbidden fill characters all are code points encoded |
604 | // in one code unit, thus the check can be omitted when more |
605 | // code units are used. |
606 | __validate_fill_character(*__begin); |
607 | |
608 | std::copy_n(__begin, __code_units, std::addressof(__fill_.__data[0])); |
609 | __begin += __code_units + 1; |
610 | return true; |
611 | } |
612 | |
613 | if (!__parse_alignment(*__begin)) |
614 | return false; |
615 | |
616 | ++__begin; |
617 | return true; |
618 | } |
619 | |
620 | # if _LIBCPP_HAS_WIDE_CHARACTERS |
621 | template <contiguous_iterator _Iterator> |
622 | requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) |
623 | _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) { |
624 | _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( |
625 | __begin != __end, |
626 | "when called with an empty input the function will cause " |
627 | "undefined behavior by evaluating data not in the input"); |
628 | if (__begin + 1 != __end && __parse_alignment(*(__begin + 1))) { |
629 | if (!__unicode::__is_scalar_value(*__begin)) |
630 | std::__throw_format_error("The fill option contains an invalid value"); |
631 | |
632 | __validate_fill_character(*__begin); |
633 | |
634 | __fill_.__data[0] = *__begin; |
635 | __begin += 2; |
636 | return true; |
637 | } |
638 | |
639 | if (!__parse_alignment(*__begin)) |
640 | return false; |
641 | |
642 | ++__begin; |
643 | return true; |
644 | } |
645 | |
646 | # endif // _LIBCPP_HAS_WIDE_CHARACTERS |
647 | |
648 | # else // _LIBCPP_HAS_UNICODE |
649 | // range-fill and tuple-fill are identical |
650 | template <contiguous_iterator _Iterator> |
651 | _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) { |
652 | _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( |
653 | __begin != __end, |
654 | "when called with an empty input the function will cause " |
655 | "undefined behavior by evaluating data not in the input"); |
656 | if (__begin + 1 != __end) { |
657 | if (__parse_alignment(*(__begin + 1))) { |
658 | __validate_fill_character(*__begin); |
659 | |
660 | __fill_.__data[0] = *__begin; |
661 | __begin += 2; |
662 | return true; |
663 | } |
664 | } |
665 | |
666 | if (!__parse_alignment(*__begin)) |
667 | return false; |
668 | |
669 | ++__begin; |
670 | return true; |
671 | } |
672 | |
673 | # endif // _LIBCPP_HAS_UNICODE |
674 | |
675 | template <contiguous_iterator _Iterator> |
676 | _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(_Iterator& __begin) { |
677 | switch (*__begin) { |
678 | case _CharT('-'): |
679 | __sign_ = __sign::__minus; |
680 | break; |
681 | case _CharT('+'): |
682 | __sign_ = __sign::__plus; |
683 | break; |
684 | case _CharT(' '): |
685 | __sign_ = __sign::__space; |
686 | break; |
687 | default: |
688 | return false; |
689 | } |
690 | ++__begin; |
691 | return true; |
692 | } |
693 | |
694 | template <contiguous_iterator _Iterator> |
695 | _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(_Iterator& __begin) { |
696 | if (*__begin != _CharT('#')) |
697 | return false; |
698 | |
699 | __alternate_form_ = true; |
700 | ++__begin; |
701 | return true; |
702 | } |
703 | |
704 | template <contiguous_iterator _Iterator> |
705 | _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(_Iterator& __begin) { |
706 | if (*__begin != _CharT('0')) |
707 | return false; |
708 | |
709 | if (__alignment_ == __alignment::__default) |
710 | __alignment_ = __alignment::__zero_padding; |
711 | ++__begin; |
712 | return true; |
713 | } |
714 | |
715 | template <contiguous_iterator _Iterator> |
716 | _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(_Iterator& __begin, _Iterator __end, auto& __ctx) { |
717 | if (*__begin == _CharT('0')) |
718 | std::__throw_format_error("The width option should not have a leading zero"); |
719 | |
720 | if (*__begin == _CharT('{')) { |
721 | __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __ctx); |
722 | __width_as_arg_ = true; |
723 | __width_ = __r.__value; |
724 | __begin = __r.__last; |
725 | return true; |
726 | } |
727 | |
728 | if (*__begin < _CharT('0') || *__begin > _CharT('9')) |
729 | return false; |
730 | |
731 | __format::__parse_number_result __r = __format::__parse_number(__begin, __end); |
732 | __width_ = __r.__value; |
733 | _LIBCPP_ASSERT_INTERNAL(__width_ != 0, |
734 | "A zero value isn't allowed and should be impossible, " |
735 | "due to validations in this function"); |
736 | __begin = __r.__last; |
737 | return true; |
738 | } |
739 | |
740 | template <contiguous_iterator _Iterator> |
741 | _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(_Iterator& __begin, _Iterator __end, auto& __ctx) { |
742 | if (*__begin != _CharT('.')) |
743 | return false; |
744 | |
745 | ++__begin; |
746 | if (__begin == __end) |
747 | std::__throw_format_error("End of input while parsing format specifier precision"); |
748 | |
749 | if (*__begin == _CharT('{')) { |
750 | __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __ctx); |
751 | __precision_as_arg_ = true; |
752 | __precision_ = __arg_id.__value; |
753 | __begin = __arg_id.__last; |
754 | return true; |
755 | } |
756 | |
757 | if (*__begin < _CharT('0') || *__begin > _CharT('9')) |
758 | std::__throw_format_error("The precision option does not contain a value or an argument index"); |
759 | |
760 | __format::__parse_number_result __r = __format::__parse_number(__begin, __end); |
761 | __precision_ = __r.__value; |
762 | __precision_as_arg_ = false; |
763 | __begin = __r.__last; |
764 | return true; |
765 | } |
766 | |
767 | template <contiguous_iterator _Iterator> |
768 | _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(_Iterator& __begin) { |
769 | if (*__begin != _CharT('L')) |
770 | return false; |
771 | |
772 | __locale_specific_form_ = true; |
773 | ++__begin; |
774 | return true; |
775 | } |
776 | |
777 | template <contiguous_iterator _Iterator> |
778 | _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_clear_brackets(_Iterator& __begin) { |
779 | if (*__begin != _CharT('n')) |
780 | return false; |
781 | |
782 | __clear_brackets_ = true; |
783 | ++__begin; |
784 | return true; |
785 | } |
786 | |
787 | template <contiguous_iterator _Iterator> |
788 | _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(_Iterator& __begin) { |
789 | // Determines the type. It does not validate whether the selected type is |
790 | // valid. Most formatters have optional fields that are only allowed for |
791 | // certain types. These parsers need to do validation after the type has |
792 | // been parsed. So its easier to implement the validation for all types in |
793 | // the specific parse function. |
794 | switch (*__begin) { |
795 | case 'A': |
796 | __type_ = __type::__hexfloat_upper_case; |
797 | break; |
798 | case 'B': |
799 | __type_ = __type::__binary_upper_case; |
800 | break; |
801 | case 'E': |
802 | __type_ = __type::__scientific_upper_case; |
803 | break; |
804 | case 'F': |
805 | __type_ = __type::__fixed_upper_case; |
806 | break; |
807 | case 'G': |
808 | __type_ = __type::__general_upper_case; |
809 | break; |
810 | case 'X': |
811 | __type_ = __type::__hexadecimal_upper_case; |
812 | break; |
813 | case 'a': |
814 | __type_ = __type::__hexfloat_lower_case; |
815 | break; |
816 | case 'b': |
817 | __type_ = __type::__binary_lower_case; |
818 | break; |
819 | case 'c': |
820 | __type_ = __type::__char; |
821 | break; |
822 | case 'd': |
823 | __type_ = __type::__decimal; |
824 | break; |
825 | case 'e': |
826 | __type_ = __type::__scientific_lower_case; |
827 | break; |
828 | case 'f': |
829 | __type_ = __type::__fixed_lower_case; |
830 | break; |
831 | case 'g': |
832 | __type_ = __type::__general_lower_case; |
833 | break; |
834 | case 'o': |
835 | __type_ = __type::__octal; |
836 | break; |
837 | case 'p': |
838 | __type_ = __type::__pointer_lower_case; |
839 | break; |
840 | case 'P': |
841 | __type_ = __type::__pointer_upper_case; |
842 | break; |
843 | case 's': |
844 | __type_ = __type::__string; |
845 | break; |
846 | case 'x': |
847 | __type_ = __type::__hexadecimal_lower_case; |
848 | break; |
849 | # if _LIBCPP_STD_VER >= 23 |
850 | case '?': |
851 | __type_ = __type::__debug; |
852 | break; |
853 | # endif |
854 | default: |
855 | return; |
856 | } |
857 | ++__begin; |
858 | } |
859 | |
860 | _LIBCPP_HIDE_FROM_ABI int32_t __get_width(auto& __ctx) const { |
861 | if (!__width_as_arg_) |
862 | return __width_; |
863 | |
864 | return __format_spec::__substitute_arg_id(__ctx.arg(__width_)); |
865 | } |
866 | |
867 | _LIBCPP_HIDE_FROM_ABI int32_t __get_precision(auto& __ctx) const { |
868 | if (!__precision_as_arg_) |
869 | return __precision_; |
870 | |
871 | return __format_spec::__substitute_arg_id(__ctx.arg(__precision_)); |
872 | } |
873 | }; |
874 | |
875 | // Validates whether the reserved bitfields don't change the size. |
876 | static_assert(sizeof(__parser<char>) == 16); |
877 | # if _LIBCPP_HAS_WIDE_CHARACTERS |
878 | static_assert(sizeof(__parser<wchar_t>) == 16); |
879 | # endif |
880 | |
881 | _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) { |
882 | switch (__type) { |
883 | case __format_spec::__type::__default: |
884 | case __format_spec::__type::__string: |
885 | case __format_spec::__type::__debug: |
886 | break; |
887 | |
888 | default: |
889 | std::__throw_format_error("The type option contains an invalid value for a string formatting argument"); |
890 | } |
891 | } |
892 | |
893 | template <class _CharT> |
894 | _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser, const char* __id) { |
895 | __parser.__validate(__format_spec::__fields_bool, __id); |
896 | if (__parser.__alignment_ == __alignment::__default) |
897 | __parser.__alignment_ = __alignment::__left; |
898 | } |
899 | |
900 | template <class _CharT> |
901 | _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser, const char* __id) { |
902 | __format_spec::__process_display_type_bool_string(__parser, __id); |
903 | } |
904 | |
905 | template <class _CharT> |
906 | _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser, const char* __id) { |
907 | switch (__parser.__type_) { |
908 | case __format_spec::__type::__default: |
909 | case __format_spec::__type::__string: |
910 | __format_spec::__process_display_type_bool_string(__parser, __id); |
911 | break; |
912 | |
913 | case __format_spec::__type::__binary_lower_case: |
914 | case __format_spec::__type::__binary_upper_case: |
915 | case __format_spec::__type::__octal: |
916 | case __format_spec::__type::__decimal: |
917 | case __format_spec::__type::__hexadecimal_lower_case: |
918 | case __format_spec::__type::__hexadecimal_upper_case: |
919 | break; |
920 | |
921 | default: |
922 | __format_spec::__throw_invalid_type_format_error(__id); |
923 | } |
924 | } |
925 | |
926 | template <class _CharT> |
927 | _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser, const char* __id) { |
928 | switch (__parser.__type_) { |
929 | case __format_spec::__type::__default: |
930 | case __format_spec::__type::__char: |
931 | case __format_spec::__type::__debug: |
932 | __format_spec::__process_display_type_char(__parser, __id); |
933 | break; |
934 | |
935 | case __format_spec::__type::__binary_lower_case: |
936 | case __format_spec::__type::__binary_upper_case: |
937 | case __format_spec::__type::__octal: |
938 | case __format_spec::__type::__decimal: |
939 | case __format_spec::__type::__hexadecimal_lower_case: |
940 | case __format_spec::__type::__hexadecimal_upper_case: |
941 | break; |
942 | |
943 | default: |
944 | __format_spec::__throw_invalid_type_format_error(__id); |
945 | } |
946 | } |
947 | |
948 | template <class _CharT> |
949 | _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser, const char* __id) { |
950 | switch (__parser.__type_) { |
951 | case __format_spec::__type::__default: |
952 | case __format_spec::__type::__binary_lower_case: |
953 | case __format_spec::__type::__binary_upper_case: |
954 | case __format_spec::__type::__octal: |
955 | case __format_spec::__type::__decimal: |
956 | case __format_spec::__type::__hexadecimal_lower_case: |
957 | case __format_spec::__type::__hexadecimal_upper_case: |
958 | break; |
959 | |
960 | case __format_spec::__type::__char: |
961 | __format_spec::__process_display_type_char(__parser, __id); |
962 | break; |
963 | |
964 | default: |
965 | __format_spec::__throw_invalid_type_format_error(__id); |
966 | } |
967 | } |
968 | |
969 | template <class _CharT> |
970 | _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser, const char* __id) { |
971 | switch (__parser.__type_) { |
972 | case __format_spec::__type::__default: |
973 | case __format_spec::__type::__hexfloat_lower_case: |
974 | case __format_spec::__type::__hexfloat_upper_case: |
975 | // Precision specific behavior will be handled later. |
976 | break; |
977 | case __format_spec::__type::__scientific_lower_case: |
978 | case __format_spec::__type::__scientific_upper_case: |
979 | case __format_spec::__type::__fixed_lower_case: |
980 | case __format_spec::__type::__fixed_upper_case: |
981 | case __format_spec::__type::__general_lower_case: |
982 | case __format_spec::__type::__general_upper_case: |
983 | if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1) |
984 | // Set the default precision for the call to to_chars. |
985 | __parser.__precision_ = 6; |
986 | break; |
987 | |
988 | default: |
989 | __format_spec::__throw_invalid_type_format_error(__id); |
990 | } |
991 | } |
992 | |
993 | _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type, const char* __id) { |
994 | switch (__type) { |
995 | case __format_spec::__type::__default: |
996 | case __format_spec::__type::__pointer_lower_case: |
997 | case __format_spec::__type::__pointer_upper_case: |
998 | break; |
999 | |
1000 | default: |
1001 | __format_spec::__throw_invalid_type_format_error(__id); |
1002 | } |
1003 | } |
1004 | |
1005 | template <contiguous_iterator _Iterator> |
1006 | struct __column_width_result { |
1007 | /// The number of output columns. |
1008 | size_t __width_; |
1009 | /// One beyond the last code unit used in the estimation. |
1010 | /// |
1011 | /// This limits the original output to fit in the wanted number of columns. |
1012 | _Iterator __last_; |
1013 | }; |
1014 | |
1015 | template <contiguous_iterator _Iterator> |
1016 | __column_width_result(size_t, _Iterator) -> __column_width_result<_Iterator>; |
1017 | |
1018 | /// Since a column width can be two it's possible that the requested column |
1019 | /// width can't be achieved. Depending on the intended usage the policy can be |
1020 | /// selected. |
1021 | /// - When used as precision the maximum width may not be exceeded and the |
1022 | /// result should be "rounded down" to the previous boundary. |
1023 | /// - When used as a width we're done once the minimum is reached, but |
1024 | /// exceeding is not an issue. Rounding down is an issue since that will |
1025 | /// result in writing fill characters. Therefore the result needs to be |
1026 | /// "rounded up". |
1027 | enum class __column_width_rounding { __down, __up }; |
1028 | |
1029 | # if _LIBCPP_HAS_UNICODE |
1030 | |
1031 | namespace __detail { |
1032 | template <contiguous_iterator _Iterator> |
1033 | _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width_grapheme_clustering( |
1034 | _Iterator __first, _Iterator __last, size_t __maximum, __column_width_rounding __rounding) noexcept { |
1035 | using _CharT = iter_value_t<_Iterator>; |
1036 | __unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last}; |
1037 | |
1038 | __column_width_result<_Iterator> __result{0, __first}; |
1039 | while (__result.__last_ != __last && __result.__width_ <= __maximum) { |
1040 | typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume(); |
1041 | int __width = __width_estimation_table::__estimated_width(__cluster.__code_point_); |
1042 | |
1043 | // When the next entry would exceed the maximum width the previous width |
1044 | // might be returned. For example when a width of 100 is requested the |
1045 | // returned width might be 99, since the next code point has an estimated |
1046 | // column width of 2. This depends on the rounding flag. |
1047 | // When the maximum is exceeded the loop will abort the next iteration. |
1048 | if (__rounding == __column_width_rounding::__down && __result.__width_ + __width > __maximum) |
1049 | return __result; |
1050 | |
1051 | __result.__width_ += __width; |
1052 | __result.__last_ = __cluster.__last_; |
1053 | } |
1054 | |
1055 | return __result; |
1056 | } |
1057 | |
1058 | } // namespace __detail |
1059 | |
1060 | // Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32. |
1061 | // Depending on format the relation between the number of code units stored and |
1062 | // the number of output columns differs. The first relation is the number of |
1063 | // code units forming a code point. (The text assumes the code units are |
1064 | // unsigned.) |
1065 | // - UTF-8 The number of code units is between one and four. The first 127 |
1066 | // Unicode code points match the ASCII character set. When the highest bit is |
1067 | // set it means the code point has more than one code unit. |
1068 | // - UTF-16: The number of code units is between 1 and 2. When the first |
1069 | // code unit is in the range [0xd800,0xdfff) it means the code point uses two |
1070 | // code units. |
1071 | // - UTF-32: The number of code units is always one. |
1072 | // |
1073 | // The code point to the number of columns is specified in |
1074 | // [format.string.std]/11. This list might change in the future. |
1075 | // |
1076 | // Another thing to be taken into account is Grapheme clustering. This means |
1077 | // that in some cases multiple code points are combined one element in the |
1078 | // output. For example: |
1079 | // - an ASCII character with a combined diacritical mark |
1080 | // - an emoji with a skin tone modifier |
1081 | // - a group of combined people emoji to create a family |
1082 | // - a combination of flag emoji |
1083 | // |
1084 | // See also: |
1085 | // - [format.string.general]/11 |
1086 | // - https://en.wikipedia.org/wiki/UTF-8#Encoding |
1087 | // - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF |
1088 | |
1089 | _LIBCPP_HIDE_FROM_ABI constexpr bool __is_ascii(char32_t __c) { return __c < 0x80; } |
1090 | |
1091 | /// Determines the number of output columns needed to render the input. |
1092 | /// |
1093 | /// \note When the scanner encounters malformed Unicode it acts as-if every |
1094 | /// code unit is a one column code point. Typically a terminal uses the same |
1095 | /// strategy and replaces every malformed code unit with a one column |
1096 | /// replacement character. |
1097 | /// |
1098 | /// \param __first Points to the first element of the input range. |
1099 | /// \param __last Points beyond the last element of the input range. |
1100 | /// \param __maximum The maximum number of output columns. The returned number |
1101 | /// of estimated output columns will not exceed this value. |
1102 | /// \param __rounding Selects the rounding method. |
1103 | /// \c __down result.__width_ <= __maximum |
1104 | /// \c __up result.__width_ <= __maximum + 1 |
1105 | template <class _CharT, class _Iterator = typename basic_string_view<_CharT>::const_iterator> |
1106 | _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width( |
1107 | basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding __rounding) noexcept { |
1108 | // The width estimation is done in two steps: |
1109 | // - Quickly process for the ASCII part. ASCII has the following properties |
1110 | // - One code unit is one code point |
1111 | // - Every code point has an estimated width of one |
1112 | // - When needed it will a Unicode Grapheme clustering algorithm to find |
1113 | // the proper place for truncation. |
1114 | |
1115 | if (__str.empty() || __maximum == 0) |
1116 | return {0, __str.begin()}; |
1117 | |
1118 | // ASCII has one caveat; when an ASCII character is followed by a non-ASCII |
1119 | // character they might be part of an extended grapheme cluster. For example: |
1120 | // an ASCII letter and a COMBINING ACUTE ACCENT |
1121 | // The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we |
1122 | // need to scan one code unit beyond the requested precision. When this code |
1123 | // unit is non-ASCII we omit the current code unit and let the Grapheme |
1124 | // clustering algorithm do its work. |
1125 | auto __it = __str.begin(); |
1126 | if (__format_spec::__is_ascii(*__it)) { |
1127 | do { |
1128 | --__maximum; |
1129 | ++__it; |
1130 | if (__it == __str.end()) |
1131 | return {__str.size(), __str.end()}; |
1132 | |
1133 | if (__maximum == 0) { |
1134 | if (__format_spec::__is_ascii(*__it)) |
1135 | return {static_cast<size_t>(__it - __str.begin()), __it}; |
1136 | |
1137 | break; |
1138 | } |
1139 | } while (__format_spec::__is_ascii(*__it)); |
1140 | --__it; |
1141 | ++__maximum; |
1142 | } |
1143 | |
1144 | ptrdiff_t __ascii_size = __it - __str.begin(); |
1145 | __column_width_result __result = |
1146 | __detail::__estimate_column_width_grapheme_clustering(__it, __str.end(), __maximum, __rounding); |
1147 | |
1148 | __result.__width_ += __ascii_size; |
1149 | return __result; |
1150 | } |
1151 | # else // _LIBCPP_HAS_UNICODE |
1152 | template <class _CharT> |
1153 | _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<typename basic_string_view<_CharT>::const_iterator> |
1154 | __estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept { |
1155 | // When Unicode isn't supported assume ASCII and every code unit is one code |
1156 | // point. In ASCII the estimated column width is always one. Thus there's no |
1157 | // need for rounding. |
1158 | size_t __width = std::min(__str.size(), __maximum); |
1159 | return {__width, __str.begin() + __width}; |
1160 | } |
1161 | |
1162 | # endif // _LIBCPP_HAS_UNICODE |
1163 | |
1164 | } // namespace __format_spec |
1165 | |
1166 | #endif // _LIBCPP_STD_VER >= 20 |
1167 | |
1168 | _LIBCPP_END_NAMESPACE_STD |
1169 | |
1170 | _LIBCPP_POP_MACROS |
1171 | |
1172 | #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H |
1173 |
Warning: This file is not a C or C++ file. It does not have highlighting.