parser_std_format_spec.h source code [libcxx/include/__format/parser_std_format_spec.h]

Warning: This file is not a C or C++ file. It does not have highlighting.

1	// -- C++ --
2	//===----------------------------------------------------------------------===//
3	//
4	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5	// See https://llvm.org/LICENSE.txt for license information.
6	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7	//
8	//===----------------------------------------------------------------------===//
9
10	#ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
11	#define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
12
13	/// \file Contains the std-format-spec parser.
14	///
15	/// Most of the code can be reused in the chrono-format-spec.
16	/// This header has some support for the chrono-format-spec since it doesn't
17	/// affect the std-format-spec.
18
19	#include <__algorithm/copy_n.h>
20	#include <__algorithm/min.h>
21	#include <__assert>
22	#include <__concepts/arithmetic.h>
23	#include <__concepts/same_as.h>
24	#include <__config>
25	#include <__format/format_arg.h>
26	#include <__format/format_error.h>
27	#include <__format/format_parse_context.h>
28	#include <__format/format_string.h>
29	#include <__format/unicode.h>
30	#include <__format/width_estimation_table.h>
31	#include <__iterator/concepts.h>
32	#include <__iterator/iterator_traits.h> // iter_value_t
33	#include <__memory/addressof.h>
34	#include <__type_traits/common_type.h>
35	#include <__type_traits/is_constant_evaluated.h>
36	#include <__type_traits/is_trivially_copyable.h>
37	#include <__variant/monostate.h>
38	#include <cstdint>
39	#include <string>
40	#include <string_view>
41
42	#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
43	# pragma GCC system_header
44	#endif
45
46	_LIBCPP_PUSH_MACROS
47	#include <__undef_macros>
48
49	_LIBCPP_BEGIN_NAMESPACE_STD
50
51	#if _LIBCPP_STD_VER >= 20
52
53	namespace __format_spec {
54
55	[[noreturn]] _LIBCPP_HIDE_FROM_ABI inline void
56	__throw_invalid_option_format_error(const char* __id, const char* __option) {
57	std::__throw_format_error(
58	(string("The format specifier for ") + __id + " does not allow the " + __option + " option").c_str());
59	}
60
61	[[noreturn]] _LIBCPP_HIDE_FROM_ABI inline void __throw_invalid_type_format_error(const char* __id) {
62	std::__throw_format_error(
63	(string("The type option contains an invalid value for ") + __id + " formatting argument").c_str());
64	}
65
66	template <contiguous_iterator _Iterator, class _ParseContext>
67	_LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result<_Iterator>
68	__parse_arg_id(_Iterator __begin, _Iterator __end, _ParseContext& __ctx) {
69	using _CharT = iter_value_t<_Iterator>;
70	// This function is a wrapper to call the real parser. But it does the
71	// validation for the pre-conditions and post-conditions.
72	if (__begin == __end)
73	std::__throw_format_error("End of input while parsing an argument index");
74
75	__format::__parse_number_result __r = __format::__parse_arg_id(__begin, __end, __ctx);
76
77	if (__r.__last == __end \|\| *__r.__last != _CharT('}'))
78	std::__throw_format_error("The argument index is invalid");
79
80	++__r.__last;
81	return __r;
82	}
83
84	template <class _Context>
85	_LIBCPP_HIDE_FROM_ABI constexpr uint32_t __substitute_arg_id(basic_format_arg<_Context> __format_arg) {
86	// [format.string.std]/8
87	// If the corresponding formatting argument is not of integral type...
88	// This wording allows char and bool too. LWG-3720 changes the wording to
89	// If the corresponding formatting argument is not of standard signed or
90	// unsigned integer type,
91	// This means the 128-bit will not be valid anymore.
92	// TODO FMT Verify this resolution is accepted and add a test to verify
93	// 128-bit integrals fail and switch to visit_format_arg.
94	return std::__visit_format_arg(
95	[](auto __arg) -> uint32_t {
96	using _Type = decltype(__arg);
97	if constexpr (same_as<_Type, monostate>)
98	std::__throw_format_error("The argument index value is too large for the number of arguments supplied");
99
100	// [format.string.std]/8
101	// If { arg-idopt } is used in a width or precision, the value of the
102	// corresponding formatting argument is used in its place. If the
103	// corresponding formatting argument is not of standard signed or unsigned
104	// integer type, or its value is negative for precision or non-positive for
105	// width, an exception of type format_error is thrown.
106	//
107	// When an integral is used in a format function, it is stored as one of
108	// the types checked below. Other integral types are promoted. For example,
109	// a signed char is stored as an int.
110	if constexpr (same_as<_Type, int> \|\| same_as<_Type, unsigned int> \|\| //
111	same_as<_Type, long long> \|\| same_as<_Type, unsigned long long>) {
112	if constexpr (signed_integral<_Type>) {
113	if (__arg < 0)
114	std::__throw_format_error("An argument index may not have a negative value");
115	}
116
117	using _CT = common_type_t<_Type, decltype(__format::__number_max)>;
118	if (static_cast<_CT>(__arg) > static_cast<_CT>(__format::__number_max))
119	std::__throw_format_error("The value of the argument index exceeds its maximum value");
120
121	return __arg;
122	} else
123	std::__throw_format_error("Replacement argument isn't a standard signed or unsigned integer type");
124	},
125	__format_arg);
126	}
127
128	/// These fields are a filter for which elements to parse.
129	///
130	/// They default to false so when a new field is added it needs to be opted in
131	/// explicitly.
132	struct _LIBCPP_HIDE_FROM_ABI __fields {
133	uint16_t __sign_ : 1 {false};
134	uint16_t __alternate_form_ : 1 {false};
135	uint16_t __zero_padding_ : 1 {false};
136	uint16_t __precision_ : 1 {false};
137	uint16_t __locale_specific_form_ : 1 {false};
138	uint16_t __type_ : 1 {false};
139	// Determines the valid values for fill.
140	//
141	// Originally the fill could be any character except { and }. Range-based
142	// formatters use the colon to mark the beginning of the
143	// underlying-format-spec. To avoid parsing ambiguities these formatter
144	// specializations prohibit the use of the colon as a fill character.
145	uint16_t __use_range_fill_ : 1 {false};
146	uint16_t __clear_brackets_ : 1 {false};
147	uint16_t __consume_all_ : 1 {false};
148	};
149
150	// By not placing this constant in the formatter class it's not duplicated for
151	// char and wchar_t.
152	inline constexpr __fields __fields_bool{.__locale_specific_form_ = true, .__type_ = true, .__consume_all_ = true};
153	inline constexpr __fields __fields_integral{
154	.__sign_ = true,
155	.__alternate_form_ = true,
156	.__zero_padding_ = true,
157	.__locale_specific_form_ = true,
158	.__type_ = true,
159	.__consume_all_ = true};
160	inline constexpr __fields __fields_floating_point{
161	.__sign_ = true,
162	.__alternate_form_ = true,
163	.__zero_padding_ = true,
164	.__precision_ = true,
165	.__locale_specific_form_ = true,
166	.__type_ = true,
167	.__consume_all_ = true};
168	inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true, .__consume_all_ = true};
169	inline constexpr __fields __fields_pointer{.__zero_padding_ = true, .__type_ = true, .__consume_all_ = true};
170
171	# if _LIBCPP_STD_VER >= 23
172	inline constexpr __fields __fields_tuple{.__use_range_fill_ = true, .__clear_brackets_ = true};
173	inline constexpr __fields __fields_range{.__use_range_fill_ = true, .__clear_brackets_ = true};
174	inline constexpr __fields __fields_fill_align_width{};
175	# endif
176
177	enum class __alignment : uint8_t {
178	/// No alignment is set in the format string.
179	__default,
180	__left,
181	__center,
182	__right,
183	__zero_padding
184	};
185
186	enum class __sign : uint8_t {
187	/// No sign is set in the format string.
188	///
189	/// The sign isn't allowed for certain format-types. By using this value
190	/// it's possible to detect whether or not the user explicitly set the sign
191	/// flag. For formatting purposes it behaves the same as \ref __minus.
192	__default,
193	__minus,
194	__plus,
195	__space
196	};
197
198	enum class __type : uint8_t {
199	__default = 0,
200	__string,
201	__binary_lower_case,
202	__binary_upper_case,
203	__octal,
204	__decimal,
205	__hexadecimal_lower_case,
206	__hexadecimal_upper_case,
207	__pointer_lower_case,
208	__pointer_upper_case,
209	__char,
210	__hexfloat_lower_case,
211	__hexfloat_upper_case,
212	__scientific_lower_case,
213	__scientific_upper_case,
214	__fixed_lower_case,
215	__fixed_upper_case,
216	__general_lower_case,
217	__general_upper_case,
218	__debug
219	};
220
221	_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __create_type_mask(__type __t) {
222	uint32_t __shift = static_cast<uint32_t>(__t);
223	if (__shift == 0)
224	return 1;
225
226	if (__shift > 31)
227	std::__throw_format_error("The type does not fit in the mask");
228
229	return 1 << __shift;
230	}
231
232	inline constexpr uint32_t __type_mask_integer =
233	__create_type_mask(__type::__binary_lower_case) \| //
234	__create_type_mask(__type::__binary_upper_case) \| //
235	__create_type_mask(__type::__decimal) \| //
236	__create_type_mask(__type::__octal) \| //
237	__create_type_mask(__type::__hexadecimal_lower_case) \| //
238	__create_type_mask(__type::__hexadecimal_upper_case);
239
240	struct __std {
241	__alignment __alignment_ : 3;
242	__sign __sign_ : 2;
243	bool __alternate_form_ : 1;
244	bool __locale_specific_form_ : 1;
245	__type __type_;
246	};
247
248	struct __chrono {
249	__alignment __alignment_ : 3;
250	bool __locale_specific_form_ : 1;
251	bool __hour_ : 1;
252	bool __weekday_name_ : 1;
253	bool __weekday_ : 1;
254	bool __day_of_year_ : 1;
255	bool __week_of_year_ : 1;
256	bool __month_name_ : 1;
257	};
258
259	// The fill UCS scalar value.
260	//
261	// This is always an array, with 1, 2, or 4 elements.
262	// The size of the data structure is always 32-bits.
263	template <class _CharT>
264	struct __code_point;
265
266	template <>
267	struct __code_point<char> {
268	char __data[4] = {' '};
269	};
270
271	# if _LIBCPP_HAS_WIDE_CHARACTERS
272	template <>
273	struct __code_point<wchar_t> {
274	wchar_t __data[4 / sizeof(wchar_t)] = {L' '};
275	};
276	# endif
277
278	/// Contains the parsed formatting specifications.
279	///
280	/// This contains information for both the std-format-spec and the
281	/// chrono-format-spec. This results in some unused members for both
282	/// specifications. However these unused members don't increase the size
283	/// of the structure.
284	///
285	/// This struct doesn't cross ABI boundaries so its layout doesn't need to be
286	/// kept stable.
287	template <class _CharT>
288	struct __parsed_specifications {
289	union {
290	// The field __alignment_ is the first element in __std_ and __chrono_.
291	// This allows the code to always inspect this value regards which member
292	// of the union is the active member [class.union.general]/2.
293	//
294	// This is needed since the generic output routines handle the alignment of
295	// the output.
296	__alignment __alignment_ : 3;
297	__std __std_;
298	__chrono __chrono_;
299	};
300
301	/// The requested width.
302	///
303	/// When the format-spec used an arg-id for this field it has already been
304	/// replaced with the value of that arg-id.
305	int32_t __width_;
306
307	/// The requested precision.
308	///
309	/// When the format-spec used an arg-id for this field it has already been
310	/// replaced with the value of that arg-id.
311	int32_t __precision_;
312
313	__code_point<_CharT> __fill_;
314
315	_LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; }
316
317	_LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; }
318	};
319
320	// Validate the struct is small and cheap to copy since the struct is passed by
321	// value in formatting functions.
322	static_assert(sizeof(__parsed_specifications<char>) == 16);
323	static_assert(is_trivially_copyable_v<__parsed_specifications<char>>);
324	# if _LIBCPP_HAS_WIDE_CHARACTERS
325	static_assert(sizeof(__parsed_specifications<wchar_t>) == 16);
326	static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>);
327	# endif
328
329	/// The parser for the std-format-spec.
330	///
331	/// Note this class is a member of std::formatter specializations. It's
332	/// expected developers will create their own formatter specializations that
333	/// inherit from the std::formatter specializations. This means this class
334	/// must be ABI stable. To aid the stability the unused bits in the class are
335	/// set to zero. That way they can be repurposed if a future revision of the
336	/// Standards adds new fields to std-format-spec.
337	template <class _CharT>
338	class __parser {
339	public:
340	// Parses the format specification.
341	//
342	// Depending on whether the parsing is done compile-time or run-time
343	// the method slightly differs.
344	// - Only parses a field when it is in the __fields. Accepting all
345	// fields and then validating the valid ones has a performance impact.
346	// This is faster but gives slighly worse error messages.
347	// - At compile-time when a field is not accepted the parser will still
348	// parse it and give an error when it's present. This gives a more
349	// accurate error.
350	// The idea is that most times the format instead of the vformat
351	// functions are used. In that case the error will be detected during
352	// compilation and there is no need to pay for the run-time overhead.
353	template <class _ParseContext>
354	_LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator __parse(_ParseContext& __ctx, __fields __fields) {
355	auto __begin = __ctx.begin();
356	auto __end = __ctx.end();
357	if (__begin == __end \|\| __begin == _CharT('}') \|\| (__fields.__use_range_fill_ && __begin == _CharT(':')))
358	return __begin;
359
360	if (__parse_fill_align(__begin, __end) && __begin == __end)
361	return __begin;
362
363	if (__fields.__sign_) {
364	if (__parse_sign(__begin) && __begin == __end)
365	return __begin;
366	} else if (std::is_constant_evaluated() && __parse_sign(__begin)) {
367	std::__throw_format_error("The format specification does not allow the sign option");
368	}
369
370	if (__fields.__alternate_form_) {
371	if (__parse_alternate_form(__begin) && __begin == __end)
372	return __begin;
373	} else if (std::is_constant_evaluated() && __parse_alternate_form(__begin)) {
374	std::__throw_format_error("The format specifier does not allow the alternate form option");
375	}
376
377	if (__fields.__zero_padding_) {
378	if (__parse_zero_padding(__begin) && __begin == __end)
379	return __begin;
380	} else if (std::is_constant_evaluated() && __parse_zero_padding(__begin)) {
381	std::__throw_format_error("The format specifier does not allow the zero-padding option");
382	}
383
384	if (__parse_width(__begin, __end, __ctx) && __begin == __end)
385	return __begin;
386
387	if (__fields.__precision_) {
388	if (__parse_precision(__begin, __end, __ctx) && __begin == __end)
389	return __begin;
390	} else if (std::is_constant_evaluated() && __parse_precision(__begin, __end, __ctx)) {
391	std::__throw_format_error("The format specifier does not allow the precision option");
392	}
393
394	if (__fields.__locale_specific_form_) {
395	if (__parse_locale_specific_form(__begin) && __begin == __end)
396	return __begin;
397	} else if (std::is_constant_evaluated() && __parse_locale_specific_form(__begin)) {
398	std::__throw_format_error("The format specifier does not allow the locale-specific form option");
399	}
400
401	if (__fields.__clear_brackets_) {
402	if (__parse_clear_brackets(__begin) && __begin == __end)
403	return __begin;
404	} else if (std::is_constant_evaluated() && __parse_clear_brackets(__begin)) {
405	std::__throw_format_error("The format specifier does not allow the n option");
406	}
407
408	if (__fields.__type_)
409	__parse_type(__begin);
410
411	if (!__fields.__consume_all_)
412	return __begin;
413
414	if (__begin != __end && *__begin != _CharT('}'))
415	std::__throw_format_error("The format specifier should consume the input or end with a '}'");
416
417	return __begin;
418	}
419
420	// Validates the selected the parsed data.
421	//
422	// The valid fields in the parser may depend on the display type
423	// selected. But the type is the last optional field, so by the time
424	// it's known an option can't be used, it already has been parsed.
425	// This does the validation again.
426	//
427	// For example an integral may have a sign, zero-padding, or alternate
428	// form when the type option is not 'c'. So the generic approach is:
429	//
430	// typename _ParseContext::iterator __result = __parser_.__parse(__ctx, __format_spec::__fields_integral);
431	// if (__parser.__type_ == __format_spec::__type::__char) {
432	// __parser.__validate((__format_spec::__fields_bool, "an integer");
433	// ... // more char adjustments
434	// } else {
435	// ... // validate an integral type.
436	// }
437	//
438	// For some types all valid options need a second validation run, like
439	// boolean types.
440	//
441	// Depending on whether the validation is done at compile-time or
442	// run-time the error differs
443	// - run-time the exception is thrown and contains the type of field
444	// being validated.
445	// - at compile-time the line with `std::__throw_format_error` is shown
446	// in the output. In that case it's important for the error to be on one
447	// line.
448	// Note future versions of C++ may allow better compile-time error
449	// reporting.
450	_LIBCPP_HIDE_FROM_ABI constexpr void
451	__validate(__fields __fields, const char* __id, uint32_t __type_mask = -1) const {
452	if (!__fields.__sign_ && __sign_ != __sign::__default) {
453	if (std::is_constant_evaluated())
454	std::__throw_format_error("The format specifier does not allow the sign option");
455	else
456	__format_spec::__throw_invalid_option_format_error(__id, "sign");
457	}
458
459	if (!__fields.__alternate_form_ && __alternate_form_) {
460	if (std::is_constant_evaluated())
461	std::__throw_format_error("The format specifier does not allow the alternate form option");
462	else
463	__format_spec::__throw_invalid_option_format_error(__id, "alternate form");
464	}
465
466	if (!__fields.__zero_padding_ && __alignment_ == __alignment::__zero_padding) {
467	if (std::is_constant_evaluated())
468	std::__throw_format_error("The format specifier does not allow the zero-padding option");
469	else
470	__format_spec::__throw_invalid_option_format_error(__id, "zero-padding");
471	}
472
473	if (!__fields.__precision_ && __precision_ != -1) { // Works both when the precision has a value or an arg-id.
474	if (std::is_constant_evaluated())
475	std::__throw_format_error("The format specifier does not allow the precision option");
476	else
477	__format_spec::__throw_invalid_option_format_error(__id, "precision");
478	}
479
480	if (!__fields.__locale_specific_form_ && __locale_specific_form_) {
481	if (std::is_constant_evaluated())
482	std::__throw_format_error("The format specifier does not allow the locale-specific form option");
483	else
484	__format_spec::__throw_invalid_option_format_error(__id, "locale-specific form");
485	}
486
487	if ((__create_type_mask(__type_) & __type_mask) == 0) {
488	if (std::is_constant_evaluated())
489	std::__throw_format_error("The format specifier uses an invalid value for the type option");
490	else
491	__format_spec::__throw_invalid_type_format_error(__id);
492	}
493	}
494
495	/// \returns the `__parsed_specifications` with the resolved dynamic sizes..
496	_LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const {
497	return __parsed_specifications<_CharT>{
498	.__std_ = __std{.__alignment_ = __alignment_,
499	.__sign_ = __sign_,
500	.__alternate_form_ = __alternate_form_,
501	.__locale_specific_form_ = __locale_specific_form_,
502	.__type_ = __type_},
503	.__width_{__get_width(__ctx)},
504	.__precision_{__get_precision(__ctx)},
505	.__fill_{__fill_}};
506	}
507
508	_LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_chrono_specifications(auto& __ctx) const {
509	return __parsed_specifications<_CharT>{
510	.__chrono_ =
511	__chrono{.__alignment_ = __alignment_,
512	.__locale_specific_form_ = __locale_specific_form_,
513	.__hour_ = __hour_,
514	.__weekday_name_ = __weekday_name_,
515	.__weekday_ = __weekday_,
516	.__day_of_year_ = __day_of_year_,
517	.__week_of_year_ = __week_of_year_,
518	.__month_name_ = __month_name_},
519	.__width_{__get_width(__ctx)},
520	.__precision_{__get_precision(__ctx)},
521	.__fill_{__fill_}};
522	}
523
524	__alignment __alignment_ : 3 {__alignment::__default};
525	__sign __sign_ : 2 {__sign::__default};
526	bool __alternate_form_ : 1 {false};
527	bool __locale_specific_form_ : 1 {false};
528	bool __clear_brackets_ : 1 {false};
529	__type __type_{__type::__default};
530
531	// These flags are only used for formatting chrono. Since the struct has
532	// padding space left it's added to this structure.
533	bool __hour_ : 1 {false};
534
535	bool __weekday_name_ : 1 {false};
536	bool __weekday_ : 1 {false};
537
538	bool __day_of_year_ : 1 {false};
539	bool __week_of_year_ : 1 {false};
540
541	bool __month_name_ : 1 {false};
542
543	uint8_t __reserved_0_ : 2 {0};
544	uint8_t __reserved_1_ : 6 {0};
545	// These two flags are only used internally and not part of the
546	// __parsed_specifications. Therefore put them at the end.
547	bool __width_as_arg_ : 1 {false};
548	bool __precision_as_arg_ : 1 {false};
549
550	/// The requested width, either the value or the arg-id.
551	int32_t __width_{0};
552
553	/// The requested precision, either the value or the arg-id.
554	int32_t __precision_{-1};
555
556	__code_point<_CharT> __fill_{};
557
558	private:
559	_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) {
560	switch (__c) {
561	case _CharT('<'):
562	__alignment_ = __alignment::__left;
563	return true;
564
565	case _CharT('^'):
566	__alignment_ = __alignment::__center;
567	return true;
568
569	case _CharT('>'):
570	__alignment_ = __alignment::__right;
571	return true;
572	}
573	return false;
574	}
575
576	_LIBCPP_HIDE_FROM_ABI constexpr void __validate_fill_character(_CharT __fill) {
577	// The forbidden fill characters all code points formed from a single code unit, thus the
578	// check can be omitted when more code units are used.
579	if (__fill == _CharT('{'))
580	std::__throw_format_error("The fill option contains an invalid value");
581	}
582
583	# if _LIBCPP_HAS_UNICODE
584	// range-fill and tuple-fill are identical
585	template <contiguous_iterator _Iterator>
586	requires same_as<_CharT, char>
587	# if _LIBCPP_HAS_WIDE_CHARACTERS
588	\|\| (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2)
589	# endif
590	_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) {
591	_LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
592	__begin != __end,
593	"when called with an empty input the function will cause "
594	"undefined behavior by evaluating data not in the input");
595	__unicode::__code_point_view<_CharT> __view{__begin, __end};
596	__unicode::__consume_result __consumed = __view.__consume();
597	if (__consumed.__status != __unicode::__consume_result::__ok)
598	std::__throw_format_error("The format specifier contains malformed Unicode characters");
599
600	if (__view.__position() < __end && __parse_alignment(*__view.__position())) {
601	ptrdiff_t __code_units = __view.__position() - __begin;
602	if (__code_units == 1)
603	// The forbidden fill characters all are code points encoded
604	// in one code unit, thus the check can be omitted when more
605	// code units are used.
606	__validate_fill_character(*__begin);
607
608	std::copy_n(__begin, __code_units, std::addressof(__fill_.__data[0]));
609	__begin += __code_units + 1;
610	return true;
611	}
612
613	if (!__parse_alignment(*__begin))
614	return false;
615
616	++__begin;
617	return true;
618	}
619
620	# if _LIBCPP_HAS_WIDE_CHARACTERS
621	template <contiguous_iterator _Iterator>
622	requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4)
623	_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) {
624	_LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
625	__begin != __end,
626	"when called with an empty input the function will cause "
627	"undefined behavior by evaluating data not in the input");
628	if (__begin + 1 != __end && __parse_alignment(*(__begin + 1))) {
629	if (!__unicode::__is_scalar_value(*__begin))
630	std::__throw_format_error("The fill option contains an invalid value");
631
632	__validate_fill_character(*__begin);
633
634	__fill_.__data[0] = *__begin;
635	__begin += 2;
636	return true;
637	}
638
639	if (!__parse_alignment(*__begin))
640	return false;
641
642	++__begin;
643	return true;
644	}
645
646	# endif // _LIBCPP_HAS_WIDE_CHARACTERS
647
648	# else // _LIBCPP_HAS_UNICODE
649	// range-fill and tuple-fill are identical
650	template <contiguous_iterator _Iterator>
651	_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) {
652	_LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(
653	__begin != __end,
654	"when called with an empty input the function will cause "
655	"undefined behavior by evaluating data not in the input");
656	if (__begin + 1 != __end) {
657	if (__parse_alignment(*(__begin + 1))) {
658	__validate_fill_character(*__begin);
659
660	__fill_.__data[0] = *__begin;
661	__begin += 2;
662	return true;
663	}
664	}
665
666	if (!__parse_alignment(*__begin))
667	return false;
668
669	++__begin;
670	return true;
671	}
672
673	# endif // _LIBCPP_HAS_UNICODE
674
675	template <contiguous_iterator _Iterator>
676	_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(_Iterator& __begin) {
677	switch (*__begin) {
678	case _CharT('-'):
679	__sign_ = __sign::__minus;
680	break;
681	case _CharT('+'):
682	__sign_ = __sign::__plus;
683	break;
684	case _CharT(' '):
685	__sign_ = __sign::__space;
686	break;
687	default:
688	return false;
689	}
690	++__begin;
691	return true;
692	}
693
694	template <contiguous_iterator _Iterator>
695	_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(_Iterator& __begin) {
696	if (*__begin != _CharT('#'))
697	return false;
698
699	__alternate_form_ = true;
700	++__begin;
701	return true;
702	}
703
704	template <contiguous_iterator _Iterator>
705	_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(_Iterator& __begin) {
706	if (*__begin != _CharT('0'))
707	return false;
708
709	if (__alignment_ == __alignment::__default)
710	__alignment_ = __alignment::__zero_padding;
711	++__begin;
712	return true;
713	}
714
715	template <contiguous_iterator _Iterator>
716	_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(_Iterator& __begin, _Iterator __end, auto& __ctx) {
717	if (*__begin == _CharT('0'))
718	std::__throw_format_error("The width option should not have a leading zero");
719
720	if (*__begin == _CharT('{')) {
721	__format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __ctx);
722	__width_as_arg_ = true;
723	__width_ = __r.__value;
724	__begin = __r.__last;
725	return true;
726	}
727
728	if (__begin < _CharT('0') \|\| __begin > _CharT('9'))
729	return false;
730
731	__format::__parse_number_result __r = __format::__parse_number(__begin, __end);
732	__width_ = __r.__value;
733	_LIBCPP_ASSERT_INTERNAL(__width_ != 0,
734	"A zero value isn't allowed and should be impossible, "
735	"due to validations in this function");
736	__begin = __r.__last;
737	return true;
738	}
739
740	template <contiguous_iterator _Iterator>
741	_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(_Iterator& __begin, _Iterator __end, auto& __ctx) {
742	if (*__begin != _CharT('.'))
743	return false;
744
745	++__begin;
746	if (__begin == __end)
747	std::__throw_format_error("End of input while parsing format specifier precision");
748
749	if (*__begin == _CharT('{')) {
750	__format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __ctx);
751	__precision_as_arg_ = true;
752	__precision_ = __arg_id.__value;
753	__begin = __arg_id.__last;
754	return true;
755	}
756
757	if (__begin < _CharT('0') \|\| __begin > _CharT('9'))
758	std::__throw_format_error("The precision option does not contain a value or an argument index");
759
760	__format::__parse_number_result __r = __format::__parse_number(__begin, __end);
761	__precision_ = __r.__value;
762	__precision_as_arg_ = false;
763	__begin = __r.__last;
764	return true;
765	}
766
767	template <contiguous_iterator _Iterator>
768	_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(_Iterator& __begin) {
769	if (*__begin != _CharT('L'))
770	return false;
771
772	__locale_specific_form_ = true;
773	++__begin;
774	return true;
775	}
776
777	template <contiguous_iterator _Iterator>
778	_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_clear_brackets(_Iterator& __begin) {
779	if (*__begin != _CharT('n'))
780	return false;
781
782	__clear_brackets_ = true;
783	++__begin;
784	return true;
785	}
786
787	template <contiguous_iterator _Iterator>
788	_LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(_Iterator& __begin) {
789	// Determines the type. It does not validate whether the selected type is
790	// valid. Most formatters have optional fields that are only allowed for
791	// certain types. These parsers need to do validation after the type has
792	// been parsed. So its easier to implement the validation for all types in
793	// the specific parse function.
794	switch (*__begin) {
795	case 'A':
796	__type_ = __type::__hexfloat_upper_case;
797	break;
798	case 'B':
799	__type_ = __type::__binary_upper_case;
800	break;
801	case 'E':
802	__type_ = __type::__scientific_upper_case;
803	break;
804	case 'F':
805	__type_ = __type::__fixed_upper_case;
806	break;
807	case 'G':
808	__type_ = __type::__general_upper_case;
809	break;
810	case 'X':
811	__type_ = __type::__hexadecimal_upper_case;
812	break;
813	case 'a':
814	__type_ = __type::__hexfloat_lower_case;
815	break;
816	case 'b':
817	__type_ = __type::__binary_lower_case;
818	break;
819	case 'c':
820	__type_ = __type::__char;
821	break;
822	case 'd':
823	__type_ = __type::__decimal;
824	break;
825	case 'e':
826	__type_ = __type::__scientific_lower_case;
827	break;
828	case 'f':
829	__type_ = __type::__fixed_lower_case;
830	break;
831	case 'g':
832	__type_ = __type::__general_lower_case;
833	break;
834	case 'o':
835	__type_ = __type::__octal;
836	break;
837	case 'p':
838	__type_ = __type::__pointer_lower_case;
839	break;
840	case 'P':
841	__type_ = __type::__pointer_upper_case;
842	break;
843	case 's':
844	__type_ = __type::__string;
845	break;
846	case 'x':
847	__type_ = __type::__hexadecimal_lower_case;
848	break;
849	# if _LIBCPP_STD_VER >= 23
850	case '?':
851	__type_ = __type::__debug;
852	break;
853	# endif
854	default:
855	return;
856	}
857	++__begin;
858	}
859
860	_LIBCPP_HIDE_FROM_ABI int32_t __get_width(auto& __ctx) const {
861	if (!__width_as_arg_)
862	return __width_;
863
864	return __format_spec::__substitute_arg_id(__ctx.arg(__width_));
865	}
866
867	_LIBCPP_HIDE_FROM_ABI int32_t __get_precision(auto& __ctx) const {
868	if (!__precision_as_arg_)
869	return __precision_;
870
871	return __format_spec::__substitute_arg_id(__ctx.arg(__precision_));
872	}
873	};
874
875	// Validates whether the reserved bitfields don't change the size.
876	static_assert(sizeof(__parser<char>) == 16);
877	# if _LIBCPP_HAS_WIDE_CHARACTERS
878	static_assert(sizeof(__parser<wchar_t>) == 16);
879	# endif
880
881	_LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) {
882	switch (__type) {
883	case __format_spec::__type::__default:
884	case __format_spec::__type::__string:
885	case __format_spec::__type::__debug:
886	break;
887
888	default:
889	std::__throw_format_error("The type option contains an invalid value for a string formatting argument");
890	}
891	}
892
893	template <class _CharT>
894	_LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser, const char* __id) {
895	__parser.__validate(__format_spec::__fields_bool, __id);
896	if (__parser.__alignment_ == __alignment::__default)
897	__parser.__alignment_ = __alignment::__left;
898	}
899
900	template <class _CharT>
901	_LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser, const char* __id) {
902	__format_spec::__process_display_type_bool_string(__parser, __id);
903	}
904
905	template <class _CharT>
906	_LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser, const char* __id) {
907	switch (__parser.__type_) {
908	case __format_spec::__type::__default:
909	case __format_spec::__type::__string:
910	__format_spec::__process_display_type_bool_string(__parser, __id);
911	break;
912
913	case __format_spec::__type::__binary_lower_case:
914	case __format_spec::__type::__binary_upper_case:
915	case __format_spec::__type::__octal:
916	case __format_spec::__type::__decimal:
917	case __format_spec::__type::__hexadecimal_lower_case:
918	case __format_spec::__type::__hexadecimal_upper_case:
919	break;
920
921	default:
922	__format_spec::__throw_invalid_type_format_error(__id);
923	}
924	}
925
926	template <class _CharT>
927	_LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser, const char* __id) {
928	switch (__parser.__type_) {
929	case __format_spec::__type::__default:
930	case __format_spec::__type::__char:
931	case __format_spec::__type::__debug:
932	__format_spec::__process_display_type_char(__parser, __id);
933	break;
934
935	case __format_spec::__type::__binary_lower_case:
936	case __format_spec::__type::__binary_upper_case:
937	case __format_spec::__type::__octal:
938	case __format_spec::__type::__decimal:
939	case __format_spec::__type::__hexadecimal_lower_case:
940	case __format_spec::__type::__hexadecimal_upper_case:
941	break;
942
943	default:
944	__format_spec::__throw_invalid_type_format_error(__id);
945	}
946	}
947
948	template <class _CharT>
949	_LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser, const char* __id) {
950	switch (__parser.__type_) {
951	case __format_spec::__type::__default:
952	case __format_spec::__type::__binary_lower_case:
953	case __format_spec::__type::__binary_upper_case:
954	case __format_spec::__type::__octal:
955	case __format_spec::__type::__decimal:
956	case __format_spec::__type::__hexadecimal_lower_case:
957	case __format_spec::__type::__hexadecimal_upper_case:
958	break;
959
960	case __format_spec::__type::__char:
961	__format_spec::__process_display_type_char(__parser, __id);
962	break;
963
964	default:
965	__format_spec::__throw_invalid_type_format_error(__id);
966	}
967	}
968
969	template <class _CharT>
970	_LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser, const char* __id) {
971	switch (__parser.__type_) {
972	case __format_spec::__type::__default:
973	case __format_spec::__type::__hexfloat_lower_case:
974	case __format_spec::__type::__hexfloat_upper_case:
975	// Precision specific behavior will be handled later.
976	break;
977	case __format_spec::__type::__scientific_lower_case:
978	case __format_spec::__type::__scientific_upper_case:
979	case __format_spec::__type::__fixed_lower_case:
980	case __format_spec::__type::__fixed_upper_case:
981	case __format_spec::__type::__general_lower_case:
982	case __format_spec::__type::__general_upper_case:
983	if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1)
984	// Set the default precision for the call to to_chars.
985	__parser.__precision_ = 6;
986	break;
987
988	default:
989	__format_spec::__throw_invalid_type_format_error(__id);
990	}
991	}
992
993	_LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type, const char* __id) {
994	switch (__type) {
995	case __format_spec::__type::__default:
996	case __format_spec::__type::__pointer_lower_case:
997	case __format_spec::__type::__pointer_upper_case:
998	break;
999
1000	default:
1001	__format_spec::__throw_invalid_type_format_error(__id);
1002	}
1003	}
1004
1005	template <contiguous_iterator _Iterator>
1006	struct __column_width_result {
1007	/// The number of output columns.
1008	size_t __width_;
1009	/// One beyond the last code unit used in the estimation.
1010	///
1011	/// This limits the original output to fit in the wanted number of columns.
1012	_Iterator __last_;
1013	};
1014
1015	template <contiguous_iterator _Iterator>
1016	__column_width_result(size_t, _Iterator) -> __column_width_result<_Iterator>;
1017
1018	/// Since a column width can be two it's possible that the requested column
1019	/// width can't be achieved. Depending on the intended usage the policy can be
1020	/// selected.
1021	/// - When used as precision the maximum width may not be exceeded and the
1022	/// result should be "rounded down" to the previous boundary.
1023	/// - When used as a width we're done once the minimum is reached, but
1024	/// exceeding is not an issue. Rounding down is an issue since that will
1025	/// result in writing fill characters. Therefore the result needs to be
1026	/// "rounded up".
1027	enum class __column_width_rounding { __down, __up };
1028
1029	# if _LIBCPP_HAS_UNICODE
1030
1031	namespace __detail {
1032	template <contiguous_iterator _Iterator>
1033	_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width_grapheme_clustering(
1034	_Iterator __first, _Iterator __last, size_t __maximum, __column_width_rounding __rounding) noexcept {
1035	using _CharT = iter_value_t<_Iterator>;
1036	__unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last};
1037
1038	__column_width_result<_Iterator> __result{0, __first};
1039	while (__result.__last_ != __last && __result.__width_ <= __maximum) {
1040	typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume();
1041	int __width = __width_estimation_table::__estimated_width(__cluster.__code_point_);
1042
1043	// When the next entry would exceed the maximum width the previous width
1044	// might be returned. For example when a width of 100 is requested the
1045	// returned width might be 99, since the next code point has an estimated
1046	// column width of 2. This depends on the rounding flag.
1047	// When the maximum is exceeded the loop will abort the next iteration.
1048	if (__rounding == __column_width_rounding::__down && __result.__width_ + __width > __maximum)
1049	return __result;
1050
1051	__result.__width_ += __width;
1052	__result.__last_ = __cluster.__last_;
1053	}
1054
1055	return __result;
1056	}
1057
1058	} // namespace __detail
1059
1060	// Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
1061	// Depending on format the relation between the number of code units stored and
1062	// the number of output columns differs. The first relation is the number of
1063	// code units forming a code point. (The text assumes the code units are
1064	// unsigned.)
1065	// - UTF-8 The number of code units is between one and four. The first 127
1066	// Unicode code points match the ASCII character set. When the highest bit is
1067	// set it means the code point has more than one code unit.
1068	// - UTF-16: The number of code units is between 1 and 2. When the first
1069	// code unit is in the range [0xd800,0xdfff) it means the code point uses two
1070	// code units.
1071	// - UTF-32: The number of code units is always one.
1072	//
1073	// The code point to the number of columns is specified in
1074	// [format.string.std]/11. This list might change in the future.
1075	//
1076	// Another thing to be taken into account is Grapheme clustering. This means
1077	// that in some cases multiple code points are combined one element in the
1078	// output. For example:
1079	// - an ASCII character with a combined diacritical mark
1080	// - an emoji with a skin tone modifier
1081	// - a group of combined people emoji to create a family
1082	// - a combination of flag emoji
1083	//
1084	// See also:
1085	// - [format.string.general]/11
1086	// - https://en.wikipedia.org/wiki/UTF-8#Encoding
1087	// - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
1088
1089	_LIBCPP_HIDE_FROM_ABI constexpr bool __is_ascii(char32_t __c) { return __c < 0x80; }
1090
1091	/// Determines the number of output columns needed to render the input.
1092	///
1093	/// \note When the scanner encounters malformed Unicode it acts as-if every
1094	/// code unit is a one column code point. Typically a terminal uses the same
1095	/// strategy and replaces every malformed code unit with a one column
1096	/// replacement character.
1097	///
1098	/// \param __first Points to the first element of the input range.
1099	/// \param __last Points beyond the last element of the input range.
1100	/// \param __maximum The maximum number of output columns. The returned number
1101	/// of estimated output columns will not exceed this value.
1102	/// \param __rounding Selects the rounding method.
1103	/// \c __down result.__width_ <= __maximum
1104	/// \c __up result.__width_ <= __maximum + 1
1105	template <class _CharT, class _Iterator = typename basic_string_view<_CharT>::const_iterator>
1106	_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width(
1107	basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding __rounding) noexcept {
1108	// The width estimation is done in two steps:
1109	// - Quickly process for the ASCII part. ASCII has the following properties
1110	// - One code unit is one code point
1111	// - Every code point has an estimated width of one
1112	// - When needed it will a Unicode Grapheme clustering algorithm to find
1113	// the proper place for truncation.
1114
1115	if (__str.empty() \|\| __maximum == 0)
1116	return {0, __str.begin()};
1117
1118	// ASCII has one caveat; when an ASCII character is followed by a non-ASCII
1119	// character they might be part of an extended grapheme cluster. For example:
1120	// an ASCII letter and a COMBINING ACUTE ACCENT
1121	// The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we
1122	// need to scan one code unit beyond the requested precision. When this code
1123	// unit is non-ASCII we omit the current code unit and let the Grapheme
1124	// clustering algorithm do its work.
1125	auto __it = __str.begin();
1126	if (__format_spec::__is_ascii(*__it)) {
1127	do {
1128	--__maximum;
1129	++__it;
1130	if (__it == __str.end())
1131	return {__str.size(), __str.end()};
1132
1133	if (__maximum == 0) {
1134	if (__format_spec::__is_ascii(*__it))
1135	return {static_cast<size_t>(__it - __str.begin()), __it};
1136
1137	break;
1138	}
1139	} while (__format_spec::__is_ascii(*__it));
1140	--__it;
1141	++__maximum;
1142	}
1143
1144	ptrdiff_t __ascii_size = __it - __str.begin();
1145	__column_width_result __result =
1146	__detail::__estimate_column_width_grapheme_clustering(__it, __str.end(), __maximum, __rounding);
1147
1148	__result.__width_ += __ascii_size;
1149	return __result;
1150	}
1151	# else // _LIBCPP_HAS_UNICODE
1152	template <class _CharT>
1153	_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<typename basic_string_view<_CharT>::const_iterator>
1154	__estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept {
1155	// When Unicode isn't supported assume ASCII and every code unit is one code
1156	// point. In ASCII the estimated column width is always one. Thus there's no
1157	// need for rounding.
1158	size_t __width = std::min(__str.size(), __maximum);
1159	return {__width, __str.begin() + __width};
1160	}
1161
1162	# endif // _LIBCPP_HAS_UNICODE
1163
1164	} // namespace __format_spec
1165
1166	#endif // _LIBCPP_STD_VER >= 20
1167
1168	_LIBCPP_END_NAMESPACE_STD
1169
1170	_LIBCPP_POP_MACROS
1171
1172	#endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
1173

Warning: This file is not a C or C++ file. It does not have highlighting.

source code of libcxx/include/__format/parser_std_format_spec.h