codecvt_unicode.pass.cpp source code [libcxx/test/std/localization/codecvt_unicode.pass.cpp]

1	//===----------------------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT
10
11	// Requires the fix in 390840f.
12	// XFAIL: using-built-library-before-llvm-18
13
14	#include <algorithm>
15	#include <cassert>
16	#include <codecvt>
17	#include <locale>
18
19	#include "test_macros.h"
20
21	struct test_offsets_ok {
22	size_t in_size;
23	size_t out_size;
24	};
25	struct test_offsets_partial {
26	size_t in_size;
27	size_t out_size;
28	size_t expected_in_next;
29	size_t expected_out_next;
30	};
31
32	template <class CharT>
33	struct test_offsets_error {
34	size_t in_size;
35	size_t out_size;
36	size_t expected_in_next;
37	size_t expected_out_next;
38	CharT replace_char;
39	size_t replace_pos;
40	};
41
42	#define array_size(x) (sizeof(x) / sizeof(x)[0])
43
44	using std::begin;
45	using std::char_traits;
46	using std::codecvt_base;
47	using std::copy;
48	using std::end;
49
50	template <class InternT, class ExternT>
51	void utf8_to_utf32_in_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
52	// UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
53	const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
54	const char32_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0x10AAAA`, `0`};
55	static_assert(array_size(input) == `11`, "");
56	static_assert(array_size(expected) == `5`, "");
57
58	ExternT in[array_size(input)];
59	InternT exp[array_size(expected)];
60	copy(begin(input), end(input), begin(in));
61	copy(begin(expected), end(expected), begin(exp));
62	assert(char_traits<ExternT>::length(in) == `10`);
63	assert(char_traits<InternT>::length(exp) == `4`);
64	test_offsets_ok offsets[] = {{.in_size: `0`, .out_size: `0`}, {.in_size: `1`, .out_size: `1`}, {.in_size: `3`, .out_size: `2`}, {.in_size: `6`, .out_size: `3`}, {.in_size: `10`, .out_size: `4`}};
65	for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
66	test_offsets_ok t = *it;
67	InternT out[array_size(exp) - `1`] = {};
68	assert(t.in_size <= array_size(in));
69	assert(t.out_size <= array_size(out));
70	mbstate_t state = {};
71	const ExternT* in_next = nullptr;
72	InternT* out_next = nullptr;
73	codecvt_base::result res = codecvt_base::ok;
74
75	res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
76	assert(res == cvt.ok);
77	assert(in_next == in + t.in_size);
78	assert(out_next == out + t.out_size);
79	assert(char_traits<InternT>::compare(out, exp, t.out_size) == `0`);
80	if (t.out_size < array_size(out))
81	assert(out[t.out_size] == `0`);
82
83	state = mbstate_t ();
84	int len = cvt.length(state, in, in + t.in_size, t.out_size);
85	assert(len >= `0`);
86	assert(static_cast<size_t>(len) == t.in_size);
87	}
88
89	for (test_offsets_ok* it = begin(offsets); it != end(offsets); ++it) {
90	test_offsets_ok t = *it;
91	InternT out[array_size(exp)] = {};
92	assert(t.in_size <= array_size(in));
93	assert(t.out_size <= array_size(out));
94	mbstate_t state = {};
95	const ExternT* in_next = nullptr;
96	InternT* out_next = nullptr;
97	codecvt_base::result res = codecvt_base::ok;
98
99	res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
100	assert(res == cvt.ok);
101	assert(in_next == in + t.in_size);
102	assert(out_next == out + t.out_size);
103	assert(char_traits<InternT>::compare(out, exp, t.out_size) == `0`);
104	if (t.out_size < array_size(out))
105	assert(out[t.out_size] == `0`);
106
107	state = mbstate_t ();
108	int len = cvt.length(state, in, in + t.in_size, array_size(out));
109	assert(len >= `0`);
110	assert(static_cast<size_t>(len) == t.in_size);
111	}
112	}
113
114	template <class InternT, class ExternT>
115	void utf8_to_utf32_in_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
116	// UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
117	const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
118	const char32_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0x10AAAA`, `0`};
119	static_assert(array_size(input) == `11`, "");
120	static_assert(array_size(expected) == `5`, "");
121
122	ExternT in[array_size(input)];
123	InternT exp[array_size(expected)];
124	copy(begin(arr: input), end(arr: input), begin(in));
125	copy(begin(arr: expected), end(arr: expected), begin(exp));
126	assert(char_traits<ExternT>::length(in) == `10`);
127	assert(char_traits<InternT>::length(exp) == `4`);
128
129	test_offsets_partial offsets[] = {
130	{.in_size: `1`, .out_size: `0`, .expected_in_next: `0`, .expected_out_next: `0`}, // no space for first CP
131
132	{.in_size: `3`, .out_size: `1`, .expected_in_next: `1`, .expected_out_next: `1`}, // no space for second CP
133	{.in_size: `2`, .out_size: `2`, .expected_in_next: `1`, .expected_out_next: `1`}, // incomplete second CP
134	{.in_size: `2`, .out_size: `1`, .expected_in_next: `1`, .expected_out_next: `1`}, // incomplete second CP, and no space for it
135
136	{.in_size: `6`, .out_size: `2`, .expected_in_next: `3`, .expected_out_next: `2`}, // no space for third CP
137	{.in_size: `4`, .out_size: `3`, .expected_in_next: `3`, .expected_out_next: `2`}, // incomplete third CP
138	{.in_size: `5`, .out_size: `3`, .expected_in_next: `3`, .expected_out_next: `2`}, // incomplete third CP
139	{.in_size: `4`, .out_size: `2`, .expected_in_next: `3`, .expected_out_next: `2`}, // incomplete third CP, and no space for it
140	{.in_size: `5`, .out_size: `2`, .expected_in_next: `3`, .expected_out_next: `2`}, // incomplete third CP, and no space for it
141
142	{.in_size: `10`, .out_size: `3`, .expected_in_next: `6`, .expected_out_next: `3`}, // no space for fourth CP
143	{.in_size: `7`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP
144	{.in_size: `8`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP
145	{.in_size: `9`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP
146	{.in_size: `7`, .out_size: `3`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP, and no space for it
147	{.in_size: `8`, .out_size: `3`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP, and no space for it
148	{.in_size: `9`, .out_size: `3`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP, and no space for it
149	};
150
151	for (test_offsets_partial* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
152	test_offsets_partial t = *it;
153	InternT out[array_size(exp) - `1`] = {};
154	assert(t.in_size <= array_size(in));
155	assert(t.out_size <= array_size(out));
156	assert(t.expected_in_next <= t.in_size);
157	assert(t.expected_out_next <= t.out_size);
158	mbstate_t state = {};
159	const ExternT* in_next = nullptr;
160	InternT* out_next = nullptr;
161	codecvt_base::result res = codecvt_base::ok;
162
163	res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
164	assert(res == cvt.partial);
165	assert(in_next == in + t.expected_in_next);
166	assert(out_next == out + t.expected_out_next);
167	assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == `0`);
168	if (t.expected_out_next < array_size(out))
169	assert(out[t.expected_out_next] == `0`);
170
171	state = mbstate_t ();
172	int len = cvt.length(state, in, in + t.in_size, t.out_size);
173	assert(len >= `0`);
174	assert(static_cast<size_t>(len) == t.expected_in_next);
175	}
176	}
177
178	template <class InternT, class ExternT>
179	void utf8_to_utf32_in_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
180	// UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP, 4-byte CP
181	const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
182	const char32_t expected[] = {`'b'`, `0x0448`, `0xD700`, `0x10AAAA`, `0`};
183	static_assert(array_size(input) == `11`, "");
184	static_assert(array_size(expected) == `5`, "");
185
186	ExternT in[array_size(input)];
187	InternT exp[array_size(expected)];
188	copy(begin(arr: input), end(arr: input), begin(in));
189	copy(begin(arr: expected), end(arr: expected), begin(exp));
190	assert(char_traits<ExternT>::length(in) == `10`);
191	assert(char_traits<InternT>::length(exp) == `4`);
192
193	// There are 5 classes of errors in UTF-8 decoding
194	// 1. Missing leading byte
195	// 2. Missing trailing byte
196	// 3. Surrogate CP
197	// 4. Overlong sequence
198	// 5. CP out of Unicode range
199	test_offsets_error<unsigned char> offsets[] = {
200
201	// 1. Missing leading byte. We will replace the leading byte with
202	// non-leading byte, such as a byte that is always invalid or a trailing
203	// byte.
204
205	// replace leading byte with invalid byte
206	{.in_size: `1`, .out_size: `4`, .expected_in_next: `0`, .expected_out_next: `0`, .replace_char: `0xFF`, .replace_pos: `0`},
207	{.in_size: `3`, .out_size: `4`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `0xFF`, .replace_pos: `1`},
208	{.in_size: `6`, .out_size: `4`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0xFF`, .replace_pos: `3`},
209	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `6`},
210
211	// replace leading byte with trailing byte
212	{.in_size: `1`, .out_size: `4`, .expected_in_next: `0`, .expected_out_next: `0`, .replace_char: `0b10101010`, .replace_pos: `0`},
213	{.in_size: `3`, .out_size: `4`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `0b10101010`, .replace_pos: `1`},
214	{.in_size: `6`, .out_size: `4`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b10101010`, .replace_pos: `3`},
215	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0b10101010`, .replace_pos: `6`},
216
217	// 2. Missing trailing byte. We will replace the trailing byte with
218	// non-trailing byte, such as a byte that is always invalid or a leading
219	// byte (simple ASCII byte in our case).
220
221	// replace first trailing byte with ASCII byte
222	{.in_size: `3`, .out_size: `4`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `'z'`, .replace_pos: `2`},
223	{.in_size: `6`, .out_size: `4`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `'z'`, .replace_pos: `4`},
224	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `7`},
225
226	// replace first trailing byte with invalid byte
227	{.in_size: `3`, .out_size: `4`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `0xFF`, .replace_pos: `2`},
228	{.in_size: `6`, .out_size: `4`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0xFF`, .replace_pos: `4`},
229	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `7`},
230
231	// replace second trailing byte with ASCII byte
232	{.in_size: `6`, .out_size: `4`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `'z'`, .replace_pos: `5`},
233	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `8`},
234
235	// replace second trailing byte with invalid byte
236	{.in_size: `6`, .out_size: `4`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0xFF`, .replace_pos: `5`},
237	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `8`},
238
239	// replace third trailing byte
240	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `9`},
241	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `9`},
242
243	// 2.1 The following test-cases raise doubt whether error or partial should
244	// be returned. For example, we have 4-byte sequence with valid leading
245	// byte. If we hide the last byte we need to return partial. But, if the
246	// second or third byte, which are visible to the call to codecvt, are
247	// malformed then error should be returned.
248
249	// replace first trailing byte with ASCII byte, also incomplete at end
250	{.in_size: `5`, .out_size: `4`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `'z'`, .replace_pos: `4`},
251	{.in_size: `8`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `7`},
252	{.in_size: `9`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `7`},
253
254	// replace first trailing byte with invalid byte, also incomplete at end
255	{.in_size: `5`, .out_size: `4`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0xFF`, .replace_pos: `4`},
256	{.in_size: `8`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `7`},
257	{.in_size: `9`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `7`},
258
259	// replace second trailing byte with ASCII byte, also incomplete at end
260	{.in_size: `9`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `8`},
261
262	// replace second trailing byte with invalid byte, also incomplete at end
263	{.in_size: `9`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `8`},
264
265	// 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
266	// CP U+D700
267	{.in_size: `6`, .out_size: `4`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b10100000`, .replace_pos: `4`}, // turn U+D700 into U+D800
268	{.in_size: `6`, .out_size: `4`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b10101100`, .replace_pos: `4`}, // turn U+D700 into U+DB00
269	{.in_size: `6`, .out_size: `4`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b10110000`, .replace_pos: `4`}, // turn U+D700 into U+DC00
270	{.in_size: `6`, .out_size: `4`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b10111100`, .replace_pos: `4`}, // turn U+D700 into U+DF00
271
272	// 4. Overlong sequence. The CPs in the input are chosen such as modifying
273	// just the leading byte is enough to make them overlong, i.e. for the
274	// 3-byte and 4-byte CP the second byte (first trailing) has enough leading
275	// zeroes.
276	{.in_size: `3`, .out_size: `4`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `0b11000000`, .replace_pos: `1`}, // make the 2-byte CP overlong
277	{.in_size: `3`, .out_size: `4`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `0b11000001`, .replace_pos: `1`}, // make the 2-byte CP overlong
278	{.in_size: `6`, .out_size: `4`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b11100000`, .replace_pos: `3`}, // make the 3-byte CP overlong
279	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0b11110000`, .replace_pos: `6`}, // make the 4-byte CP overlong
280
281	// 5. CP above range
282	// turn U+10AAAA into U+14AAAA by changing its leading byte
283	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0b11110101`, .replace_pos: `6`},
284	// turn U+10AAAA into U+11AAAA by changing its 2nd byte
285	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0b10011010`, .replace_pos: `7`},
286	};
287	for (test_offsets_error<unsigned char>* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
288	test_offsets_error<unsigned char> t = *it;
289	InternT out[array_size(exp) - `1`] = {};
290	assert(t.in_size <= array_size(in));
291	assert(t.out_size <= array_size(out));
292	assert(t.expected_in_next <= t.in_size);
293	assert(t.expected_out_next <= t.out_size);
294	ExternT old_char = in[t.replace_pos];
295	in[t.replace_pos] = t.replace_char;
296
297	mbstate_t state = {};
298	const ExternT* in_next = nullptr;
299	InternT* out_next = nullptr;
300	codecvt_base::result res = codecvt_base::ok;
301
302	res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
303	assert(res == cvt.error);
304	assert(in_next == in + t.expected_in_next);
305	assert(out_next == out + t.expected_out_next);
306	assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == `0`);
307	if (t.expected_out_next < array_size(out))
308	assert(out[t.expected_out_next] == `0`);
309
310	state = mbstate_t ();
311	int len = cvt.length(state, in, in + t.in_size, t.out_size);
312	assert(len >= `0`);
313	assert(static_cast<size_t>(len) == t.expected_in_next);
314
315	in[t.replace_pos] = old_char;
316	}
317	}
318
319	template <class InternT, class ExternT>
320	void utf8_to_utf32_in(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
321	utf8_to_utf32_in_ok(cvt);
322	utf8_to_utf32_in_partial(cvt);
323	utf8_to_utf32_in_error(cvt);
324	}
325
326	template <class InternT, class ExternT>
327	void utf32_to_utf8_out_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
328	// UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
329	const char32_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0x10AAAA`, `0`};
330	const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
331	static_assert(array_size(input) == `5`, "");
332	static_assert(array_size(expected) == `11`, "");
333
334	InternT in[array_size(input)];
335	ExternT exp[array_size(expected)];
336	copy(begin(arr: input), end(arr: input), begin(in));
337	copy(begin(arr: expected), end(arr: expected), begin(exp));
338	assert(char_traits<InternT>::length(in) == `4`);
339	assert(char_traits<ExternT>::length(exp) == `10`);
340
341	test_offsets_ok offsets[] = {{.in_size: `0`, .out_size: `0`}, {.in_size: `1`, .out_size: `1`}, {.in_size: `2`, .out_size: `3`}, {.in_size: `3`, .out_size: `6`}, {.in_size: `4`, .out_size: `10`}};
342	for (test_offsets_ok* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
343	test_offsets_ok t = *it;
344	ExternT out[array_size(exp) - `1`] = {};
345	assert(t.in_size <= array_size(in));
346	assert(t.out_size <= array_size(out));
347	mbstate_t state = {};
348	const InternT* in_next = nullptr;
349	ExternT* out_next = nullptr;
350	codecvt_base::result res = codecvt_base::ok;
351
352	res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
353	assert(res == cvt.ok);
354	assert(in_next == in + t.in_size);
355	assert(out_next == out + t.out_size);
356	assert(char_traits<ExternT>::compare(out, exp, t.out_size) == `0`);
357	if (t.out_size < array_size(out))
358	assert(out[t.out_size] == `0`);
359	}
360	}
361
362	template <class InternT, class ExternT>
363	void utf32_to_utf8_out_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
364	// UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
365	const char32_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0x10AAAA`, `0`};
366	const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
367	static_assert(array_size(input) == `5`, "");
368	static_assert(array_size(expected) == `11`, "");
369
370	InternT in[array_size(input)];
371	ExternT exp[array_size(expected)];
372	copy(begin(arr: input), end(arr: input), begin(in));
373	copy(begin(arr: expected), end(arr: expected), begin(exp));
374	assert(char_traits<InternT>::length(in) == `4`);
375	assert(char_traits<ExternT>::length(exp) == `10`);
376
377	test_offsets_partial offsets[] = {
378	{.in_size: `1`, .out_size: `0`, .expected_in_next: `0`, .expected_out_next: `0`}, // no space for first CP
379
380	{.in_size: `2`, .out_size: `1`, .expected_in_next: `1`, .expected_out_next: `1`}, // no space for second CP
381	{.in_size: `2`, .out_size: `2`, .expected_in_next: `1`, .expected_out_next: `1`}, // no space for second CP
382
383	{.in_size: `3`, .out_size: `3`, .expected_in_next: `2`, .expected_out_next: `3`}, // no space for third CP
384	{.in_size: `3`, .out_size: `4`, .expected_in_next: `2`, .expected_out_next: `3`}, // no space for third CP
385	{.in_size: `3`, .out_size: `5`, .expected_in_next: `2`, .expected_out_next: `3`}, // no space for third CP
386
387	{.in_size: `4`, .out_size: `6`, .expected_in_next: `3`, .expected_out_next: `6`}, // no space for fourth CP
388	{.in_size: `4`, .out_size: `7`, .expected_in_next: `3`, .expected_out_next: `6`}, // no space for fourth CP
389	{.in_size: `4`, .out_size: `8`, .expected_in_next: `3`, .expected_out_next: `6`}, // no space for fourth CP
390	{.in_size: `4`, .out_size: `9`, .expected_in_next: `3`, .expected_out_next: `6`}, // no space for fourth CP
391	};
392	for (test_offsets_partial* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
393	test_offsets_partial t = *it;
394	ExternT out[array_size(exp) - `1`] = {};
395	assert(t.in_size <= array_size(in));
396	assert(t.out_size <= array_size(out));
397	assert(t.expected_in_next <= t.in_size);
398	assert(t.expected_out_next <= t.out_size);
399	mbstate_t state = {};
400	const InternT* in_next = nullptr;
401	ExternT* out_next = nullptr;
402	codecvt_base::result res = codecvt_base::ok;
403
404	res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
405	assert(res == cvt.partial);
406	assert(in_next == in + t.expected_in_next);
407	assert(out_next == out + t.expected_out_next);
408	assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == `0`);
409	if (t.expected_out_next < array_size(out))
410	assert(out[t.expected_out_next] == `0`);
411	}
412	}
413
414	template <class InternT, class ExternT>
415	void utf32_to_utf8_out_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
416	// UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
417	const char32_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0x10AAAA`, `0`};
418	const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
419	static_assert(array_size(input) == `5`, "");
420	static_assert(array_size(expected) == `11`, "");
421
422	InternT in[array_size(input)];
423	ExternT exp[array_size(expected)];
424	copy(begin(arr: input), end(arr: input), begin(in));
425	copy(begin(arr: expected), end(arr: expected), begin(exp));
426	assert(char_traits<InternT>::length(in) == `4`);
427	assert(char_traits<ExternT>::length(exp) == `10`);
428
429	test_offsets_error<InternT> offsets[] = {
430
431	// Surrogate CP
432	{`4`, `10`, `0`, `0`, `0xD800`, `0`},
433	{`4`, `10`, `1`, `1`, `0xDBFF`, `1`},
434	{`4`, `10`, `2`, `3`, `0xDC00`, `2`},
435	{`4`, `10`, `3`, `6`, `0xDFFF`, `3`},
436
437	// CP out of range
438	{`4`, `10`, `0`, `0`, `0x00110000`, `0`},
439	{`4`, `10`, `1`, `1`, `0x00110000`, `1`},
440	{`4`, `10`, `2`, `3`, `0x00110000`, `2`},
441	{`4`, `10`, `3`, `6`, `0x00110000`, `3`}};
442
443	for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
444	test_offsets_error<InternT> t = *it;
445	ExternT out[array_size(exp) - `1`] = {};
446	assert(t.in_size <= array_size(in));
447	assert(t.out_size <= array_size(out));
448	assert(t.expected_in_next <= t.in_size);
449	assert(t.expected_out_next <= t.out_size);
450	InternT old_char = in[t.replace_pos];
451	in[t.replace_pos] = t.replace_char;
452
453	mbstate_t state = {};
454	const InternT* in_next = nullptr;
455	ExternT* out_next = nullptr;
456	codecvt_base::result res = codecvt_base::ok;
457
458	res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
459	assert(res == cvt.error);
460	assert(in_next == in + t.expected_in_next);
461	assert(out_next == out + t.expected_out_next);
462	assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == `0`);
463	if (t.expected_out_next < array_size(out))
464	assert(out[t.expected_out_next] == `0`);
465
466	in[t.replace_pos] = old_char;
467	}
468	}
469
470	template <class InternT, class ExternT>
471	void utf32_to_utf8_out(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
472	utf32_to_utf8_out_ok(cvt);
473	utf32_to_utf8_out_partial(cvt);
474	utf32_to_utf8_out_error(cvt);
475	}
476
477	template <class InternT, class ExternT>
478	void test_utf8_utf32_cvt(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
479	utf8_to_utf32_in(cvt);
480	utf32_to_utf8_out(cvt);
481	}
482
483	template <class InternT, class ExternT>
484	void utf8_to_utf16_in_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
485	// UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
486	const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
487	const InternT expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
488	static_assert(array_size(input) == `11`, "");
489	static_assert(array_size(expected) == `6`, "");
490
491	ExternT in[array_size(input)];
492	InternT exp[array_size(expected)];
493	copy(begin(arr: input), end(arr: input), begin(in));
494	copy(begin(expected), end(expected), begin(exp));
495	assert(char_traits<ExternT>::length(in) == `10`);
496	assert(char_traits<InternT>::length(exp) == `5`);
497
498	test_offsets_ok offsets[] = {{.in_size: `0`, .out_size: `0`}, {.in_size: `1`, .out_size: `1`}, {.in_size: `3`, .out_size: `2`}, {.in_size: `6`, .out_size: `3`}, {.in_size: `10`, .out_size: `5`}};
499	for (test_offsets_ok* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
500	test_offsets_ok t = *it;
501	InternT out[array_size(exp) - `1`] = {};
502	assert(t.in_size <= array_size(in));
503	assert(t.out_size <= array_size(out));
504	mbstate_t state = {};
505	const ExternT* in_next = nullptr;
506	InternT* out_next = nullptr;
507	codecvt_base::result res = codecvt_base::ok;
508
509	res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
510	assert(res == cvt.ok);
511	assert(in_next == in + t.in_size);
512	assert(out_next == out + t.out_size);
513	assert(char_traits<InternT>::compare(out, exp, t.out_size) == `0`);
514	if (t.out_size < array_size(out))
515	assert(out[t.out_size] == `0`);
516
517	state = mbstate_t ();
518	int len = cvt.length(state, in, in + t.in_size, t.out_size);
519	assert(len >= `0`);
520	assert(static_cast<size_t>(len) == t.in_size);
521	}
522
523	for (test_offsets_ok* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
524	test_offsets_ok t = *it;
525	InternT out[array_size(exp)] = {};
526	assert(t.in_size <= array_size(in));
527	assert(t.out_size <= array_size(out));
528	mbstate_t state = {};
529	const ExternT* in_next = nullptr;
530	InternT* out_next = nullptr;
531	codecvt_base::result res = codecvt_base::ok;
532
533	res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
534	assert(res == cvt.ok);
535	assert(in_next == in + t.in_size);
536	assert(out_next == out + t.out_size);
537	assert(char_traits<InternT>::compare(out, exp, t.out_size) == `0`);
538	if (t.out_size < array_size(out))
539	assert(out[t.out_size] == `0`);
540
541	state = mbstate_t ();
542	int len = cvt.length(state, in, in + t.in_size, array_size(out));
543	assert(len >= `0`);
544	assert(static_cast<size_t>(len) == t.in_size);
545	}
546	}
547
548	template <class InternT, class ExternT>
549	void utf8_to_utf16_in_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
550	// UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
551	const unsigned char input[] = "b\u0448\uAAAA\U0010AAAA";
552	const InternT expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
553	static_assert(array_size(input) == `11`, "");
554	static_assert(array_size(expected) == `6`, "");
555
556	ExternT in[array_size(input)];
557	InternT exp[array_size(expected)];
558	copy(begin(arr: input), end(arr: input), begin(in));
559	copy(begin(expected), end(expected), begin(exp));
560	assert(char_traits<ExternT>::length(in) == `10`);
561	assert(char_traits<InternT>::length(exp) == `5`);
562
563	test_offsets_partial offsets[] = {
564	{.in_size: `1`, .out_size: `0`, .expected_in_next: `0`, .expected_out_next: `0`}, // no space for first CP
565
566	{.in_size: `3`, .out_size: `1`, .expected_in_next: `1`, .expected_out_next: `1`}, // no space for second CP
567	{.in_size: `2`, .out_size: `2`, .expected_in_next: `1`, .expected_out_next: `1`}, // incomplete second CP
568	{.in_size: `2`, .out_size: `1`, .expected_in_next: `1`, .expected_out_next: `1`}, // incomplete second CP, and no space for it
569
570	{.in_size: `6`, .out_size: `2`, .expected_in_next: `3`, .expected_out_next: `2`}, // no space for third CP
571	{.in_size: `4`, .out_size: `3`, .expected_in_next: `3`, .expected_out_next: `2`}, // incomplete third CP
572	{.in_size: `5`, .out_size: `3`, .expected_in_next: `3`, .expected_out_next: `2`}, // incomplete third CP
573	{.in_size: `4`, .out_size: `2`, .expected_in_next: `3`, .expected_out_next: `2`}, // incomplete third CP, and no space for it
574	{.in_size: `5`, .out_size: `2`, .expected_in_next: `3`, .expected_out_next: `2`}, // incomplete third CP, and no space for it
575
576	{.in_size: `10`, .out_size: `3`, .expected_in_next: `6`, .expected_out_next: `3`}, // no space for fourth CP
577	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`}, // no space for fourth CP
578	{.in_size: `7`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP
579	{.in_size: `8`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP
580	{.in_size: `9`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP
581	{.in_size: `7`, .out_size: `3`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP, and no space for it
582	{.in_size: `8`, .out_size: `3`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP, and no space for it
583	{.in_size: `9`, .out_size: `3`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP, and no space for it
584	{.in_size: `7`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP, and no space for it
585	{.in_size: `8`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP, and no space for it
586	{.in_size: `9`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP, and no space for it
587
588	};
589
590	for (test_offsets_partial* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
591	test_offsets_partial t = *it;
592	InternT out[array_size(exp) - `1`] = {};
593	assert(t.in_size <= array_size(in));
594	assert(t.out_size <= array_size(out));
595	assert(t.expected_in_next <= t.in_size);
596	assert(t.expected_out_next <= t.out_size);
597	mbstate_t state = {};
598	const ExternT* in_next = nullptr;
599	InternT* out_next = nullptr;
600	codecvt_base::result res = codecvt_base::ok;
601
602	res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
603	assert(res == cvt.partial);
604	assert(in_next == in + t.expected_in_next);
605	assert(out_next == out + t.expected_out_next);
606	assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == `0`);
607	if (t.expected_out_next < array_size(out))
608	assert(out[t.expected_out_next] == `0`);
609
610	state = mbstate_t ();
611	int len = cvt.length(state, in, in + t.in_size, t.out_size);
612	assert(len >= `0`);
613	assert(static_cast<size_t>(len) == t.expected_in_next);
614	}
615	}
616
617	template <class InternT, class ExternT>
618	void utf8_to_utf16_in_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
619	// UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP, 4-byte CP
620	const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
621	const InternT expected[] = {`'b'`, `0x0448`, `0xD700`, `0xDBEA`, `0xDEAA`, `0`};
622	static_assert(array_size(input) == `11`, "");
623	static_assert(array_size(expected) == `6`, "");
624
625	ExternT in[array_size(input)];
626	InternT exp[array_size(expected)];
627	copy(begin(arr: input), end(arr: input), begin(in));
628	copy(begin(expected), end(expected), begin(exp));
629	assert(char_traits<ExternT>::length(in) == `10`);
630	assert(char_traits<InternT>::length(exp) == `5`);
631
632	// There are 5 classes of errors in UTF-8 decoding
633	// 1. Missing leading byte
634	// 2. Missing trailing byte
635	// 3. Surrogate CP
636	// 4. Overlong sequence
637	// 5. CP out of Unicode range
638	test_offsets_error<unsigned char> offsets[] = {
639
640	// 1. Missing leading byte. We will replace the leading byte with
641	// non-leading byte, such as a byte that is always invalid or a trailing
642	// byte.
643
644	// replace leading byte with invalid byte
645	{.in_size: `1`, .out_size: `5`, .expected_in_next: `0`, .expected_out_next: `0`, .replace_char: `0xFF`, .replace_pos: `0`},
646	{.in_size: `3`, .out_size: `5`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `0xFF`, .replace_pos: `1`},
647	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0xFF`, .replace_pos: `3`},
648	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `6`},
649
650	// replace leading byte with trailing byte
651	{.in_size: `1`, .out_size: `5`, .expected_in_next: `0`, .expected_out_next: `0`, .replace_char: `0b10101010`, .replace_pos: `0`},
652	{.in_size: `3`, .out_size: `5`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `0b10101010`, .replace_pos: `1`},
653	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b10101010`, .replace_pos: `3`},
654	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0b10101010`, .replace_pos: `6`},
655
656	// 2. Missing trailing byte. We will replace the trailing byte with
657	// non-trailing byte, such as a byte that is always invalid or a leading
658	// byte (simple ASCII byte in our case).
659
660	// replace first trailing byte with ASCII byte
661	{.in_size: `3`, .out_size: `5`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `'z'`, .replace_pos: `2`},
662	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `'z'`, .replace_pos: `4`},
663	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `7`},
664
665	// replace first trailing byte with invalid byte
666	{.in_size: `3`, .out_size: `5`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `0xFF`, .replace_pos: `2`},
667	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0xFF`, .replace_pos: `4`},
668	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `7`},
669
670	// replace second trailing byte with ASCII byte
671	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `'z'`, .replace_pos: `5`},
672	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `8`},
673
674	// replace second trailing byte with invalid byte
675	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0xFF`, .replace_pos: `5`},
676	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `8`},
677
678	// replace third trailing byte
679	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `9`},
680	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `9`},
681
682	// 2.1 The following test-cases raise doubt whether error or partial should
683	// be returned. For example, we have 4-byte sequence with valid leading
684	// byte. If we hide the last byte we need to return partial. But, if the
685	// second or third byte, which are visible to the call to codecvt, are
686	// malformed then error should be returned.
687
688	// replace first trailing byte with ASCII byte, also incomplete at end
689	{.in_size: `5`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `'z'`, .replace_pos: `4`},
690	{.in_size: `8`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `7`},
691	{.in_size: `9`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `7`},
692
693	// replace first trailing byte with invalid byte, also incomplete at end
694	{.in_size: `5`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0xFF`, .replace_pos: `4`},
695	{.in_size: `8`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `7`},
696	{.in_size: `9`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `7`},
697
698	// replace second trailing byte with ASCII byte, also incomplete at end
699	{.in_size: `9`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `8`},
700
701	// replace second trailing byte with invalid byte, also incomplete at end
702	{.in_size: `9`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `8`},
703
704	// 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
705	// CP U+D700
706	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b10100000`, .replace_pos: `4`}, // turn U+D700 into U+D800
707	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b10101100`, .replace_pos: `4`}, // turn U+D700 into U+DB00
708	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b10110000`, .replace_pos: `4`}, // turn U+D700 into U+DC00
709	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b10111100`, .replace_pos: `4`}, // turn U+D700 into U+DF00
710
711	// 4. Overlong sequence. The CPs in the input are chosen such as modifying
712	// just the leading byte is enough to make them overlong, i.e. for the
713	// 3-byte and 4-byte CP the second byte (first trailing) has enough leading
714	// zeroes.
715	{.in_size: `3`, .out_size: `5`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `0b11000000`, .replace_pos: `1`}, // make the 2-byte CP overlong
716	{.in_size: `3`, .out_size: `5`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `0b11000001`, .replace_pos: `1`}, // make the 2-byte CP overlong
717	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b11100000`, .replace_pos: `3`}, // make the 3-byte CP overlong
718	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0b11110000`, .replace_pos: `6`}, // make the 4-byte CP overlong
719
720	// 5. CP above range
721	// turn U+10AAAA into U+14AAAA by changing its leading byte
722	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0b11110101`, .replace_pos: `6`},
723	// turn U+10AAAA into U+11AAAA by changing its 2nd byte
724	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0b10011010`, .replace_pos: `7`},
725	};
726	for (test_offsets_error<unsigned char>* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
727	test_offsets_error<unsigned char> t = *it;
728	InternT out[array_size(exp) - `1`] = {};
729	assert(t.in_size <= array_size(in));
730	assert(t.out_size <= array_size(out));
731	assert(t.expected_in_next <= t.in_size);
732	assert(t.expected_out_next <= t.out_size);
733	ExternT old_char = in[t.replace_pos];
734	in[t.replace_pos] = t.replace_char;
735
736	mbstate_t state = {};
737	const ExternT* in_next = nullptr;
738	InternT* out_next = nullptr;
739	codecvt_base::result res = codecvt_base::ok;
740
741	res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
742	assert(res == cvt.error);
743	assert(in_next == in + t.expected_in_next);
744	assert(out_next == out + t.expected_out_next);
745	assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == `0`);
746	if (t.expected_out_next < array_size(out))
747	assert(out[t.expected_out_next] == `0`);
748
749	state = mbstate_t ();
750	int len = cvt.length(state, in, in + t.in_size, t.out_size);
751	assert(len >= `0`);
752	assert(static_cast<size_t>(len) == t.expected_in_next);
753
754	in[t.replace_pos] = old_char;
755	}
756	}
757
758	template <class InternT, class ExternT>
759	void utf8_to_utf16_in(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
760	utf8_to_utf16_in_ok(cvt);
761	utf8_to_utf16_in_partial(cvt);
762	utf8_to_utf16_in_error(cvt);
763	}
764
765	template <class InternT, class ExternT>
766	void utf16_to_utf8_out_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
767	// UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
768	const InternT input[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
769	const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
770	static_assert(array_size(input) == `6`, "");
771	static_assert(array_size(expected) == `11`, "");
772
773	InternT in[array_size(input)];
774	ExternT exp[array_size(expected)];
775	copy(begin(input), end(input), begin(in));
776	copy(begin(arr: expected), end(arr: expected), begin(exp));
777	assert(char_traits<InternT>::length(in) == `5`);
778	assert(char_traits<ExternT>::length(exp) == `10`);
779
780	test_offsets_ok offsets[] = {{.in_size: `0`, .out_size: `0`}, {.in_size: `1`, .out_size: `1`}, {.in_size: `2`, .out_size: `3`}, {.in_size: `3`, .out_size: `6`}, {.in_size: `5`, .out_size: `10`}};
781	for (test_offsets_ok* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
782	test_offsets_ok t = *it;
783	ExternT out[array_size(exp) - `1`] = {};
784	assert(t.in_size <= array_size(in));
785	assert(t.out_size <= array_size(out));
786	mbstate_t state = {};
787	const InternT* in_next = nullptr;
788	ExternT* out_next = nullptr;
789	codecvt_base::result res = codecvt_base::ok;
790
791	res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
792	assert(res == cvt.ok);
793	assert(in_next == in + t.in_size);
794	assert(out_next == out + t.out_size);
795	assert(char_traits<ExternT>::compare(out, exp, t.out_size) == `0`);
796	if (t.out_size < array_size(out))
797	assert(out[t.out_size] == `0`);
798	}
799	}
800
801	template <class InternT, class ExternT>
802	void utf16_to_utf8_out_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
803	// UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
804	const InternT input[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
805	const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
806	static_assert(array_size(input) == `6`, "");
807	static_assert(array_size(expected) == `11`, "");
808
809	InternT in[array_size(input)];
810	ExternT exp[array_size(expected)];
811	copy(begin(input), end(input), begin(in));
812	copy(begin(arr: expected), end(arr: expected), begin(exp));
813	assert(char_traits<InternT>::length(in) == `5`);
814	assert(char_traits<ExternT>::length(exp) == `10`);
815
816	test_offsets_partial offsets[] = {
817	{.in_size: `1`, .out_size: `0`, .expected_in_next: `0`, .expected_out_next: `0`}, // no space for first CP
818
819	{.in_size: `2`, .out_size: `1`, .expected_in_next: `1`, .expected_out_next: `1`}, // no space for second CP
820	{.in_size: `2`, .out_size: `2`, .expected_in_next: `1`, .expected_out_next: `1`}, // no space for second CP
821
822	{.in_size: `3`, .out_size: `3`, .expected_in_next: `2`, .expected_out_next: `3`}, // no space for third CP
823	{.in_size: `3`, .out_size: `4`, .expected_in_next: `2`, .expected_out_next: `3`}, // no space for third CP
824	{.in_size: `3`, .out_size: `5`, .expected_in_next: `2`, .expected_out_next: `3`}, // no space for third CP
825
826	{.in_size: `5`, .out_size: `6`, .expected_in_next: `3`, .expected_out_next: `6`}, // no space for fourth CP
827	{.in_size: `5`, .out_size: `7`, .expected_in_next: `3`, .expected_out_next: `6`}, // no space for fourth CP
828	{.in_size: `5`, .out_size: `8`, .expected_in_next: `3`, .expected_out_next: `6`}, // no space for fourth CP
829	{.in_size: `5`, .out_size: `9`, .expected_in_next: `3`, .expected_out_next: `6`}, // no space for fourth CP
830
831	{.in_size: `4`, .out_size: `10`, .expected_in_next: `3`, .expected_out_next: `6`}, // incomplete fourth CP
832
833	{.in_size: `4`, .out_size: `6`, .expected_in_next: `3`, .expected_out_next: `6`}, // incomplete fourth CP, and no space for it
834	{.in_size: `4`, .out_size: `7`, .expected_in_next: `3`, .expected_out_next: `6`}, // incomplete fourth CP, and no space for it
835	{.in_size: `4`, .out_size: `8`, .expected_in_next: `3`, .expected_out_next: `6`}, // incomplete fourth CP, and no space for it
836	{.in_size: `4`, .out_size: `9`, .expected_in_next: `3`, .expected_out_next: `6`}, // incomplete fourth CP, and no space for it
837	};
838	for (test_offsets_partial* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
839	test_offsets_partial t = *it;
840	ExternT out[array_size(exp) - `1`] = {};
841	assert(t.in_size <= array_size(in));
842	assert(t.out_size <= array_size(out));
843	assert(t.expected_in_next <= t.in_size);
844	assert(t.expected_out_next <= t.out_size);
845	mbstate_t state = {};
846	const InternT* in_next = nullptr;
847	ExternT* out_next = nullptr;
848	codecvt_base::result res = codecvt_base::ok;
849
850	res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
851	assert(res == cvt.partial);
852	assert(in_next == in + t.expected_in_next);
853	assert(out_next == out + t.expected_out_next);
854	assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == `0`);
855	if (t.expected_out_next < array_size(out))
856	assert(out[t.expected_out_next] == `0`);
857	}
858	}
859
860	template <class InternT, class ExternT>
861	void utf16_to_utf8_out_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
862	// UTF-8 string of 1-byte CP, 2-byte CP, 3-byte CP and 4-byte CP
863	const InternT input[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
864	const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
865	static_assert(array_size(input) == `6`, "");
866	static_assert(array_size(expected) == `11`, "");
867
868	InternT in[array_size(input)];
869	ExternT exp[array_size(expected)];
870	copy(begin(input), end(input), begin(in));
871	copy(begin(arr: expected), end(arr: expected), begin(exp));
872	assert(char_traits<InternT>::length(in) == `5`);
873	assert(char_traits<ExternT>::length(exp) == `10`);
874
875	// The only possible error in UTF-16 is unpaired surrogate code units.
876	// So we replace valid code points (scalar values) with lone surrogate CU.
877	test_offsets_error<InternT> offsets[] = {
878	{`5`, `10`, `0`, `0`, `0xD800`, `0`},
879	{`5`, `10`, `0`, `0`, `0xDBFF`, `0`},
880	{`5`, `10`, `0`, `0`, `0xDC00`, `0`},
881	{`5`, `10`, `0`, `0`, `0xDFFF`, `0`},
882
883	{`5`, `10`, `1`, `1`, `0xD800`, `1`},
884	{`5`, `10`, `1`, `1`, `0xDBFF`, `1`},
885	{`5`, `10`, `1`, `1`, `0xDC00`, `1`},
886	{`5`, `10`, `1`, `1`, `0xDFFF`, `1`},
887
888	{`5`, `10`, `2`, `3`, `0xD800`, `2`},
889	{`5`, `10`, `2`, `3`, `0xDBFF`, `2`},
890	{`5`, `10`, `2`, `3`, `0xDC00`, `2`},
891	{`5`, `10`, `2`, `3`, `0xDFFF`, `2`},
892
893	// make the leading surrogate a trailing one
894	{`5`, `10`, `3`, `6`, `0xDC00`, `3`},
895	{`5`, `10`, `3`, `6`, `0xDFFF`, `3`},
896
897	// make the trailing surrogate a leading one
898	{`5`, `10`, `3`, `6`, `0xD800`, `4`},
899	{`5`, `10`, `3`, `6`, `0xDBFF`, `4`},
900
901	// make the trailing surrogate a BMP char
902	{`5`, `10`, `3`, `6`, `'z'`, `4`},
903	};
904
905	for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
906	test_offsets_error<InternT> t = *it;
907	ExternT out[array_size(exp) - `1`] = {};
908	assert(t.in_size <= array_size(in));
909	assert(t.out_size <= array_size(out));
910	assert(t.expected_in_next <= t.in_size);
911	assert(t.expected_out_next <= t.out_size);
912	InternT old_char = in[t.replace_pos];
913	in[t.replace_pos] = t.replace_char;
914
915	mbstate_t state = {};
916	const InternT* in_next = nullptr;
917	ExternT* out_next = nullptr;
918	codecvt_base::result res = codecvt_base::ok;
919
920	res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
921	assert(res == cvt.error);
922	assert(in_next == in + t.expected_in_next);
923	assert(out_next == out + t.expected_out_next);
924	assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == `0`);
925	if (t.expected_out_next < array_size(out))
926	assert(out[t.expected_out_next] == `0`);
927
928	in[t.replace_pos] = old_char;
929	}
930	}
931
932	template <class InternT, class ExternT>
933	void utf16_to_utf8_out(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
934	utf16_to_utf8_out_ok(cvt);
935	utf16_to_utf8_out_partial(cvt);
936	utf16_to_utf8_out_error(cvt);
937	}
938
939	template <class InternT, class ExternT>
940	void test_utf8_utf16_cvt(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
941	utf8_to_utf16_in(cvt);
942	utf16_to_utf8_out(cvt);
943	}
944
945	template <class InternT, class ExternT>
946	void utf8_to_ucs2_in_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
947	// UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
948	const unsigned char input[] = "b\u0448\uAAAA";
949	const char16_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0`};
950	static_assert(array_size(input) == `7`, "");
951	static_assert(array_size(expected) == `4`, "");
952
953	ExternT in[array_size(input)];
954	InternT exp[array_size(expected)];
955	copy(begin(arr: input), end(arr: input), begin(in));
956	copy(begin(arr: expected), end(arr: expected), begin(exp));
957	assert(char_traits<ExternT>::length(in) == `6`);
958	assert(char_traits<InternT>::length(exp) == `3`);
959
960	test_offsets_ok offsets[] = {{.in_size: `0`, .out_size: `0`}, {.in_size: `1`, .out_size: `1`}, {.in_size: `3`, .out_size: `2`}, {.in_size: `6`, .out_size: `3`}};
961	for (test_offsets_ok* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
962	test_offsets_ok t = *it;
963	InternT out[array_size(exp) - `1`] = {};
964	assert(t.in_size <= array_size(in));
965	assert(t.out_size <= array_size(out));
966	mbstate_t state = {};
967	const ExternT* in_next = nullptr;
968	InternT* out_next = nullptr;
969	codecvt_base::result res = codecvt_base::ok;
970
971	res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
972	assert(res == cvt.ok);
973	assert(in_next == in + t.in_size);
974	assert(out_next == out + t.out_size);
975	assert(char_traits<InternT>::compare(out, exp, t.out_size) == `0`);
976	if (t.out_size < array_size(out))
977	assert(out[t.out_size] == `0`);
978
979	state = mbstate_t ();
980	int len = cvt.length(state, in, in + t.in_size, t.out_size);
981	assert(len >= `0`);
982	assert(static_cast<size_t>(len) == t.in_size);
983	}
984
985	for (test_offsets_ok* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
986	test_offsets_ok t = *it;
987	InternT out[array_size(exp)] = {};
988	assert(t.in_size <= array_size(in));
989	assert(t.out_size <= array_size(out));
990	mbstate_t state = {};
991	const ExternT* in_next = nullptr;
992	InternT* out_next = nullptr;
993	codecvt_base::result res = codecvt_base::ok;
994
995	res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
996	assert(res == cvt.ok);
997	assert(in_next == in + t.in_size);
998	assert(out_next == out + t.out_size);
999	assert(char_traits<InternT>::compare(out, exp, t.out_size) == `0`);
1000	if (t.out_size < array_size(out))
1001	assert(out[t.out_size] == `0`);
1002
1003	state = mbstate_t ();
1004	int len = cvt.length(state, in, in + t.in_size, array_size(out));
1005	assert(len >= `0`);
1006	assert(static_cast<size_t>(len) == t.in_size);
1007	}
1008	}
1009
1010	template <class InternT, class ExternT>
1011	void utf8_to_ucs2_in_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1012	// UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
1013	const unsigned char input[] = "b\u0448\uAAAA";
1014	const char16_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0`};
1015	static_assert(array_size(input) == `7`, "");
1016	static_assert(array_size(expected) == `4`, "");
1017
1018	ExternT in[array_size(input)];
1019	InternT exp[array_size(expected)];
1020	copy(begin(arr: input), end(arr: input), begin(in));
1021	copy(begin(arr: expected), end(arr: expected), begin(exp));
1022	assert(char_traits<ExternT>::length(in) == `6`);
1023	assert(char_traits<InternT>::length(exp) == `3`);
1024
1025	test_offsets_partial offsets[] = {
1026	{.in_size: `1`, .out_size: `0`, .expected_in_next: `0`, .expected_out_next: `0`}, // no space for first CP
1027
1028	{.in_size: `3`, .out_size: `1`, .expected_in_next: `1`, .expected_out_next: `1`}, // no space for second CP
1029	{.in_size: `2`, .out_size: `2`, .expected_in_next: `1`, .expected_out_next: `1`}, // incomplete second CP
1030	{.in_size: `2`, .out_size: `1`, .expected_in_next: `1`, .expected_out_next: `1`}, // incomplete second CP, and no space for it
1031
1032	{.in_size: `6`, .out_size: `2`, .expected_in_next: `3`, .expected_out_next: `2`}, // no space for third CP
1033	{.in_size: `4`, .out_size: `3`, .expected_in_next: `3`, .expected_out_next: `2`}, // incomplete third CP
1034	{.in_size: `5`, .out_size: `3`, .expected_in_next: `3`, .expected_out_next: `2`}, // incomplete third CP
1035	{.in_size: `4`, .out_size: `2`, .expected_in_next: `3`, .expected_out_next: `2`}, // incomplete third CP, and no space for it
1036	{.in_size: `5`, .out_size: `2`, .expected_in_next: `3`, .expected_out_next: `2`}, // incomplete third CP, and no space for it
1037	};
1038
1039	for (test_offsets_partial* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
1040	test_offsets_partial t = *it;
1041	InternT out[array_size(exp) - `1`] = {};
1042	assert(t.in_size <= array_size(in));
1043	assert(t.out_size <= array_size(out));
1044	assert(t.expected_in_next <= t.in_size);
1045	assert(t.expected_out_next <= t.out_size);
1046	mbstate_t state = {};
1047	const ExternT* in_next = nullptr;
1048	InternT* out_next = nullptr;
1049	codecvt_base::result res = codecvt_base::ok;
1050
1051	res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1052	assert(res == cvt.partial);
1053	assert(in_next == in + t.expected_in_next);
1054	assert(out_next == out + t.expected_out_next);
1055	assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == `0`);
1056	if (t.expected_out_next < array_size(out))
1057	assert(out[t.expected_out_next] == `0`);
1058
1059	state = mbstate_t ();
1060	int len = cvt.length(state, in, in + t.in_size, t.out_size);
1061	assert(len >= `0`);
1062	assert(static_cast<size_t>(len) == t.expected_in_next);
1063	}
1064	}
1065
1066	template <class InternT, class ExternT>
1067	void utf8_to_ucs2_in_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1068	const unsigned char input[] = "b\u0448\uD700\U0010AAAA";
1069	const char16_t expected[] = {`'b'`, `0x0448`, `0xD700`, `0xDBEA`, `0xDEAA`, `0`};
1070	static_assert(array_size(input) == `11`, "");
1071	static_assert(array_size(expected) == `6`, "");
1072
1073	ExternT in[array_size(input)];
1074	InternT exp[array_size(expected)];
1075	copy(begin(arr: input), end(arr: input), begin(in));
1076	copy(begin(arr: expected), end(arr: expected), begin(exp));
1077	assert(char_traits<ExternT>::length(in) == `10`);
1078	assert(char_traits<InternT>::length(exp) == `5`);
1079
1080	// There are 5 classes of errors in UTF-8 decoding
1081	// 1. Missing leading byte
1082	// 2. Missing trailing byte
1083	// 3. Surrogate CP
1084	// 4. Overlong sequence
1085	// 5. CP out of Unicode range
1086	test_offsets_error<unsigned char> offsets[] = {
1087
1088	// 1. Missing leading byte. We will replace the leading byte with
1089	// non-leading byte, such as a byte that is always invalid or a trailing
1090	// byte.
1091
1092	// replace leading byte with invalid byte
1093	{.in_size: `1`, .out_size: `5`, .expected_in_next: `0`, .expected_out_next: `0`, .replace_char: `0xFF`, .replace_pos: `0`},
1094	{.in_size: `3`, .out_size: `5`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `0xFF`, .replace_pos: `1`},
1095	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0xFF`, .replace_pos: `3`},
1096	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `6`},
1097
1098	// replace leading byte with trailing byte
1099	{.in_size: `1`, .out_size: `5`, .expected_in_next: `0`, .expected_out_next: `0`, .replace_char: `0b10101010`, .replace_pos: `0`},
1100	{.in_size: `3`, .out_size: `5`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `0b10101010`, .replace_pos: `1`},
1101	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b10101010`, .replace_pos: `3`},
1102	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0b10101010`, .replace_pos: `6`},
1103
1104	// 2. Missing trailing byte. We will replace the trailing byte with
1105	// non-trailing byte, such as a byte that is always invalid or a leading
1106	// byte (simple ASCII byte in our case).
1107
1108	// replace first trailing byte with ASCII byte
1109	{.in_size: `3`, .out_size: `5`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `'z'`, .replace_pos: `2`},
1110	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `'z'`, .replace_pos: `4`},
1111	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `7`},
1112
1113	// replace first trailing byte with invalid byte
1114	{.in_size: `3`, .out_size: `5`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `0xFF`, .replace_pos: `2`},
1115	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0xFF`, .replace_pos: `4`},
1116	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `7`},
1117
1118	// replace second trailing byte with ASCII byte
1119	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `'z'`, .replace_pos: `5`},
1120	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `8`},
1121
1122	// replace second trailing byte with invalid byte
1123	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0xFF`, .replace_pos: `5`},
1124	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `8`},
1125
1126	// replace third trailing byte
1127	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `9`},
1128	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `9`},
1129
1130	// 2.1 The following test-cases raise doubt whether error or partial should
1131	// be returned. For example, we have 4-byte sequence with valid leading
1132	// byte. If we hide the last byte we need to return partial. But, if the
1133	// second or third byte, which are visible to the call to codecvt, are
1134	// malformed then error should be returned.
1135
1136	// replace first trailing byte with ASCII byte, also incomplete at end
1137	{.in_size: `5`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `'z'`, .replace_pos: `4`},
1138	{.in_size: `8`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `7`},
1139	{.in_size: `9`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `7`},
1140
1141	// replace first trailing byte with invalid byte, also incomplete at end
1142	{.in_size: `5`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0xFF`, .replace_pos: `4`},
1143	{.in_size: `8`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `7`},
1144	{.in_size: `9`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `7`},
1145
1146	// replace second trailing byte with ASCII byte, also incomplete at end
1147	{.in_size: `9`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `8`},
1148
1149	// replace second trailing byte with invalid byte, also incomplete at end
1150	{.in_size: `9`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xFF`, .replace_pos: `8`},
1151
1152	// 3. Surrogate CP. We modify the second byte (first trailing) of the 3-byte
1153	// CP U+D700
1154	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b10100000`, .replace_pos: `4`}, // turn U+D700 into U+D800
1155	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b10101100`, .replace_pos: `4`}, // turn U+D700 into U+DB00
1156	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b10110000`, .replace_pos: `4`}, // turn U+D700 into U+DC00
1157	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b10111100`, .replace_pos: `4`}, // turn U+D700 into U+DF00
1158
1159	// 4. Overlong sequence. The CPs in the input are chosen such as modifying
1160	// just the leading byte is enough to make them overlong, i.e. for the
1161	// 3-byte and 4-byte CP the second byte (first trailing) has enough leading
1162	// zeroes.
1163	{.in_size: `3`, .out_size: `5`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `0b11000000`, .replace_pos: `1`}, // make the 2-byte CP overlong
1164	{.in_size: `3`, .out_size: `5`, .expected_in_next: `1`, .expected_out_next: `1`, .replace_char: `0b11000001`, .replace_pos: `1`}, // make the 2-byte CP overlong
1165	{.in_size: `6`, .out_size: `5`, .expected_in_next: `3`, .expected_out_next: `2`, .replace_char: `0b11100000`, .replace_pos: `3`}, // make the 3-byte CP overlong
1166	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0b11110000`, .replace_pos: `6`}, // make the 4-byte CP overlong
1167
1168	// 5. CP above range
1169	// turn U+10AAAA into U+14AAAA by changing its leading byte
1170	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0b11110101`, .replace_pos: `6`},
1171	// turn U+10AAAA into U+11AAAA by changing its 2nd byte
1172	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0b10011010`, .replace_pos: `7`},
1173	// Don't replace anything, show full 4-byte CP U+10AAAA
1174	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'b'`, .replace_pos: `0`},
1175	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'b'`, .replace_pos: `0`},
1176	// Don't replace anything, show incomplete 4-byte CP at the end. It's still
1177	// out of UCS2 range just by seeing the first byte.
1178	{.in_size: `7`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'b'`, .replace_pos: `0`}, // incomplete fourth CP
1179	{.in_size: `8`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'b'`, .replace_pos: `0`}, // incomplete fourth CP
1180	{.in_size: `9`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'b'`, .replace_pos: `0`}, // incomplete fourth CP
1181	{.in_size: `7`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'b'`, .replace_pos: `0`}, // incomplete fourth CP
1182	{.in_size: `8`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'b'`, .replace_pos: `0`}, // incomplete fourth CP
1183	{.in_size: `9`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'b'`, .replace_pos: `0`}, // incomplete fourth CP
1184	};
1185	for (test_offsets_error<unsigned char>* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
1186	test_offsets_error<unsigned char> t = *it;
1187	InternT out[array_size(exp) - `1`] = {};
1188	assert(t.in_size <= array_size(in));
1189	assert(t.out_size <= array_size(out));
1190	assert(t.expected_in_next <= t.in_size);
1191	assert(t.expected_out_next <= t.out_size);
1192	ExternT old_char = in[t.replace_pos];
1193	in[t.replace_pos] = t.replace_char;
1194
1195	mbstate_t state = {};
1196	const ExternT* in_next = nullptr;
1197	InternT* out_next = nullptr;
1198	codecvt_base::result res = codecvt_base::ok;
1199
1200	res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1201	assert(res == cvt.error);
1202	assert(in_next == in + t.expected_in_next);
1203	assert(out_next == out + t.expected_out_next);
1204	assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == `0`);
1205	if (t.expected_out_next < array_size(out))
1206	assert(out[t.expected_out_next] == `0`);
1207
1208	state = mbstate_t ();
1209	int len = cvt.length(state, in, in + t.in_size, t.out_size);
1210	assert(len >= `0`);
1211	assert(static_cast<size_t>(len) == t.expected_in_next);
1212
1213	in[t.replace_pos] = old_char;
1214	}
1215	}
1216
1217	template <class InternT, class ExternT>
1218	void utf8_to_ucs2_in(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1219	utf8_to_ucs2_in_ok(cvt);
1220	utf8_to_ucs2_in_partial(cvt);
1221	utf8_to_ucs2_in_error(cvt);
1222	}
1223
1224	template <class InternT, class ExternT>
1225	void ucs2_to_utf8_out_ok(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1226	// UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
1227	const char16_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0`};
1228	const unsigned char expected[] = "b\u0448\uAAAA";
1229	static_assert(array_size(input) == `4`, "");
1230	static_assert(array_size(expected) == `7`, "");
1231
1232	InternT in[array_size(input)];
1233	ExternT exp[array_size(expected)];
1234	copy(begin(arr: input), end(arr: input), begin(in));
1235	copy(begin(arr: expected), end(arr: expected), begin(exp));
1236	assert(char_traits<InternT>::length(in) == `3`);
1237	assert(char_traits<ExternT>::length(exp) == `6`);
1238
1239	test_offsets_ok offsets[] = {{.in_size: `0`, .out_size: `0`}, {.in_size: `1`, .out_size: `1`}, {.in_size: `2`, .out_size: `3`}, {.in_size: `3`, .out_size: `6`}};
1240	for (test_offsets_ok* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
1241	test_offsets_ok t = *it;
1242	ExternT out[array_size(exp) - `1`] = {};
1243	assert(t.in_size <= array_size(in));
1244	assert(t.out_size <= array_size(out));
1245	mbstate_t state = {};
1246	const InternT* in_next = nullptr;
1247	ExternT* out_next = nullptr;
1248	codecvt_base::result res = codecvt_base::ok;
1249
1250	res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1251	assert(res == cvt.ok);
1252	assert(in_next == in + t.in_size);
1253	assert(out_next == out + t.out_size);
1254	assert(char_traits<ExternT>::compare(out, exp, t.out_size) == `0`);
1255	if (t.out_size < array_size(out))
1256	assert(out[t.out_size] == `0`);
1257	}
1258	}
1259
1260	template <class InternT, class ExternT>
1261	void ucs2_to_utf8_out_partial(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1262	// UTF-8 string of 1-byte CP, 2-byte CP and 3-byte CP
1263	const char16_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0`};
1264	const unsigned char expected[] = "b\u0448\uAAAA";
1265	static_assert(array_size(input) == `4`, "");
1266	static_assert(array_size(expected) == `7`, "");
1267
1268	InternT in[array_size(input)];
1269	ExternT exp[array_size(expected)];
1270	copy(begin(arr: input), end(arr: input), begin(in));
1271	copy(begin(arr: expected), end(arr: expected), begin(exp));
1272	assert(char_traits<InternT>::length(in) == `3`);
1273	assert(char_traits<ExternT>::length(exp) == `6`);
1274
1275	test_offsets_partial offsets[] = {
1276	{.in_size: `1`, .out_size: `0`, .expected_in_next: `0`, .expected_out_next: `0`}, // no space for first CP
1277
1278	{.in_size: `2`, .out_size: `1`, .expected_in_next: `1`, .expected_out_next: `1`}, // no space for second CP
1279	{.in_size: `2`, .out_size: `2`, .expected_in_next: `1`, .expected_out_next: `1`}, // no space for second CP
1280
1281	{.in_size: `3`, .out_size: `3`, .expected_in_next: `2`, .expected_out_next: `3`}, // no space for third CP
1282	{.in_size: `3`, .out_size: `4`, .expected_in_next: `2`, .expected_out_next: `3`}, // no space for third CP
1283	{.in_size: `3`, .out_size: `5`, .expected_in_next: `2`, .expected_out_next: `3`}, // no space for third CP
1284	};
1285	for (test_offsets_partial* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
1286	test_offsets_partial t = *it;
1287	ExternT out[array_size(exp) - `1`] = {};
1288	assert(t.in_size <= array_size(in));
1289	assert(t.out_size <= array_size(out));
1290	assert(t.expected_in_next <= t.in_size);
1291	assert(t.expected_out_next <= t.out_size);
1292	mbstate_t state = {};
1293	const InternT* in_next = nullptr;
1294	ExternT* out_next = nullptr;
1295	codecvt_base::result res = codecvt_base::ok;
1296
1297	res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1298	assert(res == cvt.partial);
1299	assert(in_next == in + t.expected_in_next);
1300	assert(out_next == out + t.expected_out_next);
1301	assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == `0`);
1302	if (t.expected_out_next < array_size(out))
1303	assert(out[t.expected_out_next] == `0`);
1304	}
1305	}
1306
1307	template <class InternT, class ExternT>
1308	void ucs2_to_utf8_out_error(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1309	const char16_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
1310	const unsigned char expected[] = "b\u0448\uAAAA\U0010AAAA";
1311	static_assert(array_size(input) == `6`, "");
1312	static_assert(array_size(expected) == `11`, "");
1313
1314	InternT in[array_size(input)];
1315	ExternT exp[array_size(expected)];
1316	copy(begin(arr: input), end(arr: input), begin(in));
1317	copy(begin(arr: expected), end(arr: expected), begin(exp));
1318	assert(char_traits<InternT>::length(in) == `5`);
1319	assert(char_traits<ExternT>::length(exp) == `10`);
1320
1321	test_offsets_error<InternT> offsets[] = {
1322	{`3`, `6`, `0`, `0`, `0xD800`, `0`},
1323	{`3`, `6`, `0`, `0`, `0xDBFF`, `0`},
1324	{`3`, `6`, `0`, `0`, `0xDC00`, `0`},
1325	{`3`, `6`, `0`, `0`, `0xDFFF`, `0`},
1326
1327	{`3`, `6`, `1`, `1`, `0xD800`, `1`},
1328	{`3`, `6`, `1`, `1`, `0xDBFF`, `1`},
1329	{`3`, `6`, `1`, `1`, `0xDC00`, `1`},
1330	{`3`, `6`, `1`, `1`, `0xDFFF`, `1`},
1331
1332	{`3`, `6`, `2`, `3`, `0xD800`, `2`},
1333	{`3`, `6`, `2`, `3`, `0xDBFF`, `2`},
1334	{`3`, `6`, `2`, `3`, `0xDC00`, `2`},
1335	{`3`, `6`, `2`, `3`, `0xDFFF`, `2`},
1336
1337	// make the leading surrogate a trailing one
1338	{`5`, `10`, `3`, `6`, `0xDC00`, `3`},
1339	{`5`, `10`, `3`, `6`, `0xDFFF`, `3`},
1340
1341	// make the trailing surrogate a leading one
1342	{`5`, `10`, `3`, `6`, `0xD800`, `4`},
1343	{`5`, `10`, `3`, `6`, `0xDBFF`, `4`},
1344
1345	// make the trailing surrogate a BMP char
1346	{`5`, `10`, `3`, `6`, `'z'`, `4`},
1347
1348	// don't replace anything in the test cases bellow, just show the surrogate
1349	// pair (fourth CP) fully or partially
1350	{`5`, `10`, `3`, `6`, `'b'`, `0`},
1351	{`5`, `7`, `3`, `6`, `'b'`, `0`}, // no space for fourth CP
1352	{`5`, `8`, `3`, `6`, `'b'`, `0`}, // no space for fourth CP
1353	{`5`, `9`, `3`, `6`, `'b'`, `0`}, // no space for fourth CP
1354
1355	{`4`, `10`, `3`, `6`, `'b'`, `0`}, // incomplete fourth CP
1356	{`4`, `7`, `3`, `6`, `'b'`, `0`}, // incomplete fourth CP, and no space for it
1357	{`4`, `8`, `3`, `6`, `'b'`, `0`}, // incomplete fourth CP, and no space for it
1358	{`4`, `9`, `3`, `6`, `'b'`, `0`}, // incomplete fourth CP, and no space for it
1359	};
1360
1361	for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
1362	test_offsets_error<InternT> t = *it;
1363	ExternT out[array_size(exp) - `1`] = {};
1364	assert(t.in_size <= array_size(in));
1365	assert(t.out_size <= array_size(out));
1366	assert(t.expected_in_next <= t.in_size);
1367	assert(t.expected_out_next <= t.out_size);
1368	InternT old_char = in[t.replace_pos];
1369	in[t.replace_pos] = t.replace_char;
1370
1371	mbstate_t state = {};
1372	const InternT* in_next = nullptr;
1373	ExternT* out_next = nullptr;
1374	codecvt_base::result res = codecvt_base::ok;
1375
1376	res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1377	assert(res == cvt.error);
1378	assert(in_next == in + t.expected_in_next);
1379	assert(out_next == out + t.expected_out_next);
1380	assert(char_traits<ExternT>::compare(out, exp, t.expected_out_next) == `0`);
1381	if (t.expected_out_next < array_size(out))
1382	assert(out[t.expected_out_next] == `0`);
1383
1384	in[t.replace_pos] = old_char;
1385	}
1386	}
1387
1388	template <class InternT, class ExternT>
1389	void ucs2_to_utf8_out(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1390	ucs2_to_utf8_out_ok(cvt);
1391	ucs2_to_utf8_out_partial(cvt);
1392	ucs2_to_utf8_out_error(cvt);
1393	}
1394
1395	template <class InternT, class ExternT>
1396	void test_utf8_ucs2_cvt(const std::codecvt<InternT, ExternT, mbstate_t>& cvt) {
1397	utf8_to_ucs2_in(cvt);
1398	ucs2_to_utf8_out(cvt);
1399	}
1400
1401	enum utf16_endianess { utf16_big_endian, utf16_little_endian };
1402
1403	template <class Iter1, class Iter2>
1404	Iter2 utf16_to_bytes(Iter1 f, Iter1 l, Iter2 o, utf16_endianess e) {
1405	if (e == utf16_big_endian)
1406	for (; f != l; ++f) {
1407	o++ = (f >> `8`) & `0xFF`;
1408	o++ = f & `0xFF`;
1409	}
1410	else
1411	for (; f != l; ++f) {
1412	o++ = f & `0xFF`;
1413	o++ = (f >> `8`) & `0xFF`;
1414	}
1415	return o;
1416	}
1417
1418	template <class InternT>
1419	void utf16_to_utf32_in_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1420	const char16_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
1421	const char32_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0x10AAAA`, `0`};
1422	static_assert(array_size(input) == `6`, "");
1423	static_assert(array_size(expected) == `5`, "");
1424
1425	char in[array_size(input) * `2`];
1426	InternT exp[array_size(expected)];
1427	utf16_to_bytes(f: begin(arr: input), l: end(arr: input), o: begin(arr&: in), e: endianess);
1428	copy(begin(arr: expected), end(arr: expected), begin(exp));
1429
1430	test_offsets_ok offsets[] = {{.in_size: `0`, .out_size: `0`}, {.in_size: `2`, .out_size: `1`}, {.in_size: `4`, .out_size: `2`}, {.in_size: `6`, .out_size: `3`}, {.in_size: `10`, .out_size: `4`}};
1431	for (test_offsets_ok* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
1432	test_offsets_ok t = *it;
1433	InternT out[array_size(exp) - `1`] = {};
1434	assert(t.in_size <= array_size(in));
1435	assert(t.out_size <= array_size(out));
1436	mbstate_t state = {};
1437	const char* in_next = nullptr;
1438	InternT* out_next = nullptr;
1439	codecvt_base::result res = codecvt_base::ok;
1440
1441	res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1442	assert(res == cvt.ok);
1443	assert(in_next == in + t.in_size);
1444	assert(out_next == out + t.out_size);
1445	assert(char_traits<InternT>::compare(out, exp, t.out_size) == `0`);
1446	if (t.out_size < array_size(out))
1447	assert(out[t.out_size] == `0`);
1448
1449	state = mbstate_t ();
1450	int len = cvt.length(state, in, in + t.in_size, t.out_size);
1451	assert(len >= `0`);
1452	assert(static_cast<size_t>(len) == t.in_size);
1453	}
1454
1455	for (test_offsets_ok* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
1456	test_offsets_ok t = *it;
1457	InternT out[array_size(exp)] = {};
1458	assert(t.in_size <= array_size(in));
1459	assert(t.out_size <= array_size(out));
1460	mbstate_t state = {};
1461	const char* in_next = nullptr;
1462	InternT* out_next = nullptr;
1463	codecvt_base::result res = codecvt_base::ok;
1464
1465	res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
1466	assert(res == cvt.ok);
1467	assert(in_next == in + t.in_size);
1468	assert(out_next == out + t.out_size);
1469	assert(char_traits<InternT>::compare(out, exp, t.out_size) == `0`);
1470	if (t.out_size < array_size(out))
1471	assert(out[t.out_size] == `0`);
1472
1473	state = mbstate_t ();
1474	int len = cvt.length(state, in, in + t.in_size, array_size(out));
1475	assert(len >= `0`);
1476	assert(static_cast<size_t>(len) == t.in_size);
1477	}
1478	}
1479
1480	template <class InternT>
1481	void utf16_to_utf32_in_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1482	const char16_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
1483	const char32_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0x10AAAA`, `0`};
1484	static_assert(array_size(input) == `6`, "");
1485	static_assert(array_size(expected) == `5`, "");
1486
1487	char in[array_size(input) * `2`];
1488	InternT exp[array_size(expected)];
1489	utf16_to_bytes(f: begin(arr: input), l: end(arr: input), o: begin(arr&: in), e: endianess);
1490	copy(begin(arr: expected), end(arr: expected), begin(exp));
1491
1492	test_offsets_partial offsets[] = {
1493	{.in_size: `2`, .out_size: `0`, .expected_in_next: `0`, .expected_out_next: `0`}, // no space for first CP
1494	{.in_size: `1`, .out_size: `1`, .expected_in_next: `0`, .expected_out_next: `0`}, // incomplete first CP
1495	{.in_size: `1`, .out_size: `0`, .expected_in_next: `0`, .expected_out_next: `0`}, // incomplete first CP, and no space for it
1496
1497	{.in_size: `4`, .out_size: `1`, .expected_in_next: `2`, .expected_out_next: `1`}, // no space for second CP
1498	{.in_size: `3`, .out_size: `2`, .expected_in_next: `2`, .expected_out_next: `1`}, // incomplete second CP
1499	{.in_size: `3`, .out_size: `1`, .expected_in_next: `2`, .expected_out_next: `1`}, // incomplete second CP, and no space for it
1500
1501	{.in_size: `6`, .out_size: `2`, .expected_in_next: `4`, .expected_out_next: `2`}, // no space for third CP
1502	{.in_size: `5`, .out_size: `3`, .expected_in_next: `4`, .expected_out_next: `2`}, // incomplete third CP
1503	{.in_size: `5`, .out_size: `2`, .expected_in_next: `4`, .expected_out_next: `2`}, // incomplete third CP, and no space for it
1504
1505	{.in_size: `10`, .out_size: `3`, .expected_in_next: `6`, .expected_out_next: `3`}, // no space for fourth CP
1506	{.in_size: `7`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP
1507	{.in_size: `8`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP
1508	{.in_size: `9`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP
1509	{.in_size: `7`, .out_size: `3`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP, and no space for it
1510	{.in_size: `8`, .out_size: `3`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP, and no space for it
1511	{.in_size: `9`, .out_size: `3`, .expected_in_next: `6`, .expected_out_next: `3`}, // incomplete fourth CP, and no space for it
1512	};
1513
1514	for (test_offsets_partial* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
1515	test_offsets_partial t = *it;
1516	InternT out[array_size(exp) - `1`] = {};
1517	assert(t.in_size <= array_size(in));
1518	assert(t.out_size <= array_size(out));
1519	assert(t.expected_in_next <= t.in_size);
1520	assert(t.expected_out_next <= t.out_size);
1521	mbstate_t state = {};
1522	const char* in_next = nullptr;
1523	InternT* out_next = nullptr;
1524	codecvt_base::result res = codecvt_base::ok;
1525
1526	res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1527	assert(res == cvt.partial);
1528	assert(in_next == in + t.expected_in_next);
1529	assert(out_next == out + t.expected_out_next);
1530	assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == `0`);
1531	if (t.expected_out_next < array_size(out))
1532	assert(out[t.expected_out_next] == `0`);
1533
1534	state = mbstate_t ();
1535	int len = cvt.length(state, in, in + t.in_size, t.out_size);
1536	assert(len >= `0`);
1537	assert(static_cast<size_t>(len) == t.expected_in_next);
1538	}
1539	}
1540
1541	template <class InternT>
1542	void utf16_to_utf32_in_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1543	char16_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
1544	const char32_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0x10AAAA`, `0`};
1545	static_assert(array_size(input) == `6`, "");
1546	static_assert(array_size(expected) == `5`, "");
1547
1548	InternT exp[array_size(expected)];
1549	copy(begin(arr: expected), end(arr: expected), begin(exp));
1550
1551	// The only possible error in UTF-16 is unpaired surrogate code units.
1552	// So we replace valid code points (scalar values) with lone surrogate CU.
1553	test_offsets_error<char16_t> offsets[] = {
1554	{.in_size: `10`, .out_size: `4`, .expected_in_next: `0`, .expected_out_next: `0`, .replace_char: `0xD800`, .replace_pos: `0`},
1555	{.in_size: `10`, .out_size: `4`, .expected_in_next: `0`, .expected_out_next: `0`, .replace_char: `0xDBFF`, .replace_pos: `0`},
1556	{.in_size: `10`, .out_size: `4`, .expected_in_next: `0`, .expected_out_next: `0`, .replace_char: `0xDC00`, .replace_pos: `0`},
1557	{.in_size: `10`, .out_size: `4`, .expected_in_next: `0`, .expected_out_next: `0`, .replace_char: `0xDFFF`, .replace_pos: `0`},
1558
1559	{.in_size: `10`, .out_size: `4`, .expected_in_next: `2`, .expected_out_next: `1`, .replace_char: `0xD800`, .replace_pos: `1`},
1560	{.in_size: `10`, .out_size: `4`, .expected_in_next: `2`, .expected_out_next: `1`, .replace_char: `0xDBFF`, .replace_pos: `1`},
1561	{.in_size: `10`, .out_size: `4`, .expected_in_next: `2`, .expected_out_next: `1`, .replace_char: `0xDC00`, .replace_pos: `1`},
1562	{.in_size: `10`, .out_size: `4`, .expected_in_next: `2`, .expected_out_next: `1`, .replace_char: `0xDFFF`, .replace_pos: `1`},
1563
1564	{.in_size: `10`, .out_size: `4`, .expected_in_next: `4`, .expected_out_next: `2`, .replace_char: `0xD800`, .replace_pos: `2`},
1565	{.in_size: `10`, .out_size: `4`, .expected_in_next: `4`, .expected_out_next: `2`, .replace_char: `0xDBFF`, .replace_pos: `2`},
1566	{.in_size: `10`, .out_size: `4`, .expected_in_next: `4`, .expected_out_next: `2`, .replace_char: `0xDC00`, .replace_pos: `2`},
1567	{.in_size: `10`, .out_size: `4`, .expected_in_next: `4`, .expected_out_next: `2`, .replace_char: `0xDFFF`, .replace_pos: `2`},
1568
1569	// make the leading surrogate a trailing one
1570	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xDC00`, .replace_pos: `3`},
1571	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xDFFF`, .replace_pos: `3`},
1572
1573	// make the trailing surrogate a leading one
1574	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xD800`, .replace_pos: `4`},
1575	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xDBFF`, .replace_pos: `4`},
1576
1577	// make the trailing surrogate a BMP char
1578	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `4`},
1579	};
1580
1581	for (test_offsets_error<char16_t>* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
1582	test_offsets_error<char16_t> t = *it;
1583	char in[array_size(input) * `2`];
1584	InternT out[array_size(exp) - `1`] = {};
1585	assert(t.in_size <= array_size(in));
1586	assert(t.out_size <= array_size(out));
1587	assert(t.expected_in_next <= t.in_size);
1588	assert(t.expected_out_next <= t.out_size);
1589	char16_t old_char = input[t.replace_pos];
1590	input[t.replace_pos] = t.replace_char; // replace in input, not in in
1591	utf16_to_bytes(f: begin(arr&: input), l: end(arr&: input), o: begin(arr&: in), e: endianess);
1592
1593	mbstate_t state = {};
1594	const char* in_next = nullptr;
1595	InternT* out_next = nullptr;
1596	codecvt_base::result res = codecvt_base::ok;
1597
1598	res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1599	assert(res == cvt.error);
1600	assert(in_next == in + t.expected_in_next);
1601	assert(out_next == out + t.expected_out_next);
1602	assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == `0`);
1603	if (t.expected_out_next < array_size(out))
1604	assert(out[t.expected_out_next] == `0`);
1605
1606	state = mbstate_t ();
1607	int len = cvt.length(state, in, in + t.in_size, t.out_size);
1608	assert(len >= `0`);
1609	assert(static_cast<size_t>(len) == t.expected_in_next);
1610
1611	input[t.replace_pos] = old_char;
1612	}
1613	}
1614
1615	template <class InternT>
1616	void utf32_to_utf16_out_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1617	const char32_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0x10AAAA`, `0`};
1618	const char16_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
1619	static_assert(array_size(input) == `5`, "");
1620	static_assert(array_size(expected) == `6`, "");
1621
1622	InternT in[array_size(input)];
1623	char exp[array_size(expected) * `2`];
1624	copy(begin(arr: input), end(arr: input), begin(in));
1625	utf16_to_bytes(f: begin(arr: expected), l: end(arr: expected), o: begin(arr&: exp), e: endianess);
1626
1627	test_offsets_ok offsets[] = {{.in_size: `0`, .out_size: `0`}, {.in_size: `1`, .out_size: `2`}, {.in_size: `2`, .out_size: `4`}, {.in_size: `3`, .out_size: `6`}, {.in_size: `4`, .out_size: `10`}};
1628	for (test_offsets_ok* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
1629	test_offsets_ok t = *it;
1630	char out[array_size(exp) - `2`] = {};
1631	assert(t.in_size <= array_size(in));
1632	assert(t.out_size <= array_size(out));
1633	mbstate_t state = {};
1634	const InternT* in_next = nullptr;
1635	char* out_next = nullptr;
1636	codecvt_base::result res = codecvt_base::ok;
1637
1638	res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1639	assert(res == cvt.ok);
1640	assert(in_next == in + t.in_size);
1641	assert(out_next == out + t.out_size);
1642	assert(char_traits<char>::compare(out, exp, t.out_size) == `0`);
1643	if (t.out_size < array_size(out))
1644	assert(out[t.out_size] == `0`);
1645	}
1646	}
1647
1648	template <class InternT>
1649	void utf32_to_utf16_out_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1650	const char32_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0x10AAAA`, `0`};
1651	const char16_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
1652	static_assert(array_size(input) == `5`, "");
1653	static_assert(array_size(expected) == `6`, "");
1654
1655	InternT in[array_size(input)];
1656	char exp[array_size(expected) * `2`];
1657	copy(begin(arr: input), end(arr: input), begin(in));
1658	utf16_to_bytes(f: begin(arr: expected), l: end(arr: expected), o: begin(arr&: exp), e: endianess);
1659
1660	test_offsets_partial offsets[] = {
1661	{.in_size: `1`, .out_size: `0`, .expected_in_next: `0`, .expected_out_next: `0`}, // no space for first CP
1662	{.in_size: `1`, .out_size: `1`, .expected_in_next: `0`, .expected_out_next: `0`}, // no space for first CP
1663
1664	{.in_size: `2`, .out_size: `2`, .expected_in_next: `1`, .expected_out_next: `2`}, // no space for second CP
1665	{.in_size: `2`, .out_size: `3`, .expected_in_next: `1`, .expected_out_next: `2`}, // no space for second CP
1666
1667	{.in_size: `3`, .out_size: `4`, .expected_in_next: `2`, .expected_out_next: `4`}, // no space for third CP
1668	{.in_size: `3`, .out_size: `5`, .expected_in_next: `2`, .expected_out_next: `4`}, // no space for third CP
1669
1670	{.in_size: `4`, .out_size: `6`, .expected_in_next: `3`, .expected_out_next: `6`}, // no space for fourth CP
1671	{.in_size: `4`, .out_size: `7`, .expected_in_next: `3`, .expected_out_next: `6`}, // no space for fourth CP
1672	{.in_size: `4`, .out_size: `8`, .expected_in_next: `3`, .expected_out_next: `6`}, // no space for fourth CP
1673	{.in_size: `4`, .out_size: `9`, .expected_in_next: `3`, .expected_out_next: `6`}, // no space for fourth CP
1674	};
1675	for (test_offsets_partial* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
1676	test_offsets_partial t = *it;
1677	char out[array_size(exp) - `2`] = {};
1678	assert(t.in_size <= array_size(in));
1679	assert(t.out_size <= array_size(out));
1680	assert(t.expected_in_next <= t.in_size);
1681	assert(t.expected_out_next <= t.out_size);
1682	mbstate_t state = {};
1683	const InternT* in_next = nullptr;
1684	char* out_next = nullptr;
1685	codecvt_base::result res = codecvt_base::ok;
1686
1687	res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1688	assert(res == cvt.partial);
1689	assert(in_next == in + t.expected_in_next);
1690	assert(out_next == out + t.expected_out_next);
1691	assert(char_traits<char>::compare(out, exp, t.expected_out_next) == `0`);
1692	if (t.expected_out_next < array_size(out))
1693	assert(out[t.expected_out_next] == `0`);
1694	}
1695	}
1696
1697	template <class InternT>
1698	void utf32_to_utf16_out_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1699	const char32_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0x10AAAA`, `0`};
1700	const char16_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
1701	static_assert(array_size(input) == `5`, "");
1702	static_assert(array_size(expected) == `6`, "");
1703
1704	InternT in[array_size(input)];
1705	char exp[array_size(expected) * `2`];
1706	copy(begin(arr: input), end(arr: input), begin(in));
1707	utf16_to_bytes(f: begin(arr: expected), l: end(arr: expected), o: begin(arr&: exp), e: endianess);
1708
1709	test_offsets_error<InternT> offsets[] = {
1710
1711	// Surrogate CP
1712	{`4`, `10`, `0`, `0`, `0xD800`, `0`},
1713	{`4`, `10`, `1`, `2`, `0xDBFF`, `1`},
1714	{`4`, `10`, `2`, `4`, `0xDC00`, `2`},
1715	{`4`, `10`, `3`, `6`, `0xDFFF`, `3`},
1716
1717	// CP out of range
1718	{`4`, `10`, `0`, `0`, `0x00110000`, `0`},
1719	{`4`, `10`, `1`, `2`, `0x00110000`, `1`},
1720	{`4`, `10`, `2`, `4`, `0x00110000`, `2`},
1721	{`4`, `10`, `3`, `6`, `0x00110000`, `3`}};
1722
1723	for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
1724	test_offsets_error<InternT> t = *it;
1725	char out[array_size(exp) - `2`] = {};
1726	assert(t.in_size <= array_size(in));
1727	assert(t.out_size <= array_size(out));
1728	assert(t.expected_in_next <= t.in_size);
1729	assert(t.expected_out_next <= t.out_size);
1730	InternT old_char = in[t.replace_pos];
1731	in[t.replace_pos] = t.replace_char;
1732
1733	mbstate_t state = {};
1734	const InternT* in_next = nullptr;
1735	char* out_next = nullptr;
1736	codecvt_base::result res = codecvt_base::ok;
1737
1738	res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1739	assert(res == cvt.error);
1740	assert(in_next == in + t.expected_in_next);
1741	assert(out_next == out + t.expected_out_next);
1742	assert(char_traits<char>::compare(out, exp, t.expected_out_next) == `0`);
1743	if (t.expected_out_next < array_size(out))
1744	assert(out[t.expected_out_next] == `0`);
1745
1746	in[t.replace_pos] = old_char;
1747	}
1748	}
1749
1750	template <class InternT>
1751	void test_utf16_utf32_cvt(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1752	utf16_to_utf32_in_ok(cvt, endianess);
1753	utf16_to_utf32_in_partial(cvt, endianess);
1754	utf16_to_utf32_in_error(cvt, endianess);
1755	utf32_to_utf16_out_ok(cvt, endianess);
1756	utf32_to_utf16_out_partial(cvt, endianess);
1757	utf32_to_utf16_out_error(cvt, endianess);
1758	}
1759
1760	template <class InternT>
1761	void utf16_to_ucs2_in_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1762	const char16_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0`};
1763	const char16_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0`};
1764	static_assert(array_size(input) == `4`, "");
1765	static_assert(array_size(expected) == `4`, "");
1766
1767	char in[array_size(input) * `2`];
1768	InternT exp[array_size(expected)];
1769	utf16_to_bytes(f: begin(arr: input), l: end(arr: input), o: begin(arr&: in), e: endianess);
1770	copy(begin(arr: expected), end(arr: expected), begin(exp));
1771
1772	test_offsets_ok offsets[] = {{.in_size: `0`, .out_size: `0`}, {.in_size: `2`, .out_size: `1`}, {.in_size: `4`, .out_size: `2`}, {.in_size: `6`, .out_size: `3`}};
1773	for (test_offsets_ok* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
1774	test_offsets_ok t = *it;
1775	InternT out[array_size(exp) - `1`] = {};
1776	assert(t.in_size <= array_size(in));
1777	assert(t.out_size <= array_size(out));
1778	mbstate_t state = {};
1779	const char* in_next = nullptr;
1780	InternT* out_next = nullptr;
1781	codecvt_base::result res = codecvt_base::ok;
1782
1783	res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1784	assert(res == cvt.ok);
1785	assert(in_next == in + t.in_size);
1786	assert(out_next == out + t.out_size);
1787	assert(char_traits<InternT>::compare(out, exp, t.out_size) == `0`);
1788	if (t.out_size < array_size(out))
1789	assert(out[t.out_size] == `0`);
1790
1791	state = mbstate_t ();
1792	int len = cvt.length(state, in, in + t.in_size, t.out_size);
1793	assert(len >= `0`);
1794	assert(static_cast<size_t>(len) == t.in_size);
1795	}
1796
1797	for (test_offsets_ok* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
1798	test_offsets_ok t = *it;
1799	InternT out[array_size(exp)] = {};
1800	assert(t.in_size <= array_size(in));
1801	assert(t.out_size <= array_size(out));
1802	mbstate_t state = {};
1803	const char* in_next = nullptr;
1804	InternT* out_next = nullptr;
1805	codecvt_base::result res = codecvt_base::ok;
1806
1807	res = cvt.in(state, in, in + t.in_size, in_next, out, end(out), out_next);
1808	assert(res == cvt.ok);
1809	assert(in_next == in + t.in_size);
1810	assert(out_next == out + t.out_size);
1811	assert(char_traits<InternT>::compare(out, exp, t.out_size) == `0`);
1812	if (t.out_size < array_size(out))
1813	assert(out[t.out_size] == `0`);
1814
1815	state = mbstate_t ();
1816	int len = cvt.length(state, in, in + t.in_size, array_size(out));
1817	assert(len >= `0`);
1818	assert(static_cast<size_t>(len) == t.in_size);
1819	}
1820	}
1821
1822	template <class InternT>
1823	void utf16_to_ucs2_in_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1824	const char16_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0`};
1825	const char16_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0`};
1826	static_assert(array_size(input) == `4`, "");
1827	static_assert(array_size(expected) == `4`, "");
1828
1829	char in[array_size(input) * `2`];
1830	InternT exp[array_size(expected)];
1831	utf16_to_bytes(f: begin(arr: input), l: end(arr: input), o: begin(arr&: in), e: endianess);
1832	copy(begin(arr: expected), end(arr: expected), begin(exp));
1833
1834	test_offsets_partial offsets[] = {
1835	{.in_size: `2`, .out_size: `0`, .expected_in_next: `0`, .expected_out_next: `0`}, // no space for first CP
1836	{.in_size: `1`, .out_size: `1`, .expected_in_next: `0`, .expected_out_next: `0`}, // incomplete first CP
1837	{.in_size: `1`, .out_size: `0`, .expected_in_next: `0`, .expected_out_next: `0`}, // incomplete first CP, and no space for it
1838
1839	{.in_size: `4`, .out_size: `1`, .expected_in_next: `2`, .expected_out_next: `1`}, // no space for second CP
1840	{.in_size: `3`, .out_size: `2`, .expected_in_next: `2`, .expected_out_next: `1`}, // incomplete second CP
1841	{.in_size: `3`, .out_size: `1`, .expected_in_next: `2`, .expected_out_next: `1`}, // incomplete second CP, and no space for it
1842
1843	{.in_size: `6`, .out_size: `2`, .expected_in_next: `4`, .expected_out_next: `2`}, // no space for third CP
1844	{.in_size: `5`, .out_size: `3`, .expected_in_next: `4`, .expected_out_next: `2`}, // incomplete third CP
1845	{.in_size: `5`, .out_size: `2`, .expected_in_next: `4`, .expected_out_next: `2`}, // incomplete third CP, and no space for it
1846	};
1847
1848	for (test_offsets_partial* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
1849	test_offsets_partial t = *it;
1850	InternT out[array_size(exp) - `1`] = {};
1851	assert(t.in_size <= array_size(in));
1852	assert(t.out_size <= array_size(out));
1853	assert(t.expected_in_next <= t.in_size);
1854	assert(t.expected_out_next <= t.out_size);
1855	mbstate_t state = {};
1856	const char* in_next = nullptr;
1857	InternT* out_next = nullptr;
1858	codecvt_base::result res = codecvt_base::ok;
1859
1860	res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1861	assert(res == cvt.partial);
1862	assert(in_next == in + t.expected_in_next);
1863	assert(out_next == out + t.expected_out_next);
1864	assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == `0`);
1865	if (t.expected_out_next < array_size(out))
1866	assert(out[t.expected_out_next] == `0`);
1867
1868	state = mbstate_t ();
1869	int len = cvt.length(state, in, in + t.in_size, t.out_size);
1870	assert(len >= `0`);
1871	assert(static_cast<size_t>(len) == t.expected_in_next);
1872	}
1873	}
1874
1875	template <class InternT>
1876	void utf16_to_ucs2_in_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1877	char16_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
1878	const char16_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
1879	static_assert(array_size(input) == `6`, "");
1880	static_assert(array_size(expected) == `6`, "");
1881
1882	InternT exp[array_size(expected)];
1883	copy(begin(arr: expected), end(arr: expected), begin(exp));
1884
1885	// The only possible error in UTF-16 is unpaired surrogate code units.
1886	// Additionally, because the target encoding is UCS-2, a proper pair of
1887	// surrogates is also error. Simply, any surrogate CU is error.
1888	test_offsets_error<char16_t> offsets[] = {
1889	{.in_size: `6`, .out_size: `3`, .expected_in_next: `0`, .expected_out_next: `0`, .replace_char: `0xD800`, .replace_pos: `0`},
1890	{.in_size: `6`, .out_size: `3`, .expected_in_next: `0`, .expected_out_next: `0`, .replace_char: `0xDBFF`, .replace_pos: `0`},
1891	{.in_size: `6`, .out_size: `3`, .expected_in_next: `0`, .expected_out_next: `0`, .replace_char: `0xDC00`, .replace_pos: `0`},
1892	{.in_size: `6`, .out_size: `3`, .expected_in_next: `0`, .expected_out_next: `0`, .replace_char: `0xDFFF`, .replace_pos: `0`},
1893
1894	{.in_size: `6`, .out_size: `3`, .expected_in_next: `2`, .expected_out_next: `1`, .replace_char: `0xD800`, .replace_pos: `1`},
1895	{.in_size: `6`, .out_size: `3`, .expected_in_next: `2`, .expected_out_next: `1`, .replace_char: `0xDBFF`, .replace_pos: `1`},
1896	{.in_size: `6`, .out_size: `3`, .expected_in_next: `2`, .expected_out_next: `1`, .replace_char: `0xDC00`, .replace_pos: `1`},
1897	{.in_size: `6`, .out_size: `3`, .expected_in_next: `2`, .expected_out_next: `1`, .replace_char: `0xDFFF`, .replace_pos: `1`},
1898
1899	{.in_size: `6`, .out_size: `3`, .expected_in_next: `4`, .expected_out_next: `2`, .replace_char: `0xD800`, .replace_pos: `2`},
1900	{.in_size: `6`, .out_size: `3`, .expected_in_next: `4`, .expected_out_next: `2`, .replace_char: `0xDBFF`, .replace_pos: `2`},
1901	{.in_size: `6`, .out_size: `3`, .expected_in_next: `4`, .expected_out_next: `2`, .replace_char: `0xDC00`, .replace_pos: `2`},
1902	{.in_size: `6`, .out_size: `3`, .expected_in_next: `4`, .expected_out_next: `2`, .replace_char: `0xDFFF`, .replace_pos: `2`},
1903
1904	// make the leading surrogate a trailing one
1905	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xDC00`, .replace_pos: `3`},
1906	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xDFFF`, .replace_pos: `3`},
1907
1908	// make the trailing surrogate a leading one
1909	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xD800`, .replace_pos: `4`},
1910	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `0xDBFF`, .replace_pos: `4`},
1911
1912	// make the trailing surrogate a BMP char
1913	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'z'`, .replace_pos: `4`},
1914
1915	// don't replace anything in the test cases bellow, just show the surrogate
1916	// pair (fourth CP) fully or partially (just the first surrogate)
1917	{.in_size: `10`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'b'`, .replace_pos: `0`},
1918	{.in_size: `8`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'b'`, .replace_pos: `0`},
1919	{.in_size: `9`, .out_size: `5`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'b'`, .replace_pos: `0`},
1920
1921	{.in_size: `10`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'b'`, .replace_pos: `0`},
1922	{.in_size: `8`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'b'`, .replace_pos: `0`},
1923	{.in_size: `9`, .out_size: `4`, .expected_in_next: `6`, .expected_out_next: `3`, .replace_char: `'b'`, .replace_pos: `0`},
1924	};
1925
1926	for (test_offsets_error<char16_t>* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
1927	test_offsets_error<char16_t> t = *it;
1928	char in[array_size(input) * `2`];
1929	InternT out[array_size(exp) - `1`] = {};
1930	assert(t.in_size <= array_size(in));
1931	assert(t.out_size <= array_size(out));
1932	assert(t.expected_in_next <= t.in_size);
1933	assert(t.expected_out_next <= t.out_size);
1934	char16_t old_char = input[t.replace_pos];
1935	input[t.replace_pos] = t.replace_char; // replace in input, not in in
1936	utf16_to_bytes(f: begin(arr&: input), l: end(arr&: input), o: begin(arr&: in), e: endianess);
1937
1938	mbstate_t state = {};
1939	const char* in_next = nullptr;
1940	InternT* out_next = nullptr;
1941	codecvt_base::result res = codecvt_base::ok;
1942
1943	res = cvt.in(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1944	assert(res == cvt.error);
1945	assert(in_next == in + t.expected_in_next);
1946	assert(out_next == out + t.expected_out_next);
1947	assert(char_traits<InternT>::compare(out, exp, t.expected_out_next) == `0`);
1948	if (t.expected_out_next < array_size(out))
1949	assert(out[t.expected_out_next] == `0`);
1950
1951	state = mbstate_t ();
1952	int len = cvt.length(state, in, in + t.in_size, t.out_size);
1953	assert(len >= `0`);
1954	assert(static_cast<size_t>(len) == t.expected_in_next);
1955
1956	input[t.replace_pos] = old_char;
1957	}
1958	}
1959
1960	template <class InternT>
1961	void ucs2_to_utf16_out_ok(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1962	const char16_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0`};
1963	const char16_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0`};
1964	static_assert(array_size(input) == `4`, "");
1965	static_assert(array_size(expected) == `4`, "");
1966
1967	InternT in[array_size(input)];
1968	char exp[array_size(expected) * `2`];
1969	copy(begin(arr: input), end(arr: input), begin(in));
1970	utf16_to_bytes(f: begin(arr: expected), l: end(arr: expected), o: begin(arr&: exp), e: endianess);
1971
1972	test_offsets_ok offsets[] = {{.in_size: `0`, .out_size: `0`}, {.in_size: `1`, .out_size: `2`}, {.in_size: `2`, .out_size: `4`}, {.in_size: `3`, .out_size: `6`}};
1973	for (test_offsets_ok* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
1974	test_offsets_ok t = *it;
1975	char out[array_size(exp) - `2`] = {};
1976	assert(t.in_size <= array_size(in));
1977	assert(t.out_size <= array_size(out));
1978	mbstate_t state = {};
1979	const InternT* in_next = nullptr;
1980	char* out_next = nullptr;
1981	codecvt_base::result res = codecvt_base::ok;
1982
1983	res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
1984	assert(res == cvt.ok);
1985	assert(in_next == in + t.in_size);
1986	assert(out_next == out + t.out_size);
1987	assert(char_traits<char>::compare(out, exp, t.out_size) == `0`);
1988	if (t.out_size < array_size(out))
1989	assert(out[t.out_size] == `0`);
1990	}
1991	}
1992
1993	template <class InternT>
1994	void ucs2_to_utf16_out_partial(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
1995	const char16_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0`};
1996	const char16_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0`};
1997	static_assert(array_size(input) == `4`, "");
1998	static_assert(array_size(expected) == `4`, "");
1999
2000	InternT in[array_size(input)];
2001	char exp[array_size(expected) * `2`];
2002	copy(begin(arr: input), end(arr: input), begin(in));
2003	utf16_to_bytes(f: begin(arr: expected), l: end(arr: expected), o: begin(arr&: exp), e: endianess);
2004
2005	test_offsets_partial offsets[] = {
2006	{.in_size: `1`, .out_size: `0`, .expected_in_next: `0`, .expected_out_next: `0`}, // no space for first CP
2007	{.in_size: `1`, .out_size: `1`, .expected_in_next: `0`, .expected_out_next: `0`}, // no space for first CP
2008
2009	{.in_size: `2`, .out_size: `2`, .expected_in_next: `1`, .expected_out_next: `2`}, // no space for second CP
2010	{.in_size: `2`, .out_size: `3`, .expected_in_next: `1`, .expected_out_next: `2`}, // no space for second CP
2011
2012	{.in_size: `3`, .out_size: `4`, .expected_in_next: `2`, .expected_out_next: `4`}, // no space for third CP
2013	{.in_size: `3`, .out_size: `5`, .expected_in_next: `2`, .expected_out_next: `4`}, // no space for third CP
2014	};
2015	for (test_offsets_partial* it = begin(arr&: offsets); it != end(arr&: offsets); ++it) {
2016	test_offsets_partial t = *it;
2017	char out[array_size(exp) - `2`] = {};
2018	assert(t.in_size <= array_size(in));
2019	assert(t.out_size <= array_size(out));
2020	assert(t.expected_in_next <= t.in_size);
2021	assert(t.expected_out_next <= t.out_size);
2022	mbstate_t state = {};
2023	const InternT* in_next = nullptr;
2024	char* out_next = nullptr;
2025	codecvt_base::result res = codecvt_base::ok;
2026
2027	res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
2028	assert(res == cvt.partial);
2029	assert(in_next == in + t.expected_in_next);
2030	assert(out_next == out + t.expected_out_next);
2031	assert(char_traits<char>::compare(out, exp, t.expected_out_next) == `0`);
2032	if (t.expected_out_next < array_size(out))
2033	assert(out[t.expected_out_next] == `0`);
2034	}
2035	}
2036
2037	template <class InternT>
2038	void ucs2_to_utf16_out_error(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
2039	const char16_t input[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
2040	const char16_t expected[] = {`'b'`, `0x0448`, `0xAAAA`, `0xDBEA`, `0xDEAA`, `0`};
2041	static_assert(array_size(input) == `6`, "");
2042	static_assert(array_size(expected) == `6`, "");
2043
2044	InternT in[array_size(input)];
2045	char exp[array_size(expected) * `2`];
2046	copy(begin(arr: input), end(arr: input), begin(in));
2047	utf16_to_bytes(f: begin(arr: expected), l: end(arr: expected), o: begin(arr&: exp), e: endianess);
2048
2049	test_offsets_error<InternT> offsets[] = {
2050	{`3`, `6`, `0`, `0`, `0xD800`, `0`},
2051	{`3`, `6`, `0`, `0`, `0xDBFF`, `0`},
2052	{`3`, `6`, `0`, `0`, `0xDC00`, `0`},
2053	{`3`, `6`, `0`, `0`, `0xDFFF`, `0`},
2054
2055	{`3`, `6`, `1`, `2`, `0xD800`, `1`},
2056	{`3`, `6`, `1`, `2`, `0xDBFF`, `1`},
2057	{`3`, `6`, `1`, `2`, `0xDC00`, `1`},
2058	{`3`, `6`, `1`, `2`, `0xDFFF`, `1`},
2059
2060	{`3`, `6`, `2`, `4`, `0xD800`, `2`},
2061	{`3`, `6`, `2`, `4`, `0xDBFF`, `2`},
2062	{`3`, `6`, `2`, `4`, `0xDC00`, `2`},
2063	{`3`, `6`, `2`, `4`, `0xDFFF`, `2`},
2064
2065	// make the leading surrogate a trailing one
2066	{`5`, `10`, `3`, `6`, `0xDC00`, `3`},
2067	{`5`, `10`, `3`, `6`, `0xDFFF`, `3`},
2068
2069	// make the trailing surrogate a leading one
2070	{`5`, `10`, `3`, `6`, `0xD800`, `4`},
2071	{`5`, `10`, `3`, `6`, `0xDBFF`, `4`},
2072
2073	// make the trailing surrogate a BMP char
2074	{`5`, `10`, `3`, `6`, `'z'`, `4`},
2075
2076	// don't replace anything in the test cases bellow, just show the surrogate
2077	// pair (fourth CP) fully or partially (just the first surrogate)
2078	{`5`, `10`, `3`, `6`, `'b'`, `0`},
2079	{`5`, `8`, `3`, `6`, `'b'`, `0`},
2080	{`5`, `9`, `3`, `6`, `'b'`, `0`},
2081
2082	{`4`, `10`, `3`, `6`, `'b'`, `0`},
2083	{`4`, `8`, `3`, `6`, `'b'`, `0`},
2084	{`4`, `9`, `3`, `6`, `'b'`, `0`},
2085	};
2086
2087	for (test_offsets_error<InternT>* it = begin(offsets); it != end(offsets); ++it) {
2088	test_offsets_error<InternT> t = *it;
2089	char out[array_size(exp) - `2`] = {};
2090	assert(t.in_size <= array_size(in));
2091	assert(t.out_size <= array_size(out));
2092	assert(t.expected_in_next <= t.in_size);
2093	assert(t.expected_out_next <= t.out_size);
2094	InternT old_char = in[t.replace_pos];
2095	in[t.replace_pos] = t.replace_char;
2096
2097	mbstate_t state = {};
2098	const InternT* in_next = nullptr;
2099	char* out_next = nullptr;
2100	codecvt_base::result res = codecvt_base::ok;
2101
2102	res = cvt.out(state, in, in + t.in_size, in_next, out, out + t.out_size, out_next);
2103	assert(res == cvt.error);
2104	assert(in_next == in + t.expected_in_next);
2105	assert(out_next == out + t.expected_out_next);
2106	assert(char_traits<char>::compare(out, exp, t.expected_out_next) == `0`);
2107	if (t.expected_out_next < array_size(out))
2108	assert(out[t.expected_out_next] == `0`);
2109
2110	in[t.replace_pos] = old_char;
2111	}
2112	}
2113
2114	template <class InternT>
2115	void test_utf16_ucs2_cvt(const std::codecvt<InternT, char, mbstate_t>& cvt, utf16_endianess endianess) {
2116	utf16_to_ucs2_in_ok(cvt, endianess);
2117	utf16_to_ucs2_in_partial(cvt, endianess);
2118	utf16_to_ucs2_in_error(cvt, endianess);
2119	ucs2_to_utf16_out_ok(cvt, endianess);
2120	ucs2_to_utf16_out_partial(cvt, endianess);
2121	ucs2_to_utf16_out_error(cvt, endianess);
2122	}
2123
2124	using std::codecvt;
2125	using std::codecvt_utf16;
2126	using std::codecvt_utf8;
2127	using std::codecvt_utf8_utf16;
2128	using std::has_facet;
2129	using std::locale;
2130	using std::use_facet;
2131
2132	void test_utf8_utf32_codecvts() {
2133	typedef codecvt<char32_t, char, mbstate_t> codecvt_c32;
2134	const locale& loc_c = locale::classic();
2135	assert(has_facet<codecvt_c32>(loc_c));
2136
2137	const codecvt_c32& cvt = use_facet<codecvt_c32>(loc: loc_c);
2138	test_utf8_utf32_cvt(cvt);
2139
2140	codecvt_utf8<char32_t> cvt2;
2141	test_utf8_utf32_cvt(cvt: cvt2);
2142
2143	#if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && !defined(TEST_SHORT_WCHAR)
2144	codecvt_utf8<wchar_t> cvt3;
2145	test_utf8_utf32_cvt(cvt: cvt3);
2146	#endif
2147
2148	#ifndef TEST_HAS_NO_CHAR8_T
2149	typedef codecvt<char32_t, char8_t, mbstate_t> codecvt_c32_c8;
2150	assert(has_facet<codecvt_c32_c8>(loc_c));
2151	const codecvt_c32_c8& cvt4 = use_facet<codecvt_c32_c8>(loc: loc_c);
2152	test_utf8_utf32_cvt(cvt4);
2153	#endif
2154	}
2155
2156	void test_utf8_utf16_codecvts() {
2157	typedef codecvt<char16_t, char, mbstate_t> codecvt_c16;
2158	const locale& loc_c = locale::classic();
2159	assert(has_facet<codecvt_c16>(loc_c));
2160
2161	const codecvt_c16& cvt = use_facet<codecvt_c16>(loc: loc_c);
2162	test_utf8_utf16_cvt(cvt);
2163
2164	codecvt_utf8_utf16<char16_t> cvt2;
2165	test_utf8_utf16_cvt(cvt: cvt2);
2166
2167	codecvt_utf8_utf16<char32_t> cvt3;
2168	test_utf8_utf16_cvt(cvt: cvt3);
2169
2170	#ifndef TEST_HAS_NO_WIDE_CHARACTERS
2171	codecvt_utf8_utf16<wchar_t> cvt4;
2172	test_utf8_utf16_cvt(cvt: cvt4);
2173	#endif
2174
2175	#ifndef TEST_HAS_NO_CHAR8_T
2176	typedef codecvt<char16_t, char8_t, mbstate_t> codecvt_c16_c8;
2177	assert(has_facet<codecvt_c16_c8>(loc_c));
2178	const codecvt_c16_c8& cvt5 = use_facet<codecvt_c16_c8>(loc: loc_c);
2179	test_utf8_utf16_cvt(cvt5);
2180	#endif
2181	}
2182
2183	void test_utf8_ucs2_codecvts() {
2184	codecvt_utf8<char16_t> cvt;
2185	test_utf8_ucs2_cvt(cvt);
2186
2187	#if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && defined(TEST_SHORT_WCHAR)
2188	codecvt_utf8<wchar_t> cvt2;
2189	test_utf8_ucs2_cvt(cvt2);
2190	#endif
2191	}
2192
2193	void test_utf16_utf32_codecvts() {
2194	codecvt_utf16<char32_t> cvt;
2195	test_utf16_utf32_cvt(cvt, endianess: utf16_big_endian);
2196
2197	codecvt_utf16<char32_t, `0x10FFFF`, std::little_endian> cvt2;
2198	test_utf16_utf32_cvt(cvt: cvt2, endianess: utf16_little_endian);
2199
2200	#if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && !defined(TEST_SHORT_WCHAR)
2201	codecvt_utf16<wchar_t> cvt3;
2202	test_utf16_utf32_cvt(cvt: cvt3, endianess: utf16_big_endian);
2203
2204	codecvt_utf16<wchar_t, `0x10FFFF`, std::little_endian> cvt4;
2205	test_utf16_utf32_cvt(cvt: cvt4, endianess: utf16_little_endian);
2206	#endif
2207	}
2208
2209	void test_utf16_ucs2_codecvts() {
2210	codecvt_utf16<char16_t> cvt;
2211	test_utf16_ucs2_cvt(cvt, endianess: utf16_big_endian);
2212
2213	codecvt_utf16<char16_t, `0x10FFFF`, std::little_endian> cvt2;
2214	test_utf16_ucs2_cvt(cvt: cvt2, endianess: utf16_little_endian);
2215
2216	#if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && defined(TEST_SHORT_WCHAR)
2217	codecvt_utf16<wchar_t> cvt3;
2218	test_utf16_ucs2_cvt(cvt3, utf16_big_endian);
2219
2220	codecvt_utf16<wchar_t, `0x10FFFF`, std::little_endian> cvt4;
2221	test_utf16_ucs2_cvt(cvt4, utf16_little_endian);
2222	#endif
2223	}
2224
2225	int main() {
2226	test_utf8_utf32_codecvts();
2227	test_utf8_utf16_codecvts();
2228	test_utf8_ucs2_codecvts();
2229	test_utf16_utf32_codecvts();
2230	test_utf16_ucs2_codecvts();
2231	}
2232

Provided by KDAB

Definitions

test_offsets_ok
test_offsets_partial
test_offsets_error
utf8_to_utf32_in_ok
utf8_to_utf32_in_partial
utf8_to_utf32_in_error
utf8_to_utf32_in
utf32_to_utf8_out_ok
utf32_to_utf8_out_partial
utf32_to_utf8_out_error
utf32_to_utf8_out
test_utf8_utf32_cvt
utf8_to_utf16_in_ok
utf8_to_utf16_in_partial
utf8_to_utf16_in_error
utf8_to_utf16_in
utf16_to_utf8_out_ok
utf16_to_utf8_out_partial
utf16_to_utf8_out_error
utf16_to_utf8_out
test_utf8_utf16_cvt
utf8_to_ucs2_in_ok
utf8_to_ucs2_in_partial
utf8_to_ucs2_in_error
utf8_to_ucs2_in
ucs2_to_utf8_out_ok
ucs2_to_utf8_out_partial
ucs2_to_utf8_out_error
ucs2_to_utf8_out
test_utf8_ucs2_cvt
utf16_endianess
utf16_to_bytes
utf16_to_utf32_in_ok
utf16_to_utf32_in_partial
utf16_to_utf32_in_error
utf32_to_utf16_out_ok
utf32_to_utf16_out_partial
utf32_to_utf16_out_error
test_utf16_utf32_cvt
utf16_to_ucs2_in_ok
utf16_to_ucs2_in_partial
utf16_to_ucs2_in_error
ucs2_to_utf16_out_ok
ucs2_to_utf16_out_partial
ucs2_to_utf16_out_error
test_utf16_ucs2_cvt
test_utf8_utf32_codecvts
test_utf8_utf16_codecvts
test_utf8_ucs2_codecvts
test_utf16_utf32_codecvts
test_utf16_ucs2_codecvts

Learn to use CMake with our Intro Training

Find out more

Definitions

source code of libcxx/test/std/localization/codecvt_unicode.pass.cpp