path.charconv.pass.cpp source code [libcxx/test/std/input.output/filesystems/class.path/path.member/path.charconv.pass.cpp]

1	//===----------------------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	// UNSUPPORTED: no-localization
10	// UNSUPPORTED: c++03, c++11, c++14
11	// UNSUPPORTED: availability-filesystem-missing
12	// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
13
14	// <filesystem>
15
16	// class path
17
18	// Test constructors, accessors and modifiers that convert from/to various
19	// character encodings. Constructors and modifiers (append, concat,
20	// operator/=, operator+=) accept inputs with various character encodings,
21	// and accessors (string(), string<>(), u8string()) export the string with*
22	// various encodings.
23	//
24	// Some encodings are standardized; char16_t, char32_t and the u8string
25	// accessor and u8path constructor (and normal functions taking char8_t in
26	// C++20) convert from/to UTF-16, UTF-32 and UTF-8. wchar_t can be either
27	// UTF-16 or UTF-32 depending on the size of the wchar_t type, or can be
28	// left unimplemented.
29	//
30	// Plain char is implicitly UTF-8 on posix systems. On Windows, plain char
31	// is supposed to be in the same encoding as the platform's native file
32	// system APIs consumes in the functions that take narrow strings as path
33	// names.
34
35	#include <filesystem>
36	#include <type_traits>
37	#include <cassert>
38
39	#include "test_macros.h"
40
41	#ifdef _WIN32
42	# include <windows.h> // SetFileApisToANSI & friends
43	#endif
44	namespace fs = std::filesystem;
45
46	// Test conversion with strings that fit within the latin1 charset, that fit
47	// within one code point in UTF-16, and that can be expressible in certain
48	// one-byte code pages.
49	static void test_latin_unicode()
50	{
51	const char16_t u16str[] = { `0xe5`, `0xe4`, `0xf6`, `0x00` };
52	const char32_t u32str[] = { `0xe5`, `0xe4`, `0xf6`, `0x00` };
53	const char str[] = { char(`0xc3`), char(`0xa5`), char(`0xc3`), char(`0xa4`), char(`0xc3`), char(`0xb6`), `0x00` }; // UTF8, in a regular char string
54	#if TEST_STD_VER > 17 && defined(__cpp_lib_char8_t)
55	const char8_t u8str[] = { `0xc3`, `0xa5`, `0xc3`, `0xa4`, `0xc3`, `0xb6`, `0x00` };
56	#else
57	const char u8str[] = { char(`0xc3`), char(`0xa5`), char(`0xc3`), char(`0xa4`), char(`0xc3`), char(`0xb6`), `0x00` };
58	#endif
59	#ifndef TEST_HAS_NO_WIDE_CHARACTERS
60	const wchar_t wstr[] = { `0xe5`, `0xe4`, `0xf6`, `0x00` };
61	#endif
62
63	// Test well-defined conversion between UTF-8, UTF-16 and UTF-32
64	{
65	const fs::path p(u16str);
66	assert(p.u8string() == u8str);
67	assert(p.u16string() == u16str);
68	assert(p.u32string() == u32str);
69	assert(p.string<char16_t>() == u16str);
70	assert(p.string<char32_t>() == u32str);
71	}
72	{
73	const fs::path p(u32str);
74	assert(p.u8string() == u8str);
75	assert(p.u16string() == u16str);
76	assert(p.u32string() == u32str);
77	assert(p.string<char16_t>() == u16str);
78	assert(p.string<char32_t>() == u32str);
79	}
80	{
81	const fs::path p = fs::u8path(str);
82	assert(p.u8string() == u8str);
83	assert(p.u16string() == u16str);
84	assert(p.u32string() == u32str);
85	assert(p.string<char16_t>() == u16str);
86	assert(p.string<char32_t>() == u32str);
87	}
88	#if TEST_STD_VER > 17 && defined(__cpp_lib_char8_t)
89	{
90	// In C++20, the path constructor can unambiguously handle UTF-8 input,
91	// even if the plain char constructor would treat it as something else.
92	const fs::path p(u8str);
93	assert(p.u8string() == u8str);
94	assert(p.u16string() == u16str);
95	assert(p.u32string() == u32str);
96	assert(p.string<char8_t>() == u8str);
97	assert(p.string<char16_t>() == u16str);
98	assert(p.string<char32_t>() == u32str);
99	}
100	// Check reading various inputs with string<char8_t>()
101	{
102	const fs::path p(u16str);
103	assert(p.string<char8_t>() == u8str);
104	}
105	{
106	const fs::path p(u32str);
107	assert(p.string<char8_t>() == u8str);
108	}
109	{
110	const fs::path p = fs::u8path(str);
111	assert(p.string<char8_t>() == u8str);
112	}
113	#endif
114	#ifndef TEST_HAS_NO_WIDE_CHARACTERS
115	// Test conversion to/from wchar_t.
116	{
117	const fs::path p(u16str);
118	assert(p.wstring() == wstr);
119	assert(p.string<wchar_t>() == wstr);
120	}
121	{
122	const fs::path p = fs::u8path(str);
123	assert(p.wstring() == wstr);
124	assert(p.string<wchar_t>() == wstr);
125	}
126	{
127	const fs::path p(wstr);
128	assert(p.wstring() == wstr);
129	assert(p.u8string() == u8str);
130	assert(p.u16string() == u16str);
131	assert(p.u32string() == u32str);
132	assert(p.string<wchar_t>() == wstr);
133	}
134	#endif // TEST_HAS_NO_WIDE_CHARACTERS
135	#ifndef _WIN32
136	// Test conversion to/from regular char-based string. On POSIX, this
137	// is implied to convert to/from UTF-8.
138	{
139	const fs::path p(str);
140	assert(p.string() == str);
141	assert(p.u16string() == u16str);
142	assert(p.string<char>() == str);
143	}
144	{
145	const fs::path p(u16str);
146	assert(p.string() == str);
147	assert(p.string<char>() == str);
148	}
149	#else
150	// On windows, the narrow char-based input/output is supposed to be
151	// in the charset that narrow file IO APIs use. This can either be the
152	// current active code page (ACP) or the OEM code page, exposed by
153	// the AreFileApisANSI() function, and settable with SetFileApisToANSI() and
154	// SetFileApisToOEM(). We can't set which codepage is active within
155	// the process, but for some specific known ones, we can check if they
156	// behave as expected.
157	SetFileApisToANSI();
158	if (GetACP() == `1252`) {
159	const char latin1[] = { char(`0xe5`), char(`0xe4`), char(`0xf6`), `0x00` };
160	{
161	const fs::path p(wstr);
162	assert(p.string() == latin1);
163	assert(p.string<char>() == latin1);
164	}
165	{
166	const fs::path p(latin1);
167	assert(p.string() == latin1);
168	assert(p.wstring() == wstr);
169	assert(p.u8string() == u8str);
170	assert(p.u16string() == u16str);
171	assert(p.string<char>() == latin1);
172	assert(p.string<wchar_t>() == wstr);
173	}
174	}
175	SetFileApisToOEM();
176	if (GetOEMCP() == `850` \|\| GetOEMCP() == `437`) {
177	// These chars are identical in both CP 850 and 437
178	const char cp850[] = { char(`0x86`), char(`0x84`), char(`0x94`), `0x00` };
179	{
180	const fs::path p(wstr);
181	assert(p.string() == cp850);
182	assert(p.string<char>() == cp850);
183	}
184	{
185	const fs::path p(cp850);
186	assert(p.string() == cp850);
187	assert(p.wstring() == wstr);
188	assert(p.u8string() == u8str);
189	assert(p.u16string() == u16str);
190	assert(p.string<char>() == cp850);
191	assert(p.string<wchar_t>() == wstr);
192	}
193	}
194	#endif
195	}
196
197	// Test conversion with strings that don't fit within one UTF-16 code point.
198	// Here, wchar_t can be either UTF-16 or UTF-32 depending on the size on the
199	// particular platform.
200	static void test_wide_unicode()
201	{
202	const char16_t u16str[] = { `0xd801`, `0xdc37`, `0x00` };
203	const char32_t u32str[] = { `0x10437`, `0x00` };
204	#if TEST_STD_VER > 17 && defined(__cpp_lib_char8_t)
205	const char8_t u8str[] = { `0xf0`, `0x90`, `0x90`, `0xb7`, `0x00` };
206	#else
207	const char u8str[] = { char(`0xf0`), char(`0x90`), char(`0x90`), char(`0xb7`), `0x00` };
208	#endif
209	const char str[] = { char(`0xf0`), char(`0x90`), char(`0x90`), char(`0xb7`), `0x00` };
210	{
211	const fs::path p = fs::u8path(source: str);
212	assert(p.u8string() == u8str);
213	assert(p.u16string() == u16str);
214	assert(p.u32string() == u32str);
215	}
216	{
217	const fs::path p(u16str);
218	assert(p.u8string() == u8str);
219	assert(p.u16string() == u16str);
220	assert(p.u32string() == u32str);
221	}
222	{
223	const fs::path p(u32str);
224	assert(p.u8string() == u8str);
225	assert(p.u16string() == u16str);
226	assert(p.u32string() == u32str);
227	}
228	#if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && defined(__SIZEOF_WCHAR_T__)
229	# if __SIZEOF_WCHAR_T__ == 2
230	const wchar_t wstr[] = { `0xd801`, `0xdc37`, `0x00` };
231	# else
232	const wchar_t wstr[] = { `0x10437`, `0x00` };
233	# endif
234	// Test conversion to/from wchar_t.
235	{
236	const fs::path p = fs::u8path(source: str);
237	assert(p.wstring() == wstr);
238	}
239	{
240	const fs::path p(u16str);
241	assert(p.wstring() == wstr);
242	}
243	{
244	const fs::path p(u32str);
245	assert(p.wstring() == wstr);
246	}
247	{
248	const fs::path p(wstr);
249	assert(p.u8string() == u8str);
250	assert(p.u16string() == u16str);
251	assert(p.u32string() == u32str);
252	assert(p.wstring() == wstr);
253	}
254	#endif // !defined(TEST_HAS_NO_WIDE_CHARACTERS) && defined(__SIZEOF_WCHAR_T__)
255	}
256
257	// Test appending paths in different encodings.
258	static void test_append()
259	{
260	const char16_t u16str[] = { `0xd801`, `0xdc37`, `0x00` };
261	const char32_t u32str[] = { `0x10437`, `0x00` };
262	const char32_t u32ref[] = { `0x10437`, fs::path::preferred_separator, `0x10437`, fs::path::preferred_separator, `0x10437`, `0x00` };
263	const char str[] = { char(`0xf0`), char(`0x90`), char(`0x90`), char(`0xb7`), `0x00` };
264	{
265	fs::path p = fs::u8path(source: str) / u16str / u32str;
266	assert(p.u32string() == u32ref);
267	p = fs::u8path(source: str).append(source: u16str).append(source: u32str);
268	assert(p.u32string() == u32ref);
269	p = fs::u8path(source: str);
270	p /= u16str;
271	p /= u32str;
272	assert(p.u32string() == u32ref);
273	}
274	#if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && defined(__SIZEOF_WCHAR_T__)
275	# if __SIZEOF_WCHAR_T__ == 2
276	const wchar_t wstr[] = { `0xd801`, `0xdc37`, `0x00` };
277	# else
278	const wchar_t wstr[] = { `0x10437`, `0x00` };
279	# endif
280	// Test conversion from wchar_t.
281	{
282	fs::path p = fs::path (u16str) / wstr / u32str;
283	assert(p.u32string() == u32ref);
284	p = fs::path (u16str).append(source: wstr).append(source: u32str);
285	assert(p.u32string() == u32ref);
286	p = fs::path (u16str);
287	p /= wstr;
288	p /= u32str;
289	assert(p.u32string() == u32ref);
290	}
291	#endif // !defined(TEST_HAS_NO_WIDE_CHARACTERS) && defined(__SIZEOF_WCHAR_T__)
292	}
293
294	static void test_concat()
295	{
296	const char16_t u16str[] = { `0xd801`, `0xdc37`, `0x00` };
297	const char32_t u32str[] = { `0x10437`, `0x00` };
298	const char32_t u32ref[] = { `0x10437`, `0x10437`, `0x10437`, `0x00` };
299	const char str[] = { char(`0xf0`), char(`0x90`), char(`0x90`), char(`0xb7`), `0x00` };
300	{
301	fs::path p = fs::u8path(source: str);
302	p += u16str;
303	p += u32str;
304	assert(p.u32string() == u32ref);
305	p = fs::u8path(source: str).concat(x: u16str).concat(x: u32str);
306	assert(p.u32string() == u32ref);
307	}
308	#if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && defined(__SIZEOF_WCHAR_T__)
309	# if __SIZEOF_WCHAR_T__ == 2
310	const wchar_t wstr[] = { `0xd801`, `0xdc37`, `0x00` };
311	# else
312	const wchar_t wstr[] = { `0x10437`, `0x00` };
313	# endif
314	// Test conversion from wchar_t.
315	{
316	fs::path p = fs::path (u16str);
317	p += wstr;
318	p += u32str;
319	assert(p.u32string() == u32ref);
320	p = fs::path (u16str).concat(x: wstr).concat(x: u32str);
321	assert(p.u32string() == u32ref);
322	}
323	#endif // !defined(TEST_HAS_NO_WIDE_CHARACTERS) && defined(__SIZEOF_WCHAR_T__)
324	}
325
326	static void test_append_concat_narrow()
327	{
328	const char16_t u16str[] = { `0xe5`, `0x00` };
329	const char32_t u32ref_append[] = { `0xe5`, fs::path::preferred_separator, `0xe5`, `0x00` };
330	const char32_t u32ref_concat[] = { `0xe5`, `0xe5`, `0x00` };
331
332	#if TEST_STD_VER > 17 && defined(__cpp_lib_char8_t)
333	{
334	const char8_t u8str[] = { `0xc3`, `0xa5`, `0x00` };
335	// In C++20, appends of a char8_t string is unambiguously treated as
336	// UTF-8.
337	fs::path p = fs::path(u16str) / u8str;
338	assert(p.u32string() == u32ref_append);
339	p = fs::path(u16str).append(u8str);
340	assert(p.u32string() == u32ref_append);
341	p = fs::path(u16str);
342	p /= u8str;
343	assert(p.u32string() == u32ref_append);
344	p = fs::path(u16str).concat(u8str);
345	assert(p.u32string() == u32ref_concat);
346	p = fs::path(u16str);
347	p += u8str;
348	assert(p.u32string() == u32ref_concat);
349	}
350	#endif
351	#ifndef _WIN32
352	// Test appending a regular char-based string. On POSIX, this
353	// is implied to convert to/from UTF-8.
354	{
355	const char str[] = { char(`0xc3`), char(`0xa5`), `0x00` }; // UTF8, in a regular char string
356	fs::path p = fs::path (u16str) / str;
357	assert(p.u32string() == u32ref_append);
358	p = fs::path (u16str).append(source: str);
359	assert(p.u32string() == u32ref_append);
360	p = fs::path (u16str);
361	p /= str;
362	assert(p.u32string() == u32ref_append);
363	p = fs::path (u16str).concat(x: str);
364	assert(p.u32string() == u32ref_concat);
365	p = fs::path (u16str);
366	p += str;
367	assert(p.u32string() == u32ref_concat);
368	}
369	#else
370	SetFileApisToANSI();
371	if (GetACP() == `1252`) {
372	const char latin1[] = { char(`0xe5`), `0x00` };
373	fs::path p = fs::path(u16str) / latin1;
374	assert(p.u32string() == u32ref_append);
375	p = fs::path(u16str).append(latin1);
376	assert(p.u32string() == u32ref_append);
377	p = fs::path(u16str);
378	p /= latin1;
379	assert(p.u32string() == u32ref_append);
380	p = fs::path(u16str).concat(latin1);
381	assert(p.u32string() == u32ref_concat);
382	p = fs::path(u16str);
383	p += latin1;
384	assert(p.u32string() == u32ref_concat);
385	}
386	SetFileApisToOEM();
387	if (GetOEMCP() == `850` \|\| GetOEMCP() == `437`) {
388	// This chars is identical in both CP 850 and 437
389	const char cp850[] = { char(`0x86`), `0x00` };
390	fs::path p = fs::path(u16str) / cp850;
391	assert(p.u32string() == u32ref_append);
392	p = fs::path(u16str).append(cp850);
393	assert(p.u32string() == u32ref_append);
394	p = fs::path(u16str);
395	p /= cp850;
396	assert(p.u32string() == u32ref_append);
397	p = fs::path(u16str).concat(cp850);
398	assert(p.u32string() == u32ref_concat);
399	p = fs::path(u16str);
400	p += cp850;
401	assert(p.u32string() == u32ref_concat);
402	}
403	#endif
404	}
405
406	int main(int, char**)
407	{
408	test_latin_unicode();
409	test_wide_unicode();
410	test_append();
411	test_concat();
412	test_append_concat_narrow();
413
414	return `0`;
415	}
416

source code of libcxx/test/std/input.output/filesystems/class.path/path.member/path.charconv.pass.cpp