1 | //===----------------------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | // UNSUPPORTED: c++03, c++11, c++14, c++17 |
10 | |
11 | // Some basic examples of how split_view might be used in the wild. This is a general |
12 | // collection of sample algorithms and functions that try to mock general usage of |
13 | // this view. |
14 | |
15 | // These test check the output `split_view` produces for a variety of inputs, including many corner cases, with no |
16 | // restrictions on which member functions can be called. |
17 | |
18 | #include <algorithm> |
19 | #include <array> |
20 | #include <cassert> |
21 | #include <concepts> |
22 | #include <map> |
23 | #include <ranges> |
24 | #include <string> |
25 | #include <string_view> |
26 | #include <utility> |
27 | #include <vector> |
28 | |
29 | #include "test_macros.h" |
30 | |
31 | template <std::ranges::view View, std::ranges::range Expected> |
32 | constexpr bool is_equal(View& view, const Expected& expected) { |
33 | return std::ranges::equal(view, expected, std::ranges::equal); |
34 | } |
35 | |
36 | template <class T, class Separator, class U, std::size_t M> |
37 | constexpr bool test_function_call(T&& input, Separator&& separator, std::array<U, M> expected) { |
38 | std::ranges::split_view v(input, separator); |
39 | return is_equal(v, expected); |
40 | } |
41 | |
42 | template <class T, class Separator, class U, std::size_t M> |
43 | constexpr bool test_with_piping(T&& input, Separator&& separator, std::array<U, M> expected) { |
44 | auto expected_it = expected.begin(); |
45 | for (auto e : input | std::ranges::views::split(separator)) { |
46 | if (expected_it == expected.end()) |
47 | return false; |
48 | if (!std::ranges::equal(e, *expected_it)) |
49 | return false; |
50 | |
51 | ++expected_it; |
52 | } |
53 | |
54 | return expected_it == expected.end(); |
55 | } |
56 | |
57 | constexpr bool test_l_r_values() { |
58 | using namespace std::string_view_literals; |
59 | |
60 | // Both lvalues and rvalues can be used as input. |
61 | { |
62 | // Lvalues. |
63 | { |
64 | auto input = "abc"sv ; |
65 | auto sep = " "sv ; |
66 | [[maybe_unused]] std::ranges::split_view v(input, sep); |
67 | } |
68 | |
69 | // Const lvalues. |
70 | { |
71 | const auto input = "abc"sv ; |
72 | const auto sep = " "sv ; |
73 | [[maybe_unused]] std::ranges::split_view v(input, sep); |
74 | } |
75 | |
76 | // Rvalues. |
77 | { |
78 | auto input = "abc"sv ; |
79 | auto sep = " "sv ; |
80 | [[maybe_unused]] std::ranges::split_view v(std::move(input), std::move(sep)); |
81 | } |
82 | |
83 | // Const rvalues. |
84 | { |
85 | const auto input = "abc"sv ; |
86 | const auto sep = " "sv ; |
87 | [[maybe_unused]] std::ranges::split_view v(std::move(input), std::move(sep)); |
88 | } |
89 | } |
90 | |
91 | return true; |
92 | } |
93 | |
94 | constexpr bool test_string_literal_separator() { |
95 | using namespace std::string_view_literals; |
96 | |
97 | // Splitting works as expected when the separator is a single character literal. |
98 | { |
99 | std::ranges::split_view v("abc def"sv , ' '); |
100 | assert(is_equal(v, std::array{"abc"sv , "def"sv })); |
101 | } |
102 | |
103 | // Counterintuitively, a seemingly equivalent separator expressed as a string literal doesn't match anything. This is |
104 | // because of the implicit terminating null in the literal. |
105 | { |
106 | std::ranges::split_view v("abc def"sv , " " ); |
107 | assert(is_equal(v, std::array{"abc def"sv })); |
108 | } |
109 | |
110 | // To illustrate the previous point further, the separator is actually a two-character string literal: `{' ', '\0'}`. |
111 | // Should the input string contain that two-character sequence, the separator would match. |
112 | { |
113 | std::ranges::split_view v("abc \0def"sv , " " ); |
114 | assert(is_equal(v, std::array{"abc"sv , "def"sv })); |
115 | } |
116 | |
117 | return true; |
118 | } |
119 | |
120 | // Make sure that a string literal and a `string_view` produce the same results (which isn't always the case, see |
121 | // below). |
122 | template <class T> |
123 | constexpr std::string_view sv(T&& str) { |
124 | return std::string_view(str); |
125 | }; |
126 | |
127 | template <class T, class Separator, class U, std::size_t M> |
128 | constexpr void test_one(T&& input, Separator&& separator, std::array<U, M> expected) { |
129 | assert(test_function_call(input, separator, expected)); |
130 | assert(test_with_piping(input, separator, expected)); |
131 | } |
132 | |
133 | constexpr bool test_string_literals() { |
134 | // These tests show characteristic examples of how using string literals with `split_view` produces unexpected |
135 | // results due to the implicit terminating null that is treated as part of the range. |
136 | |
137 | using namespace std::string_view_literals; |
138 | |
139 | char short_sep = ' '; |
140 | auto long_sep = "12"sv ; |
141 | |
142 | // When splitting a string literal, only the last segment will be null-terminated (getting the terminating null from |
143 | // the original range). |
144 | { |
145 | std::array expected = {"abc"sv , std::string_view("def" , sizeof("def" ))}; |
146 | |
147 | assert(test_function_call("abc def" , short_sep, expected)); |
148 | assert(test_with_piping("abc def" , short_sep, expected)); |
149 | assert(test_function_call("abc12def" , long_sep, expected)); |
150 | assert(test_with_piping("abc12def" , long_sep, expected)); |
151 | } |
152 | |
153 | // Empty string. |
154 | { |
155 | // Because an empty string literal contains an implicit terminating null, the output will contain one segment. |
156 | std::array expected = {std::string_view("" , 1)}; |
157 | |
158 | assert(test_function_call("" , short_sep, expected)); |
159 | assert(test_with_piping("" , short_sep, expected)); |
160 | assert(test_function_call("" , long_sep, expected)); |
161 | assert(test_with_piping("" , long_sep, expected)); |
162 | } |
163 | |
164 | // Terminating null in the separator -- the character literal `' '` and the seemingly equivalent string literal `" "` |
165 | // are treated differently due to the presence of an implicit `\0` in the latter. |
166 | { |
167 | const char input[] = "abc def" ; |
168 | std::array expected_unsplit = {std::string_view(input, sizeof(input))}; |
169 | std::array expected_split = {"abc"sv , std::string_view("def" , sizeof("def" ))}; |
170 | |
171 | assert(test_function_call(input, " " , expected_unsplit)); |
172 | assert(test_function_call("abc \0def" , " " , expected_split)); |
173 | // Note: string literals don't work with piping because arrays decay to pointers, and pointers don't model `range`. |
174 | } |
175 | |
176 | // Empty separator. |
177 | { |
178 | auto empty_sep = ""sv ; |
179 | std::array expected = {"a"sv , "b"sv , "c"sv , "\0"sv }; |
180 | |
181 | assert(test_function_call("abc" , empty_sep, expected)); |
182 | assert(test_with_piping("abc" , empty_sep, expected)); |
183 | } |
184 | |
185 | return true; |
186 | } |
187 | |
188 | bool test_nontrivial_characters() { |
189 | // Try a deliberately heavyweight "character" type to see if it triggers any corner cases. |
190 | |
191 | using Map = std::map<std::string, int>; |
192 | using Vec = std::vector<Map>; |
193 | |
194 | Map sep = {{"yyy" , 999}}; |
195 | Map m1 = { |
196 | {"a" , 1}, |
197 | {"bc" , 2}, |
198 | }; |
199 | Map m2 = { |
200 | {"def" , 3}, |
201 | }; |
202 | Map m3 = { |
203 | {"g" , 4}, |
204 | {"hijk" , 5}, |
205 | }; |
206 | |
207 | Vec expected1 = {m1, m2}; |
208 | Vec expected2 = {m3}; |
209 | |
210 | std::ranges::split_view v(Vec{m1, m2, sep, m3}, sep); |
211 | |
212 | // Segment 1: {m1, m2} |
213 | auto outer = v.begin(); |
214 | assert(outer != v.end()); |
215 | auto inner = (*outer).begin(); |
216 | assert(*inner++ == m1); |
217 | assert(*inner++ == m2); |
218 | assert(inner == (*outer).end()); |
219 | |
220 | // Segment 2: {m3} |
221 | ++outer; |
222 | assert(outer != v.end()); |
223 | inner = (*outer).begin(); |
224 | assert(*inner++ == m3); |
225 | assert(inner == (*outer).end()); |
226 | |
227 | ++outer; |
228 | assert(outer == v.end()); |
229 | |
230 | return true; |
231 | } |
232 | |
233 | constexpr bool main_test() { |
234 | using namespace std::string_view_literals; |
235 | |
236 | char short_sep = ' '; |
237 | auto long_sep = "12"sv ; |
238 | |
239 | // One separator. |
240 | { |
241 | std::array expected = {"abc"sv , "def"sv }; |
242 | test_one(input: "abc def"sv , separator&: short_sep, expected); |
243 | test_one(input: "abc12def"sv , separator&: long_sep, expected); |
244 | } |
245 | |
246 | // Several separators in a row. |
247 | { |
248 | std::array expected = {"abc"sv , ""sv , ""sv , ""sv , "def"sv }; |
249 | test_one(input: "abc def"sv , separator&: short_sep, expected); |
250 | test_one(input: "abc12121212def"sv , separator&: long_sep, expected); |
251 | } |
252 | |
253 | // Trailing separator. |
254 | { |
255 | std::array expected = {"abc"sv , "def"sv , ""sv }; |
256 | test_one(input: "abc def "sv , separator&: short_sep, expected); |
257 | test_one(input: "abc12def12"sv , separator&: long_sep, expected); |
258 | } |
259 | |
260 | // Leading separator. |
261 | { |
262 | std::array expected = {""sv , "abc"sv , "def"sv }; |
263 | test_one(input: " abc def"sv , separator&: short_sep, expected); |
264 | test_one(input: "12abc12def"sv , separator&: long_sep, expected); |
265 | } |
266 | |
267 | // No separator. |
268 | { |
269 | std::array expected = {"abc"sv }; |
270 | test_one(input: "abc"sv , separator&: short_sep, expected); |
271 | test_one(input: "abc"sv , separator&: long_sep, expected); |
272 | } |
273 | |
274 | // Input consisting of a single separator. |
275 | { |
276 | std::array expected = {""sv , ""sv }; |
277 | test_one(input: " "sv , separator&: short_sep, expected); |
278 | test_one(input: "12"sv , separator&: long_sep, expected); |
279 | } |
280 | |
281 | // Input consisting of only separators. |
282 | { |
283 | std::array expected = {""sv , ""sv , ""sv , ""sv }; |
284 | test_one(input: " "sv , separator&: short_sep, expected); |
285 | test_one(input: "121212"sv , separator&: long_sep, expected); |
286 | } |
287 | |
288 | // The separator and the string use the same character only. |
289 | { |
290 | auto overlapping_sep = "aaa"sv ; |
291 | std::array expected = {""sv , "aa"sv }; |
292 | test_one(input: "aaaaa"sv , separator&: overlapping_sep, expected); |
293 | } |
294 | |
295 | // Many redundant separators. |
296 | { |
297 | std::array expected = {""sv , ""sv , "abc"sv , ""sv , ""sv , "def"sv , ""sv , ""sv }; |
298 | test_one(input: " abc def "sv , separator&: short_sep, expected); |
299 | test_one(input: "1212abc121212def1212"sv , separator&: long_sep, expected); |
300 | } |
301 | |
302 | // Separators after every character. |
303 | { |
304 | std::array expected = {""sv , "a"sv , "b"sv , "c"sv , ""sv }; |
305 | test_one(input: " a b c "sv , separator&: short_sep, expected); |
306 | test_one(input: "12a12b12c12"sv , separator&: long_sep, expected); |
307 | } |
308 | |
309 | // Overlap between the separator and the string (see https://wg21.link/lwg3505). |
310 | { |
311 | auto overlapping_sep = "ab"sv ; |
312 | std::array expected = {"a"sv , "aa"sv , ""sv , "b"sv }; |
313 | test_one(input: "aabaaababb"sv , separator&: overlapping_sep, expected); |
314 | } |
315 | |
316 | // Empty input. |
317 | { |
318 | std::array<std::string_view, 0> expected = {}; |
319 | test_one(input: ""sv , separator&: short_sep, expected); |
320 | test_one(input: ""sv , separator&: long_sep, expected); |
321 | } |
322 | |
323 | // Empty separator. |
324 | { |
325 | auto empty_sep = ""sv ; |
326 | std::array expected = {"a"sv , "b"sv , "c"sv }; |
327 | test_one(input: "abc"sv , separator&: empty_sep, expected); |
328 | test_one(input: "abc"sv , separator&: empty_sep, expected); |
329 | } |
330 | |
331 | // Terminating null as a separator. |
332 | { |
333 | std::array expected = {"abc"sv , "def"sv }; |
334 | test_one(input: "abc\0def"sv , separator: '\0', expected); |
335 | test_one(input: "abc\0\0def"sv , separator: "\0\0"sv , expected); |
336 | } |
337 | |
338 | // Different character types. |
339 | { |
340 | // `char`. |
341 | test_function_call("abc def" , ' ', std::array{"abc"sv , "def"sv }); |
342 | #ifndef TEST_HAS_NO_WIDE_CHARACTERS |
343 | // `wchar_t`. |
344 | test_function_call(L"abc def" , L' ', std::array{L"abc"sv , L"def"sv }); |
345 | #endif |
346 | // `char8_t`. |
347 | test_function_call(u8"abc def" , u8' ', std::array{u8"abc"sv , u8"def"sv }); |
348 | // `char16_t`. |
349 | test_function_call(u"abc def" , u' ', std::array{u"abc"sv , u"def"sv }); |
350 | // `char32_t`. |
351 | test_function_call(U"abc def" , U' ', std::array{U"abc"sv , U"def"sv }); |
352 | } |
353 | |
354 | // Non-character input. |
355 | { |
356 | std::array expected = {std::array{1, 2, 3}, std::array{4, 5, 6}}; |
357 | test_one(input: std::array{1, 2, 3, 0, 4, 5, 6}, separator: 0, expected); |
358 | test_one(input: std::array{1, 2, 3, 0, 0, 0, 4, 5, 6}, separator: std::array{0, 0, 0}, expected); |
359 | } |
360 | |
361 | return true; |
362 | } |
363 | |
364 | constexpr bool example_test() { |
365 | // example code in the spec |
366 | std::string str{"the quick brown fox" }; |
367 | std::vector<std::string_view> result; |
368 | for (auto r : std::views::split(str, ' ')) { |
369 | result.emplace_back(r.begin(), r.end()); |
370 | } |
371 | using namespace std::string_view_literals; |
372 | auto expected = {"the"sv , "quick"sv , "brown"sv , "fox"sv }; |
373 | assert(std::ranges::equal(result, expected)); |
374 | |
375 | return true; |
376 | } |
377 | |
378 | int main(int, char**) { |
379 | example_test(); |
380 | static_assert(example_test()); |
381 | |
382 | test_string_literals(); |
383 | static_assert(test_string_literals()); |
384 | |
385 | test_l_r_values(); |
386 | static_assert(test_l_r_values()); |
387 | |
388 | test_string_literal_separator(); |
389 | static_assert(test_string_literal_separator()); |
390 | |
391 | // Note: map is not `constexpr`, so this test is runtime-only. |
392 | test_nontrivial_characters(); |
393 | |
394 | return 0; |
395 | } |
396 | |