1//
2// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3// Copyright (c) 2021-2023 Alexander Grund
4//
5// Distributed under the Boost Software License, Version 1.0.
6// https://www.boost.org/LICENSE_1_0.txt
7
8#include <boost/locale.hpp>
9#include <boost/locale/conversion.hpp>
10#include "../src/boost/locale/win32/lcid.hpp"
11#include "boostLocale/test/tools.hpp"
12#include "boostLocale/test/unit_test.hpp"
13#include <boost/assert.hpp>
14#include <boost/core/ignore_unused.hpp>
15#include <algorithm>
16#include <iomanip>
17#include <locale>
18#include <sstream>
19#include <string>
20#include <vector>
21#ifdef BOOST_LOCALE_WITH_ICU
22# include <unicode/uversion.h>
23# define BOOST_LOCALE_ICU_VERSION (U_ICU_VERSION_MAJOR_NUM * 100 + U_ICU_VERSION_MINOR_NUM)
24#else
25# define BOOST_LOCALE_ICU_VERSION 0
26#endif
27
28namespace boost { namespace locale { namespace test {
29 template<class Facet>
30 BOOST_NOINLINE bool is_facet(const std::locale::facet* facet)
31 {
32 return dynamic_cast<const Facet*>(facet) != nullptr;
33 }
34
35 template<class Facet>
36 bool has_facet(const std::locale& l)
37 {
38 return std::has_facet<Facet>(l) && is_facet<Facet>(&std::use_facet<Facet>(l));
39 }
40
41 template<class Facet>
42 bool has_not_facet(const std::locale& l)
43 {
44 const Facet* f;
45 try {
46 f = &std::use_facet<Facet>(l);
47 } catch(const std::bad_cast&) {
48 return !std::has_facet<Facet>(l);
49 }
50 // This mustn't be reached, checks for debugging
51 TEST(is_facet<Facet>(f)); // LCOV_EXCL_LINE
52 TEST(!std::has_facet<Facet>(l)); // LCOV_EXCL_LINE
53 return false; // LCOV_EXCL_LINE
54 }
55}}} // namespace boost::locale::test
56namespace blt = boost::locale::test;
57
58bool has_message(const std::locale& l)
59{
60 return blt::has_facet<boost::locale::message_format<char>>(l);
61}
62
63struct test_facet : public std::locale::facet {
64 test_facet() : std::locale::facet(0) {}
65 static std::locale::id id;
66};
67
68std::locale::id test_facet::id;
69
70template<typename CharType>
71using codecvt_by_char_type = std::codecvt<CharType, char, std::mbstate_t>;
72
73namespace bl = boost::locale;
74
75bool hasLocaleForBackend(const std::string& locale_name, const std::string& backendName)
76{
77 if(backendName == "winapi")
78 return has_win_locale(locale_name);
79 else if(backendName == "std")
80 return has_std_locale(name: locale_name.c_str());
81 else if(backendName == "posix")
82 return has_posix_locale(name: locale_name);
83 else {
84 BOOST_ASSERT(backendName == "icu");
85 return BOOST_LOCALE_ICU_VERSION >= 5901; // First version to use (correct) CLDR data
86 }
87}
88
89void test_special_locales()
90{
91 bl::localization_backend_manager backend = bl::localization_backend_manager::global();
92 for(const std::string& backendName : backend.get_all_backends()) {
93 std::cout << "Backend: " << backendName << std::endl;
94 backend.select(backend_name: backendName);
95 bl::localization_backend_manager::global(backend);
96
97 {
98 const auto utf8LocaleName = bl::util::get_system_locale(use_utf8_on_windows: true);
99 // The WinAPI backend only supports UTF-8 encoding and hence always returns the UTF-8 locale
100 const auto ansiLocaleName = (backendName == "winapi") ? utf8LocaleName : bl::util::get_system_locale(use_utf8_on_windows: false);
101 bl::generator g;
102 g.use_ansi_encoding(enc: true);
103 std::locale l = g("");
104 TEST_EQ(std::use_facet<bl::info>(l).name(), ansiLocaleName);
105 g.use_ansi_encoding(enc: false);
106 l = g("");
107 TEST_EQ(std::use_facet<bl::info>(l).name(), utf8LocaleName);
108 g.use_ansi_encoding(enc: true);
109 l = g("");
110 TEST_EQ(std::use_facet<bl::info>(l).name(), ansiLocaleName);
111 }
112
113 bl::generator g;
114
115 namespace as = bl::as;
116 constexpr time_t datetime = 60 * 60 * 24 * (31 + 4) // Feb 5th
117 + (15 * 60 + 42) * 60; // 15:42
118
119 const std::string enWorldName = "en_001.UTF-8";
120 if(!hasLocaleForBackend(locale_name: enWorldName, backendName))
121 std::cout << "\tSkipping due to missing locale " << enWorldName << std::endl;
122 else {
123 auto l = g(enWorldName);
124 const auto& info = std::use_facet<bl::info>(loc: l);
125 TEST_EQ(info.language(), "en");
126 TEST_EQ(info.country(), "001");
127 TEST(info.utf8());
128 TEST_EQ(info.encoding(), "UTF-8");
129
130 std::ostringstream os;
131 os.imbue(loc: l);
132 os << as::time << as::gmt << as::time_short;
133 os << datetime;
134 TEST_EQ(os.str().substr(0, 4), "3:42"); // 3:42 pm
135 }
136 const std::string enEuropeName = "en_150.UTF-8";
137 if(!hasLocaleForBackend(locale_name: enEuropeName, backendName))
138 std::cout << "\tSkipping due to missing locale " << enEuropeName << std::endl;
139 else {
140 auto l = g(enEuropeName);
141 const auto& info = std::use_facet<bl::info>(loc: l);
142 TEST_EQ(info.language(), "en");
143 TEST_EQ(info.country(), "150");
144 TEST(info.utf8());
145 TEST_EQ(info.encoding(), "UTF-8");
146
147 std::ostringstream os;
148
149 std::string expectedTimeFormat = "15:42";
150 // The std locale may not fully support the 150 region and use a different format
151 if(backendName == "std") {
152 os.imbue(loc: std::locale(os.getloc(), new std::time_put_byname<char>(enEuropeName)));
153 empty_stream(s&: os) << std::put_time(tmb: gmtime_wrap(time: &datetime), fmt: "%X");
154 expectedTimeFormat = os.str();
155 }
156
157 os.imbue(loc: l);
158 empty_stream(s&: os) << as::time << as::gmt << as::time_short;
159 os << datetime;
160 TEST_EQ(os.str().substr(0, expectedTimeFormat.size()), expectedTimeFormat);
161 }
162 }
163}
164
165bool has_unicode_classic_locale()
166{
167 std::locale l = std::locale::classic();
168 for(const auto name : {"C.UTF-8", "C.utf8"}) {
169 try {
170 l = std::locale(name);
171 break;
172 } catch(...) {
173 }
174 }
175 const wchar_t s = L'\u03B4';
176 // Check that that the Unicode character is handled
177 return std::use_facet<std::ctype<wchar_t>>(loc: l).toupper(c: s) != s;
178}
179
180// For a non-existing locale the C locale will be used as a fallback
181// If UTF-8 is requested/reported then UTF-8 will still be used as much as possible
182void test_invalid_locale()
183{
184 std::ostringstream classicStream;
185 classicStream.imbue(loc: std::locale::classic());
186 const boost::locale::util::locale_data systemLocale(boost::locale::util::get_system_locale());
187
188 bl::localization_backend_manager tmp_backend = bl::localization_backend_manager::global();
189 tmp_backend.select(backend_name: "std");
190 bl::localization_backend_manager::global(tmp_backend);
191 bl::generator g;
192 std::locale nonExistingLocale = g("noLang_noCountry." + systemLocale.encoding());
193 const auto& info = std::use_facet<bl::info>(loc: nonExistingLocale);
194 TEST_EQ(info.language(), "nolang");
195 TEST_EQ(info.country(), "NOCOUNTRY");
196 std::ostringstream os;
197 os.imbue(loc: nonExistingLocale);
198 os << boost::locale::as::number << 123456789 << " " << 1234567.89;
199 classicStream << 123456789 << " " << 1234567.89;
200 TEST_EQ(os.str(), classicStream.str());
201
202 // Request UTF-8 explicitly and check that it is used even when the locale doesn't exist
203 if(!info.utf8())
204 nonExistingLocale = g("noLang_noCountry.UTF-8");
205 if(has_unicode_classic_locale()) {
206 // Case conversion works only if the backend supports classic locale with Unicode
207 TEST_EQ(boost::locale::to_upper("δ", nonExistingLocale), "Δ");
208 }
209 // The codecvt facet always supports UTF-8
210 {
211 auto& cvt = std::use_facet<std::codecvt<wchar_t, char, std::mbstate_t>>(loc: nonExistingLocale);
212 // String with Unicode chars from different cultures
213 const std::wstring wide_str = L"\U0001D49E-\u043F\u0440\u0438\u0432\u0435\u0442-\u3084\u3042";
214
215 std::mbstate_t state{};
216 const wchar_t* from_next = wide_str.c_str();
217 const wchar_t* from_end = from_next + wide_str.size();
218 char out_str[32]{};
219 char* to_next = out_str;
220 TEST_EQ(cvt.out(state, from_next, from_end, from_next, out_str, out_str + sizeof(out_str), to_next), cvt.ok);
221 const std::string utf8_str = boost::locale::conv::utf_to_utf<char>(str: wide_str);
222 TEST_EQ(out_str, utf8_str);
223 }
224}
225
226void test_install_chartype(const std::string& backendName)
227{
228 // Use ASCII and UTF-8 encoding
229 for(const std::string localeName : {"C", "en_US.UTF-8"}) {
230 std::cout << "--- Locale: " << localeName << std::endl;
231 const std::locale origLocale = bl::generator{}(localeName);
232 const auto backend = bl::localization_backend_manager::global().create();
233 backend->set_option(name: "locale", value: localeName);
234 for(auto category = bl::per_character_facet_first; category <= bl::per_character_facet_last; ++category) {
235 std::cout << "---- Testing category " << static_cast<unsigned>(category) << '\n';
236 // This should modify the locale
237 const std::locale newLocale_char = backend->install(base: origLocale, category, type: bl::char_facet_t::char_f);
238 // This should not
239 const std::locale newLocale_nochar = backend->install(base: origLocale, category, type: bl::char_facet_t::nochar);
240 // But the boundary facet is only implemented in ICU, so for all else the locale is still unchanged
241 if(category != bl::category_t::boundary || backendName == "icu")
242 TEST(origLocale != newLocale_char);
243 else
244 TEST(origLocale == newLocale_char);
245 TEST(origLocale == newLocale_nochar);
246 }
247 }
248}
249
250template<typename Char>
251struct dummy_collate : std::collate<Char> {};
252
253template<typename Char>
254bool has_dummy_collate(const std::locale& l)
255{
256 const auto& col = std::use_facet<std::collate<Char>>(l); // Implicitely require existance of std::collate
257 return blt::is_facet<dummy_collate<Char>>(&col);
258}
259
260void test_std_collate_replaced(const std::string& /*backendName*/)
261{
262 std::locale origLocale = std::locale::classic();
263 origLocale = std::locale(origLocale, new dummy_collate<char>);
264 origLocale = std::locale(origLocale, new dummy_collate<wchar_t>);
265#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
266 origLocale = std::locale(origLocale, new dummy_collate<char16_t>);
267#endif
268#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
269 origLocale = std::locale(origLocale, new dummy_collate<char32_t>);
270#endif
271
272 // Use ASCII and UTF-8 encoding
273 for(const std::string localeName : {"C", "en_US.UTF-8"}) {
274 std::cout << "--- Locale: " << localeName << std::endl;
275 bl::generator g;
276 g.categories(cats: boost::locale::category_t::collation);
277 const std::locale l = g.generate(base: origLocale, id: localeName);
278 TEST(has_dummy_collate<char>(origLocale));
279 TEST(!has_dummy_collate<char>(l));
280 TEST(has_dummy_collate<wchar_t>(origLocale));
281 TEST(!has_dummy_collate<wchar_t>(l));
282#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
283 TEST(has_dummy_collate<char16_t>(origLocale));
284 TEST(!has_dummy_collate<char16_t>(l));
285#endif
286#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
287 TEST(has_dummy_collate<char32_t>(origLocale));
288 TEST(!has_dummy_collate<char32_t>(l));
289#endif
290 }
291}
292
293void test_main(int /*argc*/, char** /*argv*/)
294{
295 {
296 std::vector<std::string> backends;
297#ifdef BOOST_LOCALE_WITH_ICU
298 backends.push_back(x: "icu");
299#endif
300#ifndef BOOST_LOCALE_NO_STD_BACKEND
301 backends.push_back(x: "std");
302#endif
303#ifndef BOOST_LOCALE_NO_WINAPI_BACKEND
304 backends.push_back("winapi");
305#endif
306#ifndef BOOST_LOCALE_NO_POSIX_BACKEND
307 backends.push_back(x: "posix");
308#endif
309 std::sort(first: backends.begin(), last: backends.end());
310
311 std::vector<std::string> all_backends = bl::localization_backend_manager::global().get_all_backends();
312 std::sort(first: all_backends.begin(), last: all_backends.end());
313 TEST_EQ(all_backends, backends);
314 }
315
316 const bl::localization_backend_manager orig_backend = bl::localization_backend_manager::global();
317 for(const std::string& backendName : orig_backend.get_all_backends()) {
318 std::cout << "Backend: " << backendName << std::endl;
319 bl::localization_backend_manager tmp_backend = bl::localization_backend_manager::global();
320 tmp_backend.select(backend_name: backendName);
321 bl::localization_backend_manager::global(tmp_backend);
322 bl::generator g;
323 for(const std::string localeName : {"", "C", "en_US.UTF-8", "en_US.ISO8859-1", "tr_TR.windows1254"}) {
324 std::cout << "-- Locale: " << localeName << std::endl;
325 const std::locale l = g(localeName);
326#ifdef __cpp_char8_t
327# define TEST_FOR_CHAR8(check) TEST(check)
328#else
329# define TEST_FOR_CHAR8(check) (void)0
330#endif
331#ifndef BOOST_LOCALE_NO_CXX20_STRING8
332# define TEST_FOR_STRING8(check) TEST(check)
333#else
334# define TEST_FOR_STRING8(check) (void)0
335#endif
336#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
337# define TEST_FOR_CHAR16(check) TEST(check)
338#else
339# define TEST_FOR_CHAR16(check) (void)0
340#endif
341#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
342# define TEST_FOR_CHAR32(check) TEST(check)
343#else
344# define TEST_FOR_CHAR32(check) (void)0
345#endif
346#define TEST_HAS_FACET_CHAR8(facet, l) TEST_FOR_CHAR8(blt::has_facet<facet<char8_t>>(l))
347#define TEST_HAS_FACET_CHAR16(facet, l) TEST_FOR_CHAR16(blt::has_facet<facet<char16_t>>(l))
348#define TEST_HAS_FACET_CHAR32(facet, l) TEST_FOR_CHAR32(blt::has_facet<facet<char32_t>>(l))
349#define TEST_HAS_FACET_STRING8(facet, l) TEST_FOR_STRING8(blt::has_facet<facet<char8_t>>(l))
350
351#define TEST_HAS_FACETS(facet, l) \
352 do { \
353 TEST(blt::has_facet<facet<char>>(l)); \
354 TEST(blt::has_facet<facet<wchar_t>>(l)); \
355 TEST_HAS_FACET_CHAR16(facet, l); \
356 TEST_HAS_FACET_CHAR32(facet, l); \
357 } while(false)
358
359 // Convert
360 TEST_HAS_FACETS(bl::converter, l);
361 TEST_HAS_FACET_STRING8(bl::converter, l);
362 // Collator
363 TEST_HAS_FACETS(std::collate, l);
364 if(backendName == "icu" || (backendName == "winapi" && std::use_facet<bl::info>(loc: l).utf8())) {
365 TEST_HAS_FACETS(bl::collator, l);
366 TEST_HAS_FACET_STRING8(bl::collator, l);
367 } else {
368 TEST(blt::has_not_facet<bl::collator<char>>(l));
369 TEST(blt::has_not_facet<bl::collator<wchar_t>>(l));
370 TEST_FOR_STRING8(blt::has_not_facet<bl::collator<char8_t>>(l));
371 TEST_FOR_CHAR16(blt::has_not_facet<bl::collator<char16_t>>(l));
372 TEST_FOR_CHAR32(blt::has_not_facet<bl::collator<char32_t>>(l));
373 }
374 // Formatting
375 TEST_HAS_FACETS(std::num_put, l);
376 TEST_HAS_FACETS(std::time_put, l);
377 TEST_HAS_FACETS(std::numpunct, l);
378 TEST_HAS_FACETS(std::moneypunct, l);
379 // Parsing
380 TEST_HAS_FACETS(std::num_get, l);
381 // Message
382 TEST_HAS_FACETS(bl::message_format, l);
383 TEST_HAS_FACET_STRING8(bl::message_format, l);
384 // Codepage
385 TEST_HAS_FACETS(codecvt_by_char_type, l);
386 // Boundary
387 if(backendName == "icu") {
388 TEST_HAS_FACETS(bl::boundary::boundary_indexing, l);
389 TEST_HAS_FACET_CHAR8(bl::boundary::boundary_indexing, l);
390 }
391 // calendar
392 TEST(blt::has_facet<bl::calendar_facet>(l));
393 // information
394 TEST(blt::has_facet<bl::info>(l));
395 }
396
397 std::locale l = g("en_US.UTF-8");
398 TEST(has_message(l));
399 g.categories(cats: g.categories() ^ bl::category_t::message);
400 g.locale_cache_enabled(on: true);
401 g("en_US.UTF-8");
402 g.categories(cats: g.categories() | bl::category_t::message);
403 l = g("en_US.UTF-8");
404 TEST(!has_message(l));
405 g.clear_cache();
406 g.locale_cache_enabled(on: false);
407 l = g("en_US.UTF-8");
408 TEST(has_message(l));
409 g.characters(chars: g.characters() ^ bl::char_facet_t::char_f);
410 l = g("en_US.UTF-8");
411 TEST(!has_message(l));
412 g.characters(chars: g.characters() | bl::char_facet_t::char_f);
413 l = g("en_US.UTF-8");
414 TEST(has_message(l));
415
416 l = g("en_US.ISO8859-1");
417 {
418 const auto& info = std::use_facet<bl::info>(loc: l);
419 TEST_EQ(info.language(), "en");
420 TEST_EQ(info.country(), "US");
421 TEST(!info.utf8());
422 TEST_EQ(info.variant(), "");
423 TEST_EQ(info.encoding(), "ISO8859-1");
424 }
425 l = g("en_US.UTF-8");
426 {
427 const auto& info = std::use_facet<bl::info>(loc: l);
428 TEST_EQ(info.language(), "en");
429 TEST_EQ(info.country(), "US");
430 TEST(info.utf8());
431 TEST_EQ(info.variant(), "");
432 TEST_EQ(info.encoding(), "UTF-8");
433 }
434 l = g("da_DK.ISO8859-15@euro");
435 {
436 const auto& info = std::use_facet<bl::info>(loc: l);
437 TEST_EQ(info.language(), "da");
438 TEST_EQ(info.country(), "DK");
439 TEST(!info.utf8());
440 TEST_EQ(info.variant(), "euro");
441 TEST_EQ(info.encoding(), "ISO8859-15");
442 }
443 l = g("en_US.ISO8859-1");
444 {
445 const auto& info = std::use_facet<bl::info>(loc: l);
446 TEST_EQ(info.language(), "en");
447 TEST_EQ(info.country(), "US");
448 TEST(!info.utf8());
449 TEST_EQ(info.variant(), "");
450 TEST_EQ(info.encoding(), "ISO8859-1");
451 }
452
453 // Check that generate() extends the given locale, not replaces it
454 std::locale l_wt(std::locale::classic(), new test_facet);
455 TEST(blt::has_facet<test_facet>(g.generate(l_wt, "en_US.UTF-8")));
456 TEST(!blt::has_facet<test_facet>(g.generate("en_US.UTF-8")));
457 TEST(blt::has_facet<test_facet>(g.generate(l_wt, "en_US.ISO8859-1")));
458 TEST(!blt::has_facet<test_facet>(g.generate("en_US.ISO8859-1")));
459
460 // Check caching works
461 g.locale_cache_enabled(on: true);
462 // Generate a locale with a specific facet which is then cached
463 g.generate(base: l_wt, id: "en_US.UTF-8");
464 g.generate(base: l_wt, id: "en_US.ISO8859-1");
465 // Cached locale is returned -> facet is still there
466 TEST(blt::has_facet<test_facet>(g("en_US.UTF-8")));
467 TEST(blt::has_facet<test_facet>(g("en_US.ISO8859-1")));
468 // Check a property to verify it doesn't simply return the same locale for each call
469 TEST(std::use_facet<bl::info>(g("en_US.UTF-8")).utf8());
470 TEST(!std::use_facet<bl::info>(g("en_US.ISO8859-1")).utf8());
471
472 test_install_chartype(backendName);
473 test_std_collate_replaced(backendName);
474 }
475 std::cout << "Test special locales" << std::endl;
476 test_special_locales();
477 test_invalid_locale();
478}
479

source code of boost/libs/locale/test/test_generator.cpp