| 1 | // |
| 2 | // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) |
| 3 | // |
| 4 | // Distributed under the Boost Software License, Version 1.0. |
| 5 | // https://www.boost.org/LICENSE_1_0.txt |
| 6 | |
| 7 | #include <boost/locale/encoding.hpp> |
| 8 | #include "boost/locale/std/all_generator.hpp" |
| 9 | #include <boost/assert.hpp> |
| 10 | #include <ios> |
| 11 | #include <locale> |
| 12 | #include <string> |
| 13 | #include <type_traits> |
| 14 | |
| 15 | namespace boost { namespace locale { namespace impl_std { |
| 16 | |
| 17 | class utf8_collator_from_wide : public std::collate<char> { |
| 18 | public: |
| 19 | typedef std::collate<wchar_t> wfacet; |
| 20 | utf8_collator_from_wide(const std::string& locale_name) : |
| 21 | base_(std::locale::classic(), new std::collate_byname<wchar_t>(locale_name)) |
| 22 | {} |
| 23 | int do_compare(const char* lb, const char* le, const char* rb, const char* re) const override |
| 24 | { |
| 25 | const std::wstring l = conv::utf_to_utf<wchar_t>(begin: lb, end: le); |
| 26 | const std::wstring r = conv::utf_to_utf<wchar_t>(begin: rb, end: re); |
| 27 | return std::use_facet<wfacet>(loc: base_).compare(lo1: l.c_str(), |
| 28 | hi1: l.c_str() + l.size(), |
| 29 | lo2: r.c_str(), |
| 30 | hi2: r.c_str() + r.size()); |
| 31 | } |
| 32 | long do_hash(const char* b, const char* e) const override |
| 33 | { |
| 34 | const std::wstring tmp = conv::utf_to_utf<wchar_t>(begin: b, end: e); |
| 35 | return std::use_facet<wfacet>(loc: base_).hash(lo: tmp.c_str(), hi: tmp.c_str() + tmp.size()); |
| 36 | } |
| 37 | std::string do_transform(const char* b, const char* e) const override |
| 38 | { |
| 39 | const std::wstring tmp = conv::utf_to_utf<wchar_t>(begin: b, end: e); |
| 40 | const std::wstring wkey = std::use_facet<wfacet>(loc: base_).transform(lo: tmp.c_str(), hi: tmp.c_str() + tmp.size()); |
| 41 | // wkey is only for lexicographical sorting, so may no be valid UTF |
| 42 | // --> Convert to char array in big endian order so sorting stays the same |
| 43 | std::string key; |
| 44 | key.reserve(res_arg: wkey.size() * sizeof(wchar_t)); |
| 45 | for(const wchar_t c : wkey) { |
| 46 | const auto tv = static_cast<std::make_unsigned<wchar_t>::type>(c); |
| 47 | for(unsigned i = 1; i <= sizeof(tv); ++i) |
| 48 | key += char((tv >> (sizeof(tv) - i) * 8) & 0xFF); |
| 49 | } |
| 50 | return key; |
| 51 | } |
| 52 | |
| 53 | private: |
| 54 | std::locale base_; |
| 55 | }; |
| 56 | |
| 57 | // Workaround for a bug in the C++ or C standard library so far observed on the Appveyor VS2017 image |
| 58 | bool collation_works(const std::locale& l) |
| 59 | { |
| 60 | const auto& col = std::use_facet<std::collate<char>>(loc: l); |
| 61 | const std::string a = "a" ; |
| 62 | const std::string b = "b" ; |
| 63 | try { |
| 64 | // On some broken system libs transform throws an exception |
| 65 | const auto ta = col.transform(lo: a.c_str(), hi: a.c_str() + a.size()); |
| 66 | const auto tb = col.transform(lo: b.c_str(), hi: b.c_str() + b.size()); |
| 67 | // This should always be true but on some broken system libs `l(a,b) == !l(b,a) == false` |
| 68 | return l(a, b) == !l(b, a) && (l(a, b) == (ta < tb)); |
| 69 | } catch(const std::exception&) { // LCOV_EXCL_LINE |
| 70 | return false; // LCOV_EXCL_LINE |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | std::locale |
| 75 | create_collate(const std::locale& in, const std::string& locale_name, char_facet_t type, utf8_support utf) |
| 76 | { |
| 77 | switch(type) { |
| 78 | case char_facet_t::nochar: break; |
| 79 | case char_facet_t::char_f: |
| 80 | if(utf == utf8_support::from_wide) |
| 81 | return std::locale(in, new utf8_collator_from_wide(locale_name)); |
| 82 | else { |
| 83 | std::locale res = std::locale(in, new std::collate_byname<char>(locale_name)); |
| 84 | if(utf != utf8_support::none && !collation_works(l: res)) { |
| 85 | res = std::locale(res, new utf8_collator_from_wide(locale_name)); // LCOV_EXCL_LINE |
| 86 | } |
| 87 | BOOST_ASSERT_MSG(collation_works(res), "Broken collation" ); |
| 88 | return res; |
| 89 | } |
| 90 | |
| 91 | case char_facet_t::wchar_f: return std::locale(in, new std::collate_byname<wchar_t>(locale_name)); |
| 92 | |
| 93 | #ifdef __cpp_char8_t |
| 94 | case char_facet_t::char8_f: break; // std-facet not available (yet) |
| 95 | #endif |
| 96 | #ifdef BOOST_LOCALE_ENABLE_CHAR16_T |
| 97 | case char_facet_t::char16_f: return std::locale(in, new std::collate_byname<char16_t>(locale_name)); |
| 98 | #endif |
| 99 | #ifdef BOOST_LOCALE_ENABLE_CHAR32_T |
| 100 | case char_facet_t::char32_f: return std::locale(in, new std::collate_byname<char32_t>(locale_name)); |
| 101 | #endif |
| 102 | } |
| 103 | return in; |
| 104 | } |
| 105 | |
| 106 | }}} // namespace boost::locale::impl_std |
| 107 | |