1//
2// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3//
4// Distributed under the Boost Software License, Version 1.0.
5// https://www.boost.org/LICENSE_1_0.txt
6
7#include <boost/locale/collator.hpp>
8#include <boost/locale/generator.hpp>
9#include "boost/locale/icu/all_generator.hpp"
10#include "boost/locale/icu/cdata.hpp"
11#include "boost/locale/icu/icu_util.hpp"
12#include "boost/locale/icu/uconv.hpp"
13#include "boost/locale/shared/mo_hash.hpp"
14#include "boost/locale/shared/std_collate_adapter.hpp"
15#include <boost/thread.hpp>
16#include <limits>
17#include <memory>
18#include <unicode/coll.h>
19#include <vector>
20#if BOOST_LOCALE_ICU_VERSION >= 402
21# define BOOST_LOCALE_WITH_STRINGPIECE 1
22# include <unicode/stringpiece.h>
23#else
24# define BOOST_LOCALE_WITH_STRINGPIECE 0
25#endif
26
27#ifdef BOOST_MSVC
28# pragma warning(disable : 4244) // 'argument' : conversion from 'int'
29# pragma warning(disable : 4267) // 'argument' : conversion from 'size_t'
30#endif
31
32namespace boost { namespace locale { namespace impl_icu {
33 template<typename CharType>
34 class collate_impl : public collator<CharType> {
35 public:
36 int level_to_int(collate_level level) const
37 {
38 const auto res = static_cast<int>(level);
39 if(res < 0)
40 return 0;
41 if(res >= level_count)
42 return level_count - 1;
43 return res;
44 }
45
46#if BOOST_LOCALE_WITH_STRINGPIECE
47 int do_utf8_compare(collate_level level,
48 const char* b1,
49 const char* e1,
50 const char* b2,
51 const char* e2,
52 UErrorCode& status) const
53 {
54 icu::StringPiece left(b1, e1 - b1);
55 icu::StringPiece right(b2, e2 - b2);
56 return get_collator(level).compareUTF8(left, right, status);
57 }
58#endif
59
60 int do_ustring_compare(collate_level level,
61 const CharType* b1,
62 const CharType* e1,
63 const CharType* b2,
64 const CharType* e2,
65 UErrorCode& status) const
66 {
67 icu::UnicodeString left = cvt_.icu(b1, e1);
68 icu::UnicodeString right = cvt_.icu(b2, e2);
69 return get_collator(level).compare(left, right, status);
70 }
71
72 int do_real_compare(collate_level level,
73 const CharType* b1,
74 const CharType* e1,
75 const CharType* b2,
76 const CharType* e2,
77 UErrorCode& status) const
78 {
79 return do_ustring_compare(level, b1, e1, b2, e2, status);
80 }
81
82 int do_compare(collate_level level,
83 const CharType* b1,
84 const CharType* e1,
85 const CharType* b2,
86 const CharType* e2) const override
87 {
88 UErrorCode status = U_ZERO_ERROR;
89
90 int res = do_real_compare(level, b1, e1, b2, e2, status);
91
92 if(U_FAILURE(code: status))
93 throw std::runtime_error(std::string("Collation failed:") + u_errorName(code: status));
94 if(res < 0)
95 return -1;
96 else if(res > 0)
97 return 1;
98 return 0;
99 }
100
101 std::vector<uint8_t> do_basic_transform(collate_level level, const CharType* b, const CharType* e) const
102 {
103 icu::UnicodeString str = cvt_.icu(b, e);
104 std::vector<uint8_t> tmp;
105 tmp.resize(new_size: str.length() + 1u);
106 icu::Collator& collate = get_collator(level);
107 const int len = collate.getSortKey(source: str, result: tmp.data(), resultLength: tmp.size());
108 if(len > int(tmp.size())) {
109 tmp.resize(new_size: len);
110 collate.getSortKey(source: str, result: tmp.data(), resultLength: tmp.size());
111 } else
112 tmp.resize(new_size: len);
113 return tmp;
114 }
115 std::basic_string<CharType>
116 do_transform(collate_level level, const CharType* b, const CharType* e) const override
117 {
118 std::vector<uint8_t> tmp = do_basic_transform(level, b, e);
119 return std::basic_string<CharType>(tmp.begin(), tmp.end());
120 }
121
122 long do_hash(collate_level level, const CharType* b, const CharType* e) const override
123 {
124 std::vector<uint8_t> tmp = do_basic_transform(level, b, e);
125 tmp.push_back(x: 0);
126 return gnu_gettext::pj_winberger_hash_function(ptr: reinterpret_cast<char*>(tmp.data()));
127 }
128
129 collate_impl(const cdata& d) : cvt_(d.encoding()), locale_(d.locale()), is_utf8_(d.is_utf8()) {}
130
131 icu::Collator& get_collator(collate_level level) const
132 {
133 const int lvl_idx = level_to_int(level);
134 constexpr icu::Collator::ECollationStrength levels[level_count] = {icu::Collator::PRIMARY,
135 icu::Collator::SECONDARY,
136 icu::Collator::TERTIARY,
137 icu::Collator::QUATERNARY,
138 icu::Collator::IDENTICAL};
139
140 icu::Collator* col = collates_[lvl_idx].get();
141 if(!col) {
142 UErrorCode status = U_ZERO_ERROR;
143 std::unique_ptr<icu::Collator> tmp_col(icu::Collator::createInstance(loc: locale_, err&: status));
144 if(U_FAILURE(code: status))
145 throw std::runtime_error(std::string("Creation of collate failed:") + u_errorName(code: status));
146
147 tmp_col->setStrength(levels[lvl_idx]);
148 col = tmp_col.release();
149 collates_[lvl_idx].reset(new_value: col);
150 }
151 return *col;
152 }
153
154 private:
155 static constexpr int level_count = static_cast<int>(collate_level::identical) + 1;
156 icu_std_converter<CharType> cvt_;
157 icu::Locale locale_;
158 mutable boost::thread_specific_ptr<icu::Collator> collates_[level_count];
159 bool is_utf8_;
160 };
161
162#if BOOST_LOCALE_WITH_STRINGPIECE
163 template<>
164 int collate_impl<char>::do_real_compare(collate_level level,
165 const char* b1,
166 const char* e1,
167 const char* b2,
168 const char* e2,
169 UErrorCode& status) const
170 {
171 if(is_utf8_)
172 return do_utf8_compare(level, b1, e1, b2, e2, status);
173 else
174 return do_ustring_compare(level, b1, e1, b2, e2, status);
175 }
176#endif
177 std::locale create_collate(const std::locale& in, const cdata& cd, char_facet_t type)
178 {
179 switch(type) {
180 case char_facet_t::nochar: break;
181 case char_facet_t::char_f: return impl::create_collators<char, collate_impl>(in, args: cd);
182 case char_facet_t::wchar_f: return impl::create_collators<wchar_t, collate_impl>(in, args: cd);
183#ifdef __cpp_char8_t
184 case char_facet_t::char8_f:
185 return std::locale(in, new collate_impl<char8_t>(cd)); // std-facet not available (yet)
186#endif
187#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
188 case char_facet_t::char16_f: return impl::create_collators<char16_t, collate_impl>(in, cd);
189#endif
190#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
191 case char_facet_t::char32_f: return impl::create_collators<char32_t, collate_impl>(in, cd);
192#endif
193 }
194 return in;
195 }
196
197}}} // namespace boost::locale::impl_icu
198

source code of boost/libs/locale/src/boost/locale/icu/collator.cpp