1//
2// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3// Copyright (c) 2022-2023 Alexander Grund
4//
5// Distributed under the Boost Software License, Version 1.0.
6// https://www.boost.org/LICENSE_1_0.txt
7
8#include <boost/locale/conversion.hpp>
9#include "boost/locale/icu/all_generator.hpp"
10#include "boost/locale/icu/cdata.hpp"
11#include "boost/locale/icu/icu_util.hpp"
12#include "boost/locale/icu/uconv.hpp"
13#include <limits>
14#include <unicode/locid.h>
15#include <unicode/normlzr.h>
16#include <unicode/ustring.h>
17#if BOOST_LOCALE_ICU_VERSION >= 308
18# include <unicode/ucasemap.h>
19# define BOOST_LOCALE_WITH_CASEMAP
20#endif
21#include <vector>
22
23namespace boost { namespace locale { namespace impl_icu {
24
25 namespace {
26 void normalize_string(icu::UnicodeString& str, int flags)
27 {
28 UErrorCode code = U_ZERO_ERROR;
29 UNormalizationMode mode = UNORM_DEFAULT;
30 switch(flags) {
31 case norm_nfd: mode = UNORM_NFD; break;
32 case norm_nfc: mode = UNORM_NFC; break;
33 case norm_nfkd: mode = UNORM_NFKD; break;
34 case norm_nfkc: mode = UNORM_NFKC; break;
35 }
36 icu::UnicodeString tmp;
37 icu::Normalizer::normalize(source: str, mode, options: 0, result&: tmp, status&: code);
38
39 check_and_throw_icu_error(err: code);
40
41 str = tmp;
42 }
43 } // namespace
44
45 template<typename CharType>
46 class converter_impl : public converter<CharType> {
47 public:
48 typedef std::basic_string<CharType> string_type;
49
50 converter_impl(const cdata& d) : locale_(d.locale()), encoding_(d.encoding()) {}
51
52 string_type convert(converter_base::conversion_type how,
53 const CharType* begin,
54 const CharType* end,
55 int flags = 0) const override
56 {
57 icu_std_converter<CharType> cvt(encoding_);
58 icu::UnicodeString str = cvt.icu(begin, end);
59 using conversion_type = converter_base::conversion_type;
60 switch(how) {
61 case conversion_type::normalization: normalize_string(str, flags); break;
62 case conversion_type::upper_case: str.toUpper(locale: locale_); break;
63 case conversion_type::lower_case: str.toLower(locale: locale_); break;
64 case conversion_type::title_case: str.toTitle(titleIter: nullptr, locale: locale_); break;
65 case conversion_type::case_folding: str.foldCase(); break;
66 }
67 return cvt.std(str);
68 }
69
70 private:
71 icu::Locale locale_;
72 std::string encoding_;
73 }; // converter_impl
74
75#ifdef BOOST_LOCALE_WITH_CASEMAP
76 template<typename T>
77 struct get_casemap_size_type;
78
79 template<typename TRes, typename TCaseMap, typename TSize>
80 struct get_casemap_size_type<TRes (*)(TCaseMap*, char*, TSize, const char*, TSize, UErrorCode*)> {
81 using type = TSize;
82 };
83
84 template<typename Func>
85 struct is_casemap_func_const;
86
87 template<typename Func>
88 struct is_casemap_func_const<Func*> : is_casemap_func_const<Func> {};
89
90 template<typename TRes, typename TCaseMap, typename... TArgs>
91 struct is_casemap_func_const<TRes(TCaseMap*, TArgs...)> : std::is_const<TCaseMap> {};
92
93 template<typename U8Char>
94 class raii_casemap {
95 public:
96 static_assert(sizeof(U8Char) == sizeof(char), "Not an UTF-8 char type");
97 using string_type = std::basic_string<U8Char>;
98
99 raii_casemap(const raii_casemap&) = delete;
100 void operator=(const raii_casemap&) = delete;
101
102 raii_casemap(const std::string& locale_id) : map_(nullptr)
103 {
104 UErrorCode err = U_ZERO_ERROR;
105 map_ = ucasemap_open(locale: locale_id.c_str(), options: 0, pErrorCode: &err);
106 check_and_throw_icu_error(err);
107 if(!map_)
108 throw std::runtime_error("Failed to create UCaseMap"); // LCOV_EXCL_LINE
109 }
110 ~raii_casemap() { ucasemap_close(csm: map_); }
111
112 template<typename Conv>
113 typename std::enable_if<!is_casemap_func_const<Conv>::value, string_type>::type
114 convert(Conv func, const U8Char* begin, const U8Char* end)
115 {
116 return do_convert(func, begin, end);
117 }
118 template<typename Conv>
119 typename std::enable_if<is_casemap_func_const<Conv>::value, string_type>::type
120 convert(Conv func, const U8Char* begin, const U8Char* end) const
121 {
122 return do_convert(func, begin, end);
123 }
124
125 private:
126 template<typename Conv>
127 string_type do_convert(Conv func, const U8Char* begin, const U8Char* end) const
128 {
129 using size_type = typename get_casemap_size_type<Conv>::type;
130 if((end - begin) >= std::numeric_limits<std::ptrdiff_t>::max() / 11)
131 throw std::range_error("String to long to be converted by ICU"); // LCOV_EXCL_LINE
132 const auto max_converted_size = (end - begin) * 11 / 10 + 1;
133 if(max_converted_size >= std::numeric_limits<size_type>::max())
134 throw std::range_error("String to long to be converted by ICU"); // LCOV_EXCL_LINE
135 std::vector<U8Char> buf(max_converted_size);
136 UErrorCode err = U_ZERO_ERROR;
137 auto size = func(map_,
138 reinterpret_cast<char*>(buf.data()),
139 static_cast<size_type>(buf.size()),
140 reinterpret_cast<const char*>(begin),
141 static_cast<size_type>(end - begin),
142 &err);
143 if(err == U_BUFFER_OVERFLOW_ERROR) {
144 err = U_ZERO_ERROR;
145 buf.resize(size + 1);
146 size = func(map_,
147 reinterpret_cast<char*>(buf.data()),
148 static_cast<size_type>(buf.size()),
149 reinterpret_cast<const char*>(begin),
150 static_cast<size_type>(end - begin),
151 &err);
152 }
153 check_and_throw_icu_error(err);
154 return string_type(buf.data(), size);
155 }
156
157 private:
158 UCaseMap* map_;
159 };
160
161 template<typename U8Char>
162 class utf8_converter_impl : public converter<U8Char> {
163 public:
164 static_assert(sizeof(U8Char) == sizeof(char), "Not an UTF-8 char type");
165 utf8_converter_impl(const cdata& d) : locale_id_(d.locale().getName()), map_(locale_id_) {}
166
167 std::basic_string<U8Char> convert(converter_base::conversion_type how,
168 const U8Char* begin,
169 const U8Char* end,
170 int flags = 0) const override
171 {
172 switch(how) {
173 case converter_base::upper_case: return map_.convert(ucasemap_utf8ToUpper, begin, end);
174 case converter_base::lower_case: return map_.convert(ucasemap_utf8ToLower, begin, end);
175 case converter_base::title_case: {
176 // Non-const method, so need to create a separate map
177 raii_casemap<U8Char> map(locale_id_);
178 return map.convert(ucasemap_utf8ToTitle, begin, end);
179 }
180 case converter_base::case_folding: return map_.convert(ucasemap_utf8FoldCase, begin, end);
181 case converter_base::normalization: {
182 icu_std_converter<U8Char> cvt("UTF-8");
183 icu::UnicodeString str = cvt.icu(begin, end);
184 normalize_string(str, flags);
185 return cvt.std(str);
186 }
187 }
188 return std::basic_string<U8Char>(begin, end - begin); // LCOV_EXCL_LINE
189 }
190
191 private:
192 std::string locale_id_;
193 raii_casemap<U8Char> map_;
194 }; // converter_impl
195
196#endif // BOOST_LOCALE_WITH_CASEMAP
197
198 std::locale create_convert(const std::locale& in, const cdata& cd, char_facet_t type)
199 {
200 switch(type) {
201 case char_facet_t::nochar: break;
202 case char_facet_t::char_f:
203#ifdef BOOST_LOCALE_WITH_CASEMAP
204 if(cd.is_utf8())
205 return std::locale(in, new utf8_converter_impl<char>(cd));
206#endif
207 return std::locale(in, new converter_impl<char>(cd));
208 case char_facet_t::wchar_f: return std::locale(in, new converter_impl<wchar_t>(cd));
209#ifndef BOOST_LOCALE_NO_CXX20_STRING8
210 case char_facet_t::char8_f:
211# if defined(BOOST_LOCALE_WITH_CASEMAP)
212 return std::locale(in, new utf8_converter_impl<char8_t>(cd));
213# else
214 return std::locale(in, new converter_impl<char8_t>(cd));
215# endif
216#elif defined(__cpp_char8_t)
217 case char_facet_t::char8_f: break;
218#endif
219#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
220 case char_facet_t::char16_f: return std::locale(in, new converter_impl<char16_t>(cd));
221#endif
222#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
223 case char_facet_t::char32_f: return std::locale(in, new converter_impl<char32_t>(cd));
224#endif
225 }
226 return in;
227 }
228
229}}} // namespace boost::locale::impl_icu
230

source code of boost/libs/locale/src/boost/locale/icu/conversion.cpp