1//
2// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3//
4// Distributed under the Boost Software License, Version 1.0.
5// https://www.boost.org/LICENSE_1_0.txt
6
7#ifndef BOOST_LOCALE_ENCODING_HPP_INCLUDED
8#define BOOST_LOCALE_ENCODING_HPP_INCLUDED
9
10#include <boost/locale/config.hpp>
11#include <boost/locale/detail/encoding.hpp>
12#include <boost/locale/encoding_errors.hpp>
13#include <boost/locale/encoding_utf.hpp>
14#include <boost/locale/info.hpp>
15#include <boost/locale/util/string.hpp>
16#include <memory>
17
18#ifdef BOOST_MSVC
19# pragma warning(push)
20# pragma warning(disable : 4275 4251 4231 4660)
21#endif
22
23namespace boost { namespace locale {
24
25 /// \brief Namespace that contains all functions related to character set conversion
26 namespace conv {
27
28 /// \defgroup Charset conversion functions
29 ///
30 /// @{
31
32 /// convert text in range [begin,end) encoded with \a charset to UTF according to policy \a how
33 ///
34 /// \throws invalid_charset_error: Character set is not supported
35 /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
36 /// encoded or decoded)
37 template<typename CharType>
38 BOOST_LOCALE_DECL std::basic_string<CharType>
39 to_utf(const char* begin, const char* end, const std::string& charset, method_type how = default_method);
40
41 /// convert UTF text in range [begin,end) to text encoded with \a charset according to policy \a how
42 ///
43 /// \throws invalid_charset_error: Character set is not supported
44 /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
45 /// encoded or decoded)
46 template<typename CharType>
47 BOOST_LOCALE_DECL std::string from_utf(const CharType* begin,
48 const CharType* end,
49 const std::string& charset,
50 method_type how = default_method);
51
52 /// convert \a text encoded with \a charset to UTF according to policy \a how
53 ///
54 /// \throws invalid_charset_error: Character set is not supported
55 /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
56 /// encoded or decoded)
57 template<typename CharType>
58 std::basic_string<CharType>
59 to_utf(const std::string& text, const std::string& charset, method_type how = default_method)
60 {
61 return to_utf<CharType>(text.c_str(), text.c_str() + text.size(), charset, how);
62 }
63
64 /// Convert \a text encoded with \a charset to UTF according to policy \a how
65 ///
66 /// \throws invalid_charset_error: Character set is not supported
67 /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
68 /// encoded or decoded)
69 template<typename CharType>
70 std::basic_string<CharType>
71 to_utf(const char* text, const std::string& charset, method_type how = default_method)
72 {
73 return to_utf<CharType>(text, util::str_end(str: text), charset, how);
74 }
75
76 /// convert text in range [begin,end) in locale encoding given by \a loc to UTF according to
77 /// policy \a how
78 ///
79 /// \throws std::bad_cast: \a loc does not have \ref info facet installed
80 /// \throws invalid_charset_error: Character set is not supported
81 /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
82 /// encoded or decoded)
83 template<typename CharType>
84 std::basic_string<CharType>
85 to_utf(const char* begin, const char* end, const std::locale& loc, method_type how = default_method)
86 {
87 return to_utf<CharType>(begin, end, std::use_facet<info>(loc: loc).encoding(), how);
88 }
89
90 /// Convert \a text in locale encoding given by \a loc to UTF according to policy \a how
91 ///
92 /// \throws std::bad_cast: \a loc does not have \ref info facet installed
93 /// \throws invalid_charset_error: Character set is not supported
94 /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
95 /// encoded or decoded)
96 template<typename CharType>
97 std::basic_string<CharType>
98 to_utf(const std::string& text, const std::locale& loc, method_type how = default_method)
99 {
100 return to_utf<CharType>(text, std::use_facet<info>(loc: loc).encoding(), how);
101 }
102
103 /// Convert \a text in locale encoding given by \a loc to UTF according to policy \a how
104 ///
105 /// \throws std::bad_cast: \a loc does not have \ref info facet installed
106 /// \throws invalid_charset_error: Character set is not supported
107 /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
108 /// encoded or decoded)
109 template<typename CharType>
110 std::basic_string<CharType> to_utf(const char* text, const std::locale& loc, method_type how = default_method)
111 {
112 return to_utf<CharType>(text, std::use_facet<info>(loc: loc).encoding(), how);
113 }
114
115 /// convert \a text from UTF to text encoded with \a charset according to policy \a how
116 ///
117 /// \throws invalid_charset_error: Character set is not supported
118 /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
119 /// encoded or decoded)
120 template<typename CharType>
121 std::string
122 from_utf(const std::basic_string<CharType>& text, const std::string& charset, method_type how = default_method)
123 {
124 return from_utf(text.c_str(), text.c_str() + text.size(), charset, how);
125 }
126
127 /// Convert \a text from UTF to \a charset according to policy \a how
128 ///
129 /// \throws invalid_charset_error: Character set is not supported
130 /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
131 /// encoded or decoded)
132 template<typename CharType>
133 std::string from_utf(const CharType* text, const std::string& charset, method_type how = default_method)
134 {
135 return from_utf(text, util::str_end(text), charset, how);
136 }
137
138 /// Convert UTF text in range [begin,end) to text in locale encoding given by \a loc according to policy \a how
139 ///
140 /// \throws std::bad_cast: \a loc does not have \ref info facet installed
141 /// \throws invalid_charset_error: Character set is not supported
142 /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
143 /// encoded or decoded)
144 template<typename CharType>
145 std::string
146 from_utf(const CharType* begin, const CharType* end, const std::locale& loc, method_type how = default_method)
147 {
148 return from_utf(begin, end, std::use_facet<info>(loc: loc).encoding(), how);
149 }
150
151 /// Convert \a text from UTF to locale encoding given by \a loc according to policy \a how
152 ///
153 /// \throws std::bad_cast: \a loc does not have \ref info facet installed
154 /// \throws invalid_charset_error: Character set is not supported
155 /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
156 /// encoded or decoded)
157 template<typename CharType>
158 std::string
159 from_utf(const std::basic_string<CharType>& text, const std::locale& loc, method_type how = default_method)
160 {
161 return from_utf(text, std::use_facet<info>(loc: loc).encoding(), how);
162 }
163
164 /// Convert \a text from UTF to locale encoding given by \a loc according to policy \a how
165 ///
166 /// \throws std::bad_cast: \a loc does not have \ref info facet installed
167 /// \throws invalid_charset_error: Character set is not supported
168 /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
169 /// encoded or decoded)
170 template<typename CharType>
171 std::string from_utf(const CharType* text, const std::locale& loc, method_type how = default_method)
172 {
173 return from_utf(text, std::use_facet<info>(loc: loc).encoding(), how);
174 }
175
176 /// Convert a text in range [begin,end) to \a to_encoding from \a from_encoding according to
177 /// policy \a how
178 ///
179 /// \throws invalid_charset_error: Either character set is not supported
180 /// \throws conversion_error: when the conversion fails (e.g. \a how is \c stop and any character cannot be
181 /// encoded or decoded)
182 BOOST_LOCALE_DECL
183 std::string between(const char* begin,
184 const char* end,
185 const std::string& to_encoding,
186 const std::string& from_encoding,
187 method_type how = default_method);
188
189 /// Convert \a text to \a to_encoding from \a from_encoding according to
190 /// policy \a how
191 ///
192 /// \throws invalid_charset_error: Either character set is not supported
193 /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
194 /// encoded or decoded)
195 inline std::string between(const char* text,
196 const std::string& to_encoding,
197 const std::string& from_encoding,
198 method_type how = default_method)
199 {
200 return between(begin: text, end: util::str_end(str: text), to_encoding, from_encoding, how);
201 }
202
203 /// Convert \a text to \a to_encoding from \a from_encoding according to
204 /// policy \a how
205 ///
206 /// \throws invalid_charset_error: Either character set is not supported
207 /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
208 /// encoded or decoded)
209 inline std::string between(const std::string& text,
210 const std::string& to_encoding,
211 const std::string& from_encoding,
212 method_type how = default_method)
213 {
214 return between(begin: text.c_str(), end: text.c_str() + text.size(), to_encoding, from_encoding, how);
215 }
216
217 /// @}
218
219 /// Converter class to decode a narrow string using a local encoding and encode it with UTF
220 template<typename CharType>
221 class utf_encoder {
222 std::unique_ptr<detail::utf_encoder<CharType>> impl_;
223
224 public:
225 using char_type = CharType;
226 using string_type = std::basic_string<CharType>;
227
228 /// Create an instance to convert text encoded with \a charset to UTF according to policy \a how
229 ///
230 /// Note: When converting only a single text \ref to_utf is likely faster.
231 /// \throws invalid_charset_error: Character set is not supported
232 utf_encoder(const std::string& charset, method_type how = default_method) :
233 impl_(detail::make_utf_encoder<CharType>(charset, how))
234 {}
235
236 /// Convert text in range [begin,end) to UTF
237 ///
238 /// \throws conversion_error: Conversion failed
239 string_type convert(const char* begin, const char* end) const { return impl_->convert(begin, end); }
240 /// Convert \a text to UTF
241 ///
242 /// \throws conversion_error: Conversion failed
243 string_type convert(const boost::string_view& text) const { return impl_->convert(text); }
244 /// Convert \a text to UTF
245 ///
246 /// \throws conversion_error: Conversion failed
247 string_type operator()(const boost::string_view& text) const { return convert(text); }
248 };
249
250 /// Converter class to decode an UTF string and encode it using a local encoding
251 template<typename CharType>
252 class utf_decoder {
253 std::unique_ptr<detail::utf_decoder<CharType>> impl_;
254
255 public:
256 using char_type = CharType;
257 using stringview_type = boost::basic_string_view<CharType>;
258
259 /// Create an instance to convert UTF text to text encoded with \a charset according to policy \a how
260 ///
261 /// Note: When converting only a single text \ref from_utf is likely faster.
262 /// \throws invalid_charset_error: Character set is not supported
263 utf_decoder(const std::string& charset, method_type how = default_method) :
264 impl_(detail::make_utf_decoder<CharType>(charset, how))
265 {}
266
267 /// Convert UTF text in range [begin,end) to local encoding
268 ///
269 /// \throws conversion_error: Conversion failed
270 std::string convert(const CharType* begin, const CharType* end) const { return impl_->convert(begin, end); }
271 /// Convert \a text from UTF to local encoding
272 ///
273 /// \throws conversion_error: Conversion failed
274 std::string convert(const stringview_type& text) const { return impl_->convert(text); }
275 /// Convert \a text from UTF to local encoding
276 ///
277 /// \throws conversion_error: Conversion failed
278 std::string operator()(const stringview_type& text) const { return convert(text); }
279 };
280
281 class narrow_converter {
282 std::unique_ptr<detail::narrow_converter> impl_;
283
284 public:
285 /// Create converter to convert text from \a src_encoding to \a target_encoding according to policy \a how
286 ///
287 /// \throws invalid_charset_error: Either character set is not supported
288 narrow_converter(const std::string& src_encoding,
289 const std::string& target_encoding,
290 method_type how = default_method) :
291 impl_(detail::make_narrow_converter(src_encoding, target_encoding, how))
292 {}
293
294 /// Convert text in range [begin,end)
295 ///
296 /// \throws conversion_error: Conversion failed
297 std::string convert(const char* begin, const char* end) const { return impl_->convert(begin, end); }
298 /// Convert \a text
299 ///
300 /// \throws conversion_error: Conversion failed
301 std::string convert(const boost::string_view& text) const { return impl_->convert(text); }
302 /// Convert \a text
303 ///
304 /// \throws conversion_error: Conversion failed
305 std::string operator()(const boost::string_view& text) const { return convert(text); }
306 };
307 } // namespace conv
308}} // namespace boost::locale
309
310#ifdef BOOST_MSVC
311# pragma warning(pop)
312#endif
313
314#endif
315

source code of boost/libs/locale/include/boost/locale/encoding.hpp