1 | // |
2 | // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) |
3 | // |
4 | // Distributed under the Boost Software License, Version 1.0. |
5 | // https://www.boost.org/LICENSE_1_0.txt |
6 | |
7 | #ifndef BOOST_LOCALE_ENCODING_HPP_INCLUDED |
8 | #define BOOST_LOCALE_ENCODING_HPP_INCLUDED |
9 | |
10 | #include <boost/locale/config.hpp> |
11 | #include <boost/locale/detail/encoding.hpp> |
12 | #include <boost/locale/encoding_errors.hpp> |
13 | #include <boost/locale/encoding_utf.hpp> |
14 | #include <boost/locale/info.hpp> |
15 | #include <boost/locale/util/string.hpp> |
16 | #include <memory> |
17 | |
18 | #ifdef BOOST_MSVC |
19 | # pragma warning(push) |
20 | # pragma warning(disable : 4275 4251 4231 4660) |
21 | #endif |
22 | |
23 | namespace boost { namespace locale { |
24 | |
25 | /// \brief Namespace that contains all functions related to character set conversion |
26 | namespace conv { |
27 | |
28 | /// \defgroup Charset conversion functions |
29 | /// |
30 | /// @{ |
31 | |
32 | /// convert text in range [begin,end) encoded with \a charset to UTF according to policy \a how |
33 | /// |
34 | /// \throws invalid_charset_error: Character set is not supported |
35 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
36 | /// encoded or decoded) |
37 | template<typename CharType> |
38 | BOOST_LOCALE_DECL std::basic_string<CharType> |
39 | to_utf(const char* begin, const char* end, const std::string& charset, method_type how = default_method); |
40 | |
41 | /// convert UTF text in range [begin,end) to text encoded with \a charset according to policy \a how |
42 | /// |
43 | /// \throws invalid_charset_error: Character set is not supported |
44 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
45 | /// encoded or decoded) |
46 | template<typename CharType> |
47 | BOOST_LOCALE_DECL std::string from_utf(const CharType* begin, |
48 | const CharType* end, |
49 | const std::string& charset, |
50 | method_type how = default_method); |
51 | |
52 | /// convert \a text encoded with \a charset to UTF according to policy \a how |
53 | /// |
54 | /// \throws invalid_charset_error: Character set is not supported |
55 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
56 | /// encoded or decoded) |
57 | template<typename CharType> |
58 | std::basic_string<CharType> |
59 | to_utf(const std::string& text, const std::string& charset, method_type how = default_method) |
60 | { |
61 | return to_utf<CharType>(text.c_str(), text.c_str() + text.size(), charset, how); |
62 | } |
63 | |
64 | /// Convert \a text encoded with \a charset to UTF according to policy \a how |
65 | /// |
66 | /// \throws invalid_charset_error: Character set is not supported |
67 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
68 | /// encoded or decoded) |
69 | template<typename CharType> |
70 | std::basic_string<CharType> |
71 | to_utf(const char* text, const std::string& charset, method_type how = default_method) |
72 | { |
73 | return to_utf<CharType>(text, util::str_end(str: text), charset, how); |
74 | } |
75 | |
76 | /// convert text in range [begin,end) in locale encoding given by \a loc to UTF according to |
77 | /// policy \a how |
78 | /// |
79 | /// \throws std::bad_cast: \a loc does not have \ref info facet installed |
80 | /// \throws invalid_charset_error: Character set is not supported |
81 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
82 | /// encoded or decoded) |
83 | template<typename CharType> |
84 | std::basic_string<CharType> |
85 | to_utf(const char* begin, const char* end, const std::locale& loc, method_type how = default_method) |
86 | { |
87 | return to_utf<CharType>(begin, end, std::use_facet<info>(loc: loc).encoding(), how); |
88 | } |
89 | |
90 | /// Convert \a text in locale encoding given by \a loc to UTF according to policy \a how |
91 | /// |
92 | /// \throws std::bad_cast: \a loc does not have \ref info facet installed |
93 | /// \throws invalid_charset_error: Character set is not supported |
94 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
95 | /// encoded or decoded) |
96 | template<typename CharType> |
97 | std::basic_string<CharType> |
98 | to_utf(const std::string& text, const std::locale& loc, method_type how = default_method) |
99 | { |
100 | return to_utf<CharType>(text, std::use_facet<info>(loc: loc).encoding(), how); |
101 | } |
102 | |
103 | /// Convert \a text in locale encoding given by \a loc to UTF according to policy \a how |
104 | /// |
105 | /// \throws std::bad_cast: \a loc does not have \ref info facet installed |
106 | /// \throws invalid_charset_error: Character set is not supported |
107 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
108 | /// encoded or decoded) |
109 | template<typename CharType> |
110 | std::basic_string<CharType> to_utf(const char* text, const std::locale& loc, method_type how = default_method) |
111 | { |
112 | return to_utf<CharType>(text, std::use_facet<info>(loc: loc).encoding(), how); |
113 | } |
114 | |
115 | /// convert \a text from UTF to text encoded with \a charset according to policy \a how |
116 | /// |
117 | /// \throws invalid_charset_error: Character set is not supported |
118 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
119 | /// encoded or decoded) |
120 | template<typename CharType> |
121 | std::string |
122 | from_utf(const std::basic_string<CharType>& text, const std::string& charset, method_type how = default_method) |
123 | { |
124 | return from_utf(text.c_str(), text.c_str() + text.size(), charset, how); |
125 | } |
126 | |
127 | /// Convert \a text from UTF to \a charset according to policy \a how |
128 | /// |
129 | /// \throws invalid_charset_error: Character set is not supported |
130 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
131 | /// encoded or decoded) |
132 | template<typename CharType> |
133 | std::string from_utf(const CharType* text, const std::string& charset, method_type how = default_method) |
134 | { |
135 | return from_utf(text, util::str_end(text), charset, how); |
136 | } |
137 | |
138 | /// Convert UTF text in range [begin,end) to text in locale encoding given by \a loc according to policy \a how |
139 | /// |
140 | /// \throws std::bad_cast: \a loc does not have \ref info facet installed |
141 | /// \throws invalid_charset_error: Character set is not supported |
142 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
143 | /// encoded or decoded) |
144 | template<typename CharType> |
145 | std::string |
146 | from_utf(const CharType* begin, const CharType* end, const std::locale& loc, method_type how = default_method) |
147 | { |
148 | return from_utf(begin, end, std::use_facet<info>(loc: loc).encoding(), how); |
149 | } |
150 | |
151 | /// Convert \a text from UTF to locale encoding given by \a loc according to policy \a how |
152 | /// |
153 | /// \throws std::bad_cast: \a loc does not have \ref info facet installed |
154 | /// \throws invalid_charset_error: Character set is not supported |
155 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
156 | /// encoded or decoded) |
157 | template<typename CharType> |
158 | std::string |
159 | from_utf(const std::basic_string<CharType>& text, const std::locale& loc, method_type how = default_method) |
160 | { |
161 | return from_utf(text, std::use_facet<info>(loc: loc).encoding(), how); |
162 | } |
163 | |
164 | /// Convert \a text from UTF to locale encoding given by \a loc according to policy \a how |
165 | /// |
166 | /// \throws std::bad_cast: \a loc does not have \ref info facet installed |
167 | /// \throws invalid_charset_error: Character set is not supported |
168 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
169 | /// encoded or decoded) |
170 | template<typename CharType> |
171 | std::string from_utf(const CharType* text, const std::locale& loc, method_type how = default_method) |
172 | { |
173 | return from_utf(text, std::use_facet<info>(loc: loc).encoding(), how); |
174 | } |
175 | |
176 | /// Convert a text in range [begin,end) to \a to_encoding from \a from_encoding according to |
177 | /// policy \a how |
178 | /// |
179 | /// \throws invalid_charset_error: Either character set is not supported |
180 | /// \throws conversion_error: when the conversion fails (e.g. \a how is \c stop and any character cannot be |
181 | /// encoded or decoded) |
182 | BOOST_LOCALE_DECL |
183 | std::string between(const char* begin, |
184 | const char* end, |
185 | const std::string& to_encoding, |
186 | const std::string& from_encoding, |
187 | method_type how = default_method); |
188 | |
189 | /// Convert \a text to \a to_encoding from \a from_encoding according to |
190 | /// policy \a how |
191 | /// |
192 | /// \throws invalid_charset_error: Either character set is not supported |
193 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
194 | /// encoded or decoded) |
195 | inline std::string between(const char* text, |
196 | const std::string& to_encoding, |
197 | const std::string& from_encoding, |
198 | method_type how = default_method) |
199 | { |
200 | return between(begin: text, end: util::str_end(str: text), to_encoding, from_encoding, how); |
201 | } |
202 | |
203 | /// Convert \a text to \a to_encoding from \a from_encoding according to |
204 | /// policy \a how |
205 | /// |
206 | /// \throws invalid_charset_error: Either character set is not supported |
207 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
208 | /// encoded or decoded) |
209 | inline std::string between(const std::string& text, |
210 | const std::string& to_encoding, |
211 | const std::string& from_encoding, |
212 | method_type how = default_method) |
213 | { |
214 | return between(begin: text.c_str(), end: text.c_str() + text.size(), to_encoding, from_encoding, how); |
215 | } |
216 | |
217 | /// @} |
218 | |
219 | /// Converter class to decode a narrow string using a local encoding and encode it with UTF |
220 | template<typename CharType> |
221 | class utf_encoder { |
222 | std::unique_ptr<detail::utf_encoder<CharType>> impl_; |
223 | |
224 | public: |
225 | using char_type = CharType; |
226 | using string_type = std::basic_string<CharType>; |
227 | |
228 | /// Create an instance to convert text encoded with \a charset to UTF according to policy \a how |
229 | /// |
230 | /// Note: When converting only a single text \ref to_utf is likely faster. |
231 | /// \throws invalid_charset_error: Character set is not supported |
232 | utf_encoder(const std::string& charset, method_type how = default_method) : |
233 | impl_(detail::make_utf_encoder<CharType>(charset, how)) |
234 | {} |
235 | |
236 | /// Convert text in range [begin,end) to UTF |
237 | /// |
238 | /// \throws conversion_error: Conversion failed |
239 | string_type convert(const char* begin, const char* end) const { return impl_->convert(begin, end); } |
240 | /// Convert \a text to UTF |
241 | /// |
242 | /// \throws conversion_error: Conversion failed |
243 | string_type convert(const boost::string_view& text) const { return impl_->convert(text); } |
244 | /// Convert \a text to UTF |
245 | /// |
246 | /// \throws conversion_error: Conversion failed |
247 | string_type operator()(const boost::string_view& text) const { return convert(text); } |
248 | }; |
249 | |
250 | /// Converter class to decode an UTF string and encode it using a local encoding |
251 | template<typename CharType> |
252 | class utf_decoder { |
253 | std::unique_ptr<detail::utf_decoder<CharType>> impl_; |
254 | |
255 | public: |
256 | using char_type = CharType; |
257 | using stringview_type = boost::basic_string_view<CharType>; |
258 | |
259 | /// Create an instance to convert UTF text to text encoded with \a charset according to policy \a how |
260 | /// |
261 | /// Note: When converting only a single text \ref from_utf is likely faster. |
262 | /// \throws invalid_charset_error: Character set is not supported |
263 | utf_decoder(const std::string& charset, method_type how = default_method) : |
264 | impl_(detail::make_utf_decoder<CharType>(charset, how)) |
265 | {} |
266 | |
267 | /// Convert UTF text in range [begin,end) to local encoding |
268 | /// |
269 | /// \throws conversion_error: Conversion failed |
270 | std::string convert(const CharType* begin, const CharType* end) const { return impl_->convert(begin, end); } |
271 | /// Convert \a text from UTF to local encoding |
272 | /// |
273 | /// \throws conversion_error: Conversion failed |
274 | std::string convert(const stringview_type& text) const { return impl_->convert(text); } |
275 | /// Convert \a text from UTF to local encoding |
276 | /// |
277 | /// \throws conversion_error: Conversion failed |
278 | std::string operator()(const stringview_type& text) const { return convert(text); } |
279 | }; |
280 | |
281 | class narrow_converter { |
282 | std::unique_ptr<detail::narrow_converter> impl_; |
283 | |
284 | public: |
285 | /// Create converter to convert text from \a src_encoding to \a target_encoding according to policy \a how |
286 | /// |
287 | /// \throws invalid_charset_error: Either character set is not supported |
288 | narrow_converter(const std::string& src_encoding, |
289 | const std::string& target_encoding, |
290 | method_type how = default_method) : |
291 | impl_(detail::make_narrow_converter(src_encoding, target_encoding, how)) |
292 | {} |
293 | |
294 | /// Convert text in range [begin,end) |
295 | /// |
296 | /// \throws conversion_error: Conversion failed |
297 | std::string convert(const char* begin, const char* end) const { return impl_->convert(begin, end); } |
298 | /// Convert \a text |
299 | /// |
300 | /// \throws conversion_error: Conversion failed |
301 | std::string convert(const boost::string_view& text) const { return impl_->convert(text); } |
302 | /// Convert \a text |
303 | /// |
304 | /// \throws conversion_error: Conversion failed |
305 | std::string operator()(const boost::string_view& text) const { return convert(text); } |
306 | }; |
307 | } // namespace conv |
308 | }} // namespace boost::locale |
309 | |
310 | #ifdef BOOST_MSVC |
311 | # pragma warning(pop) |
312 | #endif |
313 | |
314 | #endif |
315 | |