| 1 | // |
| 2 | // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) |
| 3 | // |
| 4 | // Distributed under the Boost Software License, Version 1.0. |
| 5 | // https://www.boost.org/LICENSE_1_0.txt |
| 6 | |
| 7 | #ifndef BOOST_LOCALE_ENCODING_HPP_INCLUDED |
| 8 | #define BOOST_LOCALE_ENCODING_HPP_INCLUDED |
| 9 | |
| 10 | #include <boost/locale/config.hpp> |
| 11 | #include <boost/locale/detail/encoding.hpp> |
| 12 | #include <boost/locale/encoding_errors.hpp> |
| 13 | #include <boost/locale/encoding_utf.hpp> |
| 14 | #include <boost/locale/info.hpp> |
| 15 | #include <boost/locale/util/string.hpp> |
| 16 | #include <memory> |
| 17 | |
| 18 | #ifdef BOOST_MSVC |
| 19 | # pragma warning(push) |
| 20 | # pragma warning(disable : 4275 4251 4231 4660) |
| 21 | #endif |
| 22 | |
| 23 | namespace boost { namespace locale { |
| 24 | |
| 25 | /// \brief Namespace that contains all functions related to character set conversion |
| 26 | namespace conv { |
| 27 | |
| 28 | /// \defgroup Charset conversion functions |
| 29 | /// |
| 30 | /// @{ |
| 31 | |
| 32 | /// convert text in range [begin,end) encoded with \a charset to UTF according to policy \a how |
| 33 | /// |
| 34 | /// \throws invalid_charset_error: Character set is not supported |
| 35 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
| 36 | /// encoded or decoded) |
| 37 | template<typename CharType> |
| 38 | BOOST_LOCALE_DECL std::basic_string<CharType> |
| 39 | to_utf(const char* begin, const char* end, const std::string& charset, method_type how = default_method); |
| 40 | |
| 41 | /// convert UTF text in range [begin,end) to text encoded with \a charset according to policy \a how |
| 42 | /// |
| 43 | /// \throws invalid_charset_error: Character set is not supported |
| 44 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
| 45 | /// encoded or decoded) |
| 46 | template<typename CharType> |
| 47 | BOOST_LOCALE_DECL std::string from_utf(const CharType* begin, |
| 48 | const CharType* end, |
| 49 | const std::string& charset, |
| 50 | method_type how = default_method); |
| 51 | |
| 52 | /// convert \a text encoded with \a charset to UTF according to policy \a how |
| 53 | /// |
| 54 | /// \throws invalid_charset_error: Character set is not supported |
| 55 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
| 56 | /// encoded or decoded) |
| 57 | template<typename CharType> |
| 58 | std::basic_string<CharType> |
| 59 | to_utf(const std::string& text, const std::string& charset, method_type how = default_method) |
| 60 | { |
| 61 | return to_utf<CharType>(text.c_str(), text.c_str() + text.size(), charset, how); |
| 62 | } |
| 63 | |
| 64 | /// Convert \a text encoded with \a charset to UTF according to policy \a how |
| 65 | /// |
| 66 | /// \throws invalid_charset_error: Character set is not supported |
| 67 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
| 68 | /// encoded or decoded) |
| 69 | template<typename CharType> |
| 70 | std::basic_string<CharType> |
| 71 | to_utf(const char* text, const std::string& charset, method_type how = default_method) |
| 72 | { |
| 73 | return to_utf<CharType>(text, util::str_end(str: text), charset, how); |
| 74 | } |
| 75 | |
| 76 | /// convert text in range [begin,end) in locale encoding given by \a loc to UTF according to |
| 77 | /// policy \a how |
| 78 | /// |
| 79 | /// \throws std::bad_cast: \a loc does not have \ref info facet installed |
| 80 | /// \throws invalid_charset_error: Character set is not supported |
| 81 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
| 82 | /// encoded or decoded) |
| 83 | template<typename CharType> |
| 84 | std::basic_string<CharType> |
| 85 | to_utf(const char* begin, const char* end, const std::locale& loc, method_type how = default_method) |
| 86 | { |
| 87 | return to_utf<CharType>(begin, end, std::use_facet<info>(loc: loc).encoding(), how); |
| 88 | } |
| 89 | |
| 90 | /// Convert \a text in locale encoding given by \a loc to UTF according to policy \a how |
| 91 | /// |
| 92 | /// \throws std::bad_cast: \a loc does not have \ref info facet installed |
| 93 | /// \throws invalid_charset_error: Character set is not supported |
| 94 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
| 95 | /// encoded or decoded) |
| 96 | template<typename CharType> |
| 97 | std::basic_string<CharType> |
| 98 | to_utf(const std::string& text, const std::locale& loc, method_type how = default_method) |
| 99 | { |
| 100 | return to_utf<CharType>(text, std::use_facet<info>(loc: loc).encoding(), how); |
| 101 | } |
| 102 | |
| 103 | /// Convert \a text in locale encoding given by \a loc to UTF according to policy \a how |
| 104 | /// |
| 105 | /// \throws std::bad_cast: \a loc does not have \ref info facet installed |
| 106 | /// \throws invalid_charset_error: Character set is not supported |
| 107 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
| 108 | /// encoded or decoded) |
| 109 | template<typename CharType> |
| 110 | std::basic_string<CharType> to_utf(const char* text, const std::locale& loc, method_type how = default_method) |
| 111 | { |
| 112 | return to_utf<CharType>(text, std::use_facet<info>(loc: loc).encoding(), how); |
| 113 | } |
| 114 | |
| 115 | /// convert \a text from UTF to text encoded with \a charset according to policy \a how |
| 116 | /// |
| 117 | /// \throws invalid_charset_error: Character set is not supported |
| 118 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
| 119 | /// encoded or decoded) |
| 120 | template<typename CharType> |
| 121 | std::string |
| 122 | from_utf(const std::basic_string<CharType>& text, const std::string& charset, method_type how = default_method) |
| 123 | { |
| 124 | return from_utf(text.c_str(), text.c_str() + text.size(), charset, how); |
| 125 | } |
| 126 | |
| 127 | /// Convert \a text from UTF to \a charset according to policy \a how |
| 128 | /// |
| 129 | /// \throws invalid_charset_error: Character set is not supported |
| 130 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
| 131 | /// encoded or decoded) |
| 132 | template<typename CharType> |
| 133 | std::string from_utf(const CharType* text, const std::string& charset, method_type how = default_method) |
| 134 | { |
| 135 | return from_utf(text, util::str_end(text), charset, how); |
| 136 | } |
| 137 | |
| 138 | /// Convert UTF text in range [begin,end) to text in locale encoding given by \a loc according to policy \a how |
| 139 | /// |
| 140 | /// \throws std::bad_cast: \a loc does not have \ref info facet installed |
| 141 | /// \throws invalid_charset_error: Character set is not supported |
| 142 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
| 143 | /// encoded or decoded) |
| 144 | template<typename CharType> |
| 145 | std::string |
| 146 | from_utf(const CharType* begin, const CharType* end, const std::locale& loc, method_type how = default_method) |
| 147 | { |
| 148 | return from_utf(begin, end, std::use_facet<info>(loc: loc).encoding(), how); |
| 149 | } |
| 150 | |
| 151 | /// Convert \a text from UTF to locale encoding given by \a loc according to policy \a how |
| 152 | /// |
| 153 | /// \throws std::bad_cast: \a loc does not have \ref info facet installed |
| 154 | /// \throws invalid_charset_error: Character set is not supported |
| 155 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
| 156 | /// encoded or decoded) |
| 157 | template<typename CharType> |
| 158 | std::string |
| 159 | from_utf(const std::basic_string<CharType>& text, const std::locale& loc, method_type how = default_method) |
| 160 | { |
| 161 | return from_utf(text, std::use_facet<info>(loc: loc).encoding(), how); |
| 162 | } |
| 163 | |
| 164 | /// Convert \a text from UTF to locale encoding given by \a loc according to policy \a how |
| 165 | /// |
| 166 | /// \throws std::bad_cast: \a loc does not have \ref info facet installed |
| 167 | /// \throws invalid_charset_error: Character set is not supported |
| 168 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
| 169 | /// encoded or decoded) |
| 170 | template<typename CharType> |
| 171 | std::string from_utf(const CharType* text, const std::locale& loc, method_type how = default_method) |
| 172 | { |
| 173 | return from_utf(text, std::use_facet<info>(loc: loc).encoding(), how); |
| 174 | } |
| 175 | |
| 176 | /// Convert a text in range [begin,end) to \a to_encoding from \a from_encoding according to |
| 177 | /// policy \a how |
| 178 | /// |
| 179 | /// \throws invalid_charset_error: Either character set is not supported |
| 180 | /// \throws conversion_error: when the conversion fails (e.g. \a how is \c stop and any character cannot be |
| 181 | /// encoded or decoded) |
| 182 | BOOST_LOCALE_DECL |
| 183 | std::string between(const char* begin, |
| 184 | const char* end, |
| 185 | const std::string& to_encoding, |
| 186 | const std::string& from_encoding, |
| 187 | method_type how = default_method); |
| 188 | |
| 189 | /// Convert \a text to \a to_encoding from \a from_encoding according to |
| 190 | /// policy \a how |
| 191 | /// |
| 192 | /// \throws invalid_charset_error: Either character set is not supported |
| 193 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
| 194 | /// encoded or decoded) |
| 195 | inline std::string between(const char* text, |
| 196 | const std::string& to_encoding, |
| 197 | const std::string& from_encoding, |
| 198 | method_type how = default_method) |
| 199 | { |
| 200 | return between(begin: text, end: util::str_end(str: text), to_encoding, from_encoding, how); |
| 201 | } |
| 202 | |
| 203 | /// Convert \a text to \a to_encoding from \a from_encoding according to |
| 204 | /// policy \a how |
| 205 | /// |
| 206 | /// \throws invalid_charset_error: Either character set is not supported |
| 207 | /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be |
| 208 | /// encoded or decoded) |
| 209 | inline std::string between(const std::string& text, |
| 210 | const std::string& to_encoding, |
| 211 | const std::string& from_encoding, |
| 212 | method_type how = default_method) |
| 213 | { |
| 214 | return between(begin: text.c_str(), end: text.c_str() + text.size(), to_encoding, from_encoding, how); |
| 215 | } |
| 216 | |
| 217 | /// @} |
| 218 | |
| 219 | /// Converter class to decode a narrow string using a local encoding and encode it with UTF |
| 220 | template<typename CharType> |
| 221 | class utf_encoder { |
| 222 | std::unique_ptr<detail::utf_encoder<CharType>> impl_; |
| 223 | |
| 224 | public: |
| 225 | using char_type = CharType; |
| 226 | using string_type = std::basic_string<CharType>; |
| 227 | |
| 228 | /// Create an instance to convert text encoded with \a charset to UTF according to policy \a how |
| 229 | /// |
| 230 | /// Note: When converting only a single text \ref to_utf is likely faster. |
| 231 | /// \throws invalid_charset_error: Character set is not supported |
| 232 | utf_encoder(const std::string& charset, method_type how = default_method) : |
| 233 | impl_(detail::make_utf_encoder<CharType>(charset, how)) |
| 234 | {} |
| 235 | |
| 236 | /// Convert text in range [begin,end) to UTF |
| 237 | /// |
| 238 | /// \throws conversion_error: Conversion failed |
| 239 | string_type convert(const char* begin, const char* end) const { return impl_->convert(begin, end); } |
| 240 | /// Convert \a text to UTF |
| 241 | /// |
| 242 | /// \throws conversion_error: Conversion failed |
| 243 | string_type convert(const boost::string_view& text) const { return impl_->convert(text); } |
| 244 | /// Convert \a text to UTF |
| 245 | /// |
| 246 | /// \throws conversion_error: Conversion failed |
| 247 | string_type operator()(const boost::string_view& text) const { return convert(text); } |
| 248 | }; |
| 249 | |
| 250 | /// Converter class to decode an UTF string and encode it using a local encoding |
| 251 | template<typename CharType> |
| 252 | class utf_decoder { |
| 253 | std::unique_ptr<detail::utf_decoder<CharType>> impl_; |
| 254 | |
| 255 | public: |
| 256 | using char_type = CharType; |
| 257 | using stringview_type = boost::basic_string_view<CharType>; |
| 258 | |
| 259 | /// Create an instance to convert UTF text to text encoded with \a charset according to policy \a how |
| 260 | /// |
| 261 | /// Note: When converting only a single text \ref from_utf is likely faster. |
| 262 | /// \throws invalid_charset_error: Character set is not supported |
| 263 | utf_decoder(const std::string& charset, method_type how = default_method) : |
| 264 | impl_(detail::make_utf_decoder<CharType>(charset, how)) |
| 265 | {} |
| 266 | |
| 267 | /// Convert UTF text in range [begin,end) to local encoding |
| 268 | /// |
| 269 | /// \throws conversion_error: Conversion failed |
| 270 | std::string convert(const CharType* begin, const CharType* end) const { return impl_->convert(begin, end); } |
| 271 | /// Convert \a text from UTF to local encoding |
| 272 | /// |
| 273 | /// \throws conversion_error: Conversion failed |
| 274 | std::string convert(const stringview_type& text) const { return impl_->convert(text); } |
| 275 | /// Convert \a text from UTF to local encoding |
| 276 | /// |
| 277 | /// \throws conversion_error: Conversion failed |
| 278 | std::string operator()(const stringview_type& text) const { return convert(text); } |
| 279 | }; |
| 280 | |
| 281 | class narrow_converter { |
| 282 | std::unique_ptr<detail::narrow_converter> impl_; |
| 283 | |
| 284 | public: |
| 285 | /// Create converter to convert text from \a src_encoding to \a target_encoding according to policy \a how |
| 286 | /// |
| 287 | /// \throws invalid_charset_error: Either character set is not supported |
| 288 | narrow_converter(const std::string& src_encoding, |
| 289 | const std::string& target_encoding, |
| 290 | method_type how = default_method) : |
| 291 | impl_(detail::make_narrow_converter(src_encoding, target_encoding, how)) |
| 292 | {} |
| 293 | |
| 294 | /// Convert text in range [begin,end) |
| 295 | /// |
| 296 | /// \throws conversion_error: Conversion failed |
| 297 | std::string convert(const char* begin, const char* end) const { return impl_->convert(begin, end); } |
| 298 | /// Convert \a text |
| 299 | /// |
| 300 | /// \throws conversion_error: Conversion failed |
| 301 | std::string convert(const boost::string_view& text) const { return impl_->convert(text); } |
| 302 | /// Convert \a text |
| 303 | /// |
| 304 | /// \throws conversion_error: Conversion failed |
| 305 | std::string operator()(const boost::string_view& text) const { return convert(text); } |
| 306 | }; |
| 307 | } // namespace conv |
| 308 | }} // namespace boost::locale |
| 309 | |
| 310 | #ifdef BOOST_MSVC |
| 311 | # pragma warning(pop) |
| 312 | #endif |
| 313 | |
| 314 | #endif |
| 315 | |