| 1 | // |
| 2 | // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) |
| 3 | // Copyright (c) 2022-2023 Alexander Grund |
| 4 | // |
| 5 | // Distributed under the Boost Software License, Version 1.0. |
| 6 | // https://www.boost.org/LICENSE_1_0.txt |
| 7 | |
| 8 | #include "boost/locale/icu/codecvt.hpp" |
| 9 | #include <boost/locale/encoding.hpp> |
| 10 | #include <boost/locale/encoding_errors.hpp> |
| 11 | #include <boost/locale/hold_ptr.hpp> |
| 12 | #include <boost/locale/util.hpp> |
| 13 | #include "boost/locale/icu/all_generator.hpp" |
| 14 | #include "boost/locale/icu/icu_util.hpp" |
| 15 | #include "boost/locale/icu/uconv.hpp" |
| 16 | #include "boost/locale/util/encoding.hpp" |
| 17 | #include "boost/locale/util/make_std_unique.hpp" |
| 18 | #include <unicode/ucnv.h> |
| 19 | #include <unicode/ucnv_err.h> |
| 20 | |
| 21 | #ifdef BOOST_MSVC |
| 22 | # pragma warning(disable : 4244) // loose data |
| 23 | #endif |
| 24 | |
| 25 | namespace boost { namespace locale { namespace impl_icu { |
| 26 | class uconv_converter : public util::base_converter { |
| 27 | public: |
| 28 | uconv_converter(const std::string& encoding) : encoding_(encoding), cvt_(encoding, cpcvt_type::stop) {} |
| 29 | |
| 30 | bool is_thread_safe() const override { return false; } |
| 31 | |
| 32 | uconv_converter* clone() const override { return new uconv_converter(encoding_); } |
| 33 | |
| 34 | utf::code_point to_unicode(const char*& begin, const char* end) override |
| 35 | { |
| 36 | UErrorCode err = U_ZERO_ERROR; |
| 37 | const char* tmp = begin; |
| 38 | const UChar32 c = ucnv_getNextUChar(converter: cvt_.cvt(), source: &tmp, sourceLimit: end, err: &err); |
| 39 | ucnv_reset(converter: cvt_.cvt()); |
| 40 | if(err == U_TRUNCATED_CHAR_FOUND) |
| 41 | return incomplete; |
| 42 | if(U_FAILURE(code: err)) |
| 43 | return illegal; |
| 44 | |
| 45 | begin = tmp; |
| 46 | return c; |
| 47 | } |
| 48 | |
| 49 | utf::len_or_error from_unicode(utf::code_point u, char* begin, const char* end) override |
| 50 | { |
| 51 | UChar code_point[2] = {0}; |
| 52 | int len; |
| 53 | if(u <= 0xFFFF) { |
| 54 | if(0xD800 <= u && u <= 0xDFFF) // No surrogates |
| 55 | return illegal; |
| 56 | code_point[0] = u; |
| 57 | len = 1; |
| 58 | } else { |
| 59 | u -= 0x10000; |
| 60 | code_point[0] = 0xD800 | (u >> 10); |
| 61 | code_point[1] = 0xDC00 | (u & 0x3FF); |
| 62 | len = 2; |
| 63 | } |
| 64 | UErrorCode err = U_ZERO_ERROR; |
| 65 | const auto olen = ucnv_fromUChars(cnv: cvt_.cvt(), dest: begin, destCapacity: end - begin, src: code_point, srcLength: len, pErrorCode: &err); |
| 66 | ucnv_reset(converter: cvt_.cvt()); |
| 67 | if(err == U_BUFFER_OVERFLOW_ERROR) |
| 68 | return incomplete; |
| 69 | if(U_FAILURE(code: err)) |
| 70 | return illegal; |
| 71 | return olen; |
| 72 | } |
| 73 | |
| 74 | int max_len() const override { return cvt_.max_char_size(); } |
| 75 | |
| 76 | private: |
| 77 | std::string encoding_; |
| 78 | uconv cvt_; |
| 79 | }; |
| 80 | |
| 81 | std::unique_ptr<util::base_converter> create_uconv_converter(const std::string& encoding) |
| 82 | { |
| 83 | try { |
| 84 | return make_std_unique<uconv_converter>(args: encoding); |
| 85 | } catch(const std::exception& /*e*/) { |
| 86 | return nullptr; |
| 87 | } |
| 88 | } |
| 89 | |
| 90 | std::locale create_codecvt(const std::locale& in, const std::string& encoding, char_facet_t type) |
| 91 | { |
| 92 | if(util::normalize_encoding(encoding) == "utf8" ) |
| 93 | return util::create_utf8_codecvt(in, type); |
| 94 | |
| 95 | try { |
| 96 | return util::create_simple_codecvt(in, encoding, type); |
| 97 | } catch(const boost::locale::conv::invalid_charset_error&) { |
| 98 | return util::create_codecvt(in, cvt: create_uconv_converter(encoding), type); |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | }}} // namespace boost::locale::impl_icu |
| 103 | |