1 | // |
2 | // Copyright (c) 2015 Artyom Beilis (Tonkikh) |
3 | // |
4 | // Distributed under the Boost Software License, Version 1.0. |
5 | // https://www.boost.org/LICENSE_1_0.txt |
6 | |
7 | #ifndef BOOST_LOCALE_UTF8_CODECVT_HPP |
8 | #define BOOST_LOCALE_UTF8_CODECVT_HPP |
9 | |
10 | #include <boost/locale/generic_codecvt.hpp> |
11 | #include <boost/locale/utf.hpp> |
12 | #include <boost/assert.hpp> |
13 | #include <cstdint> |
14 | #include <locale> |
15 | |
16 | namespace boost { namespace locale { |
17 | |
18 | /// \brief Generic utf8 codecvt facet, it allows to convert UTF-8 strings to UTF-16 and UTF-32 using wchar_t, |
19 | /// char32_t and char16_t |
20 | template<typename CharType> |
21 | class utf8_codecvt : public generic_codecvt<CharType, utf8_codecvt<CharType>> { |
22 | public: |
23 | struct state_type {}; |
24 | |
25 | utf8_codecvt(size_t refs = 0) : generic_codecvt<CharType, utf8_codecvt<CharType>>(refs) {} |
26 | |
27 | static int max_encoding_length() { return 4; } |
28 | |
29 | static state_type initial_state(generic_codecvt_base::initial_convertion_state /* unused */) |
30 | { |
31 | return state_type(); |
32 | } |
33 | static utf::code_point to_unicode(state_type&, const char*& begin, const char* end) |
34 | { |
35 | const char* p = begin; |
36 | |
37 | utf::code_point c = utf::utf_traits<char>::decode(p, e: end); |
38 | if(c != utf::illegal && c != utf::incomplete) |
39 | begin = p; |
40 | return c; |
41 | } |
42 | |
43 | static utf::len_or_error from_unicode(state_type&, utf::code_point u, char* begin, const char* end) |
44 | { |
45 | BOOST_ASSERT(utf::is_valid_codepoint(u)); |
46 | const auto width = utf::utf_traits<char>::width(value: u); |
47 | if(width > end - begin) |
48 | return utf::incomplete; |
49 | utf::utf_traits<char>::encode(value: u, out: begin); |
50 | return width; |
51 | } |
52 | }; |
53 | |
54 | }} // namespace boost::locale |
55 | |
56 | #endif |
57 | |