1/*=============================================================================
2 Copyright (c) 2001-2014 Joel de Guzman
3 Copyright (c) 2023 Nikita Kniazev
4
5 Distributed under the Boost Software License, Version 1.0. (See accompanying
6 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7==============================================================================*/
8#if !defined(BOOST_SPIRIT_X3_UC_TYPES_NOVEMBER_23_2008_0840PM)
9#define BOOST_SPIRIT_X3_UC_TYPES_NOVEMBER_23_2008_0840PM
10
11#include <boost/config.hpp>
12#include <type_traits>
13#include <string>
14
15namespace boost { namespace spirit { namespace x3
16{
17 typedef char32_t ucs4_char;
18 typedef char utf8_char;
19 typedef std::basic_string<ucs4_char> ucs4_string;
20 typedef std::basic_string<utf8_char> utf8_string;
21
22namespace detail {
23 inline void utf8_put_encode(utf8_string& out, ucs4_char x)
24 {
25 // https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf D90
26 if (BOOST_UNLIKELY(x > 0x10FFFFul || (0xD7FFul < x && x < 0xE000ul)))
27 x = 0xFFFDul;
28
29 // Table 3-6. UTF-8 Bit Distribution
30 if (x < 0x80ul) {
31 out.push_back(c: static_cast<unsigned char>(x));
32 }
33 else if (x < 0x800ul) {
34 out.push_back(c: static_cast<unsigned char>(0xC0ul + (x >> 6)));
35 out.push_back(c: static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
36 }
37 else if (x < 0x10000ul) {
38 out.push_back(c: static_cast<unsigned char>(0xE0ul + (x >> 12)));
39 out.push_back(c: static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
40 out.push_back(c: static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
41 }
42 else {
43 out.push_back(c: static_cast<unsigned char>(0xF0ul + (x >> 18)));
44 out.push_back(c: static_cast<unsigned char>(0x80ul + ((x >> 12) & 0x3Ful)));
45 out.push_back(c: static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
46 out.push_back(c: static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
47 }
48 }
49}
50
51 template <typename Char>
52 inline utf8_string to_utf8(Char value)
53 {
54 utf8_string result;
55 typedef typename std::make_unsigned<Char>::type UChar;
56 detail::utf8_put_encode(out&: result, x: static_cast<UChar>(value));
57 return result;
58 }
59
60 template <typename Char>
61 inline utf8_string to_utf8(Char const* str)
62 {
63 utf8_string result;
64 typedef typename std::make_unsigned<Char>::type UChar;
65 while (*str)
66 detail::utf8_put_encode(out&: result, x: static_cast<UChar>(*str++));
67 return result;
68 }
69
70 template <typename Char, typename Traits, typename Allocator>
71 inline utf8_string
72 to_utf8(std::basic_string<Char, Traits, Allocator> const& str)
73 {
74 utf8_string result;
75 typedef typename std::make_unsigned<Char>::type UChar;
76 for (Char ch : str)
77 detail::utf8_put_encode(out&: result, x: static_cast<UChar>(ch));
78 return result;
79 }
80
81 // Assume wchar_t content is UTF-16 on MSVC, or mingw/wineg++ with -fshort-wchar
82#if defined(_MSC_VER) || defined(__SIZEOF_WCHAR_T__) && __SIZEOF_WCHAR_T__ == 2
83 inline utf8_string to_utf8(wchar_t value)
84 {
85 utf8_string result;
86 detail::utf8_put_encode(result, static_cast<std::make_unsigned<wchar_t>::type>(value));
87 return result;
88 }
89
90namespace detail {
91 inline ucs4_char decode_utf16(wchar_t const*& s)
92 {
93 typedef std::make_unsigned<wchar_t>::type uwchar_t;
94
95 uwchar_t x(*s);
96 if (x < 0xD800ul || x > 0xDFFFul)
97 return x;
98
99 // expected high-surrogate
100 if (BOOST_UNLIKELY((x >> 10) != 0b110110ul))
101 return 0xFFFDul;
102
103 uwchar_t y(*++s);
104 // expected low-surrogate
105 if (BOOST_UNLIKELY((y >> 10) != 0b110111ul))
106 return 0xFFFDul;
107
108 return ((x & 0x3FFul) << 10) + (y & 0x3FFul) + 0x10000ul;
109 }
110}
111
112 inline utf8_string to_utf8(wchar_t const* str)
113 {
114 utf8_string result;
115 for (ucs4_char c; (c = detail::decode_utf16(str)) != ucs4_char(); ++str)
116 detail::utf8_put_encode(result, c);
117 return result;
118 }
119
120 template <typename Traits, typename Allocator>
121 inline utf8_string
122 to_utf8(std::basic_string<wchar_t, Traits, Allocator> const& str)
123 {
124 return to_utf8(str.c_str());
125 }
126#endif
127}}}
128
129#endif
130

source code of boost/libs/spirit/include/boost/spirit/home/x3/support/utility/utf8.hpp