1/*=============================================================================
2 Copyright (c) 2001-2011 Joel de Guzman
3 Copyright (c) 2023 Nikita Kniazev
4
5 Distributed under the Boost Software License, Version 1.0. (See accompanying
6 file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7==============================================================================*/
8#if !defined(BOOST_SPIRIT_UC_TYPES_NOVEMBER_23_2008_0840PM)
9#define BOOST_SPIRIT_UC_TYPES_NOVEMBER_23_2008_0840PM
10
11#if defined(_MSC_VER)
12#pragma once
13#endif
14
15#include <boost/config.hpp>
16#include <boost/cstdint.hpp>
17#include <boost/type_traits/make_unsigned.hpp>
18#include <string>
19
20namespace boost { namespace spirit
21{
22 typedef ::boost::uint32_t ucs4_char;
23 typedef char utf8_char;
24 typedef std::basic_string<ucs4_char> ucs4_string;
25 typedef std::basic_string<utf8_char> utf8_string;
26
27namespace detail {
28 inline void utf8_put_encode(utf8_string& out, ucs4_char x)
29 {
30 // https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf D90
31 if (BOOST_UNLIKELY(x > 0x10FFFFul || (0xD7FFul < x && x < 0xE000ul)))
32 x = 0xFFFDul;
33
34 // Table 3-6. UTF-8 Bit Distribution
35 if (x < 0x80ul) {
36 out.push_back(c: static_cast<unsigned char>(x));
37 }
38 else if (x < 0x800ul) {
39 out.push_back(c: static_cast<unsigned char>(0xC0ul + (x >> 6)));
40 out.push_back(c: static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
41 }
42 else if (x < 0x10000ul) {
43 out.push_back(c: static_cast<unsigned char>(0xE0ul + (x >> 12)));
44 out.push_back(c: static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
45 out.push_back(c: static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
46 }
47 else {
48 out.push_back(c: static_cast<unsigned char>(0xF0ul + (x >> 18)));
49 out.push_back(c: static_cast<unsigned char>(0x80ul + ((x >> 12) & 0x3Ful)));
50 out.push_back(c: static_cast<unsigned char>(0x80ul + ((x >> 6) & 0x3Ful)));
51 out.push_back(c: static_cast<unsigned char>(0x80ul + (x & 0x3Ful)));
52 }
53 }
54}
55
56 template <typename Char>
57 inline utf8_string to_utf8(Char value)
58 {
59 utf8_string result;
60 typedef typename make_unsigned<Char>::type UChar;
61 detail::utf8_put_encode(out&: result, x: static_cast<UChar>(value));
62 return result;
63 }
64
65 template <typename Char>
66 inline utf8_string to_utf8(Char const* str)
67 {
68 utf8_string result;
69 typedef typename make_unsigned<Char>::type UChar;
70 while (*str)
71 detail::utf8_put_encode(out&: result, x: static_cast<UChar>(*str++));
72 return result;
73 }
74
75 template <typename Char, typename Traits, typename Allocator>
76 inline utf8_string
77 to_utf8(std::basic_string<Char, Traits, Allocator> const& str)
78 {
79 utf8_string result;
80 typedef typename make_unsigned<Char>::type UChar;
81 for (Char const* ptr = str.data(),
82 * end = ptr + str.size(); ptr < end; ++ptr)
83 detail::utf8_put_encode(out&: result, x: static_cast<UChar>(*ptr));
84 return result;
85 }
86
87 // Assume wchar_t content is UTF-16 on MSVC, or mingw/wineg++ with -fshort-wchar
88#if defined(_MSC_VER) || defined(__SIZEOF_WCHAR_T__) && __SIZEOF_WCHAR_T__ == 2
89 inline utf8_string to_utf8(wchar_t value)
90 {
91 utf8_string result;
92 detail::utf8_put_encode(result, static_cast<make_unsigned<wchar_t>::type>(value));
93 return result;
94 }
95
96namespace detail {
97 inline ucs4_char decode_utf16(wchar_t const*& s)
98 {
99 typedef make_unsigned<wchar_t>::type uwchar_t;
100
101 uwchar_t x(*s);
102 if (x < 0xD800ul || x > 0xDFFFul)
103 return x;
104
105 // expected high-surrogate
106 if (BOOST_UNLIKELY((x >> 10) != 0x36ul))
107 return 0xFFFDul;
108
109 uwchar_t y(*++s);
110 // expected low-surrogate
111 if (BOOST_UNLIKELY((y >> 10) != 0x37ul))
112 return 0xFFFDul;
113
114 return ((x & 0x3FFul) << 10) + (y & 0x3FFul) + 0x10000ul;
115 }
116}
117
118 inline utf8_string to_utf8(wchar_t const* str)
119 {
120 utf8_string result;
121 for (ucs4_char c; (c = detail::decode_utf16(str)) != ucs4_char(); ++str)
122 detail::utf8_put_encode(result, c);
123 return result;
124 }
125
126 template <typename Traits, typename Allocator>
127 inline utf8_string
128 to_utf8(std::basic_string<wchar_t, Traits, Allocator> const& str)
129 {
130 return to_utf8(str.c_str());
131 }
132#endif
133}}
134
135#endif
136

source code of boost/libs/spirit/include/boost/spirit/home/support/utf8.hpp