1 | #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_NARROW_ENCODING_HPP |
2 | #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_NARROW_ENCODING_HPP |
3 | |
4 | #include <boost/assert.hpp> |
5 | #include <boost/range/iterator_range_core.hpp> |
6 | |
7 | #include <utility> |
8 | |
9 | namespace boost { namespace property_tree { |
10 | namespace json_parser { namespace detail |
11 | { |
12 | |
13 | struct external_ascii_superset_encoding |
14 | { |
15 | typedef char external_char; |
16 | |
17 | bool is_nl(char c) const { return c == '\n'; } |
18 | bool is_ws(char c) const { |
19 | return c == ' ' || c == '\t' || c == '\n' || c == '\r'; |
20 | } |
21 | |
22 | bool is_minus(char c) const { return c == '-'; } |
23 | bool is_plusminus(char c) const { return c == '+' || c == '-'; } |
24 | bool is_dot(char c) const { return c == '.'; } |
25 | bool is_eE(char c) const { return c == 'e' || c == 'E'; } |
26 | bool is_0(char c) const { return c == '0'; } |
27 | bool is_digit(char c) const { return c >= '0' && c <= '9'; } |
28 | bool is_digit0(char c) const { return c >= '1' && c <= '9'; } |
29 | |
30 | bool is_quote(char c) const { return c == '"'; } |
31 | bool is_backslash(char c) const { return c == '\\'; } |
32 | bool is_slash(char c) const { return c == '/'; } |
33 | |
34 | bool is_comma(char c) const { return c == ','; } |
35 | bool is_open_bracket(char c) const { return c == '['; } |
36 | bool is_close_bracket(char c) const { return c == ']'; } |
37 | bool is_colon(char c) const { return c == ':'; } |
38 | bool is_open_brace(char c) const { return c == '{'; } |
39 | bool is_close_brace(char c) const { return c == '}'; } |
40 | |
41 | bool is_a(char c) const { return c == 'a'; } |
42 | bool is_b(char c) const { return c == 'b'; } |
43 | bool is_e(char c) const { return c == 'e'; } |
44 | bool is_f(char c) const { return c == 'f'; } |
45 | bool is_l(char c) const { return c == 'l'; } |
46 | bool is_n(char c) const { return c == 'n'; } |
47 | bool is_r(char c) const { return c == 'r'; } |
48 | bool is_s(char c) const { return c == 's'; } |
49 | bool is_t(char c) const { return c == 't'; } |
50 | bool is_u(char c) const { return c == 'u'; } |
51 | |
52 | int decode_hexdigit(char c) { |
53 | if (c >= '0' && c <= '9') return c - '0'; |
54 | if (c >= 'A' && c <= 'F') return c - 'A' + 10; |
55 | if (c >= 'a' && c <= 'f') return c - 'a' + 10; |
56 | return -1; |
57 | } |
58 | }; |
59 | |
60 | struct utf8_utf8_encoding : external_ascii_superset_encoding |
61 | { |
62 | typedef char internal_char; |
63 | |
64 | template <typename Iterator> |
65 | boost::iterator_range<Iterator> |
66 | to_internal(Iterator first, Iterator last) const { |
67 | return boost::make_iterator_range(first, last); |
68 | } |
69 | |
70 | char to_internal_trivial(char c) const { |
71 | BOOST_ASSERT(static_cast<unsigned char>(c) <= 0x7f); |
72 | return c; |
73 | } |
74 | |
75 | template <typename Iterator, typename Sentinel, |
76 | typename EncodingErrorFn> |
77 | void skip_codepoint(Iterator& cur, Sentinel end, |
78 | EncodingErrorFn error_fn) const { |
79 | transcode_codepoint(cur, end, DoNothing(), error_fn); |
80 | } |
81 | |
82 | template <typename Iterator, typename Sentinel, typename TranscodedFn, |
83 | typename EncodingErrorFn> |
84 | void transcode_codepoint(Iterator& cur, Sentinel end, |
85 | TranscodedFn transcoded_fn, EncodingErrorFn error_fn) const { |
86 | unsigned char c = *cur; |
87 | ++cur; |
88 | if (c <= 0x7f) { |
89 | // Solo byte, filter out disallowed codepoints. |
90 | if (c < 0x20) { |
91 | error_fn(); |
92 | } |
93 | transcoded_fn(c); |
94 | return; |
95 | } |
96 | int trailing = trail_table(c); |
97 | if (trailing == -1) { |
98 | // Standalone trailing byte or overly long sequence. |
99 | error_fn(); |
100 | } |
101 | transcoded_fn(c); |
102 | for (int i = 0; i < trailing; ++i) { |
103 | if (cur == end || !is_trail(c: *cur)) { |
104 | error_fn(); |
105 | } |
106 | transcoded_fn(*cur); |
107 | ++cur; |
108 | } |
109 | } |
110 | |
111 | template <typename TranscodedFn> |
112 | void feed_codepoint(unsigned codepoint, |
113 | TranscodedFn transcoded_fn) const { |
114 | if (codepoint <= 0x7f) { |
115 | transcoded_fn(static_cast<char>(codepoint)); |
116 | } else if (codepoint <= 0x7ff) { |
117 | transcoded_fn(static_cast<char>(0xc0 | (codepoint >> 6))); |
118 | transcoded_fn(trail(unmasked: codepoint)); |
119 | } else if (codepoint <= 0xffff) { |
120 | transcoded_fn(static_cast<char>(0xe0 | (codepoint >> 12))); |
121 | transcoded_fn(trail(unmasked: codepoint >> 6)); |
122 | transcoded_fn(trail(unmasked: codepoint)); |
123 | } else if (codepoint <= 0x10ffff) { |
124 | transcoded_fn(static_cast<char>(0xf0 | (codepoint >> 18))); |
125 | transcoded_fn(trail(unmasked: codepoint >> 12)); |
126 | transcoded_fn(trail(unmasked: codepoint >> 6)); |
127 | transcoded_fn(trail(unmasked: codepoint)); |
128 | } |
129 | } |
130 | |
131 | template <typename Iterator, typename Sentinel> |
132 | void skip_introduction(Iterator& cur, Sentinel end) const { |
133 | if (cur != end && static_cast<unsigned char>(*cur) == 0xef) { |
134 | if (++cur == end) return; |
135 | if (++cur == end) return; |
136 | if (++cur == end) return; |
137 | } |
138 | } |
139 | |
140 | private: |
141 | struct DoNothing { |
142 | void operator ()(char) const {} |
143 | }; |
144 | |
145 | bool is_trail(unsigned char c) const { |
146 | return (c & 0xc0) == 0x80; |
147 | } |
148 | |
149 | int trail_table(unsigned char c) const { |
150 | static const signed char table[] = { |
151 | /* not a lead byte */ |
152 | /* 0x10???sss */ -1, -1, -1, -1, -1, -1, -1, -1, |
153 | /* 0x110??sss */ 1, 1, 1, 1, /* 1 trailing byte */ |
154 | /* 0x1110?sss */ 2, 2, /* 2 trailing bytes */ |
155 | /* 0x11110sss */ 3, /* 3 trailing bytes */ |
156 | /* 0x11111sss */ -1 /* 4 or 5 trailing bytes, disallowed */ |
157 | }; |
158 | return table[(c & 0x7f) >> 3]; |
159 | } |
160 | |
161 | char trail(unsigned unmasked) const { |
162 | return static_cast<char>(0x80 | (unmasked & 0x3f)); |
163 | } |
164 | }; |
165 | |
166 | }}}} |
167 | |
168 | #endif |
169 | |