1 | // |
2 | // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) |
3 | // |
4 | // Distributed under the Boost Software License, Version 1.0. |
5 | // https://www.boost.org/LICENSE_1_0.txt |
6 | |
7 | #include <boost/locale/util.hpp> |
8 | #ifdef BOOST_LOCALE_WITH_ICU |
9 | # include "../src/boost/locale/icu/codecvt.hpp" |
10 | #endif |
11 | #include "../src/boost/locale/shared/iconv_codecvt.hpp" |
12 | |
13 | #include <cstring> |
14 | #include <iostream> |
15 | |
16 | #include "boostLocale/test/tools.hpp" |
17 | #include "boostLocale/test/unit_test.hpp" |
18 | |
19 | constexpr auto illegal = boost::locale::util::base_converter::illegal; |
20 | constexpr auto incomplete = boost::locale::util::base_converter::incomplete; |
21 | |
22 | namespace utf = boost::locale::utf; |
23 | |
24 | bool test_to(boost::locale::util::base_converter& cvt, const char* s, const utf::code_point codepoint) |
25 | { |
26 | const size_t len = strlen(s: s); |
27 | const char* end = s + len; |
28 | return cvt.to_unicode(begin&: s, end) == codepoint; |
29 | } |
30 | |
31 | bool test_from(boost::locale::util::base_converter& cvt, const utf::code_point codepoint, const char* str) |
32 | { |
33 | char buf[32] = {0}; |
34 | const auto res = cvt.from_unicode(u: codepoint, begin: buf, end: buf + sizeof(buf)); |
35 | if(res == boost::locale::util::base_converter::illegal) |
36 | return str == nullptr; |
37 | else |
38 | return str != nullptr && strlen(s: str) == res && memcmp(s1: str, s2: buf, n: res) == 0; |
39 | } |
40 | |
41 | bool test_incomplete(boost::locale::util::base_converter& cvt, const utf::code_point codepoint, const size_t len) |
42 | { |
43 | char buf[32] = {0}; |
44 | const auto res = cvt.from_unicode(u: codepoint, begin: buf, end: buf + len); |
45 | return res == boost::locale::util::base_converter::incomplete; |
46 | } |
47 | |
48 | #define TEST_TO(str, codepoint) TEST(test_to(*cvt, str, codepoint)) |
49 | #define TEST_FROM(str, codepoint) TEST(test_from(*cvt, codepoint, str)) |
50 | #define TEST_INC(codepoint, len) TEST(test_incomplete(*cvt, codepoint, len)) |
51 | |
52 | void test_shiftjis(std::unique_ptr<boost::locale::util::base_converter>& cvt) |
53 | { |
54 | std::cout << "- Correct" << std::endl; |
55 | TEST_TO("a" , 'a'); |
56 | TEST_TO("X" , 'X'); |
57 | TEST_TO("\xCB" , 0xFF8b); // half width katakana Hi ヒ |
58 | TEST_TO("\x83\x71" , 0x30d2); // Full width katakana Hi ヒ |
59 | TEST_TO("\x82\xd0" , 0x3072); // Full width hiragana Hi ひ |
60 | |
61 | TEST_FROM("a" , 'a'); |
62 | TEST_FROM("X" , 'X'); |
63 | TEST_FROM("\xCB" , 0xFF8b); // half width katakana Hi ヒ |
64 | TEST_FROM("\x83\x71" , 0x30d2); // Full width katakana Hi ヒ |
65 | TEST_FROM("\x82\xd0" , 0x3072); // Full width hiragana Hi ひ |
66 | |
67 | std::cout << "- Illegal/incomplete" << std::endl; |
68 | |
69 | TEST_TO("\xa0" , illegal); |
70 | TEST_TO("\x82" , incomplete); |
71 | TEST_TO("\x83\xf0" , illegal); |
72 | |
73 | TEST_INC(0x30d2, 1); // Full width katakana Hi ヒ |
74 | TEST_INC(0x3072, 1); // Full width hiragana Hi ひ |
75 | |
76 | TEST_FROM(nullptr, 0x5e9); // Hebrew ש not in ShiftJIS |
77 | } |
78 | |
79 | void test_main(int /*argc*/, char** /*argv*/) |
80 | { |
81 | using namespace boost::locale::util; |
82 | |
83 | std::cout << "Test UTF-8\n" ; |
84 | std::cout << "- From UTF-8" << std::endl; |
85 | |
86 | TEST(!create_simple_converter("UTF-8" )); |
87 | std::unique_ptr<base_converter> cvt = create_utf8_converter(); |
88 | |
89 | TEST_REQUIRE(cvt); |
90 | TEST(cvt->is_thread_safe()); |
91 | TEST_EQ(cvt->max_len(), 4); |
92 | |
93 | std::cout << "-- Correct" << std::endl; |
94 | |
95 | TEST_TO("\x7f" , 0x7f); |
96 | TEST_TO("\xC2\x80" , 0x80); |
97 | TEST_TO("\xdf\xBF" , 0x7FF); |
98 | TEST_TO("\xe0\xa0\x80" , 0x800); |
99 | TEST_TO("\xef\xbf\xbf" , 0xFFFF); |
100 | TEST_TO("\xf0\x90\x80\x80" , 0x10000); |
101 | TEST_TO("\xf4\x8f\xbf\xbf" , 0x10FFFF); |
102 | |
103 | std::cout << "-- Too big" << std::endl; |
104 | TEST_TO("\xf4\x9f\x80\x80" , illegal); // 11 0000 |
105 | TEST_TO("\xfb\xbf\xbf\xbf" , illegal); // 3FF FFFF |
106 | TEST_TO("\xf8\x90\x80\x80\x80" , illegal); // 400 0000 |
107 | TEST_TO("\xfd\xbf\xbf\xbf\xbf\xbf" , illegal); // 7fff ffff |
108 | |
109 | std::cout << "-- Invalid trail" << std::endl; |
110 | TEST_TO("\xC2\x7F" , illegal); |
111 | TEST_TO("\xdf\x7F" , illegal); |
112 | TEST_TO("\xe0\x7F\x80" , illegal); |
113 | TEST_TO("\xef\xbf\x7F" , illegal); |
114 | TEST_TO("\xe0\x7F\x80" , illegal); |
115 | TEST_TO("\xef\xbf\x7F" , illegal); |
116 | TEST_TO("\xf0\x7F\x80\x80" , illegal); |
117 | TEST_TO("\xf4\x7f\xbf\xbf" , illegal); |
118 | TEST_TO("\xf0\x90\x7F\x80" , illegal); |
119 | TEST_TO("\xf4\x8f\x7F\xbf" , illegal); |
120 | TEST_TO("\xf0\x90\x80\x7F" , illegal); |
121 | TEST_TO("\xf4\x8f\xbf\x7F" , illegal); |
122 | |
123 | std::cout << "-- Invalid length" << std::endl; |
124 | |
125 | // Test that this actually works |
126 | TEST_TO(make2(0x80), 0x80); |
127 | TEST_TO(make2(0x7ff), 0x7ff); |
128 | |
129 | TEST_TO(make3(0x800), 0x800); |
130 | TEST_TO(make3(0xffff), 0xffff); |
131 | |
132 | TEST_TO(make4(0x10000), 0x10000); |
133 | TEST_TO(make4(0x10ffff), 0x10ffff); |
134 | |
135 | TEST_TO(make4(0x110000), illegal); |
136 | TEST_TO(make4(0x1fffff), illegal); |
137 | |
138 | TEST_TO(make2(0), illegal); |
139 | TEST_TO(make3(0), illegal); |
140 | TEST_TO(make4(0), illegal); |
141 | TEST_TO(make2(0x7f), illegal); |
142 | TEST_TO(make3(0x7f), illegal); |
143 | TEST_TO(make4(0x7f), illegal); |
144 | |
145 | TEST_TO(make3(0x80), illegal); |
146 | TEST_TO(make4(0x80), illegal); |
147 | TEST_TO(make3(0x7ff), illegal); |
148 | TEST_TO(make4(0x7ff), illegal); |
149 | |
150 | TEST_TO(make4(0x8000), illegal); |
151 | TEST_TO(make4(0xffff), illegal); |
152 | |
153 | std::cout << "-- Invalid surrogate" << std::endl; |
154 | |
155 | TEST_TO(make3(0xD800), illegal); |
156 | TEST_TO(make3(0xDBFF), illegal); |
157 | TEST_TO(make3(0xDC00), illegal); |
158 | TEST_TO(make3(0xDFFF), illegal); |
159 | |
160 | TEST_TO(make4(0xD800), illegal); |
161 | TEST_TO(make4(0xDBFF), illegal); |
162 | TEST_TO(make4(0xDC00), illegal); |
163 | TEST_TO(make4(0xDFFF), illegal); |
164 | |
165 | std::cout << "-- Incomplete" << std::endl; |
166 | |
167 | TEST_TO("\x80" , illegal); |
168 | TEST_TO("\xC2" , incomplete); |
169 | |
170 | TEST_TO("\xdf" , incomplete); |
171 | |
172 | TEST_TO("\xe0" , incomplete); |
173 | TEST_TO("\xe0\xa0" , incomplete); |
174 | |
175 | TEST_TO("\xef\xbf" , incomplete); |
176 | TEST_TO("\xef" , incomplete); |
177 | |
178 | TEST_TO("\xf0\x90\x80" , incomplete); |
179 | TEST_TO("\xf0\x90" , incomplete); |
180 | TEST_TO("\xf0" , incomplete); |
181 | |
182 | TEST_TO("\xf4\x8f\xbf" , incomplete); |
183 | TEST_TO("\xf4\x8f" , incomplete); |
184 | TEST_TO("\xf4" , incomplete); |
185 | |
186 | std::cout << "- To UTF-8\n" ; |
187 | |
188 | std::cout << "-- Test correct" << std::endl; |
189 | |
190 | TEST_FROM("\x7f" , 0x7f); |
191 | TEST_FROM("\xC2\x80" , 0x80); |
192 | TEST_FROM("\xdf\xBF" , 0x7FF); |
193 | TEST_INC(0x7FF, 1); |
194 | TEST_FROM("\xe0\xa0\x80" , 0x800); |
195 | TEST_INC(0x800, 2); |
196 | TEST_INC(0x800, 1); |
197 | TEST_FROM("\xef\xbf\xbf" , 0xFFFF); |
198 | TEST_INC(0x10000, 3); |
199 | TEST_INC(0x10000, 2); |
200 | TEST_INC(0x10000, 1); |
201 | TEST_FROM("\xf0\x90\x80\x80" , 0x10000); |
202 | TEST_FROM("\xf4\x8f\xbf\xbf" , 0x10FFFF); |
203 | |
204 | std::cout << "-- Test no surrogate " << std::endl; |
205 | |
206 | TEST_FROM(nullptr, 0xD800); |
207 | TEST_FROM(nullptr, 0xDBFF); |
208 | TEST_FROM(nullptr, 0xDC00); |
209 | TEST_FROM(nullptr, 0xDFFF); |
210 | |
211 | std::cout << "-- Test invalid " << std::endl; |
212 | |
213 | TEST_FROM(nullptr, 0x110000); |
214 | TEST_FROM(nullptr, 0x1FFFFF); |
215 | |
216 | std::cout << "Test windows-1255" << std::endl; |
217 | |
218 | cvt = create_simple_converter(encoding: "windows-1255" ); |
219 | |
220 | TEST_REQUIRE(cvt); |
221 | TEST(cvt->is_thread_safe()); |
222 | TEST_EQ(cvt->max_len(), 1); |
223 | |
224 | std::cout << "- From 1255" << std::endl; |
225 | |
226 | TEST_TO("\xa4" , 0x20aa); |
227 | TEST_TO("\xe0" , 0x05d0); |
228 | TEST_TO("\xc4" , 0x5b4); |
229 | TEST_TO("\xfb" , illegal); |
230 | TEST_TO("\xdd" , illegal); |
231 | TEST_TO("\xff" , illegal); |
232 | TEST_TO("\xfe" , 0x200f); |
233 | |
234 | std::cout << "- To 1255" << std::endl; |
235 | |
236 | TEST_FROM("\xa4" , 0x20aa); |
237 | TEST_FROM("\xe0" , 0x05d0); |
238 | TEST_FROM("\xc4" , 0x5b4); |
239 | TEST_FROM("\xfe" , 0x200f); |
240 | |
241 | TEST_FROM(nullptr, 0xe4); |
242 | TEST_FROM(nullptr, 0xd0); |
243 | |
244 | #ifdef BOOST_LOCALE_WITH_ICU |
245 | std::cout << "Testing Shift-JIS using ICU/uconv" << std::endl; |
246 | |
247 | cvt = boost::locale::impl_icu::create_uconv_converter(encoding: "Shift-JIS" ); |
248 | TEST_REQUIRE(cvt); |
249 | test_shiftjis(cvt); |
250 | #endif |
251 | |
252 | std::cout << "Testing Shift-JIS using POSIX/iconv" << std::endl; |
253 | |
254 | TEST(!create_simple_converter("Shift_JIS" )); |
255 | cvt = boost::locale::create_iconv_converter(encoding: "Shift-JIS" ); |
256 | #ifndef BOOST_LOCALE_WITH_ICONV |
257 | TEST(!cvt); |
258 | #endif |
259 | if(cvt) |
260 | test_shiftjis(cvt); |
261 | #ifdef BOOST_LOCALE_WITH_ICONV |
262 | else |
263 | std::cout << "- Shift-JIS is not supported!" << std::endl; // LCOV_EXCL_LINE |
264 | #endif |
265 | } |
266 | |
267 | // boostinspect:noascii |
268 | |