1//
2// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3//
4// Distributed under the Boost Software License, Version 1.0.
5// https://www.boost.org/LICENSE_1_0.txt
6
7#include <boost/locale/util.hpp>
8#ifdef BOOST_LOCALE_WITH_ICU
9# include "../src/boost/locale/icu/codecvt.hpp"
10#endif
11#include "../src/boost/locale/shared/iconv_codecvt.hpp"
12
13#include <cstring>
14#include <iostream>
15
16#include "boostLocale/test/tools.hpp"
17#include "boostLocale/test/unit_test.hpp"
18
19constexpr auto illegal = boost::locale::util::base_converter::illegal;
20constexpr auto incomplete = boost::locale::util::base_converter::incomplete;
21
22namespace utf = boost::locale::utf;
23
24bool test_to(boost::locale::util::base_converter& cvt, const char* s, const utf::code_point codepoint)
25{
26 const size_t len = strlen(s: s);
27 const char* end = s + len;
28 return cvt.to_unicode(begin&: s, end) == codepoint;
29}
30
31bool test_from(boost::locale::util::base_converter& cvt, const utf::code_point codepoint, const char* str)
32{
33 char buf[32] = {0};
34 const auto res = cvt.from_unicode(u: codepoint, begin: buf, end: buf + sizeof(buf));
35 if(res == boost::locale::util::base_converter::illegal)
36 return str == nullptr;
37 else
38 return str != nullptr && strlen(s: str) == res && memcmp(s1: str, s2: buf, n: res) == 0;
39}
40
41bool test_incomplete(boost::locale::util::base_converter& cvt, const utf::code_point codepoint, const size_t len)
42{
43 char buf[32] = {0};
44 const auto res = cvt.from_unicode(u: codepoint, begin: buf, end: buf + len);
45 return res == boost::locale::util::base_converter::incomplete;
46}
47
48#define TEST_TO(str, codepoint) TEST(test_to(*cvt, str, codepoint))
49#define TEST_FROM(str, codepoint) TEST(test_from(*cvt, codepoint, str))
50#define TEST_INC(codepoint, len) TEST(test_incomplete(*cvt, codepoint, len))
51
52void test_shiftjis(std::unique_ptr<boost::locale::util::base_converter>& cvt)
53{
54 std::cout << "- Correct" << std::endl;
55 TEST_TO("a", 'a');
56 TEST_TO("X", 'X');
57 TEST_TO("\xCB", 0xFF8b); // half width katakana Hi ヒ
58 TEST_TO("\x83\x71", 0x30d2); // Full width katakana Hi ヒ
59 TEST_TO("\x82\xd0", 0x3072); // Full width hiragana Hi ひ
60
61 TEST_FROM("a", 'a');
62 TEST_FROM("X", 'X');
63 TEST_FROM("\xCB", 0xFF8b); // half width katakana Hi ヒ
64 TEST_FROM("\x83\x71", 0x30d2); // Full width katakana Hi ヒ
65 TEST_FROM("\x82\xd0", 0x3072); // Full width hiragana Hi ひ
66
67 std::cout << "- Illegal/incomplete" << std::endl;
68
69 TEST_TO("\xa0", illegal);
70 TEST_TO("\x82", incomplete);
71 TEST_TO("\x83\xf0", illegal);
72
73 TEST_INC(0x30d2, 1); // Full width katakana Hi ヒ
74 TEST_INC(0x3072, 1); // Full width hiragana Hi ひ
75
76 TEST_FROM(nullptr, 0x5e9); // Hebrew ש not in ShiftJIS
77}
78
79void test_main(int /*argc*/, char** /*argv*/)
80{
81 using namespace boost::locale::util;
82
83 std::cout << "Test UTF-8\n";
84 std::cout << "- From UTF-8" << std::endl;
85
86 TEST(!create_simple_converter("UTF-8"));
87 std::unique_ptr<base_converter> cvt = create_utf8_converter();
88
89 TEST_REQUIRE(cvt);
90 TEST(cvt->is_thread_safe());
91 TEST_EQ(cvt->max_len(), 4);
92
93 std::cout << "-- Correct" << std::endl;
94
95 TEST_TO("\x7f", 0x7f);
96 TEST_TO("\xC2\x80", 0x80);
97 TEST_TO("\xdf\xBF", 0x7FF);
98 TEST_TO("\xe0\xa0\x80", 0x800);
99 TEST_TO("\xef\xbf\xbf", 0xFFFF);
100 TEST_TO("\xf0\x90\x80\x80", 0x10000);
101 TEST_TO("\xf4\x8f\xbf\xbf", 0x10FFFF);
102
103 std::cout << "-- Too big" << std::endl;
104 TEST_TO("\xf4\x9f\x80\x80", illegal); // 11 0000
105 TEST_TO("\xfb\xbf\xbf\xbf", illegal); // 3FF FFFF
106 TEST_TO("\xf8\x90\x80\x80\x80", illegal); // 400 0000
107 TEST_TO("\xfd\xbf\xbf\xbf\xbf\xbf", illegal); // 7fff ffff
108
109 std::cout << "-- Invalid trail" << std::endl;
110 TEST_TO("\xC2\x7F", illegal);
111 TEST_TO("\xdf\x7F", illegal);
112 TEST_TO("\xe0\x7F\x80", illegal);
113 TEST_TO("\xef\xbf\x7F", illegal);
114 TEST_TO("\xe0\x7F\x80", illegal);
115 TEST_TO("\xef\xbf\x7F", illegal);
116 TEST_TO("\xf0\x7F\x80\x80", illegal);
117 TEST_TO("\xf4\x7f\xbf\xbf", illegal);
118 TEST_TO("\xf0\x90\x7F\x80", illegal);
119 TEST_TO("\xf4\x8f\x7F\xbf", illegal);
120 TEST_TO("\xf0\x90\x80\x7F", illegal);
121 TEST_TO("\xf4\x8f\xbf\x7F", illegal);
122
123 std::cout << "-- Invalid length" << std::endl;
124
125 // Test that this actually works
126 TEST_TO(make2(0x80), 0x80);
127 TEST_TO(make2(0x7ff), 0x7ff);
128
129 TEST_TO(make3(0x800), 0x800);
130 TEST_TO(make3(0xffff), 0xffff);
131
132 TEST_TO(make4(0x10000), 0x10000);
133 TEST_TO(make4(0x10ffff), 0x10ffff);
134
135 TEST_TO(make4(0x110000), illegal);
136 TEST_TO(make4(0x1fffff), illegal);
137
138 TEST_TO(make2(0), illegal);
139 TEST_TO(make3(0), illegal);
140 TEST_TO(make4(0), illegal);
141 TEST_TO(make2(0x7f), illegal);
142 TEST_TO(make3(0x7f), illegal);
143 TEST_TO(make4(0x7f), illegal);
144
145 TEST_TO(make3(0x80), illegal);
146 TEST_TO(make4(0x80), illegal);
147 TEST_TO(make3(0x7ff), illegal);
148 TEST_TO(make4(0x7ff), illegal);
149
150 TEST_TO(make4(0x8000), illegal);
151 TEST_TO(make4(0xffff), illegal);
152
153 std::cout << "-- Invalid surrogate" << std::endl;
154
155 TEST_TO(make3(0xD800), illegal);
156 TEST_TO(make3(0xDBFF), illegal);
157 TEST_TO(make3(0xDC00), illegal);
158 TEST_TO(make3(0xDFFF), illegal);
159
160 TEST_TO(make4(0xD800), illegal);
161 TEST_TO(make4(0xDBFF), illegal);
162 TEST_TO(make4(0xDC00), illegal);
163 TEST_TO(make4(0xDFFF), illegal);
164
165 std::cout << "-- Incomplete" << std::endl;
166
167 TEST_TO("\x80", illegal);
168 TEST_TO("\xC2", incomplete);
169
170 TEST_TO("\xdf", incomplete);
171
172 TEST_TO("\xe0", incomplete);
173 TEST_TO("\xe0\xa0", incomplete);
174
175 TEST_TO("\xef\xbf", incomplete);
176 TEST_TO("\xef", incomplete);
177
178 TEST_TO("\xf0\x90\x80", incomplete);
179 TEST_TO("\xf0\x90", incomplete);
180 TEST_TO("\xf0", incomplete);
181
182 TEST_TO("\xf4\x8f\xbf", incomplete);
183 TEST_TO("\xf4\x8f", incomplete);
184 TEST_TO("\xf4", incomplete);
185
186 std::cout << "- To UTF-8\n";
187
188 std::cout << "-- Test correct" << std::endl;
189
190 TEST_FROM("\x7f", 0x7f);
191 TEST_FROM("\xC2\x80", 0x80);
192 TEST_FROM("\xdf\xBF", 0x7FF);
193 TEST_INC(0x7FF, 1);
194 TEST_FROM("\xe0\xa0\x80", 0x800);
195 TEST_INC(0x800, 2);
196 TEST_INC(0x800, 1);
197 TEST_FROM("\xef\xbf\xbf", 0xFFFF);
198 TEST_INC(0x10000, 3);
199 TEST_INC(0x10000, 2);
200 TEST_INC(0x10000, 1);
201 TEST_FROM("\xf0\x90\x80\x80", 0x10000);
202 TEST_FROM("\xf4\x8f\xbf\xbf", 0x10FFFF);
203
204 std::cout << "-- Test no surrogate " << std::endl;
205
206 TEST_FROM(nullptr, 0xD800);
207 TEST_FROM(nullptr, 0xDBFF);
208 TEST_FROM(nullptr, 0xDC00);
209 TEST_FROM(nullptr, 0xDFFF);
210
211 std::cout << "-- Test invalid " << std::endl;
212
213 TEST_FROM(nullptr, 0x110000);
214 TEST_FROM(nullptr, 0x1FFFFF);
215
216 std::cout << "Test windows-1255" << std::endl;
217
218 cvt = create_simple_converter(encoding: "windows-1255");
219
220 TEST_REQUIRE(cvt);
221 TEST(cvt->is_thread_safe());
222 TEST_EQ(cvt->max_len(), 1);
223
224 std::cout << "- From 1255" << std::endl;
225
226 TEST_TO("\xa4", 0x20aa);
227 TEST_TO("\xe0", 0x05d0);
228 TEST_TO("\xc4", 0x5b4);
229 TEST_TO("\xfb", illegal);
230 TEST_TO("\xdd", illegal);
231 TEST_TO("\xff", illegal);
232 TEST_TO("\xfe", 0x200f);
233
234 std::cout << "- To 1255" << std::endl;
235
236 TEST_FROM("\xa4", 0x20aa);
237 TEST_FROM("\xe0", 0x05d0);
238 TEST_FROM("\xc4", 0x5b4);
239 TEST_FROM("\xfe", 0x200f);
240
241 TEST_FROM(nullptr, 0xe4);
242 TEST_FROM(nullptr, 0xd0);
243
244#ifdef BOOST_LOCALE_WITH_ICU
245 std::cout << "Testing Shift-JIS using ICU/uconv" << std::endl;
246
247 cvt = boost::locale::impl_icu::create_uconv_converter(encoding: "Shift-JIS");
248 TEST_REQUIRE(cvt);
249 test_shiftjis(cvt);
250#endif
251
252 std::cout << "Testing Shift-JIS using POSIX/iconv" << std::endl;
253
254 TEST(!create_simple_converter("Shift_JIS"));
255 cvt = boost::locale::create_iconv_converter(encoding: "Shift-JIS");
256#ifndef BOOST_LOCALE_WITH_ICONV
257 TEST(!cvt);
258#endif
259 if(cvt)
260 test_shiftjis(cvt);
261#ifdef BOOST_LOCALE_WITH_ICONV
262 else
263 std::cout << "- Shift-JIS is not supported!" << std::endl; // LCOV_EXCL_LINE
264#endif
265}
266
267// boostinspect:noascii
268

source code of boost/libs/locale/test/test_codepage_converter.cpp