| 1 | // |
| 2 | // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) |
| 3 | // |
| 4 | // Distributed under the Boost Software License, Version 1.0. |
| 5 | // https://www.boost.org/LICENSE_1_0.txt |
| 6 | |
| 7 | #include <boost/locale/util.hpp> |
| 8 | #ifdef BOOST_LOCALE_WITH_ICU |
| 9 | # include "../src/boost/locale/icu/codecvt.hpp" |
| 10 | #endif |
| 11 | #include "../src/boost/locale/shared/iconv_codecvt.hpp" |
| 12 | |
| 13 | #include <cstring> |
| 14 | #include <iostream> |
| 15 | |
| 16 | #include "boostLocale/test/tools.hpp" |
| 17 | #include "boostLocale/test/unit_test.hpp" |
| 18 | |
| 19 | constexpr auto illegal = boost::locale::util::base_converter::illegal; |
| 20 | constexpr auto incomplete = boost::locale::util::base_converter::incomplete; |
| 21 | |
| 22 | namespace utf = boost::locale::utf; |
| 23 | |
| 24 | bool test_to(boost::locale::util::base_converter& cvt, const char* s, const utf::code_point codepoint) |
| 25 | { |
| 26 | const size_t len = strlen(s: s); |
| 27 | const char* end = s + len; |
| 28 | return cvt.to_unicode(begin&: s, end) == codepoint; |
| 29 | } |
| 30 | |
| 31 | bool test_from(boost::locale::util::base_converter& cvt, const utf::code_point codepoint, const char* str) |
| 32 | { |
| 33 | char buf[32] = {0}; |
| 34 | const auto res = cvt.from_unicode(u: codepoint, begin: buf, end: buf + sizeof(buf)); |
| 35 | if(res == boost::locale::util::base_converter::illegal) |
| 36 | return str == nullptr; |
| 37 | else |
| 38 | return str != nullptr && strlen(s: str) == res && memcmp(s1: str, s2: buf, n: res) == 0; |
| 39 | } |
| 40 | |
| 41 | bool test_incomplete(boost::locale::util::base_converter& cvt, const utf::code_point codepoint, const size_t len) |
| 42 | { |
| 43 | char buf[32] = {0}; |
| 44 | const auto res = cvt.from_unicode(u: codepoint, begin: buf, end: buf + len); |
| 45 | return res == boost::locale::util::base_converter::incomplete; |
| 46 | } |
| 47 | |
| 48 | #define TEST_TO(str, codepoint) TEST(test_to(*cvt, str, codepoint)) |
| 49 | #define TEST_FROM(str, codepoint) TEST(test_from(*cvt, codepoint, str)) |
| 50 | #define TEST_INC(codepoint, len) TEST(test_incomplete(*cvt, codepoint, len)) |
| 51 | |
| 52 | void test_shiftjis(std::unique_ptr<boost::locale::util::base_converter>& cvt) |
| 53 | { |
| 54 | std::cout << "- Correct" << std::endl; |
| 55 | TEST_TO("a" , 'a'); |
| 56 | TEST_TO("X" , 'X'); |
| 57 | TEST_TO("\xCB" , 0xFF8b); // half width katakana Hi ヒ |
| 58 | TEST_TO("\x83\x71" , 0x30d2); // Full width katakana Hi ヒ |
| 59 | TEST_TO("\x82\xd0" , 0x3072); // Full width hiragana Hi ひ |
| 60 | |
| 61 | TEST_FROM("a" , 'a'); |
| 62 | TEST_FROM("X" , 'X'); |
| 63 | TEST_FROM("\xCB" , 0xFF8b); // half width katakana Hi ヒ |
| 64 | TEST_FROM("\x83\x71" , 0x30d2); // Full width katakana Hi ヒ |
| 65 | TEST_FROM("\x82\xd0" , 0x3072); // Full width hiragana Hi ひ |
| 66 | |
| 67 | std::cout << "- Illegal/incomplete" << std::endl; |
| 68 | |
| 69 | TEST_TO("\xa0" , illegal); |
| 70 | TEST_TO("\x82" , incomplete); |
| 71 | TEST_TO("\x83\xf0" , illegal); |
| 72 | |
| 73 | TEST_INC(0x30d2, 1); // Full width katakana Hi ヒ |
| 74 | TEST_INC(0x3072, 1); // Full width hiragana Hi ひ |
| 75 | |
| 76 | TEST_FROM(nullptr, 0x5e9); // Hebrew ש not in ShiftJIS |
| 77 | } |
| 78 | |
| 79 | void test_main(int /*argc*/, char** /*argv*/) |
| 80 | { |
| 81 | using namespace boost::locale::util; |
| 82 | |
| 83 | std::cout << "Test UTF-8\n" ; |
| 84 | std::cout << "- From UTF-8" << std::endl; |
| 85 | |
| 86 | TEST(!create_simple_converter("UTF-8" )); |
| 87 | std::unique_ptr<base_converter> cvt = create_utf8_converter(); |
| 88 | |
| 89 | TEST_REQUIRE(cvt); |
| 90 | TEST(cvt->is_thread_safe()); |
| 91 | TEST_EQ(cvt->max_len(), 4); |
| 92 | |
| 93 | std::cout << "-- Correct" << std::endl; |
| 94 | |
| 95 | TEST_TO("\x7f" , 0x7f); |
| 96 | TEST_TO("\xC2\x80" , 0x80); |
| 97 | TEST_TO("\xdf\xBF" , 0x7FF); |
| 98 | TEST_TO("\xe0\xa0\x80" , 0x800); |
| 99 | TEST_TO("\xef\xbf\xbf" , 0xFFFF); |
| 100 | TEST_TO("\xf0\x90\x80\x80" , 0x10000); |
| 101 | TEST_TO("\xf4\x8f\xbf\xbf" , 0x10FFFF); |
| 102 | |
| 103 | std::cout << "-- Too big" << std::endl; |
| 104 | TEST_TO("\xf4\x9f\x80\x80" , illegal); // 11 0000 |
| 105 | TEST_TO("\xfb\xbf\xbf\xbf" , illegal); // 3FF FFFF |
| 106 | TEST_TO("\xf8\x90\x80\x80\x80" , illegal); // 400 0000 |
| 107 | TEST_TO("\xfd\xbf\xbf\xbf\xbf\xbf" , illegal); // 7fff ffff |
| 108 | |
| 109 | std::cout << "-- Invalid trail" << std::endl; |
| 110 | TEST_TO("\xC2\x7F" , illegal); |
| 111 | TEST_TO("\xdf\x7F" , illegal); |
| 112 | TEST_TO("\xe0\x7F\x80" , illegal); |
| 113 | TEST_TO("\xef\xbf\x7F" , illegal); |
| 114 | TEST_TO("\xe0\x7F\x80" , illegal); |
| 115 | TEST_TO("\xef\xbf\x7F" , illegal); |
| 116 | TEST_TO("\xf0\x7F\x80\x80" , illegal); |
| 117 | TEST_TO("\xf4\x7f\xbf\xbf" , illegal); |
| 118 | TEST_TO("\xf0\x90\x7F\x80" , illegal); |
| 119 | TEST_TO("\xf4\x8f\x7F\xbf" , illegal); |
| 120 | TEST_TO("\xf0\x90\x80\x7F" , illegal); |
| 121 | TEST_TO("\xf4\x8f\xbf\x7F" , illegal); |
| 122 | |
| 123 | std::cout << "-- Invalid length" << std::endl; |
| 124 | |
| 125 | // Test that this actually works |
| 126 | TEST_TO(make2(0x80), 0x80); |
| 127 | TEST_TO(make2(0x7ff), 0x7ff); |
| 128 | |
| 129 | TEST_TO(make3(0x800), 0x800); |
| 130 | TEST_TO(make3(0xffff), 0xffff); |
| 131 | |
| 132 | TEST_TO(make4(0x10000), 0x10000); |
| 133 | TEST_TO(make4(0x10ffff), 0x10ffff); |
| 134 | |
| 135 | TEST_TO(make4(0x110000), illegal); |
| 136 | TEST_TO(make4(0x1fffff), illegal); |
| 137 | |
| 138 | TEST_TO(make2(0), illegal); |
| 139 | TEST_TO(make3(0), illegal); |
| 140 | TEST_TO(make4(0), illegal); |
| 141 | TEST_TO(make2(0x7f), illegal); |
| 142 | TEST_TO(make3(0x7f), illegal); |
| 143 | TEST_TO(make4(0x7f), illegal); |
| 144 | |
| 145 | TEST_TO(make3(0x80), illegal); |
| 146 | TEST_TO(make4(0x80), illegal); |
| 147 | TEST_TO(make3(0x7ff), illegal); |
| 148 | TEST_TO(make4(0x7ff), illegal); |
| 149 | |
| 150 | TEST_TO(make4(0x8000), illegal); |
| 151 | TEST_TO(make4(0xffff), illegal); |
| 152 | |
| 153 | std::cout << "-- Invalid surrogate" << std::endl; |
| 154 | |
| 155 | TEST_TO(make3(0xD800), illegal); |
| 156 | TEST_TO(make3(0xDBFF), illegal); |
| 157 | TEST_TO(make3(0xDC00), illegal); |
| 158 | TEST_TO(make3(0xDFFF), illegal); |
| 159 | |
| 160 | TEST_TO(make4(0xD800), illegal); |
| 161 | TEST_TO(make4(0xDBFF), illegal); |
| 162 | TEST_TO(make4(0xDC00), illegal); |
| 163 | TEST_TO(make4(0xDFFF), illegal); |
| 164 | |
| 165 | std::cout << "-- Incomplete" << std::endl; |
| 166 | |
| 167 | TEST_TO("\x80" , illegal); |
| 168 | TEST_TO("\xC2" , incomplete); |
| 169 | |
| 170 | TEST_TO("\xdf" , incomplete); |
| 171 | |
| 172 | TEST_TO("\xe0" , incomplete); |
| 173 | TEST_TO("\xe0\xa0" , incomplete); |
| 174 | |
| 175 | TEST_TO("\xef\xbf" , incomplete); |
| 176 | TEST_TO("\xef" , incomplete); |
| 177 | |
| 178 | TEST_TO("\xf0\x90\x80" , incomplete); |
| 179 | TEST_TO("\xf0\x90" , incomplete); |
| 180 | TEST_TO("\xf0" , incomplete); |
| 181 | |
| 182 | TEST_TO("\xf4\x8f\xbf" , incomplete); |
| 183 | TEST_TO("\xf4\x8f" , incomplete); |
| 184 | TEST_TO("\xf4" , incomplete); |
| 185 | |
| 186 | std::cout << "- To UTF-8\n" ; |
| 187 | |
| 188 | std::cout << "-- Test correct" << std::endl; |
| 189 | |
| 190 | TEST_FROM("\x7f" , 0x7f); |
| 191 | TEST_FROM("\xC2\x80" , 0x80); |
| 192 | TEST_FROM("\xdf\xBF" , 0x7FF); |
| 193 | TEST_INC(0x7FF, 1); |
| 194 | TEST_FROM("\xe0\xa0\x80" , 0x800); |
| 195 | TEST_INC(0x800, 2); |
| 196 | TEST_INC(0x800, 1); |
| 197 | TEST_FROM("\xef\xbf\xbf" , 0xFFFF); |
| 198 | TEST_INC(0x10000, 3); |
| 199 | TEST_INC(0x10000, 2); |
| 200 | TEST_INC(0x10000, 1); |
| 201 | TEST_FROM("\xf0\x90\x80\x80" , 0x10000); |
| 202 | TEST_FROM("\xf4\x8f\xbf\xbf" , 0x10FFFF); |
| 203 | |
| 204 | std::cout << "-- Test no surrogate " << std::endl; |
| 205 | |
| 206 | TEST_FROM(nullptr, 0xD800); |
| 207 | TEST_FROM(nullptr, 0xDBFF); |
| 208 | TEST_FROM(nullptr, 0xDC00); |
| 209 | TEST_FROM(nullptr, 0xDFFF); |
| 210 | |
| 211 | std::cout << "-- Test invalid " << std::endl; |
| 212 | |
| 213 | TEST_FROM(nullptr, 0x110000); |
| 214 | TEST_FROM(nullptr, 0x1FFFFF); |
| 215 | |
| 216 | std::cout << "Test windows-1255" << std::endl; |
| 217 | |
| 218 | cvt = create_simple_converter(encoding: "windows-1255" ); |
| 219 | |
| 220 | TEST_REQUIRE(cvt); |
| 221 | TEST(cvt->is_thread_safe()); |
| 222 | TEST_EQ(cvt->max_len(), 1); |
| 223 | |
| 224 | std::cout << "- From 1255" << std::endl; |
| 225 | |
| 226 | TEST_TO("\xa4" , 0x20aa); |
| 227 | TEST_TO("\xe0" , 0x05d0); |
| 228 | TEST_TO("\xc4" , 0x5b4); |
| 229 | TEST_TO("\xfb" , illegal); |
| 230 | TEST_TO("\xdd" , illegal); |
| 231 | TEST_TO("\xff" , illegal); |
| 232 | TEST_TO("\xfe" , 0x200f); |
| 233 | |
| 234 | std::cout << "- To 1255" << std::endl; |
| 235 | |
| 236 | TEST_FROM("\xa4" , 0x20aa); |
| 237 | TEST_FROM("\xe0" , 0x05d0); |
| 238 | TEST_FROM("\xc4" , 0x5b4); |
| 239 | TEST_FROM("\xfe" , 0x200f); |
| 240 | |
| 241 | TEST_FROM(nullptr, 0xe4); |
| 242 | TEST_FROM(nullptr, 0xd0); |
| 243 | |
| 244 | #ifdef BOOST_LOCALE_WITH_ICU |
| 245 | std::cout << "Testing Shift-JIS using ICU/uconv" << std::endl; |
| 246 | |
| 247 | cvt = boost::locale::impl_icu::create_uconv_converter(encoding: "Shift-JIS" ); |
| 248 | TEST_REQUIRE(cvt); |
| 249 | test_shiftjis(cvt); |
| 250 | #endif |
| 251 | |
| 252 | std::cout << "Testing Shift-JIS using POSIX/iconv" << std::endl; |
| 253 | |
| 254 | TEST(!create_simple_converter("Shift_JIS" )); |
| 255 | cvt = boost::locale::create_iconv_converter(encoding: "Shift-JIS" ); |
| 256 | #ifndef BOOST_LOCALE_WITH_ICONV |
| 257 | TEST(!cvt); |
| 258 | #endif |
| 259 | if(cvt) |
| 260 | test_shiftjis(cvt); |
| 261 | #ifdef BOOST_LOCALE_WITH_ICONV |
| 262 | else |
| 263 | std::cout << "- Shift-JIS is not supported!" << std::endl; // LCOV_EXCL_LINE |
| 264 | #endif |
| 265 | } |
| 266 | |
| 267 | // boostinspect:noascii |
| 268 | |