1 | // |
2 | // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) |
3 | // |
4 | // Distributed under the Boost Software License, Version 1.0. |
5 | // https://www.boost.org/LICENSE_1_0.txt |
6 | |
7 | #ifndef BOOST_LOCALE_TEST_TOOLS_HPP |
8 | #define BOOST_LOCALE_TEST_TOOLS_HPP |
9 | |
10 | #include <boost/locale/encoding.hpp> |
11 | #include "boostLocale/test/posix_tools.hpp" |
12 | #include "boostLocale/test/unit_test.hpp" |
13 | #include <cstdio> |
14 | #include <ctime> |
15 | #include <fstream> |
16 | #include <sstream> |
17 | #include <string> |
18 | #ifndef BOOST_LOCALE_NO_WINAPI_BACKEND |
19 | # include "../src/boost/locale/win32/lcid.hpp" |
20 | #else |
21 | # include <boost/core/ignore_unused.hpp> |
22 | #endif |
23 | #if BOOST_LOCALE_USE_WIN32_API |
24 | # ifndef NOMINMAX |
25 | # define NOMINMAX |
26 | # endif |
27 | # include <windows.h> |
28 | bool hasWinCodepage(unsigned codepage) |
29 | { |
30 | return IsValidCodePage(codepage) != 0; |
31 | } |
32 | #else |
33 | bool hasWinCodepage(unsigned) |
34 | { |
35 | return false; |
36 | } |
37 | #endif |
38 | |
39 | #if defined(BOOST_MSVC) && BOOST_MSVC < 1700 |
40 | # pragma warning(disable : 4428) // universal-character-name encountered in source |
41 | #endif |
42 | |
43 | class remove_file_on_exit { |
44 | std::string filename_; |
45 | |
46 | public: |
47 | explicit remove_file_on_exit(const std::string& filename) : filename_(filename) {} |
48 | ~remove_file_on_exit() { std::remove(filename: filename_.c_str()); } |
49 | }; |
50 | |
51 | inline unsigned utf8_next(const std::string& s, unsigned& pos) |
52 | { |
53 | unsigned c = static_cast<unsigned char>(s[pos++]); |
54 | unsigned l; |
55 | if(c <= 127) |
56 | return c; |
57 | else if(c <= 193) |
58 | throw std::logic_error("Invalid UTF8" ); // LCOV_EXCL_LINE |
59 | else if(c <= 223) |
60 | l = 1; |
61 | else if(c <= 239) |
62 | l = 2; |
63 | else if(c <= 244) |
64 | l = 3; |
65 | else |
66 | throw std::logic_error("Invalid UTF8" ); // LCOV_EXCL_LINE |
67 | |
68 | c &= (1 << (6 - l)) - 1; |
69 | |
70 | switch(l) { |
71 | case 3: c = (c << 6) | (static_cast<unsigned char>(s[pos++]) & 0x3F); BOOST_FALLTHROUGH; |
72 | case 2: c = (c << 6) | (static_cast<unsigned char>(s[pos++]) & 0x3F); BOOST_FALLTHROUGH; |
73 | case 1: c = (c << 6) | (static_cast<unsigned char>(s[pos++]) & 0x3F); |
74 | } |
75 | return c; |
76 | } |
77 | |
78 | /// Convert an UTF encoded string to an UTF-8 encoded string |
79 | template<typename C> |
80 | std::string to_utf8(const std::basic_string<C>& utf_string) |
81 | { |
82 | return boost::locale::conv::utf_to_utf<char>(utf_string); |
83 | } |
84 | std::string to_utf8(const std::string& utf_string) |
85 | { |
86 | return utf_string; |
87 | } |
88 | |
89 | /// Convert/decode an UTF-8 encoded string to the given char type |
90 | /// For `char` this will be Latin1, otherwise UTF-16/UTF-32 |
91 | template<typename Char> |
92 | std::basic_string<Char> to(const std::string& utf8) |
93 | { |
94 | std::basic_string<Char> out; |
95 | for(unsigned i = 0; i < utf8.size();) { |
96 | const unsigned prev = i; |
97 | unsigned point = utf8_next(s: utf8, pos&: i); |
98 | BOOST_LOCALE_START_CONST_CONDITION |
99 | if(sizeof(Char) == 1 && point > 255) { |
100 | std::ostringstream ss; |
101 | ss << "Can't convert codepoint U" << std::hex << point << "(" |
102 | << std::string(utf8.begin() + prev, utf8.begin() + i) << ") to Latin1" ; |
103 | throw std::logic_error(ss.str()); |
104 | } else if(sizeof(Char) == 2 && point > 0xFFFF) { // Deal with surrogates |
105 | point -= 0x10000; |
106 | out += static_cast<Char>(0xD800 | (point >> 10)); |
107 | out += static_cast<Char>(0xDC00 | (point & 0x3FF)); |
108 | continue; |
109 | } |
110 | BOOST_LOCALE_END_CONST_CONDITION |
111 | out += static_cast<Char>(point); |
112 | } |
113 | return out; |
114 | } |
115 | |
116 | #ifndef BOOST_LOCALE_NO_CXX20_STRING8 |
117 | template<> |
118 | std::basic_string<char8_t> to(const std::string& utf8) |
119 | { |
120 | return std::basic_string<char8_t>(utf8.begin(), utf8.end()); |
121 | } |
122 | #endif |
123 | |
124 | /// Convert an ASCII string to the given char type (i.e. copy only) |
125 | template<typename Char, size_t size> |
126 | inline std::basic_string<Char> ascii_to(const char (&str)[size]) |
127 | { |
128 | return std::basic_string<Char>(str, str + size - 1); |
129 | } |
130 | |
131 | /// Convert an UTF-8 encoded string to another UTF encoding |
132 | /// or to a narrow string encoded using the given locale |
133 | template<typename Char> |
134 | std::basic_string<Char> to_correct_string(const std::string& utf8_str, std::locale /*l*/) |
135 | { |
136 | return to<Char>(utf8_str); |
137 | } |
138 | |
139 | /// Specialization to convert an UTF-8 encoded string to a locale specific encoded string |
140 | template<> |
141 | inline std::string to_correct_string(const std::string& utf8_str, std::locale l) |
142 | { |
143 | return boost::locale::conv::from_utf(text: utf8_str, loc: l); |
144 | } |
145 | |
146 | bool has_std_locale(const char* name) |
147 | { |
148 | try { |
149 | std::locale tmp(name); |
150 | return true; |
151 | } catch(...) { |
152 | return false; |
153 | } |
154 | } |
155 | |
156 | bool has_win_locale(const std::string& locale_name) |
157 | { |
158 | #ifdef BOOST_LOCALE_NO_WINAPI_BACKEND |
159 | boost::ignore_unused(locale_name); // LCOV_EXCL_LINE |
160 | return false; // LCOV_EXCL_LINE |
161 | #else |
162 | return boost::locale::impl_win::locale_to_lcid(locale_name) != 0; |
163 | #endif |
164 | } |
165 | |
166 | /// Clear a string stream and return it |
167 | template<class T> |
168 | T& empty_stream(T& s) |
169 | { |
170 | s.str(std::basic_string<typename T::char_type>()); |
171 | s.clear(); |
172 | return s; |
173 | } |
174 | |
175 | inline bool test_std_supports_SJIS_codecvt(const std::string& locale_name) |
176 | { |
177 | const std::string file_path = boost::locale::test::exe_name + "-test-siftjis.txt" ; |
178 | remove_file_on_exit _(file_path); |
179 | { |
180 | // Japan in Shift JIS/cp932 |
181 | const char* japan_932 = "\x93\xfa\x96\x7b" ; |
182 | std::ofstream f(file_path, std::ios::binary); |
183 | f << japan_932; |
184 | } |
185 | bool res = true; |
186 | try { |
187 | std::wfstream test; |
188 | test.imbue(loc: std::locale(locale_name)); |
189 | test.open(s: file_path); |
190 | // Japan in Unicode |
191 | const std::wstring cmp = L"\u65e5\u672c" ; |
192 | std::wstring ref; |
193 | res = (test >> ref) && (ref == cmp); |
194 | } catch(const std::exception&) { |
195 | res = false; |
196 | } |
197 | return res; |
198 | } |
199 | |
200 | std::string get_std_name(const std::string& name, std::string* real_name = nullptr) |
201 | { |
202 | if(has_std_locale(name: name.c_str())) { |
203 | if(real_name) |
204 | *real_name = name; |
205 | return name; |
206 | } |
207 | |
208 | #if BOOST_LOCALE_USE_WIN32_API |
209 | const bool utf8 = name.find("UTF-8" ) != std::string::npos; |
210 | |
211 | if(name == "en_US.UTF-8" || name == "en_US.ISO8859-1" ) { |
212 | if(has_std_locale("English_United States.1252" )) { |
213 | if(real_name) |
214 | *real_name = "English_United States.1252" ; |
215 | return utf8 ? name : "en_US.windows-1252" ; |
216 | } |
217 | return "" ; |
218 | } else if(name == "he_IL.UTF-8" || name == "he_IL.ISO8859-8" ) { |
219 | if(has_std_locale("Hebrew_Israel.1255" )) { |
220 | if(real_name) |
221 | *real_name = "Hebrew_Israel.1255" ; |
222 | return utf8 ? name : "he_IL.windows-1255" ; |
223 | } |
224 | } else if(name == "ru_RU.UTF-8" ) { |
225 | if(has_std_locale("Russian_Russia.1251" )) { |
226 | if(real_name) |
227 | *real_name = "Russian_Russia.1251" ; |
228 | return name; |
229 | } |
230 | } else if(name == "tr_TR.UTF-8" ) { |
231 | if(has_std_locale("Turkish_Turkey.1254" )) { |
232 | if(real_name) |
233 | *real_name = "Turkish_Turkey.1254" ; |
234 | return name; |
235 | } |
236 | } |
237 | if(name == "ja_JP.SJIS" ) { |
238 | if(has_std_locale("Japanese_Japan.932" )) { |
239 | if(real_name) |
240 | *real_name = "Japanese_Japan.932" ; |
241 | return name; |
242 | } |
243 | return "" ; |
244 | } |
245 | #endif |
246 | return "" ; |
247 | } |
248 | |
249 | char* make2(unsigned v) |
250 | { |
251 | static unsigned char buf[3] = {0}; |
252 | buf[0] = static_cast<unsigned char>(0xC0 | (v >> 6)); |
253 | buf[1] = static_cast<unsigned char>(0x80 | (v & 0x3F)); |
254 | return reinterpret_cast<char*>(buf); |
255 | } |
256 | |
257 | char* make3(unsigned v) |
258 | { |
259 | static unsigned char buf[4] = {0}; |
260 | buf[0] = static_cast<unsigned char>(0xE0 | ((v >> 12))); |
261 | buf[1] = static_cast<unsigned char>(0x80 | ((v >> 6) & 0x3F)); |
262 | buf[2] = static_cast<unsigned char>(0x80 | ((v >> 0) & 0x3F)); |
263 | return reinterpret_cast<char*>(buf); |
264 | } |
265 | |
266 | char* make4(unsigned v) |
267 | { |
268 | static unsigned char buf[5] = {0}; |
269 | buf[0] = static_cast<unsigned char>(0xF0 | ((v >> 18))); |
270 | buf[1] = static_cast<unsigned char>(0x80 | ((v >> 12) & 0x3F)); |
271 | buf[2] = static_cast<unsigned char>(0x80 | ((v >> 6) & 0x3F)); |
272 | buf[3] = static_cast<unsigned char>(0x80 | ((v >> 0) & 0x3F)); |
273 | return reinterpret_cast<char*>(buf); |
274 | } |
275 | |
276 | #ifdef _MSC_VER |
277 | # pragma warning(push) |
278 | # pragma warning(disable : 4996) //"This function or variable may be unsafe" |
279 | #endif |
280 | #if defined(__clang__) |
281 | # pragma clang diagnostic push |
282 | # pragma clang diagnostic ignored "-Wdeprecated-declarations" |
283 | #endif |
284 | /// Wrapper for std::gmtime avoiding warning 4996 on MSVC/clang-cl: |
285 | inline std::tm* gmtime_wrap(const std::time_t* time) |
286 | { |
287 | return std::gmtime(timer: time); |
288 | } |
289 | /// Wrapper for std::localtime avoiding warning 4996 on MSVC/clang-cl |
290 | inline std::tm* localtime_wrap(const std::time_t* time) |
291 | { |
292 | return std::localtime(timer: time); |
293 | } |
294 | #if defined(__clang__) |
295 | # pragma clang diagnostic pop |
296 | #endif |
297 | #ifdef _MSC_VER |
298 | # pragma warning(pop) |
299 | #endif |
300 | |
301 | #endif |
302 | |