1//
2// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3//
4// Distributed under the Boost Software License, Version 1.0.
5// https://www.boost.org/LICENSE_1_0.txt
6
7#ifndef BOOST_LOCALE_TEST_TOOLS_HPP
8#define BOOST_LOCALE_TEST_TOOLS_HPP
9
10#include <boost/locale/encoding.hpp>
11#include "boostLocale/test/posix_tools.hpp"
12#include "boostLocale/test/unit_test.hpp"
13#include <cstdio>
14#include <ctime>
15#include <fstream>
16#include <sstream>
17#include <string>
18#ifndef BOOST_LOCALE_NO_WINAPI_BACKEND
19# include "../src/boost/locale/win32/lcid.hpp"
20#else
21# include <boost/core/ignore_unused.hpp>
22#endif
23#if BOOST_LOCALE_USE_WIN32_API
24# ifndef NOMINMAX
25# define NOMINMAX
26# endif
27# include <windows.h>
28bool hasWinCodepage(unsigned codepage)
29{
30 return IsValidCodePage(codepage) != 0;
31}
32#else
33bool hasWinCodepage(unsigned)
34{
35 return false;
36}
37#endif
38
39#if defined(BOOST_MSVC) && BOOST_MSVC < 1700
40# pragma warning(disable : 4428) // universal-character-name encountered in source
41#endif
42
43class remove_file_on_exit {
44 std::string filename_;
45
46public:
47 explicit remove_file_on_exit(const std::string& filename) : filename_(filename) {}
48 ~remove_file_on_exit() { std::remove(filename: filename_.c_str()); }
49};
50
51inline unsigned utf8_next(const std::string& s, unsigned& pos)
52{
53 unsigned c = static_cast<unsigned char>(s[pos++]);
54 unsigned l;
55 if(c <= 127)
56 return c;
57 else if(c <= 193)
58 throw std::logic_error("Invalid UTF8"); // LCOV_EXCL_LINE
59 else if(c <= 223)
60 l = 1;
61 else if(c <= 239)
62 l = 2;
63 else if(c <= 244)
64 l = 3;
65 else
66 throw std::logic_error("Invalid UTF8"); // LCOV_EXCL_LINE
67
68 c &= (1 << (6 - l)) - 1;
69
70 switch(l) {
71 case 3: c = (c << 6) | (static_cast<unsigned char>(s[pos++]) & 0x3F); BOOST_FALLTHROUGH;
72 case 2: c = (c << 6) | (static_cast<unsigned char>(s[pos++]) & 0x3F); BOOST_FALLTHROUGH;
73 case 1: c = (c << 6) | (static_cast<unsigned char>(s[pos++]) & 0x3F);
74 }
75 return c;
76}
77
78/// Convert an UTF encoded string to an UTF-8 encoded string
79template<typename C>
80std::string to_utf8(const std::basic_string<C>& utf_string)
81{
82 return boost::locale::conv::utf_to_utf<char>(utf_string);
83}
84std::string to_utf8(const std::string& utf_string)
85{
86 return utf_string;
87}
88
89/// Convert/decode an UTF-8 encoded string to the given char type
90/// For `char` this will be Latin1, otherwise UTF-16/UTF-32
91template<typename Char>
92std::basic_string<Char> to(const std::string& utf8)
93{
94 std::basic_string<Char> out;
95 for(unsigned i = 0; i < utf8.size();) {
96 const unsigned prev = i;
97 unsigned point = utf8_next(s: utf8, pos&: i);
98 BOOST_LOCALE_START_CONST_CONDITION
99 if(sizeof(Char) == 1 && point > 255) {
100 std::ostringstream ss;
101 ss << "Can't convert codepoint U" << std::hex << point << "("
102 << std::string(utf8.begin() + prev, utf8.begin() + i) << ") to Latin1";
103 throw std::logic_error(ss.str());
104 } else if(sizeof(Char) == 2 && point > 0xFFFF) { // Deal with surrogates
105 point -= 0x10000;
106 out += static_cast<Char>(0xD800 | (point >> 10));
107 out += static_cast<Char>(0xDC00 | (point & 0x3FF));
108 continue;
109 }
110 BOOST_LOCALE_END_CONST_CONDITION
111 out += static_cast<Char>(point);
112 }
113 return out;
114}
115
116#ifndef BOOST_LOCALE_NO_CXX20_STRING8
117template<>
118std::basic_string<char8_t> to(const std::string& utf8)
119{
120 return std::basic_string<char8_t>(utf8.begin(), utf8.end());
121}
122#endif
123
124/// Convert an ASCII string to the given char type (i.e. copy only)
125template<typename Char, size_t size>
126inline std::basic_string<Char> ascii_to(const char (&str)[size])
127{
128 return std::basic_string<Char>(str, str + size - 1);
129}
130
131/// Convert an UTF-8 encoded string to another UTF encoding
132/// or to a narrow string encoded using the given locale
133template<typename Char>
134std::basic_string<Char> to_correct_string(const std::string& utf8_str, std::locale /*l*/)
135{
136 return to<Char>(utf8_str);
137}
138
139/// Specialization to convert an UTF-8 encoded string to a locale specific encoded string
140template<>
141inline std::string to_correct_string(const std::string& utf8_str, std::locale l)
142{
143 return boost::locale::conv::from_utf(text: utf8_str, loc: l);
144}
145
146bool has_std_locale(const char* name)
147{
148 try {
149 std::locale tmp(name);
150 return true;
151 } catch(...) {
152 return false;
153 }
154}
155
156bool has_win_locale(const std::string& locale_name)
157{
158#ifdef BOOST_LOCALE_NO_WINAPI_BACKEND
159 boost::ignore_unused(locale_name); // LCOV_EXCL_LINE
160 return false; // LCOV_EXCL_LINE
161#else
162 return boost::locale::impl_win::locale_to_lcid(locale_name) != 0;
163#endif
164}
165
166/// Clear a string stream and return it
167template<class T>
168T& empty_stream(T& s)
169{
170 s.str(std::basic_string<typename T::char_type>());
171 s.clear();
172 return s;
173}
174
175inline bool test_std_supports_SJIS_codecvt(const std::string& locale_name)
176{
177 const std::string file_path = boost::locale::test::exe_name + "-test-siftjis.txt";
178 remove_file_on_exit _(file_path);
179 {
180 // Japan in Shift JIS/cp932
181 const char* japan_932 = "\x93\xfa\x96\x7b";
182 std::ofstream f(file_path, std::ios::binary);
183 f << japan_932;
184 }
185 bool res = true;
186 try {
187 std::wfstream test;
188 test.imbue(loc: std::locale(locale_name));
189 test.open(s: file_path);
190 // Japan in Unicode
191 const std::wstring cmp = L"\u65e5\u672c";
192 std::wstring ref;
193 res = (test >> ref) && (ref == cmp);
194 } catch(const std::exception&) {
195 res = false;
196 }
197 return res;
198}
199
200std::string get_std_name(const std::string& name, std::string* real_name = nullptr)
201{
202 if(has_std_locale(name: name.c_str())) {
203 if(real_name)
204 *real_name = name;
205 return name;
206 }
207
208#if BOOST_LOCALE_USE_WIN32_API
209 const bool utf8 = name.find("UTF-8") != std::string::npos;
210
211 if(name == "en_US.UTF-8" || name == "en_US.ISO8859-1") {
212 if(has_std_locale("English_United States.1252")) {
213 if(real_name)
214 *real_name = "English_United States.1252";
215 return utf8 ? name : "en_US.windows-1252";
216 }
217 return "";
218 } else if(name == "he_IL.UTF-8" || name == "he_IL.ISO8859-8") {
219 if(has_std_locale("Hebrew_Israel.1255")) {
220 if(real_name)
221 *real_name = "Hebrew_Israel.1255";
222 return utf8 ? name : "he_IL.windows-1255";
223 }
224 } else if(name == "ru_RU.UTF-8") {
225 if(has_std_locale("Russian_Russia.1251")) {
226 if(real_name)
227 *real_name = "Russian_Russia.1251";
228 return name;
229 }
230 } else if(name == "tr_TR.UTF-8") {
231 if(has_std_locale("Turkish_Turkey.1254")) {
232 if(real_name)
233 *real_name = "Turkish_Turkey.1254";
234 return name;
235 }
236 }
237 if(name == "ja_JP.SJIS") {
238 if(has_std_locale("Japanese_Japan.932")) {
239 if(real_name)
240 *real_name = "Japanese_Japan.932";
241 return name;
242 }
243 return "";
244 }
245#endif
246 return "";
247}
248
249char* make2(unsigned v)
250{
251 static unsigned char buf[3] = {0};
252 buf[0] = static_cast<unsigned char>(0xC0 | (v >> 6));
253 buf[1] = static_cast<unsigned char>(0x80 | (v & 0x3F));
254 return reinterpret_cast<char*>(buf);
255}
256
257char* make3(unsigned v)
258{
259 static unsigned char buf[4] = {0};
260 buf[0] = static_cast<unsigned char>(0xE0 | ((v >> 12)));
261 buf[1] = static_cast<unsigned char>(0x80 | ((v >> 6) & 0x3F));
262 buf[2] = static_cast<unsigned char>(0x80 | ((v >> 0) & 0x3F));
263 return reinterpret_cast<char*>(buf);
264}
265
266char* make4(unsigned v)
267{
268 static unsigned char buf[5] = {0};
269 buf[0] = static_cast<unsigned char>(0xF0 | ((v >> 18)));
270 buf[1] = static_cast<unsigned char>(0x80 | ((v >> 12) & 0x3F));
271 buf[2] = static_cast<unsigned char>(0x80 | ((v >> 6) & 0x3F));
272 buf[3] = static_cast<unsigned char>(0x80 | ((v >> 0) & 0x3F));
273 return reinterpret_cast<char*>(buf);
274}
275
276#ifdef _MSC_VER
277# pragma warning(push)
278# pragma warning(disable : 4996) //"This function or variable may be unsafe"
279#endif
280#if defined(__clang__)
281# pragma clang diagnostic push
282# pragma clang diagnostic ignored "-Wdeprecated-declarations"
283#endif
284/// Wrapper for std::gmtime avoiding warning 4996 on MSVC/clang-cl:
285inline std::tm* gmtime_wrap(const std::time_t* time)
286{
287 return std::gmtime(timer: time);
288}
289/// Wrapper for std::localtime avoiding warning 4996 on MSVC/clang-cl
290inline std::tm* localtime_wrap(const std::time_t* time)
291{
292 return std::localtime(timer: time);
293}
294#if defined(__clang__)
295# pragma clang diagnostic pop
296#endif
297#ifdef _MSC_VER
298# pragma warning(pop)
299#endif
300
301#endif
302

source code of boost/libs/locale/test/boostLocale/test/tools.hpp