1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtCore module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | #include "qicucodec_p.h" |
41 | |
42 | #include "qtextcodec_p.h" |
43 | #include "qutfcodec_p.h" |
44 | #include "qlatincodec_p.h" |
45 | #include "qsimplecodec_p.h" |
46 | #include "private/qcoreglobaldata_p.h" |
47 | #include "qdebug.h" |
48 | |
49 | #include "unicode/ucnv.h" |
50 | |
51 | #if QT_CONFIG(codecs) |
52 | #include "qtsciicodec_p.h" |
53 | #include "qisciicodec_p.h" |
54 | #endif |
55 | |
56 | QT_BEGIN_NAMESPACE |
57 | |
58 | typedef QList<QTextCodec*>::ConstIterator TextCodecListConstIt; |
59 | typedef QList<QByteArray>::ConstIterator ByteArrayListConstIt; |
60 | |
61 | static void qIcuCodecStateFree(QTextCodec::ConverterState *state) |
62 | { |
63 | ucnv_close(converter: static_cast<UConverter *>(state->d)); |
64 | } |
65 | |
66 | bool qTextCodecNameMatch(const char *n, const char *h) |
67 | { |
68 | return ucnv_compareNames(name1: n, name2: h) == 0; |
69 | } |
70 | |
71 | /* The list below is generated from http://www.iana.org/assignments/character-sets/ |
72 | using the snippet of code below: |
73 | |
74 | #include <QtCore> |
75 | #include <unicode/ucnv.h> |
76 | |
77 | int main(int argc, char **argv) |
78 | { |
79 | QCoreApplication app(argc, argv); |
80 | |
81 | QFile file("character-sets.txt"); |
82 | file.open(QFile::ReadOnly); |
83 | QByteArray name; |
84 | int mib = -1; |
85 | QByteArray nameList; |
86 | int pos = 0; |
87 | while (!file.atEnd()) { |
88 | QByteArray s = file.readLine().trimmed(); |
89 | if (s.isEmpty()) { |
90 | if (mib != -1) { |
91 | UErrorCode error = U_ZERO_ERROR; |
92 | const char *standard_name = ucnv_getStandardName(name, "MIME", &error); |
93 | if (U_FAILURE(error) || !standard_name) { |
94 | error = U_ZERO_ERROR; |
95 | standard_name = ucnv_getStandardName(name, "IANA", &error); |
96 | } |
97 | UConverter *conv = ucnv_open(standard_name, &error); |
98 | if (!U_FAILURE(error) && conv && standard_name) { |
99 | ucnv_close(conv); |
100 | printf(" { %d, %d },\n", mib, pos); |
101 | nameList += "\""; |
102 | nameList += standard_name; |
103 | nameList += "\\0\"\n"; |
104 | pos += strlen(standard_name) + 1; |
105 | } |
106 | } |
107 | name = QByteArray(); |
108 | mib = -1; |
109 | } |
110 | if (s.startsWith("Name: ")) { |
111 | name = s.mid(5).trimmed(); |
112 | if (name.indexOf(' ') > 0) |
113 | name = name.left(name.indexOf(' ')); |
114 | } |
115 | if (s.startsWith("MIBenum:")) |
116 | mib = s.mid(8).trimmed().toInt(); |
117 | if (s.startsWith("Alias:") && s.contains("MIME")) { |
118 | name = s.mid(6).trimmed(); |
119 | name = name.left(name.indexOf(' ')).trimmed(); |
120 | } |
121 | } |
122 | qDebug() << nameList; |
123 | } |
124 | */ |
125 | |
126 | struct MibToName { |
127 | short mib; |
128 | short index; |
129 | }; |
130 | |
131 | static const MibToName mibToName[] = { |
132 | { .mib: 3, .index: 0 }, |
133 | { .mib: 4, .index: 9 }, |
134 | { .mib: 5, .index: 20 }, |
135 | { .mib: 6, .index: 31 }, |
136 | { .mib: 7, .index: 42 }, |
137 | { .mib: 8, .index: 53 }, |
138 | { .mib: 9, .index: 64 }, |
139 | { .mib: 10, .index: 75 }, |
140 | { .mib: 11, .index: 86 }, |
141 | { .mib: 12, .index: 97 }, |
142 | { .mib: 13, .index: 108 }, |
143 | { .mib: 16, .index: 120 }, |
144 | { .mib: 17, .index: 134 }, |
145 | { .mib: 18, .index: 144 }, |
146 | { .mib: 30, .index: 151 }, |
147 | { .mib: 36, .index: 160 }, |
148 | { .mib: 37, .index: 167 }, |
149 | { .mib: 38, .index: 179 }, |
150 | { .mib: 39, .index: 186 }, |
151 | { .mib: 40, .index: 198 }, |
152 | { .mib: 57, .index: 212 }, |
153 | { .mib: 81, .index: 223 }, |
154 | { .mib: 82, .index: 234 }, |
155 | { .mib: 84, .index: 245 }, |
156 | { .mib: 85, .index: 256 }, |
157 | { .mib: 104, .index: 267 }, |
158 | { .mib: 105, .index: 279 }, |
159 | { .mib: 106, .index: 295 }, |
160 | { .mib: 109, .index: 301 }, |
161 | { .mib: 110, .index: 313 }, |
162 | { .mib: 111, .index: 325 }, |
163 | { .mib: 113, .index: 337 }, |
164 | { .mib: 114, .index: 341 }, |
165 | { .mib: 1000, .index: 349 }, |
166 | { .mib: 1001, .index: 356 }, |
167 | { .mib: 1011, .index: 363 }, |
168 | { .mib: 1012, .index: 368 }, |
169 | { .mib: 1013, .index: 374 }, |
170 | { .mib: 1014, .index: 383 }, |
171 | { .mib: 1015, .index: 392 }, |
172 | { .mib: 1016, .index: 399 }, |
173 | { .mib: 1017, .index: 406 }, |
174 | { .mib: 1018, .index: 413 }, |
175 | { .mib: 1019, .index: 422 }, |
176 | { .mib: 1020, .index: 431 }, |
177 | { .mib: 2004, .index: 438 }, |
178 | { .mib: 2005, .index: 448 }, |
179 | { .mib: 2009, .index: 472 }, |
180 | { .mib: 2013, .index: 479 }, |
181 | { .mib: 2016, .index: 486 }, |
182 | { .mib: 2024, .index: 495 }, |
183 | { .mib: 2025, .index: 505 }, |
184 | { .mib: 2026, .index: 512 }, |
185 | { .mib: 2027, .index: 517 }, |
186 | { .mib: 2028, .index: 527 }, |
187 | { .mib: 2030, .index: 534 }, |
188 | { .mib: 2033, .index: 541 }, |
189 | { .mib: 2034, .index: 548 }, |
190 | { .mib: 2035, .index: 555 }, |
191 | { .mib: 2037, .index: 562 }, |
192 | { .mib: 2038, .index: 569 }, |
193 | { .mib: 2039, .index: 576 }, |
194 | { .mib: 2040, .index: 583 }, |
195 | { .mib: 2041, .index: 590 }, |
196 | { .mib: 2043, .index: 597 }, |
197 | { .mib: 2011, .index: 604 }, |
198 | { .mib: 2044, .index: 611 }, |
199 | { .mib: 2045, .index: 618 }, |
200 | { .mib: 2010, .index: 624 }, |
201 | { .mib: 2046, .index: 631 }, |
202 | { .mib: 2047, .index: 638 }, |
203 | { .mib: 2048, .index: 645 }, |
204 | { .mib: 2049, .index: 652 }, |
205 | { .mib: 2050, .index: 659 }, |
206 | { .mib: 2051, .index: 666 }, |
207 | { .mib: 2052, .index: 673 }, |
208 | { .mib: 2053, .index: 680 }, |
209 | { .mib: 2054, .index: 687 }, |
210 | { .mib: 2055, .index: 694 }, |
211 | { .mib: 2056, .index: 701 }, |
212 | { .mib: 2062, .index: 708 }, |
213 | { .mib: 2063, .index: 715 }, |
214 | { .mib: 2084, .index: 723 }, |
215 | { .mib: 2085, .index: 730 }, |
216 | { .mib: 2086, .index: 741 }, |
217 | { .mib: 2087, .index: 748 }, |
218 | { .mib: 2088, .index: 755 }, |
219 | { .mib: 2089, .index: 762 }, |
220 | { .mib: 2091, .index: 771 }, |
221 | { .mib: 2092, .index: 780 }, |
222 | { .mib: 2093, .index: 789 }, |
223 | { .mib: 2094, .index: 798 }, |
224 | { .mib: 2095, .index: 807 }, |
225 | { .mib: 2096, .index: 816 }, |
226 | { .mib: 2097, .index: 825 }, |
227 | { .mib: 2098, .index: 834 }, |
228 | { .mib: 2099, .index: 843 }, |
229 | { .mib: 2100, .index: 852 }, |
230 | { .mib: 2101, .index: 861 }, |
231 | { .mib: 2102, .index: 872 }, |
232 | { .mib: 2250, .index: 880 }, |
233 | { .mib: 2251, .index: 893 }, |
234 | { .mib: 2252, .index: 906 }, |
235 | { .mib: 2253, .index: 919 }, |
236 | { .mib: 2254, .index: 932 }, |
237 | { .mib: 2255, .index: 945 }, |
238 | { .mib: 2256, .index: 958 }, |
239 | { .mib: 2257, .index: 971 }, |
240 | { .mib: 2258, .index: 984 }, |
241 | { .mib: 2259, .index: 997 }, |
242 | }; |
243 | int mibToNameSize = sizeof(mibToName)/sizeof(MibToName); |
244 | |
245 | static const char mibToNameTable[] = |
246 | "US-ASCII\0" |
247 | "ISO-8859-1\0" |
248 | "ISO-8859-2\0" |
249 | "ISO-8859-3\0" |
250 | "ISO-8859-4\0" |
251 | "ISO-8859-5\0" |
252 | "ISO-8859-6\0" |
253 | "ISO-8859-7\0" |
254 | "ISO-8859-8\0" |
255 | "ISO-8859-9\0" |
256 | "ISO-8859-10\0" |
257 | "ISO-2022-JP-1\0" |
258 | "Shift_JIS\0" |
259 | "EUC-JP\0" |
260 | "US-ASCII\0" |
261 | "EUC-KR\0" |
262 | "ISO-2022-KR\0" |
263 | "EUC-KR\0" |
264 | "ISO-2022-JP\0" |
265 | "ISO-2022-JP-2\0" |
266 | "GB_2312-80\0" |
267 | "ISO-8859-6\0" |
268 | "ISO-8859-6\0" |
269 | "ISO-8859-8\0" |
270 | "ISO-8859-8\0" |
271 | "ISO-2022-CN\0" |
272 | "ISO-2022-CN-EXT\0" |
273 | "UTF-8\0" |
274 | "ISO-8859-13\0" |
275 | "ISO-8859-14\0" |
276 | "ISO-8859-15\0" |
277 | "GBK\0" |
278 | "GB18030\0" |
279 | "UTF-16\0" |
280 | "UTF-32\0" |
281 | "SCSU\0" |
282 | "UTF-7\0" |
283 | "UTF-16BE\0" |
284 | "UTF-16LE\0" |
285 | "UTF-16\0" |
286 | "CESU-8\0" |
287 | "UTF-32\0" |
288 | "UTF-32BE\0" |
289 | "UTF-32LE\0" |
290 | "BOCU-1\0" |
291 | "hp-roman8\0" |
292 | "Adobe-Standard-Encoding\0" |
293 | "IBM850\0" |
294 | "IBM862\0" |
295 | "IBM-Thai\0" |
296 | "Shift_JIS\0" |
297 | "GB2312\0" |
298 | "Big5\0" |
299 | "macintosh\0" |
300 | "IBM037\0" |
301 | "IBM273\0" |
302 | "IBM277\0" |
303 | "IBM278\0" |
304 | "IBM280\0" |
305 | "IBM284\0" |
306 | "IBM285\0" |
307 | "IBM290\0" |
308 | "IBM297\0" |
309 | "IBM420\0" |
310 | "IBM424\0" |
311 | "IBM437\0" |
312 | "IBM500\0" |
313 | "cp851\0" |
314 | "IBM852\0" |
315 | "IBM855\0" |
316 | "IBM857\0" |
317 | "IBM860\0" |
318 | "IBM861\0" |
319 | "IBM863\0" |
320 | "IBM864\0" |
321 | "IBM865\0" |
322 | "IBM868\0" |
323 | "IBM869\0" |
324 | "IBM870\0" |
325 | "IBM871\0" |
326 | "IBM918\0" |
327 | "IBM1026\0" |
328 | "KOI8-R\0" |
329 | "HZ-GB-2312\0" |
330 | "IBM866\0" |
331 | "IBM775\0" |
332 | "KOI8-U\0" |
333 | "IBM00858\0" |
334 | "IBM01140\0" |
335 | "IBM01141\0" |
336 | "IBM01142\0" |
337 | "IBM01143\0" |
338 | "IBM01144\0" |
339 | "IBM01145\0" |
340 | "IBM01146\0" |
341 | "IBM01147\0" |
342 | "IBM01148\0" |
343 | "IBM01149\0" |
344 | "Big5-HKSCS\0" |
345 | "IBM1047\0" |
346 | "windows-1250\0" |
347 | "windows-1251\0" |
348 | "windows-1252\0" |
349 | "windows-1253\0" |
350 | "windows-1254\0" |
351 | "windows-1255\0" |
352 | "windows-1256\0" |
353 | "windows-1257\0" |
354 | "windows-1258\0" |
355 | "TIS-620\0" ; |
356 | |
357 | static QTextCodec *loadQtCodec(const char *name) |
358 | { |
359 | if (!strcmp(s1: name, s2: "UTF-8" )) |
360 | return new QUtf8Codec; |
361 | if (!strcmp(s1: name, s2: "UTF-16" )) |
362 | return new QUtf16Codec; |
363 | if (!strcmp(s1: name, s2: "ISO-8859-1" )) |
364 | return new QLatin1Codec; |
365 | if (!strcmp(s1: name, s2: "UTF-16BE" )) |
366 | return new QUtf16BECodec; |
367 | if (!strcmp(s1: name, s2: "UTF-16LE" )) |
368 | return new QUtf16LECodec; |
369 | if (!strcmp(s1: name, s2: "UTF-32" )) |
370 | return new QUtf32Codec; |
371 | if (!strcmp(s1: name, s2: "UTF-32BE" )) |
372 | return new QUtf32BECodec; |
373 | if (!strcmp(s1: name, s2: "UTF-32LE" )) |
374 | return new QUtf32LECodec; |
375 | if (!strcmp(s1: name, s2: "ISO-8859-16" ) || !strcmp(s1: name, s2: "latin10" ) || !strcmp(s1: name, s2: "iso-ir-226" )) |
376 | return new QSimpleTextCodec(13 /* == 8859-16*/); |
377 | #if QT_CONFIG(codecs) |
378 | if (!strcmp(s1: name, s2: "TSCII" )) |
379 | return new QTsciiCodec; |
380 | if (!qstrnicmp(name, "iscii" , len: 5)) |
381 | return QIsciiCodec::create(name); |
382 | #endif |
383 | |
384 | return nullptr; |
385 | } |
386 | |
387 | /// \threadsafe |
388 | QList<QByteArray> QIcuCodec::availableCodecs() |
389 | { |
390 | QList<QByteArray> codecs; |
391 | int n = ucnv_countAvailable(); |
392 | for (int i = 0; i < n; ++i) { |
393 | const char *name = ucnv_getAvailableName(n: i); |
394 | |
395 | UErrorCode error = U_ZERO_ERROR; |
396 | const char *standardName = ucnv_getStandardName(name, standard: "MIME" , pErrorCode: &error); |
397 | if (U_FAILURE(code: error) || !standardName) { |
398 | error = U_ZERO_ERROR; |
399 | standardName = ucnv_getStandardName(name, standard: "IANA" , pErrorCode: &error); |
400 | } |
401 | if (U_FAILURE(code: error)) |
402 | continue; |
403 | |
404 | error = U_ZERO_ERROR; |
405 | int ac = ucnv_countAliases(alias: standardName, pErrorCode: &error); |
406 | if (U_FAILURE(code: error)) |
407 | continue; |
408 | for (int j = 0; j < ac; ++j) { |
409 | error = U_ZERO_ERROR; |
410 | const char *alias = ucnv_getAlias(alias: standardName, n: j, pErrorCode: &error); |
411 | if (!U_SUCCESS(code: error)) |
412 | continue; |
413 | codecs += alias; |
414 | } |
415 | } |
416 | |
417 | // handled by Qt and not in ICU: |
418 | codecs += "TSCII" ; |
419 | |
420 | return codecs; |
421 | } |
422 | |
423 | /// \threadsafe |
424 | QList<int> QIcuCodec::availableMibs() |
425 | { |
426 | QList<int> mibs; |
427 | mibs.reserve(alloc: mibToNameSize + 1); |
428 | for (int i = 0; i < mibToNameSize; ++i) |
429 | mibs += mibToName[i].mib; |
430 | |
431 | // handled by Qt and not in ICU: |
432 | mibs += 2107; // TSCII |
433 | |
434 | return mibs; |
435 | } |
436 | |
437 | QTextCodec *QIcuCodec::defaultCodecUnlocked() |
438 | { |
439 | QCoreGlobalData *globalData = QCoreGlobalData::instance(); |
440 | if (!globalData) |
441 | return nullptr; |
442 | QTextCodec *c = globalData->codecForLocale.loadAcquire(); |
443 | if (c) |
444 | return c; |
445 | |
446 | #if defined(QT_LOCALE_IS_UTF8) |
447 | const char *name = "UTF-8" ; |
448 | #else |
449 | const char *name = ucnv_getDefaultName(); |
450 | #endif |
451 | c = codecForNameUnlocked(name); |
452 | globalData->codecForLocale.storeRelease(newValue: c); |
453 | return c; |
454 | } |
455 | |
456 | |
457 | QTextCodec *QIcuCodec::codecForNameUnlocked(const char *name) |
458 | { |
459 | // backwards compatibility with Qt 4.x |
460 | if (!qstrcmp(str1: name, str2: "CP949" )) |
461 | name = "windows-949" ; |
462 | else if (!qstrcmp(str1: name, str2: "Apple Roman" )) |
463 | name = "macintosh" ; |
464 | // these are broken data in ICU 4.4, and can't be resolved even though they are aliases to tis-620 |
465 | if (!qstrcmp(str1: name, str2: "windows-874-2000" ) |
466 | || !qstrcmp(str1: name, str2: "windows-874" ) |
467 | || !qstrcmp(str1: name, str2: "MS874" ) |
468 | || !qstrcmp(str1: name, str2: "x-windows-874" ) |
469 | || !qstrcmp(str1: name, str2: "ISO 8859-11" )) |
470 | name = "TIS-620" ; |
471 | |
472 | UErrorCode error = U_ZERO_ERROR; |
473 | // MIME gives better default names |
474 | const char *standardName = ucnv_getStandardName(name, standard: "MIME" , pErrorCode: &error); |
475 | if (U_FAILURE(code: error) || !standardName) { |
476 | error = U_ZERO_ERROR; |
477 | standardName = ucnv_getStandardName(name, standard: "IANA" , pErrorCode: &error); |
478 | } |
479 | bool qt_only = false; |
480 | if (U_FAILURE(code: error) || !standardName) { |
481 | standardName = name; |
482 | qt_only = true; |
483 | } else { |
484 | // correct some issues where the ICU data set contains duplicated entries. |
485 | // Where this happens it's because one data set is a subset of another. We |
486 | // always use the larger data set. |
487 | |
488 | if (qstrcmp(str1: standardName, str2: "GB2312" ) == 0 || qstrcmp(str1: standardName, str2: "GB_2312-80" ) == 0) |
489 | standardName = "GBK" ; |
490 | else if (qstrcmp(str1: standardName, str2: "KSC_5601" ) == 0 || qstrcmp(str1: standardName, str2: "EUC-KR" ) == 0 || qstrcmp(str1: standardName, str2: "cp1363" ) == 0) |
491 | standardName = "windows-949" ; |
492 | } |
493 | |
494 | QCoreGlobalData *globalData = QCoreGlobalData::instance(); |
495 | QTextCodecCache *cache = &globalData->codecCache; |
496 | |
497 | QTextCodec *codec; |
498 | if (cache) { |
499 | codec = cache->value(akey: standardName); |
500 | if (codec) |
501 | return codec; |
502 | } |
503 | |
504 | for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) { |
505 | QTextCodec *cursor = *it; |
506 | if (qTextCodecNameMatch(n: cursor->name(), h: standardName)) { |
507 | if (cache) |
508 | cache->insert(akey: standardName, avalue: cursor); |
509 | return cursor; |
510 | } |
511 | QList<QByteArray> aliases = cursor->aliases(); |
512 | for (ByteArrayListConstIt ait = aliases.constBegin(), acend = aliases.constEnd(); ait != acend; ++ait) { |
513 | if (qTextCodecNameMatch(n: *ait, h: standardName)) { |
514 | if (cache) |
515 | cache->insert(akey: standardName, avalue: cursor); |
516 | return cursor; |
517 | } |
518 | } |
519 | } |
520 | |
521 | QTextCodec *c = loadQtCodec(name: standardName); |
522 | if (c) |
523 | return c; |
524 | |
525 | if (qt_only) |
526 | return nullptr; |
527 | |
528 | // check whether there is really a converter for the name available. |
529 | UConverter *conv = ucnv_open(converterName: standardName, err: &error); |
530 | if (!conv) { |
531 | qDebug(msg: "codecForName: ucnv_open failed %s %s" , standardName, u_errorName(code: error)); |
532 | return nullptr; |
533 | } |
534 | //qDebug() << "QIcuCodec: Standard name for " << name << "is" << standardName; |
535 | ucnv_close(converter: conv); |
536 | |
537 | |
538 | c = new QIcuCodec(standardName); |
539 | if (cache) |
540 | cache->insert(akey: standardName, avalue: c); |
541 | return c; |
542 | } |
543 | |
544 | |
545 | QTextCodec *QIcuCodec::codecForMibUnlocked(int mib) |
546 | { |
547 | for (int i = 0; i < mibToNameSize; ++i) { |
548 | if (mibToName[i].mib == mib) |
549 | return codecForNameUnlocked(name: mibToNameTable + mibToName[i].index); |
550 | } |
551 | |
552 | if (mib == 2107) |
553 | return codecForNameUnlocked(name: "TSCII" ); |
554 | |
555 | return nullptr; |
556 | } |
557 | |
558 | |
559 | QIcuCodec::QIcuCodec(const char *name) |
560 | : m_name(name) |
561 | { |
562 | } |
563 | |
564 | QIcuCodec::~QIcuCodec() |
565 | { |
566 | } |
567 | |
568 | /*! |
569 | \internal |
570 | |
571 | Custom callback for the ICU from Unicode conversion. It's invoked when the |
572 | conversion from Unicode detects illegal or unrecognized character. |
573 | |
574 | Assumes that context contains a pointer to QTextCodec::ConverterState |
575 | structure. Updates its invalid characters count and calls a default |
576 | callback, that replaces the invalid characters properly. |
577 | */ |
578 | static void customFromUnicodeSubstitutionCallback(const void *context, |
579 | UConverterFromUnicodeArgs *fromUArgs, |
580 | const UChar *codeUnits, |
581 | int32_t length, |
582 | UChar32 codePoint, |
583 | UConverterCallbackReason reason, |
584 | UErrorCode *err) |
585 | { |
586 | auto *state = reinterpret_cast<QTextCodec::ConverterState *>(const_cast<void *>(context)); |
587 | if (state) |
588 | state->invalidChars++; |
589 | // Call the default callback that replaces all illegal or unrecognized |
590 | // sequences with the substitute string |
591 | UCNV_FROM_U_CALLBACK_SUBSTITUTE(context: nullptr, fromUArgs, codeUnits, length, codePoint, reason, err); |
592 | } |
593 | |
594 | UConverter *QIcuCodec::getConverter(QTextCodec::ConverterState *state) const |
595 | { |
596 | UConverter *conv = nullptr; |
597 | if (state) { |
598 | if (!state->d) { |
599 | // first time |
600 | state->flags |= QTextCodec::FreeFunction; |
601 | QTextCodecUnalignedPointer::encode(dst: state->state_data, fn: qIcuCodecStateFree); |
602 | UErrorCode error = U_ZERO_ERROR; |
603 | state->d = ucnv_open(converterName: m_name, err: &error); |
604 | ucnv_setSubstChars(converter: static_cast<UConverter *>(state->d), |
605 | subChars: state->flags & QTextCodec::ConvertInvalidToNull ? "\0" : "?" , len: 1, err: &error); |
606 | if (!U_FAILURE(code: error)) { |
607 | error = U_ZERO_ERROR; |
608 | ucnv_setFromUCallBack(converter: static_cast<UConverter *>(state->d), |
609 | newAction: customFromUnicodeSubstitutionCallback, newContext: state, oldAction: nullptr, |
610 | oldContext: nullptr, err: &error); |
611 | if (U_FAILURE(code: error)) { |
612 | qDebug(msg: "getConverter(state) failed to install custom callback. " |
613 | "canEncode() may report incorrect results." ); |
614 | } |
615 | } else { |
616 | qDebug(msg: "getConverter(state) ucnv_open failed %s %s" , m_name, u_errorName(code: error)); |
617 | } |
618 | } |
619 | conv = static_cast<UConverter *>(state->d); |
620 | } |
621 | if (!conv) { |
622 | // stateless conversion |
623 | UErrorCode error = U_ZERO_ERROR; |
624 | conv = ucnv_open(converterName: m_name, err: &error); |
625 | ucnv_setSubstChars(converter: conv, subChars: "?" , len: 1, err: &error); |
626 | if (U_FAILURE(code: error)) |
627 | qDebug(msg: "getConverter(no state) ucnv_open failed %s %s" , m_name, u_errorName(code: error)); |
628 | } |
629 | return conv; |
630 | } |
631 | |
632 | QString QIcuCodec::convertToUnicode(const char *chars, int length, QTextCodec::ConverterState *state) const |
633 | { |
634 | UConverter *conv = getConverter(state); |
635 | |
636 | QString string(length + 2, Qt::Uninitialized); |
637 | |
638 | const char *end = chars + length; |
639 | int convertedChars = 0; |
640 | while (1) { |
641 | UChar *uc = (UChar *)string.data(); |
642 | UChar *ucEnd = uc + string.length(); |
643 | uc += convertedChars; |
644 | UErrorCode error = U_ZERO_ERROR; |
645 | ucnv_toUnicode(converter: conv, |
646 | target: &uc, targetLimit: ucEnd, |
647 | source: &chars, sourceLimit: end, |
648 | offsets: nullptr, flush: false, err: &error); |
649 | if (!U_SUCCESS(code: error) && error != U_BUFFER_OVERFLOW_ERROR) { |
650 | qDebug(msg: "convertToUnicode failed: %s" , u_errorName(code: error)); |
651 | break; |
652 | } |
653 | |
654 | convertedChars = uc - (UChar *)string.data(); |
655 | if (chars >= end) |
656 | break; |
657 | string.resize(size: string.length()*2); |
658 | } |
659 | string.resize(size: convertedChars); |
660 | |
661 | if (!state) |
662 | ucnv_close(converter: conv); |
663 | return string; |
664 | } |
665 | |
666 | |
667 | QByteArray QIcuCodec::convertFromUnicode(const QChar *unicode, int length, QTextCodec::ConverterState *state) const |
668 | { |
669 | UConverter *conv = getConverter(state); |
670 | |
671 | int requiredLength = UCNV_GET_MAX_BYTES_FOR_STRING(length, ucnv_getMaxCharSize(conv)); |
672 | QByteArray string(requiredLength, Qt::Uninitialized); |
673 | |
674 | const UChar *uc = (const UChar *)unicode; |
675 | const UChar *end = uc + length; |
676 | int convertedChars = 0; |
677 | while (1) { |
678 | char *ch = (char *)string.data(); |
679 | char *chEnd = ch + string.length(); |
680 | ch += convertedChars; |
681 | UErrorCode error = U_ZERO_ERROR; |
682 | ucnv_fromUnicode(converter: conv, |
683 | target: &ch, targetLimit: chEnd, |
684 | source: &uc, sourceLimit: end, |
685 | offsets: nullptr, flush: false, err: &error); |
686 | if (!U_SUCCESS(code: error)) |
687 | qDebug(msg: "convertFromUnicode failed: %s" , u_errorName(code: error)); |
688 | convertedChars = ch - string.data(); |
689 | if (uc >= end) |
690 | break; |
691 | string.resize(size: string.length()*2); |
692 | } |
693 | string.resize(size: convertedChars); |
694 | |
695 | if (!state) |
696 | ucnv_close(converter: conv); |
697 | |
698 | return string; |
699 | } |
700 | |
701 | |
702 | QByteArray QIcuCodec::name() const |
703 | { |
704 | return m_name; |
705 | } |
706 | |
707 | |
708 | QList<QByteArray> QIcuCodec::aliases() const |
709 | { |
710 | UErrorCode error = U_ZERO_ERROR; |
711 | |
712 | int n = ucnv_countAliases(alias: m_name, pErrorCode: &error); |
713 | |
714 | QList<QByteArray> aliases; |
715 | for (int i = 0; i < n; ++i) { |
716 | const char *a = ucnv_getAlias(alias: m_name, n: i, pErrorCode: &error); |
717 | // skip the canonical name |
718 | if (!a || !qstrcmp(str1: a, str2: m_name)) |
719 | continue; |
720 | aliases += a; |
721 | } |
722 | |
723 | return aliases; |
724 | } |
725 | |
726 | |
727 | int QIcuCodec::mibEnum() const |
728 | { |
729 | for (int i = 0; i < mibToNameSize; ++i) { |
730 | if (qTextCodecNameMatch(n: m_name, h: (mibToNameTable + mibToName[i].index))) |
731 | return mibToName[i].mib; |
732 | } |
733 | |
734 | return 0; |
735 | } |
736 | |
737 | QT_END_NAMESPACE |
738 | |