1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qicucodec_p.h" |
5 | |
6 | #include "qtextcodec_p.h" |
7 | #include "qutfcodec_p.h" |
8 | #include "qlatincodec_p.h" |
9 | #include "qsimplecodec_p.h" |
10 | #include "qdebug.h" |
11 | |
12 | #include "unicode/ucnv.h" |
13 | |
14 | #if QT_CONFIG(codecs) |
15 | #include "qtsciicodec_p.h" |
16 | #include "qisciicodec_p.h" |
17 | #endif |
18 | |
19 | QT_BEGIN_NAMESPACE |
20 | |
21 | typedef QList<QTextCodec*>::ConstIterator TextCodecListConstIt; |
22 | typedef QList<QByteArray>::ConstIterator ByteArrayListConstIt; |
23 | |
24 | static void qIcuCodecStateFree(QTextCodec::ConverterState *state) noexcept |
25 | { |
26 | ucnv_close(converter: static_cast<UConverter *>(state->d[0])); |
27 | } |
28 | |
29 | bool qTextCodecNameMatch(const char *n, const char *h) |
30 | { |
31 | return ucnv_compareNames(name1: n, name2: h) == 0; |
32 | } |
33 | |
34 | /* The list below is generated from http://www.iana.org/assignments/character-sets/ |
35 | using the snippet of code below: |
36 | |
37 | #include <QtCore> |
38 | #include <unicode/ucnv.h> |
39 | |
40 | int main(int argc, char **argv) |
41 | { |
42 | QCoreApplication app(argc, argv); |
43 | |
44 | QFile file("character-sets.txt"); |
45 | file.open(QFile::ReadOnly); |
46 | QByteArray name; |
47 | int mib = -1; |
48 | QByteArray nameList; |
49 | int pos = 0; |
50 | while (!file.atEnd()) { |
51 | QByteArray s = file.readLine().trimmed(); |
52 | if (s.isEmpty()) { |
53 | if (mib != -1) { |
54 | UErrorCode error = U_ZERO_ERROR; |
55 | const char *standard_name = ucnv_getStandardName(name, "MIME", &error); |
56 | if (U_FAILURE(error) || !standard_name) { |
57 | error = U_ZERO_ERROR; |
58 | standard_name = ucnv_getStandardName(name, "IANA", &error); |
59 | } |
60 | UConverter *conv = ucnv_open(standard_name, &error); |
61 | if (!U_FAILURE(error) && conv && standard_name) { |
62 | ucnv_close(conv); |
63 | printf(" { %d, %d },\n", mib, pos); |
64 | nameList += "\""; |
65 | nameList += standard_name; |
66 | nameList += "\\0\"\n"; |
67 | pos += strlen(standard_name) + 1; |
68 | } |
69 | } |
70 | name = QByteArray(); |
71 | mib = -1; |
72 | } |
73 | if (s.startsWith("Name: ")) { |
74 | name = s.mid(5).trimmed(); |
75 | if (name.indexOf(' ') > 0) |
76 | name = name.left(name.indexOf(' ')); |
77 | } |
78 | if (s.startsWith("MIBenum:")) |
79 | mib = s.mid(8).trimmed().toInt(); |
80 | if (s.startsWith("Alias:") && s.contains("MIME")) { |
81 | name = s.mid(6).trimmed(); |
82 | name = name.left(name.indexOf(' ')).trimmed(); |
83 | } |
84 | } |
85 | qDebug() << nameList; |
86 | } |
87 | */ |
88 | |
89 | struct MibToName { |
90 | short mib; |
91 | short index; |
92 | }; |
93 | |
94 | static const MibToName mibToName[] = { |
95 | { .mib: 3, .index: 0 }, |
96 | { .mib: 4, .index: 9 }, |
97 | { .mib: 5, .index: 20 }, |
98 | { .mib: 6, .index: 31 }, |
99 | { .mib: 7, .index: 42 }, |
100 | { .mib: 8, .index: 53 }, |
101 | { .mib: 9, .index: 64 }, |
102 | { .mib: 10, .index: 75 }, |
103 | { .mib: 11, .index: 86 }, |
104 | { .mib: 12, .index: 97 }, |
105 | { .mib: 13, .index: 108 }, |
106 | { .mib: 16, .index: 120 }, |
107 | { .mib: 17, .index: 134 }, |
108 | { .mib: 18, .index: 144 }, |
109 | { .mib: 30, .index: 151 }, |
110 | { .mib: 36, .index: 160 }, |
111 | { .mib: 37, .index: 167 }, |
112 | { .mib: 38, .index: 179 }, |
113 | { .mib: 39, .index: 186 }, |
114 | { .mib: 40, .index: 198 }, |
115 | { .mib: 57, .index: 212 }, |
116 | { .mib: 81, .index: 223 }, |
117 | { .mib: 82, .index: 234 }, |
118 | { .mib: 84, .index: 245 }, |
119 | { .mib: 85, .index: 256 }, |
120 | { .mib: 104, .index: 267 }, |
121 | { .mib: 105, .index: 279 }, |
122 | { .mib: 106, .index: 295 }, |
123 | { .mib: 109, .index: 301 }, |
124 | { .mib: 110, .index: 313 }, |
125 | { .mib: 111, .index: 325 }, |
126 | { .mib: 113, .index: 337 }, |
127 | { .mib: 114, .index: 341 }, |
128 | { .mib: 1000, .index: 349 }, |
129 | { .mib: 1001, .index: 356 }, |
130 | { .mib: 1011, .index: 363 }, |
131 | { .mib: 1012, .index: 368 }, |
132 | { .mib: 1013, .index: 374 }, |
133 | { .mib: 1014, .index: 383 }, |
134 | { .mib: 1015, .index: 392 }, |
135 | { .mib: 1016, .index: 399 }, |
136 | { .mib: 1017, .index: 406 }, |
137 | { .mib: 1018, .index: 413 }, |
138 | { .mib: 1019, .index: 422 }, |
139 | { .mib: 1020, .index: 431 }, |
140 | { .mib: 2004, .index: 438 }, |
141 | { .mib: 2005, .index: 448 }, |
142 | { .mib: 2009, .index: 472 }, |
143 | { .mib: 2013, .index: 479 }, |
144 | { .mib: 2016, .index: 486 }, |
145 | { .mib: 2024, .index: 495 }, |
146 | { .mib: 2025, .index: 505 }, |
147 | { .mib: 2026, .index: 512 }, |
148 | { .mib: 2027, .index: 517 }, |
149 | { .mib: 2028, .index: 527 }, |
150 | { .mib: 2030, .index: 534 }, |
151 | { .mib: 2033, .index: 541 }, |
152 | { .mib: 2034, .index: 548 }, |
153 | { .mib: 2035, .index: 555 }, |
154 | { .mib: 2037, .index: 562 }, |
155 | { .mib: 2038, .index: 569 }, |
156 | { .mib: 2039, .index: 576 }, |
157 | { .mib: 2040, .index: 583 }, |
158 | { .mib: 2041, .index: 590 }, |
159 | { .mib: 2043, .index: 597 }, |
160 | { .mib: 2011, .index: 604 }, |
161 | { .mib: 2044, .index: 611 }, |
162 | { .mib: 2045, .index: 618 }, |
163 | { .mib: 2010, .index: 624 }, |
164 | { .mib: 2046, .index: 631 }, |
165 | { .mib: 2047, .index: 638 }, |
166 | { .mib: 2048, .index: 645 }, |
167 | { .mib: 2049, .index: 652 }, |
168 | { .mib: 2050, .index: 659 }, |
169 | { .mib: 2051, .index: 666 }, |
170 | { .mib: 2052, .index: 673 }, |
171 | { .mib: 2053, .index: 680 }, |
172 | { .mib: 2054, .index: 687 }, |
173 | { .mib: 2055, .index: 694 }, |
174 | { .mib: 2056, .index: 701 }, |
175 | { .mib: 2062, .index: 708 }, |
176 | { .mib: 2063, .index: 715 }, |
177 | { .mib: 2084, .index: 723 }, |
178 | { .mib: 2085, .index: 730 }, |
179 | { .mib: 2086, .index: 741 }, |
180 | { .mib: 2087, .index: 748 }, |
181 | { .mib: 2088, .index: 755 }, |
182 | { .mib: 2089, .index: 762 }, |
183 | { .mib: 2091, .index: 771 }, |
184 | { .mib: 2092, .index: 780 }, |
185 | { .mib: 2093, .index: 789 }, |
186 | { .mib: 2094, .index: 798 }, |
187 | { .mib: 2095, .index: 807 }, |
188 | { .mib: 2096, .index: 816 }, |
189 | { .mib: 2097, .index: 825 }, |
190 | { .mib: 2098, .index: 834 }, |
191 | { .mib: 2099, .index: 843 }, |
192 | { .mib: 2100, .index: 852 }, |
193 | { .mib: 2101, .index: 861 }, |
194 | { .mib: 2102, .index: 872 }, |
195 | { .mib: 2250, .index: 880 }, |
196 | { .mib: 2251, .index: 893 }, |
197 | { .mib: 2252, .index: 906 }, |
198 | { .mib: 2253, .index: 919 }, |
199 | { .mib: 2254, .index: 932 }, |
200 | { .mib: 2255, .index: 945 }, |
201 | { .mib: 2256, .index: 958 }, |
202 | { .mib: 2257, .index: 971 }, |
203 | { .mib: 2258, .index: 984 }, |
204 | { .mib: 2259, .index: 997 }, |
205 | }; |
206 | int mibToNameSize = sizeof(mibToName)/sizeof(MibToName); |
207 | |
208 | static const char mibToNameTable[] = |
209 | "US-ASCII\0" |
210 | "ISO-8859-1\0" |
211 | "ISO-8859-2\0" |
212 | "ISO-8859-3\0" |
213 | "ISO-8859-4\0" |
214 | "ISO-8859-5\0" |
215 | "ISO-8859-6\0" |
216 | "ISO-8859-7\0" |
217 | "ISO-8859-8\0" |
218 | "ISO-8859-9\0" |
219 | "ISO-8859-10\0" |
220 | "ISO-2022-JP-1\0" |
221 | "Shift_JIS\0" |
222 | "EUC-JP\0" |
223 | "US-ASCII\0" |
224 | "EUC-KR\0" |
225 | "ISO-2022-KR\0" |
226 | "EUC-KR\0" |
227 | "ISO-2022-JP\0" |
228 | "ISO-2022-JP-2\0" |
229 | "GB_2312-80\0" |
230 | "ISO-8859-6\0" |
231 | "ISO-8859-6\0" |
232 | "ISO-8859-8\0" |
233 | "ISO-8859-8\0" |
234 | "ISO-2022-CN\0" |
235 | "ISO-2022-CN-EXT\0" |
236 | "UTF-8\0" |
237 | "ISO-8859-13\0" |
238 | "ISO-8859-14\0" |
239 | "ISO-8859-15\0" |
240 | "GBK\0" |
241 | "GB18030\0" |
242 | "UTF-16\0" |
243 | "UTF-32\0" |
244 | "SCSU\0" |
245 | "UTF-7\0" |
246 | "UTF-16BE\0" |
247 | "UTF-16LE\0" |
248 | "UTF-16\0" |
249 | "CESU-8\0" |
250 | "UTF-32\0" |
251 | "UTF-32BE\0" |
252 | "UTF-32LE\0" |
253 | "BOCU-1\0" |
254 | "hp-roman8\0" |
255 | "Adobe-Standard-Encoding\0" |
256 | "IBM850\0" |
257 | "IBM862\0" |
258 | "IBM-Thai\0" |
259 | "Shift_JIS\0" |
260 | "GB2312\0" |
261 | "Big5\0" |
262 | "macintosh\0" |
263 | "IBM037\0" |
264 | "IBM273\0" |
265 | "IBM277\0" |
266 | "IBM278\0" |
267 | "IBM280\0" |
268 | "IBM284\0" |
269 | "IBM285\0" |
270 | "IBM290\0" |
271 | "IBM297\0" |
272 | "IBM420\0" |
273 | "IBM424\0" |
274 | "IBM437\0" |
275 | "IBM500\0" |
276 | "cp851\0" |
277 | "IBM852\0" |
278 | "IBM855\0" |
279 | "IBM857\0" |
280 | "IBM860\0" |
281 | "IBM861\0" |
282 | "IBM863\0" |
283 | "IBM864\0" |
284 | "IBM865\0" |
285 | "IBM868\0" |
286 | "IBM869\0" |
287 | "IBM870\0" |
288 | "IBM871\0" |
289 | "IBM918\0" |
290 | "IBM1026\0" |
291 | "KOI8-R\0" |
292 | "HZ-GB-2312\0" |
293 | "IBM866\0" |
294 | "IBM775\0" |
295 | "KOI8-U\0" |
296 | "IBM00858\0" |
297 | "IBM01140\0" |
298 | "IBM01141\0" |
299 | "IBM01142\0" |
300 | "IBM01143\0" |
301 | "IBM01144\0" |
302 | "IBM01145\0" |
303 | "IBM01146\0" |
304 | "IBM01147\0" |
305 | "IBM01148\0" |
306 | "IBM01149\0" |
307 | "Big5-HKSCS\0" |
308 | "IBM1047\0" |
309 | "windows-1250\0" |
310 | "windows-1251\0" |
311 | "windows-1252\0" |
312 | "windows-1253\0" |
313 | "windows-1254\0" |
314 | "windows-1255\0" |
315 | "windows-1256\0" |
316 | "windows-1257\0" |
317 | "windows-1258\0" |
318 | "TIS-620\0" ; |
319 | |
320 | static QTextCodec *loadQtCodec(const char *name) |
321 | { |
322 | if (!strcmp(s1: name, s2: "UTF-8" )) |
323 | return new QUtf8Codec; |
324 | if (!strcmp(s1: name, s2: "UTF-16" )) |
325 | return new QUtf16Codec; |
326 | if (!strcmp(s1: name, s2: "ISO-8859-1" )) |
327 | return new QLatin1Codec; |
328 | if (!strcmp(s1: name, s2: "UTF-16BE" )) |
329 | return new QUtf16BECodec; |
330 | if (!strcmp(s1: name, s2: "UTF-16LE" )) |
331 | return new QUtf16LECodec; |
332 | if (!strcmp(s1: name, s2: "UTF-32" )) |
333 | return new QUtf32Codec; |
334 | if (!strcmp(s1: name, s2: "UTF-32BE" )) |
335 | return new QUtf32BECodec; |
336 | if (!strcmp(s1: name, s2: "UTF-32LE" )) |
337 | return new QUtf32LECodec; |
338 | if (!strcmp(s1: name, s2: "ISO-8859-16" ) || !strcmp(s1: name, s2: "latin10" ) || !strcmp(s1: name, s2: "iso-ir-226" )) |
339 | return new QSimpleTextCodec(13 /* == 8859-16*/); |
340 | #if QT_CONFIG(codecs) |
341 | if (!strcmp(s1: name, s2: "TSCII" )) |
342 | return new QTsciiCodec; |
343 | if (!qstrnicmp(name, "iscii" , len: 5)) |
344 | return QIsciiCodec::create(name); |
345 | #endif |
346 | |
347 | return nullptr; |
348 | } |
349 | |
350 | /// \threadsafe |
351 | QList<QByteArray> QIcuCodec::availableCodecs() |
352 | { |
353 | QList<QByteArray> codecs; |
354 | int n = ucnv_countAvailable(); |
355 | for (int i = 0; i < n; ++i) { |
356 | const char *name = ucnv_getAvailableName(n: i); |
357 | |
358 | UErrorCode error = U_ZERO_ERROR; |
359 | const char *standardName = ucnv_getStandardName(name, standard: "MIME" , pErrorCode: &error); |
360 | if (U_FAILURE(code: error) || !standardName) { |
361 | error = U_ZERO_ERROR; |
362 | standardName = ucnv_getStandardName(name, standard: "IANA" , pErrorCode: &error); |
363 | } |
364 | if (U_FAILURE(code: error)) |
365 | continue; |
366 | |
367 | error = U_ZERO_ERROR; |
368 | int ac = ucnv_countAliases(alias: standardName, pErrorCode: &error); |
369 | if (U_FAILURE(code: error)) |
370 | continue; |
371 | for (int j = 0; j < ac; ++j) { |
372 | error = U_ZERO_ERROR; |
373 | const char *alias = ucnv_getAlias(alias: standardName, n: j, pErrorCode: &error); |
374 | if (!U_SUCCESS(code: error)) |
375 | continue; |
376 | codecs += alias; |
377 | } |
378 | } |
379 | |
380 | // handled by Qt and not in ICU: |
381 | codecs += "TSCII" ; |
382 | |
383 | return codecs; |
384 | } |
385 | |
386 | /// \threadsafe |
387 | QList<int> QIcuCodec::availableMibs() |
388 | { |
389 | QList<int> mibs; |
390 | mibs.reserve(asize: mibToNameSize + 1); |
391 | for (int i = 0; i < mibToNameSize; ++i) |
392 | mibs += mibToName[i].mib; |
393 | |
394 | // handled by Qt and not in ICU: |
395 | mibs += 2107; // TSCII |
396 | |
397 | return mibs; |
398 | } |
399 | |
400 | QTextCodec *QIcuCodec::defaultCodecUnlocked() |
401 | { |
402 | QTextCodecData *globalData = QTextCodecData::instance(); |
403 | if (!globalData) |
404 | return nullptr; |
405 | QTextCodec *c = globalData->codecForLocale.loadAcquire(); |
406 | if (c) |
407 | return c; |
408 | |
409 | #if defined(QT_LOCALE_IS_UTF8) |
410 | const char *name = "UTF-8" ; |
411 | #else |
412 | const char *name = ucnv_getDefaultName(); |
413 | #endif |
414 | c = codecForNameUnlocked(name); |
415 | globalData->codecForLocale.storeRelease(newValue: c); |
416 | return c; |
417 | } |
418 | |
419 | |
420 | QTextCodec *QIcuCodec::codecForNameUnlocked(const char *name) |
421 | { |
422 | // backwards compatibility with Qt 4.x |
423 | if (!qstrcmp(str1: name, str2: "CP949" )) |
424 | name = "windows-949" ; |
425 | else if (!qstrcmp(str1: name, str2: "Apple Roman" )) |
426 | name = "macintosh" ; |
427 | // these are broken data in ICU 4.4, and can't be resolved even though they are aliases to tis-620 |
428 | if (!qstrcmp(str1: name, str2: "windows-874-2000" ) |
429 | || !qstrcmp(str1: name, str2: "windows-874" ) |
430 | || !qstrcmp(str1: name, str2: "MS874" ) |
431 | || !qstrcmp(str1: name, str2: "x-windows-874" ) |
432 | || !qstrcmp(str1: name, str2: "ISO 8859-11" )) |
433 | name = "TIS-620" ; |
434 | |
435 | UErrorCode error = U_ZERO_ERROR; |
436 | // MIME gives better default names |
437 | const char *standardName = ucnv_getStandardName(name, standard: "MIME" , pErrorCode: &error); |
438 | if (U_FAILURE(code: error) || !standardName) { |
439 | error = U_ZERO_ERROR; |
440 | standardName = ucnv_getStandardName(name, standard: "IANA" , pErrorCode: &error); |
441 | } |
442 | bool qt_only = false; |
443 | if (U_FAILURE(code: error) || !standardName) { |
444 | standardName = name; |
445 | qt_only = true; |
446 | } else { |
447 | // correct some issues where the ICU data set contains duplicated entries. |
448 | // Where this happens it's because one data set is a subset of another. We |
449 | // always use the larger data set. |
450 | |
451 | if (qstrcmp(str1: standardName, str2: "GB2312" ) == 0 || qstrcmp(str1: standardName, str2: "GB_2312-80" ) == 0) |
452 | standardName = "GBK" ; |
453 | else if (qstrcmp(str1: standardName, str2: "KSC_5601" ) == 0 || qstrcmp(str1: standardName, str2: "EUC-KR" ) == 0 || qstrcmp(str1: standardName, str2: "cp1363" ) == 0) |
454 | standardName = "windows-949" ; |
455 | } |
456 | |
457 | QTextCodecData *globalData = QTextCodecData::instance(); |
458 | QTextCodecCache *cache = &globalData->codecCache; |
459 | |
460 | QTextCodec *codec; |
461 | if (cache) { |
462 | codec = cache->value(key: standardName); |
463 | if (codec) |
464 | return codec; |
465 | } |
466 | |
467 | for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) { |
468 | QTextCodec *cursor = *it; |
469 | if (qTextCodecNameMatch(n: cursor->name(), h: standardName)) { |
470 | if (cache) |
471 | cache->insert(key: standardName, value: cursor); |
472 | return cursor; |
473 | } |
474 | QList<QByteArray> aliases = cursor->aliases(); |
475 | for (ByteArrayListConstIt ait = aliases.constBegin(), acend = aliases.constEnd(); ait != acend; ++ait) { |
476 | if (qTextCodecNameMatch(n: *ait, h: standardName)) { |
477 | if (cache) |
478 | cache->insert(key: standardName, value: cursor); |
479 | return cursor; |
480 | } |
481 | } |
482 | } |
483 | |
484 | QTextCodec *c = loadQtCodec(name: standardName); |
485 | if (c) |
486 | return c; |
487 | |
488 | if (qt_only) |
489 | return nullptr; |
490 | |
491 | // check whether there is really a converter for the name available. |
492 | UConverter *conv = ucnv_open(converterName: standardName, err: &error); |
493 | if (!conv) { |
494 | qDebug(msg: "codecForName: ucnv_open failed %s %s" , standardName, u_errorName(code: error)); |
495 | return nullptr; |
496 | } |
497 | //qDebug() << "QIcuCodec: Standard name for " << name << "is" << standardName; |
498 | ucnv_close(converter: conv); |
499 | |
500 | |
501 | c = new QIcuCodec(standardName); |
502 | if (cache) |
503 | cache->insert(key: standardName, value: c); |
504 | return c; |
505 | } |
506 | |
507 | |
508 | QTextCodec *QIcuCodec::codecForMibUnlocked(int mib) |
509 | { |
510 | for (int i = 0; i < mibToNameSize; ++i) { |
511 | if (mibToName[i].mib == mib) |
512 | return codecForNameUnlocked(name: mibToNameTable + mibToName[i].index); |
513 | } |
514 | |
515 | if (mib == 2107) |
516 | return codecForNameUnlocked(name: "TSCII" ); |
517 | |
518 | return nullptr; |
519 | } |
520 | |
521 | |
522 | QIcuCodec::QIcuCodec(const char *name) |
523 | : m_name(name) |
524 | { |
525 | } |
526 | |
527 | QIcuCodec::~QIcuCodec() |
528 | { |
529 | } |
530 | |
531 | /*! |
532 | \internal |
533 | |
534 | Custom callback for the ICU from Unicode conversion. It's invoked when the |
535 | conversion from Unicode detects illegal or unrecognized character. |
536 | |
537 | Assumes that context contains a pointer to QTextCodec::ConverterState |
538 | structure. Updates its invalid characters count and calls a default |
539 | callback, that replaces the invalid characters properly. |
540 | */ |
541 | static void customFromUnicodeSubstitutionCallback(const void *context, |
542 | UConverterFromUnicodeArgs *fromUArgs, |
543 | const UChar *codeUnits, |
544 | int32_t length, |
545 | UChar32 codePoint, |
546 | UConverterCallbackReason reason, |
547 | UErrorCode *err) |
548 | { |
549 | auto *state = reinterpret_cast<QTextCodec::ConverterState *>(const_cast<void *>(context)); |
550 | if (state) |
551 | state->invalidChars++; |
552 | // Call the default callback that replaces all illegal or unrecognized |
553 | // sequences with the substitute string |
554 | UCNV_FROM_U_CALLBACK_SUBSTITUTE(context: nullptr, fromUArgs, codeUnits, length, codePoint, reason, err); |
555 | } |
556 | |
557 | UConverter *QIcuCodec::getConverter(QTextCodec::ConverterState *state) const |
558 | { |
559 | UConverter *conv = nullptr; |
560 | if (state) { |
561 | if (!state->d[0]) { |
562 | // first time |
563 | state->clearFn = qIcuCodecStateFree; |
564 | UErrorCode error = U_ZERO_ERROR; |
565 | state->d[0] = ucnv_open(converterName: m_name, err: &error); |
566 | ucnv_setSubstChars(converter: static_cast<UConverter *>(state->d[0]), |
567 | subChars: state->flags & QTextCodec::ConvertInvalidToNull ? "\0" : "?" , len: 1, err: &error); |
568 | if (U_FAILURE(code: error)) { |
569 | qDebug(msg: "getConverter(state) ucnv_open failed %s %s" , m_name, u_errorName(code: error)); |
570 | } else { |
571 | error = U_ZERO_ERROR; |
572 | ucnv_setFromUCallBack(converter: static_cast<UConverter *>(state->d[0]), |
573 | newAction: customFromUnicodeSubstitutionCallback, newContext: state, oldAction: nullptr, |
574 | oldContext: nullptr, err: &error); |
575 | if (U_FAILURE(code: error)) { |
576 | qDebug(msg: "getConverter(state) failed to install custom callback. " |
577 | "canEncode() may report incorrect results." ); |
578 | } |
579 | } |
580 | } |
581 | conv = static_cast<UConverter *>(state->d[0]); |
582 | } |
583 | if (!conv) { |
584 | // stateless conversion |
585 | UErrorCode error = U_ZERO_ERROR; |
586 | conv = ucnv_open(converterName: m_name, err: &error); |
587 | ucnv_setSubstChars(converter: conv, subChars: "?" , len: 1, err: &error); |
588 | if (U_FAILURE(code: error)) |
589 | qDebug(msg: "getConverter(no state) ucnv_open failed %s %s" , m_name, u_errorName(code: error)); |
590 | } |
591 | return conv; |
592 | } |
593 | |
594 | QString QIcuCodec::convertToUnicode(const char *chars, int length, QTextCodec::ConverterState *state) const |
595 | { |
596 | UConverter *conv = getConverter(state); |
597 | |
598 | QString string(length + 2, QT_PREPEND_NAMESPACE(Qt::Uninitialized)); |
599 | |
600 | const char *end = chars + length; |
601 | int convertedChars = 0; |
602 | while (1) { |
603 | UChar *uc = (UChar *)string.data(); |
604 | UChar *ucEnd = uc + string.size(); |
605 | uc += convertedChars; |
606 | UErrorCode error = U_ZERO_ERROR; |
607 | ucnv_toUnicode(converter: conv, |
608 | target: &uc, targetLimit: ucEnd, |
609 | source: &chars, sourceLimit: end, |
610 | offsets: nullptr, flush: false, err: &error); |
611 | if (!U_SUCCESS(code: error) && error != U_BUFFER_OVERFLOW_ERROR) { |
612 | qDebug(msg: "convertToUnicode failed: %s" , u_errorName(code: error)); |
613 | break; |
614 | } |
615 | // flag the state if we have incomplete input |
616 | if (error == U_TRUNCATED_CHAR_FOUND) |
617 | state->remainingChars = 1; |
618 | |
619 | convertedChars = uc - (UChar *)string.data(); |
620 | if (chars >= end) |
621 | break; |
622 | string.resize(size: string.size()*2); |
623 | } |
624 | string.resize(size: convertedChars); |
625 | |
626 | if (!state) |
627 | ucnv_close(converter: conv); |
628 | return string; |
629 | } |
630 | |
631 | |
632 | QByteArray QIcuCodec::convertFromUnicode(const QChar *unicode, int length, QTextCodec::ConverterState *state) const |
633 | { |
634 | UConverter *conv = getConverter(state); |
635 | |
636 | int requiredLength = UCNV_GET_MAX_BYTES_FOR_STRING(length, ucnv_getMaxCharSize(conv)); |
637 | QByteArray string(requiredLength, QT_PREPEND_NAMESPACE(Qt::Uninitialized)); |
638 | |
639 | const UChar *uc = (const UChar *)unicode; |
640 | const UChar *end = uc + length; |
641 | int convertedChars = 0; |
642 | while (1) { |
643 | char *ch = (char *)string.data(); |
644 | char *chEnd = ch + string.size(); |
645 | ch += convertedChars; |
646 | UErrorCode error = U_ZERO_ERROR; |
647 | ucnv_fromUnicode(converter: conv, |
648 | target: &ch, targetLimit: chEnd, |
649 | source: &uc, sourceLimit: end, |
650 | offsets: nullptr, flush: false, err: &error); |
651 | if (!U_SUCCESS(code: error)) |
652 | qDebug(msg: "convertFromUnicode failed: %s" , u_errorName(code: error)); |
653 | // flag the state if we have incomplete input |
654 | if (error == U_TRUNCATED_CHAR_FOUND) |
655 | state->remainingChars = 1; |
656 | |
657 | convertedChars = ch - string.data(); |
658 | if (uc >= end) |
659 | break; |
660 | string.resize(size: string.size()*2); |
661 | } |
662 | string.resize(size: convertedChars); |
663 | |
664 | if (!state) |
665 | ucnv_close(converter: conv); |
666 | |
667 | return string; |
668 | } |
669 | |
670 | |
671 | QByteArray QIcuCodec::name() const |
672 | { |
673 | return m_name; |
674 | } |
675 | |
676 | |
677 | QList<QByteArray> QIcuCodec::aliases() const |
678 | { |
679 | UErrorCode error = U_ZERO_ERROR; |
680 | |
681 | int n = ucnv_countAliases(alias: m_name, pErrorCode: &error); |
682 | |
683 | QList<QByteArray> aliases; |
684 | for (int i = 0; i < n; ++i) { |
685 | const char *a = ucnv_getAlias(alias: m_name, n: i, pErrorCode: &error); |
686 | // skip the canonical name |
687 | if (!a || !qstrcmp(str1: a, str2: m_name)) |
688 | continue; |
689 | aliases += a; |
690 | } |
691 | |
692 | return aliases; |
693 | } |
694 | |
695 | |
696 | int QIcuCodec::mibEnum() const |
697 | { |
698 | for (int i = 0; i < mibToNameSize; ++i) { |
699 | if (qTextCodecNameMatch(n: m_name, h: (mibToNameTable + mibToName[i].index))) |
700 | return mibToName[i].mib; |
701 | } |
702 | |
703 | return 0; |
704 | } |
705 | |
706 | QT_END_NAMESPACE |
707 | |