| 1 | // Copyright (C) 2016 The Qt Company Ltd. | 
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only | 
| 3 |  | 
| 4 | #include "qicucodec_p.h" | 
| 5 |  | 
| 6 | #include "qtextcodec_p.h" | 
| 7 | #include "qutfcodec_p.h" | 
| 8 | #include "qlatincodec_p.h" | 
| 9 | #include "qsimplecodec_p.h" | 
| 10 | #include "qdebug.h" | 
| 11 |  | 
| 12 | #include "unicode/ucnv.h" | 
| 13 |  | 
| 14 | #if QT_CONFIG(codecs) | 
| 15 | #include "qtsciicodec_p.h" | 
| 16 | #include "qisciicodec_p.h" | 
| 17 | #endif | 
| 18 |  | 
| 19 | QT_BEGIN_NAMESPACE | 
| 20 |  | 
| 21 | typedef QList<QTextCodec*>::ConstIterator TextCodecListConstIt; | 
| 22 | typedef QList<QByteArray>::ConstIterator ByteArrayListConstIt; | 
| 23 |  | 
| 24 | static void qIcuCodecStateFree(QTextCodec::ConverterState *state) noexcept | 
| 25 | { | 
| 26 |     ucnv_close(converter: static_cast<UConverter *>(state->d[0])); | 
| 27 | } | 
| 28 |  | 
| 29 | bool qTextCodecNameMatch(const char *n, const char *h) | 
| 30 | { | 
| 31 |     return ucnv_compareNames(name1: n, name2: h) == 0; | 
| 32 | } | 
| 33 |  | 
| 34 | /* The list below is generated from http://www.iana.org/assignments/character-sets/ | 
| 35 |    using the snippet of code below: | 
| 36 |  | 
| 37 | #include <QtCore> | 
| 38 | #include <unicode/ucnv.h> | 
| 39 |  | 
| 40 | int main(int argc, char **argv) | 
| 41 | { | 
| 42 |     QCoreApplication app(argc, argv); | 
| 43 |  | 
| 44 |     QFile file("character-sets.txt"); | 
| 45 |     file.open(QFile::ReadOnly); | 
| 46 |     QByteArray name; | 
| 47 |     int mib = -1; | 
| 48 |     QByteArray nameList; | 
| 49 |     int pos = 0; | 
| 50 |     while (!file.atEnd()) { | 
| 51 |         QByteArray s = file.readLine().trimmed(); | 
| 52 |         if (s.isEmpty()) { | 
| 53 |             if (mib != -1) { | 
| 54 |                 UErrorCode error = U_ZERO_ERROR; | 
| 55 |                 const char *standard_name = ucnv_getStandardName(name, "MIME", &error); | 
| 56 |                 if (U_FAILURE(error) || !standard_name) { | 
| 57 |                     error = U_ZERO_ERROR; | 
| 58 |                     standard_name = ucnv_getStandardName(name, "IANA", &error); | 
| 59 |                 } | 
| 60 |                 UConverter *conv = ucnv_open(standard_name, &error); | 
| 61 |                 if (!U_FAILURE(error) && conv && standard_name) { | 
| 62 |                     ucnv_close(conv); | 
| 63 |                     printf("    { %d, %d },\n", mib, pos); | 
| 64 |                     nameList += "\""; | 
| 65 |                     nameList += standard_name; | 
| 66 |                     nameList += "\\0\"\n"; | 
| 67 |                     pos += strlen(standard_name) + 1; | 
| 68 |                 } | 
| 69 |             } | 
| 70 |             name = QByteArray(); | 
| 71 |             mib = -1; | 
| 72 |         } | 
| 73 |         if (s.startsWith("Name: ")) { | 
| 74 |             name = s.mid(5).trimmed(); | 
| 75 |             if (name.indexOf(' ') > 0) | 
| 76 |                 name = name.left(name.indexOf(' ')); | 
| 77 |         } | 
| 78 |         if (s.startsWith("MIBenum:")) | 
| 79 |             mib = s.mid(8).trimmed().toInt(); | 
| 80 |         if (s.startsWith("Alias:") && s.contains("MIME")) { | 
| 81 |             name = s.mid(6).trimmed(); | 
| 82 |             name = name.left(name.indexOf(' ')).trimmed(); | 
| 83 |         } | 
| 84 |     } | 
| 85 |     qDebug() << nameList; | 
| 86 | } | 
| 87 | */ | 
| 88 |  | 
| 89 | struct MibToName { | 
| 90 |     short mib; | 
| 91 |     short index; | 
| 92 | }; | 
| 93 |  | 
| 94 | static const MibToName mibToName[] = { | 
| 95 |     { .mib: 3, .index: 0 }, | 
| 96 |     { .mib: 4, .index: 9 }, | 
| 97 |     { .mib: 5, .index: 20 }, | 
| 98 |     { .mib: 6, .index: 31 }, | 
| 99 |     { .mib: 7, .index: 42 }, | 
| 100 |     { .mib: 8, .index: 53 }, | 
| 101 |     { .mib: 9, .index: 64 }, | 
| 102 |     { .mib: 10, .index: 75 }, | 
| 103 |     { .mib: 11, .index: 86 }, | 
| 104 |     { .mib: 12, .index: 97 }, | 
| 105 |     { .mib: 13, .index: 108 }, | 
| 106 |     { .mib: 16, .index: 120 }, | 
| 107 |     { .mib: 17, .index: 134 }, | 
| 108 |     { .mib: 18, .index: 144 }, | 
| 109 |     { .mib: 30, .index: 151 }, | 
| 110 |     { .mib: 36, .index: 160 }, | 
| 111 |     { .mib: 37, .index: 167 }, | 
| 112 |     { .mib: 38, .index: 179 }, | 
| 113 |     { .mib: 39, .index: 186 }, | 
| 114 |     { .mib: 40, .index: 198 }, | 
| 115 |     { .mib: 57, .index: 212 }, | 
| 116 |     { .mib: 81, .index: 223 }, | 
| 117 |     { .mib: 82, .index: 234 }, | 
| 118 |     { .mib: 84, .index: 245 }, | 
| 119 |     { .mib: 85, .index: 256 }, | 
| 120 |     { .mib: 104, .index: 267 }, | 
| 121 |     { .mib: 105, .index: 279 }, | 
| 122 |     { .mib: 106, .index: 295 }, | 
| 123 |     { .mib: 109, .index: 301 }, | 
| 124 |     { .mib: 110, .index: 313 }, | 
| 125 |     { .mib: 111, .index: 325 }, | 
| 126 |     { .mib: 113, .index: 337 }, | 
| 127 |     { .mib: 114, .index: 341 }, | 
| 128 |     { .mib: 1000, .index: 349 }, | 
| 129 |     { .mib: 1001, .index: 356 }, | 
| 130 |     { .mib: 1011, .index: 363 }, | 
| 131 |     { .mib: 1012, .index: 368 }, | 
| 132 |     { .mib: 1013, .index: 374 }, | 
| 133 |     { .mib: 1014, .index: 383 }, | 
| 134 |     { .mib: 1015, .index: 392 }, | 
| 135 |     { .mib: 1016, .index: 399 }, | 
| 136 |     { .mib: 1017, .index: 406 }, | 
| 137 |     { .mib: 1018, .index: 413 }, | 
| 138 |     { .mib: 1019, .index: 422 }, | 
| 139 |     { .mib: 1020, .index: 431 }, | 
| 140 |     { .mib: 2004, .index: 438 }, | 
| 141 |     { .mib: 2005, .index: 448 }, | 
| 142 |     { .mib: 2009, .index: 472 }, | 
| 143 |     { .mib: 2013, .index: 479 }, | 
| 144 |     { .mib: 2016, .index: 486 }, | 
| 145 |     { .mib: 2024, .index: 495 }, | 
| 146 |     { .mib: 2025, .index: 505 }, | 
| 147 |     { .mib: 2026, .index: 512 }, | 
| 148 |     { .mib: 2027, .index: 517 }, | 
| 149 |     { .mib: 2028, .index: 527 }, | 
| 150 |     { .mib: 2030, .index: 534 }, | 
| 151 |     { .mib: 2033, .index: 541 }, | 
| 152 |     { .mib: 2034, .index: 548 }, | 
| 153 |     { .mib: 2035, .index: 555 }, | 
| 154 |     { .mib: 2037, .index: 562 }, | 
| 155 |     { .mib: 2038, .index: 569 }, | 
| 156 |     { .mib: 2039, .index: 576 }, | 
| 157 |     { .mib: 2040, .index: 583 }, | 
| 158 |     { .mib: 2041, .index: 590 }, | 
| 159 |     { .mib: 2043, .index: 597 }, | 
| 160 |     { .mib: 2011, .index: 604 }, | 
| 161 |     { .mib: 2044, .index: 611 }, | 
| 162 |     { .mib: 2045, .index: 618 }, | 
| 163 |     { .mib: 2010, .index: 624 }, | 
| 164 |     { .mib: 2046, .index: 631 }, | 
| 165 |     { .mib: 2047, .index: 638 }, | 
| 166 |     { .mib: 2048, .index: 645 }, | 
| 167 |     { .mib: 2049, .index: 652 }, | 
| 168 |     { .mib: 2050, .index: 659 }, | 
| 169 |     { .mib: 2051, .index: 666 }, | 
| 170 |     { .mib: 2052, .index: 673 }, | 
| 171 |     { .mib: 2053, .index: 680 }, | 
| 172 |     { .mib: 2054, .index: 687 }, | 
| 173 |     { .mib: 2055, .index: 694 }, | 
| 174 |     { .mib: 2056, .index: 701 }, | 
| 175 |     { .mib: 2062, .index: 708 }, | 
| 176 |     { .mib: 2063, .index: 715 }, | 
| 177 |     { .mib: 2084, .index: 723 }, | 
| 178 |     { .mib: 2085, .index: 730 }, | 
| 179 |     { .mib: 2086, .index: 741 }, | 
| 180 |     { .mib: 2087, .index: 748 }, | 
| 181 |     { .mib: 2088, .index: 755 }, | 
| 182 |     { .mib: 2089, .index: 762 }, | 
| 183 |     { .mib: 2091, .index: 771 }, | 
| 184 |     { .mib: 2092, .index: 780 }, | 
| 185 |     { .mib: 2093, .index: 789 }, | 
| 186 |     { .mib: 2094, .index: 798 }, | 
| 187 |     { .mib: 2095, .index: 807 }, | 
| 188 |     { .mib: 2096, .index: 816 }, | 
| 189 |     { .mib: 2097, .index: 825 }, | 
| 190 |     { .mib: 2098, .index: 834 }, | 
| 191 |     { .mib: 2099, .index: 843 }, | 
| 192 |     { .mib: 2100, .index: 852 }, | 
| 193 |     { .mib: 2101, .index: 861 }, | 
| 194 |     { .mib: 2102, .index: 872 }, | 
| 195 |     { .mib: 2250, .index: 880 }, | 
| 196 |     { .mib: 2251, .index: 893 }, | 
| 197 |     { .mib: 2252, .index: 906 }, | 
| 198 |     { .mib: 2253, .index: 919 }, | 
| 199 |     { .mib: 2254, .index: 932 }, | 
| 200 |     { .mib: 2255, .index: 945 }, | 
| 201 |     { .mib: 2256, .index: 958 }, | 
| 202 |     { .mib: 2257, .index: 971 }, | 
| 203 |     { .mib: 2258, .index: 984 }, | 
| 204 |     { .mib: 2259, .index: 997 }, | 
| 205 | }; | 
| 206 | int mibToNameSize = sizeof(mibToName)/sizeof(MibToName); | 
| 207 |  | 
| 208 | static const char mibToNameTable[] = | 
| 209 |     "US-ASCII\0"  | 
| 210 |     "ISO-8859-1\0"  | 
| 211 |     "ISO-8859-2\0"  | 
| 212 |     "ISO-8859-3\0"  | 
| 213 |     "ISO-8859-4\0"  | 
| 214 |     "ISO-8859-5\0"  | 
| 215 |     "ISO-8859-6\0"  | 
| 216 |     "ISO-8859-7\0"  | 
| 217 |     "ISO-8859-8\0"  | 
| 218 |     "ISO-8859-9\0"  | 
| 219 |     "ISO-8859-10\0"  | 
| 220 |     "ISO-2022-JP-1\0"  | 
| 221 |     "Shift_JIS\0"  | 
| 222 |     "EUC-JP\0"  | 
| 223 |     "US-ASCII\0"  | 
| 224 |     "EUC-KR\0"  | 
| 225 |     "ISO-2022-KR\0"  | 
| 226 |     "EUC-KR\0"  | 
| 227 |     "ISO-2022-JP\0"  | 
| 228 |     "ISO-2022-JP-2\0"  | 
| 229 |     "GB_2312-80\0"  | 
| 230 |     "ISO-8859-6\0"  | 
| 231 |     "ISO-8859-6\0"  | 
| 232 |     "ISO-8859-8\0"  | 
| 233 |     "ISO-8859-8\0"  | 
| 234 |     "ISO-2022-CN\0"  | 
| 235 |     "ISO-2022-CN-EXT\0"  | 
| 236 |     "UTF-8\0"  | 
| 237 |     "ISO-8859-13\0"  | 
| 238 |     "ISO-8859-14\0"  | 
| 239 |     "ISO-8859-15\0"  | 
| 240 |     "GBK\0"  | 
| 241 |     "GB18030\0"  | 
| 242 |     "UTF-16\0"  | 
| 243 |     "UTF-32\0"  | 
| 244 |     "SCSU\0"  | 
| 245 |     "UTF-7\0"  | 
| 246 |     "UTF-16BE\0"  | 
| 247 |     "UTF-16LE\0"  | 
| 248 |     "UTF-16\0"  | 
| 249 |     "CESU-8\0"  | 
| 250 |     "UTF-32\0"  | 
| 251 |     "UTF-32BE\0"  | 
| 252 |     "UTF-32LE\0"  | 
| 253 |     "BOCU-1\0"  | 
| 254 |     "hp-roman8\0"  | 
| 255 |     "Adobe-Standard-Encoding\0"  | 
| 256 |     "IBM850\0"  | 
| 257 |     "IBM862\0"  | 
| 258 |     "IBM-Thai\0"  | 
| 259 |     "Shift_JIS\0"  | 
| 260 |     "GB2312\0"  | 
| 261 |     "Big5\0"  | 
| 262 |     "macintosh\0"  | 
| 263 |     "IBM037\0"  | 
| 264 |     "IBM273\0"  | 
| 265 |     "IBM277\0"  | 
| 266 |     "IBM278\0"  | 
| 267 |     "IBM280\0"  | 
| 268 |     "IBM284\0"  | 
| 269 |     "IBM285\0"  | 
| 270 |     "IBM290\0"  | 
| 271 |     "IBM297\0"  | 
| 272 |     "IBM420\0"  | 
| 273 |     "IBM424\0"  | 
| 274 |     "IBM437\0"  | 
| 275 |     "IBM500\0"  | 
| 276 |     "cp851\0"  | 
| 277 |     "IBM852\0"  | 
| 278 |     "IBM855\0"  | 
| 279 |     "IBM857\0"  | 
| 280 |     "IBM860\0"  | 
| 281 |     "IBM861\0"  | 
| 282 |     "IBM863\0"  | 
| 283 |     "IBM864\0"  | 
| 284 |     "IBM865\0"  | 
| 285 |     "IBM868\0"  | 
| 286 |     "IBM869\0"  | 
| 287 |     "IBM870\0"  | 
| 288 |     "IBM871\0"  | 
| 289 |     "IBM918\0"  | 
| 290 |     "IBM1026\0"  | 
| 291 |     "KOI8-R\0"  | 
| 292 |     "HZ-GB-2312\0"  | 
| 293 |     "IBM866\0"  | 
| 294 |     "IBM775\0"  | 
| 295 |     "KOI8-U\0"  | 
| 296 |     "IBM00858\0"  | 
| 297 |     "IBM01140\0"  | 
| 298 |     "IBM01141\0"  | 
| 299 |     "IBM01142\0"  | 
| 300 |     "IBM01143\0"  | 
| 301 |     "IBM01144\0"  | 
| 302 |     "IBM01145\0"  | 
| 303 |     "IBM01146\0"  | 
| 304 |     "IBM01147\0"  | 
| 305 |     "IBM01148\0"  | 
| 306 |     "IBM01149\0"  | 
| 307 |     "Big5-HKSCS\0"  | 
| 308 |     "IBM1047\0"  | 
| 309 |     "windows-1250\0"  | 
| 310 |     "windows-1251\0"  | 
| 311 |     "windows-1252\0"  | 
| 312 |     "windows-1253\0"  | 
| 313 |     "windows-1254\0"  | 
| 314 |     "windows-1255\0"  | 
| 315 |     "windows-1256\0"  | 
| 316 |     "windows-1257\0"  | 
| 317 |     "windows-1258\0"  | 
| 318 |     "TIS-620\0" ; | 
| 319 |  | 
| 320 | static QTextCodec *loadQtCodec(const char *name) | 
| 321 | { | 
| 322 |     if (!strcmp(s1: name, s2: "UTF-8" )) | 
| 323 |         return new QUtf8Codec; | 
| 324 |     if (!strcmp(s1: name, s2: "UTF-16" )) | 
| 325 |         return new QUtf16Codec; | 
| 326 |     if (!strcmp(s1: name, s2: "ISO-8859-1" )) | 
| 327 |         return new QLatin1Codec; | 
| 328 |     if (!strcmp(s1: name, s2: "UTF-16BE" )) | 
| 329 |         return new QUtf16BECodec; | 
| 330 |     if (!strcmp(s1: name, s2: "UTF-16LE" )) | 
| 331 |         return new QUtf16LECodec; | 
| 332 |     if (!strcmp(s1: name, s2: "UTF-32" )) | 
| 333 |         return new QUtf32Codec; | 
| 334 |     if (!strcmp(s1: name, s2: "UTF-32BE" )) | 
| 335 |         return new QUtf32BECodec; | 
| 336 |     if (!strcmp(s1: name, s2: "UTF-32LE" )) | 
| 337 |         return new QUtf32LECodec; | 
| 338 |     if (!strcmp(s1: name, s2: "ISO-8859-16" ) || !strcmp(s1: name, s2: "latin10" ) || !strcmp(s1: name, s2: "iso-ir-226" )) | 
| 339 |         return new QSimpleTextCodec(13 /* == 8859-16*/); | 
| 340 | #if QT_CONFIG(codecs) | 
| 341 |     if (!strcmp(s1: name, s2: "TSCII" )) | 
| 342 |         return new QTsciiCodec; | 
| 343 |     if (!qstrnicmp(name, "iscii" , len: 5)) | 
| 344 |         return QIsciiCodec::create(name); | 
| 345 | #endif | 
| 346 |  | 
| 347 |     return nullptr; | 
| 348 | } | 
| 349 |  | 
| 350 | /// \threadsafe | 
| 351 | QList<QByteArray> QIcuCodec::availableCodecs() | 
| 352 | { | 
| 353 |     QList<QByteArray> codecs; | 
| 354 |     int n = ucnv_countAvailable(); | 
| 355 |     for (int i = 0; i < n; ++i) { | 
| 356 |         const char *name = ucnv_getAvailableName(n: i); | 
| 357 |  | 
| 358 |         UErrorCode error = U_ZERO_ERROR; | 
| 359 |         const char *standardName = ucnv_getStandardName(name, standard: "MIME" , pErrorCode: &error); | 
| 360 |         if (U_FAILURE(code: error) || !standardName) { | 
| 361 |             error = U_ZERO_ERROR; | 
| 362 |             standardName = ucnv_getStandardName(name, standard: "IANA" , pErrorCode: &error); | 
| 363 |         } | 
| 364 |         if (U_FAILURE(code: error)) | 
| 365 |             continue; | 
| 366 |  | 
| 367 |         error = U_ZERO_ERROR; | 
| 368 |         int ac = ucnv_countAliases(alias: standardName, pErrorCode: &error); | 
| 369 |         if (U_FAILURE(code: error)) | 
| 370 |             continue; | 
| 371 |         for (int j = 0; j < ac; ++j) { | 
| 372 |             error = U_ZERO_ERROR; | 
| 373 |             const char *alias = ucnv_getAlias(alias: standardName, n: j, pErrorCode: &error); | 
| 374 |             if (!U_SUCCESS(code: error)) | 
| 375 |                 continue; | 
| 376 |             codecs += alias; | 
| 377 |         } | 
| 378 |     } | 
| 379 |  | 
| 380 |     // handled by Qt and not in ICU: | 
| 381 |     codecs += "TSCII" ; | 
| 382 |  | 
| 383 |     return codecs; | 
| 384 | } | 
| 385 |  | 
| 386 | /// \threadsafe | 
| 387 | QList<int> QIcuCodec::availableMibs() | 
| 388 | { | 
| 389 |     QList<int> mibs; | 
| 390 |     mibs.reserve(asize: mibToNameSize + 1); | 
| 391 |     for (int i = 0; i < mibToNameSize; ++i) | 
| 392 |         mibs += mibToName[i].mib; | 
| 393 |  | 
| 394 |     // handled by Qt and not in ICU: | 
| 395 |     mibs += 2107; // TSCII | 
| 396 |  | 
| 397 |     return mibs; | 
| 398 | } | 
| 399 |  | 
| 400 | QTextCodec *QIcuCodec::defaultCodecUnlocked() | 
| 401 | { | 
| 402 |     QTextCodecData *globalData = QTextCodecData::instance(); | 
| 403 |     if (!globalData) | 
| 404 |         return nullptr; | 
| 405 |     QTextCodec *c = globalData->codecForLocale.loadAcquire(); | 
| 406 |     if (c) | 
| 407 |         return c; | 
| 408 |  | 
| 409 | #if defined(QT_LOCALE_IS_UTF8) | 
| 410 |     const char *name = "UTF-8" ; | 
| 411 | #else | 
| 412 |     const char *name = ucnv_getDefaultName(); | 
| 413 | #endif | 
| 414 |     c = codecForNameUnlocked(name); | 
| 415 |     globalData->codecForLocale.storeRelease(newValue: c); | 
| 416 |     return c; | 
| 417 | } | 
| 418 |  | 
| 419 |  | 
| 420 | QTextCodec *QIcuCodec::codecForNameUnlocked(const char *name) | 
| 421 | { | 
| 422 |     // backwards compatibility with Qt 4.x | 
| 423 |     if (!qstrcmp(str1: name, str2: "CP949" )) | 
| 424 |         name = "windows-949" ; | 
| 425 |     else if (!qstrcmp(str1: name, str2: "Apple Roman" )) | 
| 426 |         name = "macintosh" ; | 
| 427 |     // these are broken data in ICU 4.4, and can't be resolved even though they are aliases to tis-620 | 
| 428 |     if (!qstrcmp(str1: name, str2: "windows-874-2000" ) | 
| 429 |         || !qstrcmp(str1: name, str2: "windows-874" ) | 
| 430 |         || !qstrcmp(str1: name, str2: "MS874" ) | 
| 431 |         || !qstrcmp(str1: name, str2: "x-windows-874" ) | 
| 432 |         || !qstrcmp(str1: name, str2: "ISO 8859-11" )) | 
| 433 |         name = "TIS-620" ; | 
| 434 |  | 
| 435 |     UErrorCode error = U_ZERO_ERROR; | 
| 436 |     // MIME gives better default names | 
| 437 |     const char *standardName = ucnv_getStandardName(name, standard: "MIME" , pErrorCode: &error); | 
| 438 |     if (U_FAILURE(code: error) || !standardName) { | 
| 439 |         error = U_ZERO_ERROR; | 
| 440 |         standardName = ucnv_getStandardName(name, standard: "IANA" , pErrorCode: &error); | 
| 441 |     } | 
| 442 |     bool qt_only = false; | 
| 443 |     if (U_FAILURE(code: error) || !standardName) { | 
| 444 |         standardName = name; | 
| 445 |         qt_only = true; | 
| 446 |     } else { | 
| 447 |         // correct some issues where the ICU data set contains duplicated entries. | 
| 448 |         // Where this happens it's because one data set is a subset of another. We | 
| 449 |         // always use the larger data set. | 
| 450 |  | 
| 451 |         if (qstrcmp(str1: standardName, str2: "GB2312" ) == 0 || qstrcmp(str1: standardName, str2: "GB_2312-80" ) == 0) | 
| 452 |             standardName = "GBK" ; | 
| 453 |         else if (qstrcmp(str1: standardName, str2: "KSC_5601" ) == 0 || qstrcmp(str1: standardName, str2: "EUC-KR" ) == 0 || qstrcmp(str1: standardName, str2: "cp1363" ) == 0) | 
| 454 |             standardName = "windows-949" ; | 
| 455 |     } | 
| 456 |  | 
| 457 |     QTextCodecData *globalData = QTextCodecData::instance(); | 
| 458 |     QTextCodecCache *cache = &globalData->codecCache; | 
| 459 |  | 
| 460 |     QTextCodec *codec; | 
| 461 |     if (cache) { | 
| 462 |         codec = cache->value(key: standardName); | 
| 463 |         if (codec) | 
| 464 |             return codec; | 
| 465 |     } | 
| 466 |  | 
| 467 |     for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) { | 
| 468 |         QTextCodec *cursor = *it; | 
| 469 |         if (qTextCodecNameMatch(n: cursor->name(), h: standardName)) { | 
| 470 |             if (cache) | 
| 471 |                 cache->insert(key: standardName, value: cursor); | 
| 472 |             return cursor; | 
| 473 |         } | 
| 474 |         QList<QByteArray> aliases = cursor->aliases(); | 
| 475 |         for (ByteArrayListConstIt ait = aliases.constBegin(), acend = aliases.constEnd(); ait != acend; ++ait) { | 
| 476 |             if (qTextCodecNameMatch(n: *ait, h: standardName)) { | 
| 477 |                 if (cache) | 
| 478 |                     cache->insert(key: standardName, value: cursor); | 
| 479 |                 return cursor; | 
| 480 |             } | 
| 481 |         } | 
| 482 |     } | 
| 483 |  | 
| 484 |     QTextCodec *c = loadQtCodec(name: standardName); | 
| 485 |     if (c) | 
| 486 |         return c; | 
| 487 |  | 
| 488 |     if (qt_only) | 
| 489 |         return nullptr; | 
| 490 |  | 
| 491 |     // check whether there is really a converter for the name available. | 
| 492 |     UConverter *conv = ucnv_open(converterName: standardName, err: &error); | 
| 493 |     if (!conv) { | 
| 494 |         qDebug(msg: "codecForName: ucnv_open failed %s %s" , standardName, u_errorName(code: error)); | 
| 495 |         return nullptr; | 
| 496 |     } | 
| 497 |     //qDebug() << "QIcuCodec: Standard name for " << name << "is" << standardName; | 
| 498 |     ucnv_close(converter: conv); | 
| 499 |  | 
| 500 |  | 
| 501 |     c = new QIcuCodec(standardName); | 
| 502 |     if (cache) | 
| 503 |         cache->insert(key: standardName, value: c); | 
| 504 |     return c; | 
| 505 | } | 
| 506 |  | 
| 507 |  | 
| 508 | QTextCodec *QIcuCodec::codecForMibUnlocked(int mib) | 
| 509 | { | 
| 510 |     for (int i = 0; i < mibToNameSize; ++i) { | 
| 511 |         if (mibToName[i].mib == mib) | 
| 512 |             return codecForNameUnlocked(name: mibToNameTable + mibToName[i].index); | 
| 513 |     } | 
| 514 |  | 
| 515 |     if (mib == 2107) | 
| 516 |         return codecForNameUnlocked(name: "TSCII" ); | 
| 517 |  | 
| 518 |     return nullptr; | 
| 519 | } | 
| 520 |  | 
| 521 |  | 
| 522 | QIcuCodec::QIcuCodec(const char *name) | 
| 523 |     : m_name(name) | 
| 524 | { | 
| 525 | } | 
| 526 |  | 
| 527 | QIcuCodec::~QIcuCodec() | 
| 528 | { | 
| 529 | } | 
| 530 |  | 
| 531 | /*! | 
| 532 |     \internal | 
| 533 |  | 
| 534 |     Custom callback for the ICU from Unicode conversion. It's invoked when the | 
| 535 |     conversion from Unicode detects illegal or unrecognized character. | 
| 536 |  | 
| 537 |     Assumes that context contains a pointer to QTextCodec::ConverterState | 
| 538 |     structure. Updates its invalid characters count and calls a default | 
| 539 |     callback, that replaces the invalid characters properly. | 
| 540 | */ | 
| 541 | static void customFromUnicodeSubstitutionCallback(const void *context, | 
| 542 |                                                   UConverterFromUnicodeArgs *fromUArgs, | 
| 543 |                                                   const UChar *codeUnits, | 
| 544 |                                                   int32_t length, | 
| 545 |                                                   UChar32 codePoint, | 
| 546 |                                                   UConverterCallbackReason reason, | 
| 547 |                                                   UErrorCode *err) | 
| 548 | { | 
| 549 |     auto *state = reinterpret_cast<QTextCodec::ConverterState *>(const_cast<void *>(context)); | 
| 550 |     if (state) | 
| 551 |         state->invalidChars++; | 
| 552 |     // Call the default callback that replaces all illegal or unrecognized | 
| 553 |     // sequences with the substitute string | 
| 554 |     UCNV_FROM_U_CALLBACK_SUBSTITUTE(context: nullptr, fromUArgs, codeUnits, length, codePoint, reason, err); | 
| 555 | } | 
| 556 |  | 
| 557 | UConverter *QIcuCodec::getConverter(QTextCodec::ConverterState *state) const | 
| 558 | { | 
| 559 |     UConverter *conv = nullptr; | 
| 560 |     if (state) { | 
| 561 |         if (!state->d[0]) { | 
| 562 |             // first time | 
| 563 |             state->clearFn = qIcuCodecStateFree; | 
| 564 |             UErrorCode error = U_ZERO_ERROR; | 
| 565 |             state->d[0] = ucnv_open(converterName: m_name, err: &error); | 
| 566 |             ucnv_setSubstChars(converter: static_cast<UConverter *>(state->d[0]), | 
| 567 |                                subChars: state->flags & QTextCodec::ConvertInvalidToNull ? "\0"  : "?" , len: 1, err: &error); | 
| 568 |             if (U_FAILURE(code: error)) { | 
| 569 |                 qDebug(msg: "getConverter(state) ucnv_open failed %s %s" , m_name, u_errorName(code: error)); | 
| 570 |             } else { | 
| 571 |                 error = U_ZERO_ERROR; | 
| 572 |                 ucnv_setFromUCallBack(converter: static_cast<UConverter *>(state->d[0]), | 
| 573 |                                       newAction: customFromUnicodeSubstitutionCallback, newContext: state, oldAction: nullptr, | 
| 574 |                                       oldContext: nullptr, err: &error); | 
| 575 |                 if (U_FAILURE(code: error)) { | 
| 576 |                     qDebug(msg: "getConverter(state) failed to install custom callback. "  | 
| 577 |                            "canEncode() may report incorrect results." ); | 
| 578 |                 } | 
| 579 |             } | 
| 580 |         } | 
| 581 |         conv = static_cast<UConverter *>(state->d[0]); | 
| 582 |     } | 
| 583 |     if (!conv) { | 
| 584 |         // stateless conversion | 
| 585 |         UErrorCode error = U_ZERO_ERROR; | 
| 586 |         conv = ucnv_open(converterName: m_name, err: &error); | 
| 587 |         ucnv_setSubstChars(converter: conv, subChars: "?" , len: 1, err: &error); | 
| 588 |         if (U_FAILURE(code: error)) | 
| 589 |             qDebug(msg: "getConverter(no state) ucnv_open failed %s %s" , m_name, u_errorName(code: error)); | 
| 590 |     } | 
| 591 |     return conv; | 
| 592 | } | 
| 593 |  | 
| 594 | QString QIcuCodec::convertToUnicode(const char *chars, int length, QTextCodec::ConverterState *state) const | 
| 595 | { | 
| 596 |     UConverter *conv = getConverter(state); | 
| 597 |  | 
| 598 |     QString string(length + 2, QT_PREPEND_NAMESPACE(Qt::Uninitialized)); | 
| 599 |  | 
| 600 |     const char *end = chars + length; | 
| 601 |     int convertedChars = 0; | 
| 602 |     while (1) { | 
| 603 |         UChar *uc = (UChar *)string.data(); | 
| 604 |         UChar *ucEnd = uc + string.size(); | 
| 605 |         uc += convertedChars; | 
| 606 |         UErrorCode error = U_ZERO_ERROR; | 
| 607 |         ucnv_toUnicode(converter: conv, | 
| 608 |                        target: &uc, targetLimit: ucEnd, | 
| 609 |                        source: &chars, sourceLimit: end, | 
| 610 |                        offsets: nullptr, flush: false, err: &error); | 
| 611 |         if (!U_SUCCESS(code: error) && error != U_BUFFER_OVERFLOW_ERROR) { | 
| 612 |             qDebug(msg: "convertToUnicode failed: %s" , u_errorName(code: error)); | 
| 613 |             break; | 
| 614 |         } | 
| 615 |         // flag the state if we have incomplete input | 
| 616 |         if (error == U_TRUNCATED_CHAR_FOUND) | 
| 617 |             state->remainingChars = 1; | 
| 618 |  | 
| 619 |         convertedChars = uc - (UChar *)string.data(); | 
| 620 |         if (chars >= end) | 
| 621 |             break; | 
| 622 |         string.resize(size: string.size()*2); | 
| 623 |     } | 
| 624 |     string.resize(size: convertedChars); | 
| 625 |  | 
| 626 |     if (!state) | 
| 627 |         ucnv_close(converter: conv); | 
| 628 |     return string; | 
| 629 | } | 
| 630 |  | 
| 631 |  | 
| 632 | QByteArray QIcuCodec::convertFromUnicode(const QChar *unicode, int length, QTextCodec::ConverterState *state) const | 
| 633 | { | 
| 634 |     UConverter *conv = getConverter(state); | 
| 635 |  | 
| 636 |     int requiredLength = UCNV_GET_MAX_BYTES_FOR_STRING(length, ucnv_getMaxCharSize(conv)); | 
| 637 |     QByteArray string(requiredLength, QT_PREPEND_NAMESPACE(Qt::Uninitialized)); | 
| 638 |  | 
| 639 |     const UChar *uc = (const UChar *)unicode; | 
| 640 |     const UChar *end = uc + length; | 
| 641 |     int convertedChars = 0; | 
| 642 |     while (1) { | 
| 643 |         char *ch = (char *)string.data(); | 
| 644 |         char *chEnd = ch + string.size(); | 
| 645 |         ch += convertedChars; | 
| 646 |         UErrorCode error = U_ZERO_ERROR; | 
| 647 |         ucnv_fromUnicode(converter: conv, | 
| 648 |                          target: &ch, targetLimit: chEnd, | 
| 649 |                          source: &uc, sourceLimit: end, | 
| 650 |                          offsets: nullptr, flush: false, err: &error); | 
| 651 |         if (!U_SUCCESS(code: error)) | 
| 652 |             qDebug(msg: "convertFromUnicode failed: %s" , u_errorName(code: error)); | 
| 653 |         // flag the state if we have incomplete input | 
| 654 |         if (error == U_TRUNCATED_CHAR_FOUND) | 
| 655 |             state->remainingChars = 1; | 
| 656 |  | 
| 657 |         convertedChars = ch - string.data(); | 
| 658 |         if (uc >= end) | 
| 659 |             break; | 
| 660 |         string.resize(size: string.size()*2); | 
| 661 |     } | 
| 662 |     string.resize(size: convertedChars); | 
| 663 |  | 
| 664 |     if (!state) | 
| 665 |         ucnv_close(converter: conv); | 
| 666 |  | 
| 667 |     return string; | 
| 668 | } | 
| 669 |  | 
| 670 |  | 
| 671 | QByteArray QIcuCodec::name() const | 
| 672 | { | 
| 673 |     return m_name; | 
| 674 | } | 
| 675 |  | 
| 676 |  | 
| 677 | QList<QByteArray> QIcuCodec::aliases() const | 
| 678 | { | 
| 679 |     UErrorCode error = U_ZERO_ERROR; | 
| 680 |  | 
| 681 |     int n = ucnv_countAliases(alias: m_name, pErrorCode: &error); | 
| 682 |  | 
| 683 |     QList<QByteArray> aliases; | 
| 684 |     for (int i = 0; i < n; ++i) { | 
| 685 |         const char *a = ucnv_getAlias(alias: m_name, n: i, pErrorCode: &error); | 
| 686 |         // skip the canonical name | 
| 687 |         if (!a || !qstrcmp(str1: a, str2: m_name)) | 
| 688 |             continue; | 
| 689 |         aliases += a; | 
| 690 |     } | 
| 691 |  | 
| 692 |     return aliases; | 
| 693 | } | 
| 694 |  | 
| 695 |  | 
| 696 | int QIcuCodec::mibEnum() const | 
| 697 | { | 
| 698 |     for (int i = 0; i < mibToNameSize; ++i) { | 
| 699 |         if (qTextCodecNameMatch(n: m_name, h: (mibToNameTable + mibToName[i].index))) | 
| 700 |             return mibToName[i].mib; | 
| 701 |     } | 
| 702 |  | 
| 703 |     return 0; | 
| 704 | } | 
| 705 |  | 
| 706 | QT_END_NAMESPACE | 
| 707 |  |