| 1 | // Copyright (C) 2016 The Qt Company Ltd. |
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
| 3 | |
| 4 | #include "qicucodec_p.h" |
| 5 | |
| 6 | #include "qtextcodec_p.h" |
| 7 | #include "qutfcodec_p.h" |
| 8 | #include "qlatincodec_p.h" |
| 9 | #include "qsimplecodec_p.h" |
| 10 | #include "qdebug.h" |
| 11 | |
| 12 | #include "unicode/ucnv.h" |
| 13 | |
| 14 | #if QT_CONFIG(codecs) |
| 15 | #include "qtsciicodec_p.h" |
| 16 | #include "qisciicodec_p.h" |
| 17 | #endif |
| 18 | |
| 19 | QT_BEGIN_NAMESPACE |
| 20 | |
| 21 | typedef QList<QTextCodec*>::ConstIterator TextCodecListConstIt; |
| 22 | typedef QList<QByteArray>::ConstIterator ByteArrayListConstIt; |
| 23 | |
| 24 | static void qIcuCodecStateFree(QTextCodec::ConverterState *state) noexcept |
| 25 | { |
| 26 | ucnv_close(converter: static_cast<UConverter *>(state->d[0])); |
| 27 | } |
| 28 | |
| 29 | bool qTextCodecNameMatch(const char *n, const char *h) |
| 30 | { |
| 31 | return ucnv_compareNames(name1: n, name2: h) == 0; |
| 32 | } |
| 33 | |
| 34 | /* The list below is generated from http://www.iana.org/assignments/character-sets/ |
| 35 | using the snippet of code below: |
| 36 | |
| 37 | #include <QtCore> |
| 38 | #include <unicode/ucnv.h> |
| 39 | |
| 40 | int main(int argc, char **argv) |
| 41 | { |
| 42 | QCoreApplication app(argc, argv); |
| 43 | |
| 44 | QFile file("character-sets.txt"); |
| 45 | file.open(QFile::ReadOnly); |
| 46 | QByteArray name; |
| 47 | int mib = -1; |
| 48 | QByteArray nameList; |
| 49 | int pos = 0; |
| 50 | while (!file.atEnd()) { |
| 51 | QByteArray s = file.readLine().trimmed(); |
| 52 | if (s.isEmpty()) { |
| 53 | if (mib != -1) { |
| 54 | UErrorCode error = U_ZERO_ERROR; |
| 55 | const char *standard_name = ucnv_getStandardName(name, "MIME", &error); |
| 56 | if (U_FAILURE(error) || !standard_name) { |
| 57 | error = U_ZERO_ERROR; |
| 58 | standard_name = ucnv_getStandardName(name, "IANA", &error); |
| 59 | } |
| 60 | UConverter *conv = ucnv_open(standard_name, &error); |
| 61 | if (!U_FAILURE(error) && conv && standard_name) { |
| 62 | ucnv_close(conv); |
| 63 | printf(" { %d, %d },\n", mib, pos); |
| 64 | nameList += "\""; |
| 65 | nameList += standard_name; |
| 66 | nameList += "\\0\"\n"; |
| 67 | pos += strlen(standard_name) + 1; |
| 68 | } |
| 69 | } |
| 70 | name = QByteArray(); |
| 71 | mib = -1; |
| 72 | } |
| 73 | if (s.startsWith("Name: ")) { |
| 74 | name = s.mid(5).trimmed(); |
| 75 | if (name.indexOf(' ') > 0) |
| 76 | name = name.left(name.indexOf(' ')); |
| 77 | } |
| 78 | if (s.startsWith("MIBenum:")) |
| 79 | mib = s.mid(8).trimmed().toInt(); |
| 80 | if (s.startsWith("Alias:") && s.contains("MIME")) { |
| 81 | name = s.mid(6).trimmed(); |
| 82 | name = name.left(name.indexOf(' ')).trimmed(); |
| 83 | } |
| 84 | } |
| 85 | qDebug() << nameList; |
| 86 | } |
| 87 | */ |
| 88 | |
| 89 | struct MibToName { |
| 90 | short mib; |
| 91 | short index; |
| 92 | }; |
| 93 | |
| 94 | static const MibToName mibToName[] = { |
| 95 | { .mib: 3, .index: 0 }, |
| 96 | { .mib: 4, .index: 9 }, |
| 97 | { .mib: 5, .index: 20 }, |
| 98 | { .mib: 6, .index: 31 }, |
| 99 | { .mib: 7, .index: 42 }, |
| 100 | { .mib: 8, .index: 53 }, |
| 101 | { .mib: 9, .index: 64 }, |
| 102 | { .mib: 10, .index: 75 }, |
| 103 | { .mib: 11, .index: 86 }, |
| 104 | { .mib: 12, .index: 97 }, |
| 105 | { .mib: 13, .index: 108 }, |
| 106 | { .mib: 16, .index: 120 }, |
| 107 | { .mib: 17, .index: 134 }, |
| 108 | { .mib: 18, .index: 144 }, |
| 109 | { .mib: 30, .index: 151 }, |
| 110 | { .mib: 36, .index: 160 }, |
| 111 | { .mib: 37, .index: 167 }, |
| 112 | { .mib: 38, .index: 179 }, |
| 113 | { .mib: 39, .index: 186 }, |
| 114 | { .mib: 40, .index: 198 }, |
| 115 | { .mib: 57, .index: 212 }, |
| 116 | { .mib: 81, .index: 223 }, |
| 117 | { .mib: 82, .index: 234 }, |
| 118 | { .mib: 84, .index: 245 }, |
| 119 | { .mib: 85, .index: 256 }, |
| 120 | { .mib: 104, .index: 267 }, |
| 121 | { .mib: 105, .index: 279 }, |
| 122 | { .mib: 106, .index: 295 }, |
| 123 | { .mib: 109, .index: 301 }, |
| 124 | { .mib: 110, .index: 313 }, |
| 125 | { .mib: 111, .index: 325 }, |
| 126 | { .mib: 113, .index: 337 }, |
| 127 | { .mib: 114, .index: 341 }, |
| 128 | { .mib: 1000, .index: 349 }, |
| 129 | { .mib: 1001, .index: 356 }, |
| 130 | { .mib: 1011, .index: 363 }, |
| 131 | { .mib: 1012, .index: 368 }, |
| 132 | { .mib: 1013, .index: 374 }, |
| 133 | { .mib: 1014, .index: 383 }, |
| 134 | { .mib: 1015, .index: 392 }, |
| 135 | { .mib: 1016, .index: 399 }, |
| 136 | { .mib: 1017, .index: 406 }, |
| 137 | { .mib: 1018, .index: 413 }, |
| 138 | { .mib: 1019, .index: 422 }, |
| 139 | { .mib: 1020, .index: 431 }, |
| 140 | { .mib: 2004, .index: 438 }, |
| 141 | { .mib: 2005, .index: 448 }, |
| 142 | { .mib: 2009, .index: 472 }, |
| 143 | { .mib: 2013, .index: 479 }, |
| 144 | { .mib: 2016, .index: 486 }, |
| 145 | { .mib: 2024, .index: 495 }, |
| 146 | { .mib: 2025, .index: 505 }, |
| 147 | { .mib: 2026, .index: 512 }, |
| 148 | { .mib: 2027, .index: 517 }, |
| 149 | { .mib: 2028, .index: 527 }, |
| 150 | { .mib: 2030, .index: 534 }, |
| 151 | { .mib: 2033, .index: 541 }, |
| 152 | { .mib: 2034, .index: 548 }, |
| 153 | { .mib: 2035, .index: 555 }, |
| 154 | { .mib: 2037, .index: 562 }, |
| 155 | { .mib: 2038, .index: 569 }, |
| 156 | { .mib: 2039, .index: 576 }, |
| 157 | { .mib: 2040, .index: 583 }, |
| 158 | { .mib: 2041, .index: 590 }, |
| 159 | { .mib: 2043, .index: 597 }, |
| 160 | { .mib: 2011, .index: 604 }, |
| 161 | { .mib: 2044, .index: 611 }, |
| 162 | { .mib: 2045, .index: 618 }, |
| 163 | { .mib: 2010, .index: 624 }, |
| 164 | { .mib: 2046, .index: 631 }, |
| 165 | { .mib: 2047, .index: 638 }, |
| 166 | { .mib: 2048, .index: 645 }, |
| 167 | { .mib: 2049, .index: 652 }, |
| 168 | { .mib: 2050, .index: 659 }, |
| 169 | { .mib: 2051, .index: 666 }, |
| 170 | { .mib: 2052, .index: 673 }, |
| 171 | { .mib: 2053, .index: 680 }, |
| 172 | { .mib: 2054, .index: 687 }, |
| 173 | { .mib: 2055, .index: 694 }, |
| 174 | { .mib: 2056, .index: 701 }, |
| 175 | { .mib: 2062, .index: 708 }, |
| 176 | { .mib: 2063, .index: 715 }, |
| 177 | { .mib: 2084, .index: 723 }, |
| 178 | { .mib: 2085, .index: 730 }, |
| 179 | { .mib: 2086, .index: 741 }, |
| 180 | { .mib: 2087, .index: 748 }, |
| 181 | { .mib: 2088, .index: 755 }, |
| 182 | { .mib: 2089, .index: 762 }, |
| 183 | { .mib: 2091, .index: 771 }, |
| 184 | { .mib: 2092, .index: 780 }, |
| 185 | { .mib: 2093, .index: 789 }, |
| 186 | { .mib: 2094, .index: 798 }, |
| 187 | { .mib: 2095, .index: 807 }, |
| 188 | { .mib: 2096, .index: 816 }, |
| 189 | { .mib: 2097, .index: 825 }, |
| 190 | { .mib: 2098, .index: 834 }, |
| 191 | { .mib: 2099, .index: 843 }, |
| 192 | { .mib: 2100, .index: 852 }, |
| 193 | { .mib: 2101, .index: 861 }, |
| 194 | { .mib: 2102, .index: 872 }, |
| 195 | { .mib: 2250, .index: 880 }, |
| 196 | { .mib: 2251, .index: 893 }, |
| 197 | { .mib: 2252, .index: 906 }, |
| 198 | { .mib: 2253, .index: 919 }, |
| 199 | { .mib: 2254, .index: 932 }, |
| 200 | { .mib: 2255, .index: 945 }, |
| 201 | { .mib: 2256, .index: 958 }, |
| 202 | { .mib: 2257, .index: 971 }, |
| 203 | { .mib: 2258, .index: 984 }, |
| 204 | { .mib: 2259, .index: 997 }, |
| 205 | }; |
| 206 | int mibToNameSize = sizeof(mibToName)/sizeof(MibToName); |
| 207 | |
| 208 | static const char mibToNameTable[] = |
| 209 | "US-ASCII\0" |
| 210 | "ISO-8859-1\0" |
| 211 | "ISO-8859-2\0" |
| 212 | "ISO-8859-3\0" |
| 213 | "ISO-8859-4\0" |
| 214 | "ISO-8859-5\0" |
| 215 | "ISO-8859-6\0" |
| 216 | "ISO-8859-7\0" |
| 217 | "ISO-8859-8\0" |
| 218 | "ISO-8859-9\0" |
| 219 | "ISO-8859-10\0" |
| 220 | "ISO-2022-JP-1\0" |
| 221 | "Shift_JIS\0" |
| 222 | "EUC-JP\0" |
| 223 | "US-ASCII\0" |
| 224 | "EUC-KR\0" |
| 225 | "ISO-2022-KR\0" |
| 226 | "EUC-KR\0" |
| 227 | "ISO-2022-JP\0" |
| 228 | "ISO-2022-JP-2\0" |
| 229 | "GB_2312-80\0" |
| 230 | "ISO-8859-6\0" |
| 231 | "ISO-8859-6\0" |
| 232 | "ISO-8859-8\0" |
| 233 | "ISO-8859-8\0" |
| 234 | "ISO-2022-CN\0" |
| 235 | "ISO-2022-CN-EXT\0" |
| 236 | "UTF-8\0" |
| 237 | "ISO-8859-13\0" |
| 238 | "ISO-8859-14\0" |
| 239 | "ISO-8859-15\0" |
| 240 | "GBK\0" |
| 241 | "GB18030\0" |
| 242 | "UTF-16\0" |
| 243 | "UTF-32\0" |
| 244 | "SCSU\0" |
| 245 | "UTF-7\0" |
| 246 | "UTF-16BE\0" |
| 247 | "UTF-16LE\0" |
| 248 | "UTF-16\0" |
| 249 | "CESU-8\0" |
| 250 | "UTF-32\0" |
| 251 | "UTF-32BE\0" |
| 252 | "UTF-32LE\0" |
| 253 | "BOCU-1\0" |
| 254 | "hp-roman8\0" |
| 255 | "Adobe-Standard-Encoding\0" |
| 256 | "IBM850\0" |
| 257 | "IBM862\0" |
| 258 | "IBM-Thai\0" |
| 259 | "Shift_JIS\0" |
| 260 | "GB2312\0" |
| 261 | "Big5\0" |
| 262 | "macintosh\0" |
| 263 | "IBM037\0" |
| 264 | "IBM273\0" |
| 265 | "IBM277\0" |
| 266 | "IBM278\0" |
| 267 | "IBM280\0" |
| 268 | "IBM284\0" |
| 269 | "IBM285\0" |
| 270 | "IBM290\0" |
| 271 | "IBM297\0" |
| 272 | "IBM420\0" |
| 273 | "IBM424\0" |
| 274 | "IBM437\0" |
| 275 | "IBM500\0" |
| 276 | "cp851\0" |
| 277 | "IBM852\0" |
| 278 | "IBM855\0" |
| 279 | "IBM857\0" |
| 280 | "IBM860\0" |
| 281 | "IBM861\0" |
| 282 | "IBM863\0" |
| 283 | "IBM864\0" |
| 284 | "IBM865\0" |
| 285 | "IBM868\0" |
| 286 | "IBM869\0" |
| 287 | "IBM870\0" |
| 288 | "IBM871\0" |
| 289 | "IBM918\0" |
| 290 | "IBM1026\0" |
| 291 | "KOI8-R\0" |
| 292 | "HZ-GB-2312\0" |
| 293 | "IBM866\0" |
| 294 | "IBM775\0" |
| 295 | "KOI8-U\0" |
| 296 | "IBM00858\0" |
| 297 | "IBM01140\0" |
| 298 | "IBM01141\0" |
| 299 | "IBM01142\0" |
| 300 | "IBM01143\0" |
| 301 | "IBM01144\0" |
| 302 | "IBM01145\0" |
| 303 | "IBM01146\0" |
| 304 | "IBM01147\0" |
| 305 | "IBM01148\0" |
| 306 | "IBM01149\0" |
| 307 | "Big5-HKSCS\0" |
| 308 | "IBM1047\0" |
| 309 | "windows-1250\0" |
| 310 | "windows-1251\0" |
| 311 | "windows-1252\0" |
| 312 | "windows-1253\0" |
| 313 | "windows-1254\0" |
| 314 | "windows-1255\0" |
| 315 | "windows-1256\0" |
| 316 | "windows-1257\0" |
| 317 | "windows-1258\0" |
| 318 | "TIS-620\0" ; |
| 319 | |
| 320 | static QTextCodec *loadQtCodec(const char *name) |
| 321 | { |
| 322 | if (!strcmp(s1: name, s2: "UTF-8" )) |
| 323 | return new QUtf8Codec; |
| 324 | if (!strcmp(s1: name, s2: "UTF-16" )) |
| 325 | return new QUtf16Codec; |
| 326 | if (!strcmp(s1: name, s2: "ISO-8859-1" )) |
| 327 | return new QLatin1Codec; |
| 328 | if (!strcmp(s1: name, s2: "UTF-16BE" )) |
| 329 | return new QUtf16BECodec; |
| 330 | if (!strcmp(s1: name, s2: "UTF-16LE" )) |
| 331 | return new QUtf16LECodec; |
| 332 | if (!strcmp(s1: name, s2: "UTF-32" )) |
| 333 | return new QUtf32Codec; |
| 334 | if (!strcmp(s1: name, s2: "UTF-32BE" )) |
| 335 | return new QUtf32BECodec; |
| 336 | if (!strcmp(s1: name, s2: "UTF-32LE" )) |
| 337 | return new QUtf32LECodec; |
| 338 | if (!strcmp(s1: name, s2: "ISO-8859-16" ) || !strcmp(s1: name, s2: "latin10" ) || !strcmp(s1: name, s2: "iso-ir-226" )) |
| 339 | return new QSimpleTextCodec(13 /* == 8859-16*/); |
| 340 | #if QT_CONFIG(codecs) |
| 341 | if (!strcmp(s1: name, s2: "TSCII" )) |
| 342 | return new QTsciiCodec; |
| 343 | if (!qstrnicmp(name, "iscii" , len: 5)) |
| 344 | return QIsciiCodec::create(name); |
| 345 | #endif |
| 346 | |
| 347 | return nullptr; |
| 348 | } |
| 349 | |
| 350 | /// \threadsafe |
| 351 | QList<QByteArray> QIcuCodec::availableCodecs() |
| 352 | { |
| 353 | QList<QByteArray> codecs; |
| 354 | int n = ucnv_countAvailable(); |
| 355 | for (int i = 0; i < n; ++i) { |
| 356 | const char *name = ucnv_getAvailableName(n: i); |
| 357 | |
| 358 | UErrorCode error = U_ZERO_ERROR; |
| 359 | const char *standardName = ucnv_getStandardName(name, standard: "MIME" , pErrorCode: &error); |
| 360 | if (U_FAILURE(code: error) || !standardName) { |
| 361 | error = U_ZERO_ERROR; |
| 362 | standardName = ucnv_getStandardName(name, standard: "IANA" , pErrorCode: &error); |
| 363 | } |
| 364 | if (U_FAILURE(code: error)) |
| 365 | continue; |
| 366 | |
| 367 | error = U_ZERO_ERROR; |
| 368 | int ac = ucnv_countAliases(alias: standardName, pErrorCode: &error); |
| 369 | if (U_FAILURE(code: error)) |
| 370 | continue; |
| 371 | for (int j = 0; j < ac; ++j) { |
| 372 | error = U_ZERO_ERROR; |
| 373 | const char *alias = ucnv_getAlias(alias: standardName, n: j, pErrorCode: &error); |
| 374 | if (!U_SUCCESS(code: error)) |
| 375 | continue; |
| 376 | codecs += alias; |
| 377 | } |
| 378 | } |
| 379 | |
| 380 | // handled by Qt and not in ICU: |
| 381 | codecs += "TSCII" ; |
| 382 | |
| 383 | return codecs; |
| 384 | } |
| 385 | |
| 386 | /// \threadsafe |
| 387 | QList<int> QIcuCodec::availableMibs() |
| 388 | { |
| 389 | QList<int> mibs; |
| 390 | mibs.reserve(asize: mibToNameSize + 1); |
| 391 | for (int i = 0; i < mibToNameSize; ++i) |
| 392 | mibs += mibToName[i].mib; |
| 393 | |
| 394 | // handled by Qt and not in ICU: |
| 395 | mibs += 2107; // TSCII |
| 396 | |
| 397 | return mibs; |
| 398 | } |
| 399 | |
| 400 | QTextCodec *QIcuCodec::defaultCodecUnlocked() |
| 401 | { |
| 402 | QTextCodecData *globalData = QTextCodecData::instance(); |
| 403 | if (!globalData) |
| 404 | return nullptr; |
| 405 | QTextCodec *c = globalData->codecForLocale.loadAcquire(); |
| 406 | if (c) |
| 407 | return c; |
| 408 | |
| 409 | #if defined(QT_LOCALE_IS_UTF8) |
| 410 | const char *name = "UTF-8" ; |
| 411 | #else |
| 412 | const char *name = ucnv_getDefaultName(); |
| 413 | #endif |
| 414 | c = codecForNameUnlocked(name); |
| 415 | globalData->codecForLocale.storeRelease(newValue: c); |
| 416 | return c; |
| 417 | } |
| 418 | |
| 419 | |
| 420 | QTextCodec *QIcuCodec::codecForNameUnlocked(const char *name) |
| 421 | { |
| 422 | // backwards compatibility with Qt 4.x |
| 423 | if (!qstrcmp(str1: name, str2: "CP949" )) |
| 424 | name = "windows-949" ; |
| 425 | else if (!qstrcmp(str1: name, str2: "Apple Roman" )) |
| 426 | name = "macintosh" ; |
| 427 | // these are broken data in ICU 4.4, and can't be resolved even though they are aliases to tis-620 |
| 428 | if (!qstrcmp(str1: name, str2: "windows-874-2000" ) |
| 429 | || !qstrcmp(str1: name, str2: "windows-874" ) |
| 430 | || !qstrcmp(str1: name, str2: "MS874" ) |
| 431 | || !qstrcmp(str1: name, str2: "x-windows-874" ) |
| 432 | || !qstrcmp(str1: name, str2: "ISO 8859-11" )) |
| 433 | name = "TIS-620" ; |
| 434 | |
| 435 | UErrorCode error = U_ZERO_ERROR; |
| 436 | // MIME gives better default names |
| 437 | const char *standardName = ucnv_getStandardName(name, standard: "MIME" , pErrorCode: &error); |
| 438 | if (U_FAILURE(code: error) || !standardName) { |
| 439 | error = U_ZERO_ERROR; |
| 440 | standardName = ucnv_getStandardName(name, standard: "IANA" , pErrorCode: &error); |
| 441 | } |
| 442 | bool qt_only = false; |
| 443 | if (U_FAILURE(code: error) || !standardName) { |
| 444 | standardName = name; |
| 445 | qt_only = true; |
| 446 | } else { |
| 447 | // correct some issues where the ICU data set contains duplicated entries. |
| 448 | // Where this happens it's because one data set is a subset of another. We |
| 449 | // always use the larger data set. |
| 450 | |
| 451 | if (qstrcmp(str1: standardName, str2: "GB2312" ) == 0 || qstrcmp(str1: standardName, str2: "GB_2312-80" ) == 0) |
| 452 | standardName = "GBK" ; |
| 453 | else if (qstrcmp(str1: standardName, str2: "KSC_5601" ) == 0 || qstrcmp(str1: standardName, str2: "EUC-KR" ) == 0 || qstrcmp(str1: standardName, str2: "cp1363" ) == 0) |
| 454 | standardName = "windows-949" ; |
| 455 | } |
| 456 | |
| 457 | QTextCodecData *globalData = QTextCodecData::instance(); |
| 458 | QTextCodecCache *cache = &globalData->codecCache; |
| 459 | |
| 460 | QTextCodec *codec; |
| 461 | if (cache) { |
| 462 | codec = cache->value(key: standardName); |
| 463 | if (codec) |
| 464 | return codec; |
| 465 | } |
| 466 | |
| 467 | for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) { |
| 468 | QTextCodec *cursor = *it; |
| 469 | if (qTextCodecNameMatch(n: cursor->name(), h: standardName)) { |
| 470 | if (cache) |
| 471 | cache->insert(key: standardName, value: cursor); |
| 472 | return cursor; |
| 473 | } |
| 474 | QList<QByteArray> aliases = cursor->aliases(); |
| 475 | for (ByteArrayListConstIt ait = aliases.constBegin(), acend = aliases.constEnd(); ait != acend; ++ait) { |
| 476 | if (qTextCodecNameMatch(n: *ait, h: standardName)) { |
| 477 | if (cache) |
| 478 | cache->insert(key: standardName, value: cursor); |
| 479 | return cursor; |
| 480 | } |
| 481 | } |
| 482 | } |
| 483 | |
| 484 | QTextCodec *c = loadQtCodec(name: standardName); |
| 485 | if (c) |
| 486 | return c; |
| 487 | |
| 488 | if (qt_only) |
| 489 | return nullptr; |
| 490 | |
| 491 | // check whether there is really a converter for the name available. |
| 492 | UConverter *conv = ucnv_open(converterName: standardName, err: &error); |
| 493 | if (!conv) { |
| 494 | qDebug(msg: "codecForName: ucnv_open failed %s %s" , standardName, u_errorName(code: error)); |
| 495 | return nullptr; |
| 496 | } |
| 497 | //qDebug() << "QIcuCodec: Standard name for " << name << "is" << standardName; |
| 498 | ucnv_close(converter: conv); |
| 499 | |
| 500 | |
| 501 | c = new QIcuCodec(standardName); |
| 502 | if (cache) |
| 503 | cache->insert(key: standardName, value: c); |
| 504 | return c; |
| 505 | } |
| 506 | |
| 507 | |
| 508 | QTextCodec *QIcuCodec::codecForMibUnlocked(int mib) |
| 509 | { |
| 510 | for (int i = 0; i < mibToNameSize; ++i) { |
| 511 | if (mibToName[i].mib == mib) |
| 512 | return codecForNameUnlocked(name: mibToNameTable + mibToName[i].index); |
| 513 | } |
| 514 | |
| 515 | if (mib == 2107) |
| 516 | return codecForNameUnlocked(name: "TSCII" ); |
| 517 | |
| 518 | return nullptr; |
| 519 | } |
| 520 | |
| 521 | |
| 522 | QIcuCodec::QIcuCodec(const char *name) |
| 523 | : m_name(name) |
| 524 | { |
| 525 | } |
| 526 | |
| 527 | QIcuCodec::~QIcuCodec() |
| 528 | { |
| 529 | } |
| 530 | |
| 531 | /*! |
| 532 | \internal |
| 533 | |
| 534 | Custom callback for the ICU from Unicode conversion. It's invoked when the |
| 535 | conversion from Unicode detects illegal or unrecognized character. |
| 536 | |
| 537 | Assumes that context contains a pointer to QTextCodec::ConverterState |
| 538 | structure. Updates its invalid characters count and calls a default |
| 539 | callback, that replaces the invalid characters properly. |
| 540 | */ |
| 541 | static void customFromUnicodeSubstitutionCallback(const void *context, |
| 542 | UConverterFromUnicodeArgs *fromUArgs, |
| 543 | const UChar *codeUnits, |
| 544 | int32_t length, |
| 545 | UChar32 codePoint, |
| 546 | UConverterCallbackReason reason, |
| 547 | UErrorCode *err) |
| 548 | { |
| 549 | auto *state = reinterpret_cast<QTextCodec::ConverterState *>(const_cast<void *>(context)); |
| 550 | if (state) |
| 551 | state->invalidChars++; |
| 552 | // Call the default callback that replaces all illegal or unrecognized |
| 553 | // sequences with the substitute string |
| 554 | UCNV_FROM_U_CALLBACK_SUBSTITUTE(context: nullptr, fromUArgs, codeUnits, length, codePoint, reason, err); |
| 555 | } |
| 556 | |
| 557 | UConverter *QIcuCodec::getConverter(QTextCodec::ConverterState *state) const |
| 558 | { |
| 559 | UConverter *conv = nullptr; |
| 560 | if (state) { |
| 561 | if (!state->d[0]) { |
| 562 | // first time |
| 563 | state->clearFn = qIcuCodecStateFree; |
| 564 | UErrorCode error = U_ZERO_ERROR; |
| 565 | state->d[0] = ucnv_open(converterName: m_name, err: &error); |
| 566 | ucnv_setSubstChars(converter: static_cast<UConverter *>(state->d[0]), |
| 567 | subChars: state->flags & QTextCodec::ConvertInvalidToNull ? "\0" : "?" , len: 1, err: &error); |
| 568 | if (U_FAILURE(code: error)) { |
| 569 | qDebug(msg: "getConverter(state) ucnv_open failed %s %s" , m_name, u_errorName(code: error)); |
| 570 | } else { |
| 571 | error = U_ZERO_ERROR; |
| 572 | ucnv_setFromUCallBack(converter: static_cast<UConverter *>(state->d[0]), |
| 573 | newAction: customFromUnicodeSubstitutionCallback, newContext: state, oldAction: nullptr, |
| 574 | oldContext: nullptr, err: &error); |
| 575 | if (U_FAILURE(code: error)) { |
| 576 | qDebug(msg: "getConverter(state) failed to install custom callback. " |
| 577 | "canEncode() may report incorrect results." ); |
| 578 | } |
| 579 | } |
| 580 | } |
| 581 | conv = static_cast<UConverter *>(state->d[0]); |
| 582 | } |
| 583 | if (!conv) { |
| 584 | // stateless conversion |
| 585 | UErrorCode error = U_ZERO_ERROR; |
| 586 | conv = ucnv_open(converterName: m_name, err: &error); |
| 587 | ucnv_setSubstChars(converter: conv, subChars: "?" , len: 1, err: &error); |
| 588 | if (U_FAILURE(code: error)) |
| 589 | qDebug(msg: "getConverter(no state) ucnv_open failed %s %s" , m_name, u_errorName(code: error)); |
| 590 | } |
| 591 | return conv; |
| 592 | } |
| 593 | |
| 594 | QString QIcuCodec::convertToUnicode(const char *chars, int length, QTextCodec::ConverterState *state) const |
| 595 | { |
| 596 | UConverter *conv = getConverter(state); |
| 597 | |
| 598 | QString string(length + 2, QT_PREPEND_NAMESPACE(Qt::Uninitialized)); |
| 599 | |
| 600 | const char *end = chars + length; |
| 601 | int convertedChars = 0; |
| 602 | while (1) { |
| 603 | UChar *uc = (UChar *)string.data(); |
| 604 | UChar *ucEnd = uc + string.size(); |
| 605 | uc += convertedChars; |
| 606 | UErrorCode error = U_ZERO_ERROR; |
| 607 | ucnv_toUnicode(converter: conv, |
| 608 | target: &uc, targetLimit: ucEnd, |
| 609 | source: &chars, sourceLimit: end, |
| 610 | offsets: nullptr, flush: false, err: &error); |
| 611 | if (!U_SUCCESS(code: error) && error != U_BUFFER_OVERFLOW_ERROR) { |
| 612 | qDebug(msg: "convertToUnicode failed: %s" , u_errorName(code: error)); |
| 613 | break; |
| 614 | } |
| 615 | // flag the state if we have incomplete input |
| 616 | if (error == U_TRUNCATED_CHAR_FOUND) |
| 617 | state->remainingChars = 1; |
| 618 | |
| 619 | convertedChars = uc - (UChar *)string.data(); |
| 620 | if (chars >= end) |
| 621 | break; |
| 622 | string.resize(size: string.size()*2); |
| 623 | } |
| 624 | string.resize(size: convertedChars); |
| 625 | |
| 626 | if (!state) |
| 627 | ucnv_close(converter: conv); |
| 628 | return string; |
| 629 | } |
| 630 | |
| 631 | |
| 632 | QByteArray QIcuCodec::convertFromUnicode(const QChar *unicode, int length, QTextCodec::ConverterState *state) const |
| 633 | { |
| 634 | UConverter *conv = getConverter(state); |
| 635 | |
| 636 | int requiredLength = UCNV_GET_MAX_BYTES_FOR_STRING(length, ucnv_getMaxCharSize(conv)); |
| 637 | QByteArray string(requiredLength, QT_PREPEND_NAMESPACE(Qt::Uninitialized)); |
| 638 | |
| 639 | const UChar *uc = (const UChar *)unicode; |
| 640 | const UChar *end = uc + length; |
| 641 | int convertedChars = 0; |
| 642 | while (1) { |
| 643 | char *ch = (char *)string.data(); |
| 644 | char *chEnd = ch + string.size(); |
| 645 | ch += convertedChars; |
| 646 | UErrorCode error = U_ZERO_ERROR; |
| 647 | ucnv_fromUnicode(converter: conv, |
| 648 | target: &ch, targetLimit: chEnd, |
| 649 | source: &uc, sourceLimit: end, |
| 650 | offsets: nullptr, flush: false, err: &error); |
| 651 | if (!U_SUCCESS(code: error)) |
| 652 | qDebug(msg: "convertFromUnicode failed: %s" , u_errorName(code: error)); |
| 653 | // flag the state if we have incomplete input |
| 654 | if (error == U_TRUNCATED_CHAR_FOUND) |
| 655 | state->remainingChars = 1; |
| 656 | |
| 657 | convertedChars = ch - string.data(); |
| 658 | if (uc >= end) |
| 659 | break; |
| 660 | string.resize(size: string.size()*2); |
| 661 | } |
| 662 | string.resize(size: convertedChars); |
| 663 | |
| 664 | if (!state) |
| 665 | ucnv_close(converter: conv); |
| 666 | |
| 667 | return string; |
| 668 | } |
| 669 | |
| 670 | |
| 671 | QByteArray QIcuCodec::name() const |
| 672 | { |
| 673 | return m_name; |
| 674 | } |
| 675 | |
| 676 | |
| 677 | QList<QByteArray> QIcuCodec::aliases() const |
| 678 | { |
| 679 | UErrorCode error = U_ZERO_ERROR; |
| 680 | |
| 681 | int n = ucnv_countAliases(alias: m_name, pErrorCode: &error); |
| 682 | |
| 683 | QList<QByteArray> aliases; |
| 684 | for (int i = 0; i < n; ++i) { |
| 685 | const char *a = ucnv_getAlias(alias: m_name, n: i, pErrorCode: &error); |
| 686 | // skip the canonical name |
| 687 | if (!a || !qstrcmp(str1: a, str2: m_name)) |
| 688 | continue; |
| 689 | aliases += a; |
| 690 | } |
| 691 | |
| 692 | return aliases; |
| 693 | } |
| 694 | |
| 695 | |
| 696 | int QIcuCodec::mibEnum() const |
| 697 | { |
| 698 | for (int i = 0; i < mibToNameSize; ++i) { |
| 699 | if (qTextCodecNameMatch(n: m_name, h: (mibToNameTable + mibToName[i].index))) |
| 700 | return mibToName[i].mib; |
| 701 | } |
| 702 | |
| 703 | return 0; |
| 704 | } |
| 705 | |
| 706 | QT_END_NAMESPACE |
| 707 | |