| 1 | /**************************************************************************** |
| 2 | ** |
| 3 | ** Copyright (C) 2016 The Qt Company Ltd. |
| 4 | ** Contact: https://www.qt.io/licensing/ |
| 5 | ** |
| 6 | ** This file is part of the QtCore module of the Qt Toolkit. |
| 7 | ** |
| 8 | ** $QT_BEGIN_LICENSE:LGPL$ |
| 9 | ** Commercial License Usage |
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in |
| 11 | ** accordance with the commercial license agreement provided with the |
| 12 | ** Software or, alternatively, in accordance with the terms contained in |
| 13 | ** a written agreement between you and The Qt Company. For licensing terms |
| 14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
| 15 | ** information use the contact form at https://www.qt.io/contact-us. |
| 16 | ** |
| 17 | ** GNU Lesser General Public License Usage |
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
| 19 | ** General Public License version 3 as published by the Free Software |
| 20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| 21 | ** packaging of this file. Please review the following information to |
| 22 | ** ensure the GNU Lesser General Public License version 3 requirements |
| 23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| 24 | ** |
| 25 | ** GNU General Public License Usage |
| 26 | ** Alternatively, this file may be used under the terms of the GNU |
| 27 | ** General Public License version 2.0 or (at your option) the GNU General |
| 28 | ** Public license version 3 or any later version approved by the KDE Free |
| 29 | ** Qt Foundation. The licenses are as published by the Free Software |
| 30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| 31 | ** included in the packaging of this file. Please review the following |
| 32 | ** information to ensure the GNU General Public License requirements will |
| 33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| 34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
| 35 | ** |
| 36 | ** $QT_END_LICENSE$ |
| 37 | ** |
| 38 | ****************************************************************************/ |
| 39 | |
| 40 | #include "qicucodec_p.h" |
| 41 | |
| 42 | #include "qtextcodec_p.h" |
| 43 | #include "qutfcodec_p.h" |
| 44 | #include "qlatincodec_p.h" |
| 45 | #include "qsimplecodec_p.h" |
| 46 | #include "private/qcoreglobaldata_p.h" |
| 47 | #include "qdebug.h" |
| 48 | |
| 49 | #include "unicode/ucnv.h" |
| 50 | |
| 51 | #if QT_CONFIG(codecs) |
| 52 | #include "qtsciicodec_p.h" |
| 53 | #include "qisciicodec_p.h" |
| 54 | #endif |
| 55 | |
| 56 | QT_BEGIN_NAMESPACE |
| 57 | |
| 58 | typedef QList<QTextCodec*>::ConstIterator TextCodecListConstIt; |
| 59 | typedef QList<QByteArray>::ConstIterator ByteArrayListConstIt; |
| 60 | |
| 61 | static void qIcuCodecStateFree(QTextCodec::ConverterState *state) |
| 62 | { |
| 63 | ucnv_close(converter: static_cast<UConverter *>(state->d)); |
| 64 | } |
| 65 | |
| 66 | bool qTextCodecNameMatch(const char *n, const char *h) |
| 67 | { |
| 68 | return ucnv_compareNames(name1: n, name2: h) == 0; |
| 69 | } |
| 70 | |
| 71 | /* The list below is generated from http://www.iana.org/assignments/character-sets/ |
| 72 | using the snippet of code below: |
| 73 | |
| 74 | #include <QtCore> |
| 75 | #include <unicode/ucnv.h> |
| 76 | |
| 77 | int main(int argc, char **argv) |
| 78 | { |
| 79 | QCoreApplication app(argc, argv); |
| 80 | |
| 81 | QFile file("character-sets.txt"); |
| 82 | file.open(QFile::ReadOnly); |
| 83 | QByteArray name; |
| 84 | int mib = -1; |
| 85 | QByteArray nameList; |
| 86 | int pos = 0; |
| 87 | while (!file.atEnd()) { |
| 88 | QByteArray s = file.readLine().trimmed(); |
| 89 | if (s.isEmpty()) { |
| 90 | if (mib != -1) { |
| 91 | UErrorCode error = U_ZERO_ERROR; |
| 92 | const char *standard_name = ucnv_getStandardName(name, "MIME", &error); |
| 93 | if (U_FAILURE(error) || !standard_name) { |
| 94 | error = U_ZERO_ERROR; |
| 95 | standard_name = ucnv_getStandardName(name, "IANA", &error); |
| 96 | } |
| 97 | UConverter *conv = ucnv_open(standard_name, &error); |
| 98 | if (!U_FAILURE(error) && conv && standard_name) { |
| 99 | ucnv_close(conv); |
| 100 | printf(" { %d, %d },\n", mib, pos); |
| 101 | nameList += "\""; |
| 102 | nameList += standard_name; |
| 103 | nameList += "\\0\"\n"; |
| 104 | pos += strlen(standard_name) + 1; |
| 105 | } |
| 106 | } |
| 107 | name = QByteArray(); |
| 108 | mib = -1; |
| 109 | } |
| 110 | if (s.startsWith("Name: ")) { |
| 111 | name = s.mid(5).trimmed(); |
| 112 | if (name.indexOf(' ') > 0) |
| 113 | name = name.left(name.indexOf(' ')); |
| 114 | } |
| 115 | if (s.startsWith("MIBenum:")) |
| 116 | mib = s.mid(8).trimmed().toInt(); |
| 117 | if (s.startsWith("Alias:") && s.contains("MIME")) { |
| 118 | name = s.mid(6).trimmed(); |
| 119 | name = name.left(name.indexOf(' ')).trimmed(); |
| 120 | } |
| 121 | } |
| 122 | qDebug() << nameList; |
| 123 | } |
| 124 | */ |
| 125 | |
| 126 | struct MibToName { |
| 127 | short mib; |
| 128 | short index; |
| 129 | }; |
| 130 | |
| 131 | static const MibToName mibToName[] = { |
| 132 | { .mib: 3, .index: 0 }, |
| 133 | { .mib: 4, .index: 9 }, |
| 134 | { .mib: 5, .index: 20 }, |
| 135 | { .mib: 6, .index: 31 }, |
| 136 | { .mib: 7, .index: 42 }, |
| 137 | { .mib: 8, .index: 53 }, |
| 138 | { .mib: 9, .index: 64 }, |
| 139 | { .mib: 10, .index: 75 }, |
| 140 | { .mib: 11, .index: 86 }, |
| 141 | { .mib: 12, .index: 97 }, |
| 142 | { .mib: 13, .index: 108 }, |
| 143 | { .mib: 16, .index: 120 }, |
| 144 | { .mib: 17, .index: 134 }, |
| 145 | { .mib: 18, .index: 144 }, |
| 146 | { .mib: 30, .index: 151 }, |
| 147 | { .mib: 36, .index: 160 }, |
| 148 | { .mib: 37, .index: 167 }, |
| 149 | { .mib: 38, .index: 179 }, |
| 150 | { .mib: 39, .index: 186 }, |
| 151 | { .mib: 40, .index: 198 }, |
| 152 | { .mib: 57, .index: 212 }, |
| 153 | { .mib: 81, .index: 223 }, |
| 154 | { .mib: 82, .index: 234 }, |
| 155 | { .mib: 84, .index: 245 }, |
| 156 | { .mib: 85, .index: 256 }, |
| 157 | { .mib: 104, .index: 267 }, |
| 158 | { .mib: 105, .index: 279 }, |
| 159 | { .mib: 106, .index: 295 }, |
| 160 | { .mib: 109, .index: 301 }, |
| 161 | { .mib: 110, .index: 313 }, |
| 162 | { .mib: 111, .index: 325 }, |
| 163 | { .mib: 113, .index: 337 }, |
| 164 | { .mib: 114, .index: 341 }, |
| 165 | { .mib: 1000, .index: 349 }, |
| 166 | { .mib: 1001, .index: 356 }, |
| 167 | { .mib: 1011, .index: 363 }, |
| 168 | { .mib: 1012, .index: 368 }, |
| 169 | { .mib: 1013, .index: 374 }, |
| 170 | { .mib: 1014, .index: 383 }, |
| 171 | { .mib: 1015, .index: 392 }, |
| 172 | { .mib: 1016, .index: 399 }, |
| 173 | { .mib: 1017, .index: 406 }, |
| 174 | { .mib: 1018, .index: 413 }, |
| 175 | { .mib: 1019, .index: 422 }, |
| 176 | { .mib: 1020, .index: 431 }, |
| 177 | { .mib: 2004, .index: 438 }, |
| 178 | { .mib: 2005, .index: 448 }, |
| 179 | { .mib: 2009, .index: 472 }, |
| 180 | { .mib: 2013, .index: 479 }, |
| 181 | { .mib: 2016, .index: 486 }, |
| 182 | { .mib: 2024, .index: 495 }, |
| 183 | { .mib: 2025, .index: 505 }, |
| 184 | { .mib: 2026, .index: 512 }, |
| 185 | { .mib: 2027, .index: 517 }, |
| 186 | { .mib: 2028, .index: 527 }, |
| 187 | { .mib: 2030, .index: 534 }, |
| 188 | { .mib: 2033, .index: 541 }, |
| 189 | { .mib: 2034, .index: 548 }, |
| 190 | { .mib: 2035, .index: 555 }, |
| 191 | { .mib: 2037, .index: 562 }, |
| 192 | { .mib: 2038, .index: 569 }, |
| 193 | { .mib: 2039, .index: 576 }, |
| 194 | { .mib: 2040, .index: 583 }, |
| 195 | { .mib: 2041, .index: 590 }, |
| 196 | { .mib: 2043, .index: 597 }, |
| 197 | { .mib: 2011, .index: 604 }, |
| 198 | { .mib: 2044, .index: 611 }, |
| 199 | { .mib: 2045, .index: 618 }, |
| 200 | { .mib: 2010, .index: 624 }, |
| 201 | { .mib: 2046, .index: 631 }, |
| 202 | { .mib: 2047, .index: 638 }, |
| 203 | { .mib: 2048, .index: 645 }, |
| 204 | { .mib: 2049, .index: 652 }, |
| 205 | { .mib: 2050, .index: 659 }, |
| 206 | { .mib: 2051, .index: 666 }, |
| 207 | { .mib: 2052, .index: 673 }, |
| 208 | { .mib: 2053, .index: 680 }, |
| 209 | { .mib: 2054, .index: 687 }, |
| 210 | { .mib: 2055, .index: 694 }, |
| 211 | { .mib: 2056, .index: 701 }, |
| 212 | { .mib: 2062, .index: 708 }, |
| 213 | { .mib: 2063, .index: 715 }, |
| 214 | { .mib: 2084, .index: 723 }, |
| 215 | { .mib: 2085, .index: 730 }, |
| 216 | { .mib: 2086, .index: 741 }, |
| 217 | { .mib: 2087, .index: 748 }, |
| 218 | { .mib: 2088, .index: 755 }, |
| 219 | { .mib: 2089, .index: 762 }, |
| 220 | { .mib: 2091, .index: 771 }, |
| 221 | { .mib: 2092, .index: 780 }, |
| 222 | { .mib: 2093, .index: 789 }, |
| 223 | { .mib: 2094, .index: 798 }, |
| 224 | { .mib: 2095, .index: 807 }, |
| 225 | { .mib: 2096, .index: 816 }, |
| 226 | { .mib: 2097, .index: 825 }, |
| 227 | { .mib: 2098, .index: 834 }, |
| 228 | { .mib: 2099, .index: 843 }, |
| 229 | { .mib: 2100, .index: 852 }, |
| 230 | { .mib: 2101, .index: 861 }, |
| 231 | { .mib: 2102, .index: 872 }, |
| 232 | { .mib: 2250, .index: 880 }, |
| 233 | { .mib: 2251, .index: 893 }, |
| 234 | { .mib: 2252, .index: 906 }, |
| 235 | { .mib: 2253, .index: 919 }, |
| 236 | { .mib: 2254, .index: 932 }, |
| 237 | { .mib: 2255, .index: 945 }, |
| 238 | { .mib: 2256, .index: 958 }, |
| 239 | { .mib: 2257, .index: 971 }, |
| 240 | { .mib: 2258, .index: 984 }, |
| 241 | { .mib: 2259, .index: 997 }, |
| 242 | }; |
| 243 | int mibToNameSize = sizeof(mibToName)/sizeof(MibToName); |
| 244 | |
| 245 | static const char mibToNameTable[] = |
| 246 | "US-ASCII\0" |
| 247 | "ISO-8859-1\0" |
| 248 | "ISO-8859-2\0" |
| 249 | "ISO-8859-3\0" |
| 250 | "ISO-8859-4\0" |
| 251 | "ISO-8859-5\0" |
| 252 | "ISO-8859-6\0" |
| 253 | "ISO-8859-7\0" |
| 254 | "ISO-8859-8\0" |
| 255 | "ISO-8859-9\0" |
| 256 | "ISO-8859-10\0" |
| 257 | "ISO-2022-JP-1\0" |
| 258 | "Shift_JIS\0" |
| 259 | "EUC-JP\0" |
| 260 | "US-ASCII\0" |
| 261 | "EUC-KR\0" |
| 262 | "ISO-2022-KR\0" |
| 263 | "EUC-KR\0" |
| 264 | "ISO-2022-JP\0" |
| 265 | "ISO-2022-JP-2\0" |
| 266 | "GB_2312-80\0" |
| 267 | "ISO-8859-6\0" |
| 268 | "ISO-8859-6\0" |
| 269 | "ISO-8859-8\0" |
| 270 | "ISO-8859-8\0" |
| 271 | "ISO-2022-CN\0" |
| 272 | "ISO-2022-CN-EXT\0" |
| 273 | "UTF-8\0" |
| 274 | "ISO-8859-13\0" |
| 275 | "ISO-8859-14\0" |
| 276 | "ISO-8859-15\0" |
| 277 | "GBK\0" |
| 278 | "GB18030\0" |
| 279 | "UTF-16\0" |
| 280 | "UTF-32\0" |
| 281 | "SCSU\0" |
| 282 | "UTF-7\0" |
| 283 | "UTF-16BE\0" |
| 284 | "UTF-16LE\0" |
| 285 | "UTF-16\0" |
| 286 | "CESU-8\0" |
| 287 | "UTF-32\0" |
| 288 | "UTF-32BE\0" |
| 289 | "UTF-32LE\0" |
| 290 | "BOCU-1\0" |
| 291 | "hp-roman8\0" |
| 292 | "Adobe-Standard-Encoding\0" |
| 293 | "IBM850\0" |
| 294 | "IBM862\0" |
| 295 | "IBM-Thai\0" |
| 296 | "Shift_JIS\0" |
| 297 | "GB2312\0" |
| 298 | "Big5\0" |
| 299 | "macintosh\0" |
| 300 | "IBM037\0" |
| 301 | "IBM273\0" |
| 302 | "IBM277\0" |
| 303 | "IBM278\0" |
| 304 | "IBM280\0" |
| 305 | "IBM284\0" |
| 306 | "IBM285\0" |
| 307 | "IBM290\0" |
| 308 | "IBM297\0" |
| 309 | "IBM420\0" |
| 310 | "IBM424\0" |
| 311 | "IBM437\0" |
| 312 | "IBM500\0" |
| 313 | "cp851\0" |
| 314 | "IBM852\0" |
| 315 | "IBM855\0" |
| 316 | "IBM857\0" |
| 317 | "IBM860\0" |
| 318 | "IBM861\0" |
| 319 | "IBM863\0" |
| 320 | "IBM864\0" |
| 321 | "IBM865\0" |
| 322 | "IBM868\0" |
| 323 | "IBM869\0" |
| 324 | "IBM870\0" |
| 325 | "IBM871\0" |
| 326 | "IBM918\0" |
| 327 | "IBM1026\0" |
| 328 | "KOI8-R\0" |
| 329 | "HZ-GB-2312\0" |
| 330 | "IBM866\0" |
| 331 | "IBM775\0" |
| 332 | "KOI8-U\0" |
| 333 | "IBM00858\0" |
| 334 | "IBM01140\0" |
| 335 | "IBM01141\0" |
| 336 | "IBM01142\0" |
| 337 | "IBM01143\0" |
| 338 | "IBM01144\0" |
| 339 | "IBM01145\0" |
| 340 | "IBM01146\0" |
| 341 | "IBM01147\0" |
| 342 | "IBM01148\0" |
| 343 | "IBM01149\0" |
| 344 | "Big5-HKSCS\0" |
| 345 | "IBM1047\0" |
| 346 | "windows-1250\0" |
| 347 | "windows-1251\0" |
| 348 | "windows-1252\0" |
| 349 | "windows-1253\0" |
| 350 | "windows-1254\0" |
| 351 | "windows-1255\0" |
| 352 | "windows-1256\0" |
| 353 | "windows-1257\0" |
| 354 | "windows-1258\0" |
| 355 | "TIS-620\0" ; |
| 356 | |
| 357 | static QTextCodec *loadQtCodec(const char *name) |
| 358 | { |
| 359 | if (!strcmp(s1: name, s2: "UTF-8" )) |
| 360 | return new QUtf8Codec; |
| 361 | if (!strcmp(s1: name, s2: "UTF-16" )) |
| 362 | return new QUtf16Codec; |
| 363 | if (!strcmp(s1: name, s2: "ISO-8859-1" )) |
| 364 | return new QLatin1Codec; |
| 365 | if (!strcmp(s1: name, s2: "UTF-16BE" )) |
| 366 | return new QUtf16BECodec; |
| 367 | if (!strcmp(s1: name, s2: "UTF-16LE" )) |
| 368 | return new QUtf16LECodec; |
| 369 | if (!strcmp(s1: name, s2: "UTF-32" )) |
| 370 | return new QUtf32Codec; |
| 371 | if (!strcmp(s1: name, s2: "UTF-32BE" )) |
| 372 | return new QUtf32BECodec; |
| 373 | if (!strcmp(s1: name, s2: "UTF-32LE" )) |
| 374 | return new QUtf32LECodec; |
| 375 | if (!strcmp(s1: name, s2: "ISO-8859-16" ) || !strcmp(s1: name, s2: "latin10" ) || !strcmp(s1: name, s2: "iso-ir-226" )) |
| 376 | return new QSimpleTextCodec(13 /* == 8859-16*/); |
| 377 | #if QT_CONFIG(codecs) |
| 378 | if (!strcmp(s1: name, s2: "TSCII" )) |
| 379 | return new QTsciiCodec; |
| 380 | if (!qstrnicmp(name, "iscii" , len: 5)) |
| 381 | return QIsciiCodec::create(name); |
| 382 | #endif |
| 383 | |
| 384 | return nullptr; |
| 385 | } |
| 386 | |
| 387 | /// \threadsafe |
| 388 | QList<QByteArray> QIcuCodec::availableCodecs() |
| 389 | { |
| 390 | QList<QByteArray> codecs; |
| 391 | int n = ucnv_countAvailable(); |
| 392 | for (int i = 0; i < n; ++i) { |
| 393 | const char *name = ucnv_getAvailableName(n: i); |
| 394 | |
| 395 | UErrorCode error = U_ZERO_ERROR; |
| 396 | const char *standardName = ucnv_getStandardName(name, standard: "MIME" , pErrorCode: &error); |
| 397 | if (U_FAILURE(code: error) || !standardName) { |
| 398 | error = U_ZERO_ERROR; |
| 399 | standardName = ucnv_getStandardName(name, standard: "IANA" , pErrorCode: &error); |
| 400 | } |
| 401 | if (U_FAILURE(code: error)) |
| 402 | continue; |
| 403 | |
| 404 | error = U_ZERO_ERROR; |
| 405 | int ac = ucnv_countAliases(alias: standardName, pErrorCode: &error); |
| 406 | if (U_FAILURE(code: error)) |
| 407 | continue; |
| 408 | for (int j = 0; j < ac; ++j) { |
| 409 | error = U_ZERO_ERROR; |
| 410 | const char *alias = ucnv_getAlias(alias: standardName, n: j, pErrorCode: &error); |
| 411 | if (!U_SUCCESS(code: error)) |
| 412 | continue; |
| 413 | codecs += alias; |
| 414 | } |
| 415 | } |
| 416 | |
| 417 | // handled by Qt and not in ICU: |
| 418 | codecs += "TSCII" ; |
| 419 | |
| 420 | return codecs; |
| 421 | } |
| 422 | |
| 423 | /// \threadsafe |
| 424 | QList<int> QIcuCodec::availableMibs() |
| 425 | { |
| 426 | QList<int> mibs; |
| 427 | mibs.reserve(alloc: mibToNameSize + 1); |
| 428 | for (int i = 0; i < mibToNameSize; ++i) |
| 429 | mibs += mibToName[i].mib; |
| 430 | |
| 431 | // handled by Qt and not in ICU: |
| 432 | mibs += 2107; // TSCII |
| 433 | |
| 434 | return mibs; |
| 435 | } |
| 436 | |
| 437 | QTextCodec *QIcuCodec::defaultCodecUnlocked() |
| 438 | { |
| 439 | QCoreGlobalData *globalData = QCoreGlobalData::instance(); |
| 440 | if (!globalData) |
| 441 | return nullptr; |
| 442 | QTextCodec *c = globalData->codecForLocale.loadAcquire(); |
| 443 | if (c) |
| 444 | return c; |
| 445 | |
| 446 | #if defined(QT_LOCALE_IS_UTF8) |
| 447 | const char *name = "UTF-8" ; |
| 448 | #else |
| 449 | const char *name = ucnv_getDefaultName(); |
| 450 | #endif |
| 451 | c = codecForNameUnlocked(name); |
| 452 | globalData->codecForLocale.storeRelease(newValue: c); |
| 453 | return c; |
| 454 | } |
| 455 | |
| 456 | |
| 457 | QTextCodec *QIcuCodec::codecForNameUnlocked(const char *name) |
| 458 | { |
| 459 | // backwards compatibility with Qt 4.x |
| 460 | if (!qstrcmp(str1: name, str2: "CP949" )) |
| 461 | name = "windows-949" ; |
| 462 | else if (!qstrcmp(str1: name, str2: "Apple Roman" )) |
| 463 | name = "macintosh" ; |
| 464 | // these are broken data in ICU 4.4, and can't be resolved even though they are aliases to tis-620 |
| 465 | if (!qstrcmp(str1: name, str2: "windows-874-2000" ) |
| 466 | || !qstrcmp(str1: name, str2: "windows-874" ) |
| 467 | || !qstrcmp(str1: name, str2: "MS874" ) |
| 468 | || !qstrcmp(str1: name, str2: "x-windows-874" ) |
| 469 | || !qstrcmp(str1: name, str2: "ISO 8859-11" )) |
| 470 | name = "TIS-620" ; |
| 471 | |
| 472 | UErrorCode error = U_ZERO_ERROR; |
| 473 | // MIME gives better default names |
| 474 | const char *standardName = ucnv_getStandardName(name, standard: "MIME" , pErrorCode: &error); |
| 475 | if (U_FAILURE(code: error) || !standardName) { |
| 476 | error = U_ZERO_ERROR; |
| 477 | standardName = ucnv_getStandardName(name, standard: "IANA" , pErrorCode: &error); |
| 478 | } |
| 479 | bool qt_only = false; |
| 480 | if (U_FAILURE(code: error) || !standardName) { |
| 481 | standardName = name; |
| 482 | qt_only = true; |
| 483 | } else { |
| 484 | // correct some issues where the ICU data set contains duplicated entries. |
| 485 | // Where this happens it's because one data set is a subset of another. We |
| 486 | // always use the larger data set. |
| 487 | |
| 488 | if (qstrcmp(str1: standardName, str2: "GB2312" ) == 0 || qstrcmp(str1: standardName, str2: "GB_2312-80" ) == 0) |
| 489 | standardName = "GBK" ; |
| 490 | else if (qstrcmp(str1: standardName, str2: "KSC_5601" ) == 0 || qstrcmp(str1: standardName, str2: "EUC-KR" ) == 0 || qstrcmp(str1: standardName, str2: "cp1363" ) == 0) |
| 491 | standardName = "windows-949" ; |
| 492 | } |
| 493 | |
| 494 | QCoreGlobalData *globalData = QCoreGlobalData::instance(); |
| 495 | QTextCodecCache *cache = &globalData->codecCache; |
| 496 | |
| 497 | QTextCodec *codec; |
| 498 | if (cache) { |
| 499 | codec = cache->value(akey: standardName); |
| 500 | if (codec) |
| 501 | return codec; |
| 502 | } |
| 503 | |
| 504 | for (TextCodecListConstIt it = globalData->allCodecs.constBegin(), cend = globalData->allCodecs.constEnd(); it != cend; ++it) { |
| 505 | QTextCodec *cursor = *it; |
| 506 | if (qTextCodecNameMatch(n: cursor->name(), h: standardName)) { |
| 507 | if (cache) |
| 508 | cache->insert(akey: standardName, avalue: cursor); |
| 509 | return cursor; |
| 510 | } |
| 511 | QList<QByteArray> aliases = cursor->aliases(); |
| 512 | for (ByteArrayListConstIt ait = aliases.constBegin(), acend = aliases.constEnd(); ait != acend; ++ait) { |
| 513 | if (qTextCodecNameMatch(n: *ait, h: standardName)) { |
| 514 | if (cache) |
| 515 | cache->insert(akey: standardName, avalue: cursor); |
| 516 | return cursor; |
| 517 | } |
| 518 | } |
| 519 | } |
| 520 | |
| 521 | QTextCodec *c = loadQtCodec(name: standardName); |
| 522 | if (c) |
| 523 | return c; |
| 524 | |
| 525 | if (qt_only) |
| 526 | return nullptr; |
| 527 | |
| 528 | // check whether there is really a converter for the name available. |
| 529 | UConverter *conv = ucnv_open(converterName: standardName, err: &error); |
| 530 | if (!conv) { |
| 531 | qDebug(msg: "codecForName: ucnv_open failed %s %s" , standardName, u_errorName(code: error)); |
| 532 | return nullptr; |
| 533 | } |
| 534 | //qDebug() << "QIcuCodec: Standard name for " << name << "is" << standardName; |
| 535 | ucnv_close(converter: conv); |
| 536 | |
| 537 | |
| 538 | c = new QIcuCodec(standardName); |
| 539 | if (cache) |
| 540 | cache->insert(akey: standardName, avalue: c); |
| 541 | return c; |
| 542 | } |
| 543 | |
| 544 | |
| 545 | QTextCodec *QIcuCodec::codecForMibUnlocked(int mib) |
| 546 | { |
| 547 | for (int i = 0; i < mibToNameSize; ++i) { |
| 548 | if (mibToName[i].mib == mib) |
| 549 | return codecForNameUnlocked(name: mibToNameTable + mibToName[i].index); |
| 550 | } |
| 551 | |
| 552 | if (mib == 2107) |
| 553 | return codecForNameUnlocked(name: "TSCII" ); |
| 554 | |
| 555 | return nullptr; |
| 556 | } |
| 557 | |
| 558 | |
| 559 | QIcuCodec::QIcuCodec(const char *name) |
| 560 | : m_name(name) |
| 561 | { |
| 562 | } |
| 563 | |
| 564 | QIcuCodec::~QIcuCodec() |
| 565 | { |
| 566 | } |
| 567 | |
| 568 | /*! |
| 569 | \internal |
| 570 | |
| 571 | Custom callback for the ICU from Unicode conversion. It's invoked when the |
| 572 | conversion from Unicode detects illegal or unrecognized character. |
| 573 | |
| 574 | Assumes that context contains a pointer to QTextCodec::ConverterState |
| 575 | structure. Updates its invalid characters count and calls a default |
| 576 | callback, that replaces the invalid characters properly. |
| 577 | */ |
| 578 | static void customFromUnicodeSubstitutionCallback(const void *context, |
| 579 | UConverterFromUnicodeArgs *fromUArgs, |
| 580 | const UChar *codeUnits, |
| 581 | int32_t length, |
| 582 | UChar32 codePoint, |
| 583 | UConverterCallbackReason reason, |
| 584 | UErrorCode *err) |
| 585 | { |
| 586 | auto *state = reinterpret_cast<QTextCodec::ConverterState *>(const_cast<void *>(context)); |
| 587 | if (state) |
| 588 | state->invalidChars++; |
| 589 | // Call the default callback that replaces all illegal or unrecognized |
| 590 | // sequences with the substitute string |
| 591 | UCNV_FROM_U_CALLBACK_SUBSTITUTE(context: nullptr, fromUArgs, codeUnits, length, codePoint, reason, err); |
| 592 | } |
| 593 | |
| 594 | UConverter *QIcuCodec::getConverter(QTextCodec::ConverterState *state) const |
| 595 | { |
| 596 | UConverter *conv = nullptr; |
| 597 | if (state) { |
| 598 | if (!state->d) { |
| 599 | // first time |
| 600 | state->flags |= QTextCodec::FreeFunction; |
| 601 | QTextCodecUnalignedPointer::encode(dst: state->state_data, fn: qIcuCodecStateFree); |
| 602 | UErrorCode error = U_ZERO_ERROR; |
| 603 | state->d = ucnv_open(converterName: m_name, err: &error); |
| 604 | ucnv_setSubstChars(converter: static_cast<UConverter *>(state->d), |
| 605 | subChars: state->flags & QTextCodec::ConvertInvalidToNull ? "\0" : "?" , len: 1, err: &error); |
| 606 | if (!U_FAILURE(code: error)) { |
| 607 | error = U_ZERO_ERROR; |
| 608 | ucnv_setFromUCallBack(converter: static_cast<UConverter *>(state->d), |
| 609 | newAction: customFromUnicodeSubstitutionCallback, newContext: state, oldAction: nullptr, |
| 610 | oldContext: nullptr, err: &error); |
| 611 | if (U_FAILURE(code: error)) { |
| 612 | qDebug(msg: "getConverter(state) failed to install custom callback. " |
| 613 | "canEncode() may report incorrect results." ); |
| 614 | } |
| 615 | } else { |
| 616 | qDebug(msg: "getConverter(state) ucnv_open failed %s %s" , m_name, u_errorName(code: error)); |
| 617 | } |
| 618 | } |
| 619 | conv = static_cast<UConverter *>(state->d); |
| 620 | } |
| 621 | if (!conv) { |
| 622 | // stateless conversion |
| 623 | UErrorCode error = U_ZERO_ERROR; |
| 624 | conv = ucnv_open(converterName: m_name, err: &error); |
| 625 | ucnv_setSubstChars(converter: conv, subChars: "?" , len: 1, err: &error); |
| 626 | if (U_FAILURE(code: error)) |
| 627 | qDebug(msg: "getConverter(no state) ucnv_open failed %s %s" , m_name, u_errorName(code: error)); |
| 628 | } |
| 629 | return conv; |
| 630 | } |
| 631 | |
| 632 | QString QIcuCodec::convertToUnicode(const char *chars, int length, QTextCodec::ConverterState *state) const |
| 633 | { |
| 634 | UConverter *conv = getConverter(state); |
| 635 | |
| 636 | QString string(length + 2, Qt::Uninitialized); |
| 637 | |
| 638 | const char *end = chars + length; |
| 639 | int convertedChars = 0; |
| 640 | while (1) { |
| 641 | UChar *uc = (UChar *)string.data(); |
| 642 | UChar *ucEnd = uc + string.length(); |
| 643 | uc += convertedChars; |
| 644 | UErrorCode error = U_ZERO_ERROR; |
| 645 | ucnv_toUnicode(converter: conv, |
| 646 | target: &uc, targetLimit: ucEnd, |
| 647 | source: &chars, sourceLimit: end, |
| 648 | offsets: nullptr, flush: false, err: &error); |
| 649 | if (!U_SUCCESS(code: error) && error != U_BUFFER_OVERFLOW_ERROR) { |
| 650 | qDebug(msg: "convertToUnicode failed: %s" , u_errorName(code: error)); |
| 651 | break; |
| 652 | } |
| 653 | |
| 654 | convertedChars = uc - (UChar *)string.data(); |
| 655 | if (chars >= end) |
| 656 | break; |
| 657 | string.resize(size: string.length()*2); |
| 658 | } |
| 659 | string.resize(size: convertedChars); |
| 660 | |
| 661 | if (!state) |
| 662 | ucnv_close(converter: conv); |
| 663 | return string; |
| 664 | } |
| 665 | |
| 666 | |
| 667 | QByteArray QIcuCodec::convertFromUnicode(const QChar *unicode, int length, QTextCodec::ConverterState *state) const |
| 668 | { |
| 669 | UConverter *conv = getConverter(state); |
| 670 | |
| 671 | int requiredLength = UCNV_GET_MAX_BYTES_FOR_STRING(length, ucnv_getMaxCharSize(conv)); |
| 672 | QByteArray string(requiredLength, Qt::Uninitialized); |
| 673 | |
| 674 | const UChar *uc = (const UChar *)unicode; |
| 675 | const UChar *end = uc + length; |
| 676 | int convertedChars = 0; |
| 677 | while (1) { |
| 678 | char *ch = (char *)string.data(); |
| 679 | char *chEnd = ch + string.length(); |
| 680 | ch += convertedChars; |
| 681 | UErrorCode error = U_ZERO_ERROR; |
| 682 | ucnv_fromUnicode(converter: conv, |
| 683 | target: &ch, targetLimit: chEnd, |
| 684 | source: &uc, sourceLimit: end, |
| 685 | offsets: nullptr, flush: false, err: &error); |
| 686 | if (!U_SUCCESS(code: error)) |
| 687 | qDebug(msg: "convertFromUnicode failed: %s" , u_errorName(code: error)); |
| 688 | convertedChars = ch - string.data(); |
| 689 | if (uc >= end) |
| 690 | break; |
| 691 | string.resize(size: string.length()*2); |
| 692 | } |
| 693 | string.resize(size: convertedChars); |
| 694 | |
| 695 | if (!state) |
| 696 | ucnv_close(converter: conv); |
| 697 | |
| 698 | return string; |
| 699 | } |
| 700 | |
| 701 | |
| 702 | QByteArray QIcuCodec::name() const |
| 703 | { |
| 704 | return m_name; |
| 705 | } |
| 706 | |
| 707 | |
| 708 | QList<QByteArray> QIcuCodec::aliases() const |
| 709 | { |
| 710 | UErrorCode error = U_ZERO_ERROR; |
| 711 | |
| 712 | int n = ucnv_countAliases(alias: m_name, pErrorCode: &error); |
| 713 | |
| 714 | QList<QByteArray> aliases; |
| 715 | for (int i = 0; i < n; ++i) { |
| 716 | const char *a = ucnv_getAlias(alias: m_name, n: i, pErrorCode: &error); |
| 717 | // skip the canonical name |
| 718 | if (!a || !qstrcmp(str1: a, str2: m_name)) |
| 719 | continue; |
| 720 | aliases += a; |
| 721 | } |
| 722 | |
| 723 | return aliases; |
| 724 | } |
| 725 | |
| 726 | |
| 727 | int QIcuCodec::mibEnum() const |
| 728 | { |
| 729 | for (int i = 0; i < mibToNameSize; ++i) { |
| 730 | if (qTextCodecNameMatch(n: m_name, h: (mibToNameTable + mibToName[i].index))) |
| 731 | return mibToName[i].mib; |
| 732 | } |
| 733 | |
| 734 | return 0; |
| 735 | } |
| 736 | |
| 737 | QT_END_NAMESPACE |
| 738 | |