1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Copyright (C) 2016 Intel Corporation.
5** Contact: https://www.qt.io/licensing/
6**
7** This file is part of the test suite of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:GPL-EXCEPT$
10** Commercial License Usage
11** Licensees holding valid commercial Qt licenses may use this file in
12** accordance with the commercial license agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and The Qt Company. For licensing terms
15** and conditions see https://www.qt.io/terms-conditions. For further
16** information use the contact form at https://www.qt.io/contact-us.
17**
18** GNU General Public License Usage
19** Alternatively, this file may be used under the terms of the GNU
20** General Public License version 3 as published by the Free Software
21** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
22** included in the packaging of this file. Please review the following
23** information to ensure the GNU General Public License requirements will
24** be met: https://www.gnu.org/licenses/gpl-3.0.html.
25**
26** $QT_END_LICENSE$
27**
28****************************************************************************/
29
30
31#include <QtTest/QtTest>
32
33#include <qtextcodec.h>
34#include <qfile.h>
35#include <time.h>
36#if QT_CONFIG(process)
37# include <qprocess.h>
38#endif
39#include <QThreadPool>
40
41#include <private/qglobal_p.h> // for the icu feature test
42#if QT_CONFIG(icu)
43# include <unicode/uvernum.h>
44#endif
45
46class tst_QTextCodec : public QObject
47{
48 Q_OBJECT
49
50private slots:
51 void threadSafety();
52
53 void toUnicode_data();
54 void toUnicode();
55 void codecForName_data();
56 void codecForName();
57 void fromUnicode_data();
58 void fromUnicode();
59 void toUnicode_codecForHtml();
60 void toUnicode_incremental();
61 void codecForLocale();
62
63 void asciiToIscii() const;
64 void nonFlaggedCodepointFFFF() const;
65 void flagF7808080() const;
66 void nonFlaggedEFBFBF() const;
67 void decode0D() const;
68 void aliasForUTF16() const;
69 void mibForTSCII() const;
70 void codecForTSCII() const;
71 void iso8859_16() const;
72
73 void utf8Codec_data();
74 void utf8Codec();
75
76 void utf8bom_data();
77 void utf8bom();
78
79 void utf8stateful_data();
80 void utf8stateful();
81
82 void utfHeaders_data();
83 void utfHeaders();
84
85 void codecForHtml_data();
86 void codecForHtml();
87
88 void codecForUtfText_data();
89 void codecForUtfText();
90
91#if defined(Q_OS_UNIX)
92 void toLocal8Bit();
93#endif
94
95 void invalidNames();
96 void checkAliases_data();
97 void checkAliases();
98
99 void moreToFromUnicode_data();
100 void moreToFromUnicode();
101
102 void shiftJis();
103 void userCodec();
104
105 void canEncode();
106 void canEncode_data();
107};
108
109void tst_QTextCodec::toUnicode_data()
110{
111 QTest::addColumn<QString>(name: "fileName");
112 QTest::addColumn<QString>(name: "codecName");
113
114 QTest::newRow( dataTag: "korean-eucKR" ) << QFINDTESTDATA("korean.txt") << "eucKR";
115 QTest::newRow( dataTag: "UTF-8" ) << QFINDTESTDATA("utf8.txt") << "UTF-8";
116}
117
118void tst_QTextCodec::toUnicode()
119{
120 QFETCH( QString, fileName );
121 QFETCH( QString, codecName );
122
123 QFile file( fileName );
124
125 if ( file.open( flags: QIODevice::ReadOnly ) ) {
126 QByteArray ba = file.readAll();
127 QVERIFY(!ba.isEmpty());
128 QTextCodec *c = QTextCodec::codecForName( name: codecName.toLatin1() );
129 QVERIFY(c != 0);
130 QString uniString = c->toUnicode( ba );
131 if (codecName == QLatin1String("UTF-8")) {
132 QCOMPARE(uniString, QString::fromUtf8(ba));
133 QCOMPARE(ba, uniString.toUtf8());
134 }
135 QVERIFY(!uniString.isEmpty());
136 QCOMPARE( ba, c->fromUnicode( uniString ) );
137 QCOMPARE(ba, c->fromUnicode(QStringView(uniString)) );
138
139 char ch = '\0';
140 QVERIFY(c->toUnicode(&ch, 1).length() == 1);
141 QVERIFY(c->toUnicode(&ch, 1).at(0).unicode() == 0);
142 } else {
143 QFAIL(qPrintable("File could not be opened: " + file.errorString()));
144 }
145}
146
147void tst_QTextCodec::codecForName_data()
148{
149 QTest::addColumn<QString>(name: "hint");
150 QTest::addColumn<QString>(name: "actualCodecName");
151
152 QTest::newRow(dataTag: "data1") << "iso88591" << "ISO-8859-1";
153 QTest::newRow(dataTag: "data2") << "iso88592" << "ISO-8859-2";
154 QTest::newRow(dataTag: "data3") << " IsO(8)8/5*9-2 " << "ISO-8859-2";
155 QTest::newRow(dataTag: "data4") << " IsO(8)8/5*2-9 " << "";
156 QTest::newRow(dataTag: "data5") << "latin2" << "ISO-8859-2";
157}
158
159void tst_QTextCodec::codecForName()
160{
161 QFETCH(QString, hint);
162 QFETCH(QString, actualCodecName);
163
164 QTextCodec *codec = QTextCodec::codecForName(name: hint.toLatin1());
165 if (actualCodecName.isEmpty()) {
166 QVERIFY(!codec);
167 } else {
168 QVERIFY(codec != 0);
169 QCOMPARE(QString(codec->name()), actualCodecName);
170 }
171}
172
173void tst_QTextCodec::fromUnicode_data()
174{
175 QTest::addColumn<QString>(name: "codecName");
176 QTest::addColumn<bool>(name: "eightBit");
177
178 QTest::newRow(dataTag: "ISO-8859-1") << "ISO-8859-1" << true;
179 QTest::newRow(dataTag: "ISO-8859-2") << "ISO-8859-2" << true;
180 QTest::newRow(dataTag: "ISO-8859-3") << "ISO-8859-3" << true;
181 QTest::newRow(dataTag: "ISO-8859-4") << "ISO-8859-4" << true;
182 QTest::newRow(dataTag: "ISO-8859-5") << "ISO-8859-5" << true;
183 QTest::newRow(dataTag: "ISO-8859-6") << "ISO-8859-6" << true;
184 QTest::newRow(dataTag: "ISO-8859-7") << "ISO-8859-7" << true;
185 QTest::newRow(dataTag: "ISO-8859-8") << "ISO-8859-8" << true;
186 QTest::newRow(dataTag: "ISO-8859-9") << "ISO-8859-9" << true;
187 QTest::newRow(dataTag: "ISO-8859-10") << "ISO-8859-10" << true;
188 QTest::newRow(dataTag: "ISO-8859-13") << "ISO-8859-13" << true;
189 QTest::newRow(dataTag: "ISO-8859-14") << "ISO-8859-14" << true;
190 QTest::newRow(dataTag: "ISO-8859-15") << "ISO-8859-15" << true;
191// QTest::newRow("ISO-8859-16") << "ISO-8859-16" << true;
192
193 QTest::newRow(dataTag: "IBM850") << "IBM850" << true;
194 QTest::newRow(dataTag: "IBM874") << "IBM874" << true;
195 QTest::newRow(dataTag: "IBM866") << "IBM866" << true;
196
197 QTest::newRow(dataTag: "windows-1250") << "windows-1250" << true;
198 QTest::newRow(dataTag: "windows-1251") << "windows-1251" << true;
199 QTest::newRow(dataTag: "windows-1252") << "windows-1252" << true;
200 QTest::newRow(dataTag: "windows-1253") << "windows-1253" << true;
201 QTest::newRow(dataTag: "windows-1254") << "windows-1254" << true;
202 QTest::newRow(dataTag: "windows-1255") << "windows-1255" << true;
203 QTest::newRow(dataTag: "windows-1256") << "windows-1256" << true;
204 QTest::newRow(dataTag: "windows-1257") << "windows-1257" << true;
205 QTest::newRow(dataTag: "windows-1258") << "windows-1258" << true;
206
207 QTest::newRow(dataTag: "Apple Roman") << "Apple Roman" << true;
208 //QTest::newRow("WINSAMI2") << "WINSAMI2" << true;
209 QTest::newRow(dataTag: "TIS-620") << "TIS-620" << true;
210 QTest::newRow(dataTag: "SJIS") << "SJIS" << false;
211
212 // all codecs from documentation
213 QTest::newRow(dataTag: "Big5") << "Big5" << false;
214 QTest::newRow(dataTag: "Big5-HKSCS") << "Big5-HKSCS" << false;
215 QTest::newRow(dataTag: "CP949") << "CP949" << false;
216 QTest::newRow(dataTag: "windows-949") << "windows-949" << false;
217 QTest::newRow(dataTag: "EUC-JP") << "EUC-JP" << false;
218 QTest::newRow(dataTag: "EUC-KR") << "EUC-KR" << false;
219 QTest::newRow(dataTag: "GB18030") << "GB18030" << false;
220 QTest::newRow(dataTag: "HP-ROMAN8") << "HP-ROMAN8" << false;
221 QTest::newRow(dataTag: "IBM 850") << "IBM 850" << false;
222 QTest::newRow(dataTag: "IBM 866") << "IBM 866" << false;
223 QTest::newRow(dataTag: "IBM 874") << "IBM 874" << false;
224 QTest::newRow(dataTag: "ISO 2022-JP") << "ISO 2022-JP" << false;
225 //ISO 8859-1 to 10 and ISO 8859-13 to 16 tested previously
226 // Iscii-Bng, Dev, Gjr, Knd, Mlm, Ori, Pnj, Tlg, and Tml tested in Iscii test
227 QTest::newRow(dataTag: "KOI8-R") << "KOI8-R" << false;
228 QTest::newRow(dataTag: "KOI8-U") << "KOI8-U" << false;
229 QTest::newRow(dataTag: "Macintosh") << "Macintosh" << true;
230 QTest::newRow(dataTag: "Shift-JIS") << "Shift-JIS" << false;
231 QTest::newRow(dataTag: "TIS-620") << "TIS-620" << false;
232 QTest::newRow(dataTag: "TSCII") << "TSCII" << false;
233 QTest::newRow(dataTag: "UTF-8") << "UTF-8" << false;
234 QTest::newRow(dataTag: "UTF-16") << "UTF-16" << false;
235 QTest::newRow(dataTag: "UTF-16BE") << "UTF-16BE" << false;
236 QTest::newRow(dataTag: "UTF-16LE") << "UTF-16LE" << false;
237 QTest::newRow(dataTag: "UTF-32") << "UTF-32" << false;
238 QTest::newRow(dataTag: "UTF-32BE") << "UTF-32BE" << false;
239 QTest::newRow(dataTag: "UTF-32LE") << "UTF-32LE" << false;
240 //Windows-1250 to 1258 tested previously
241}
242
243void tst_QTextCodec::fromUnicode()
244{
245 QFETCH(QString, codecName);
246 QFETCH(bool, eightBit);
247
248 QTextCodec *codec = QTextCodec::codecForName(name: codecName.toLatin1());
249 QVERIFY(codec != 0);
250
251 // Check if the reverse lookup is what we expect
252 if (eightBit) {
253 char chars[128];
254 for (int i = 0; i < 128; ++i)
255 chars[i] = i + 128;
256 QString s = codec->toUnicode(in: chars, length: 128);
257 QByteArray c = codec->fromUnicode(uc: s);
258 QCOMPARE(c.size(), 128);
259
260 int numberOfQuestionMarks = 0;
261 for (int i = 0; i < 128; ++i) {
262 if (c.at(i) == '?')
263 ++numberOfQuestionMarks;
264 else
265 QCOMPARE(c.at(i), char(i + 128));
266 }
267 QVERIFY(numberOfQuestionMarks != 128);
268 }
269
270 /*
271 If the encoding is a superset of ASCII, test that the byte
272 array is correct (no off by one, no trailing '\0').
273 */
274 QByteArray result = codec->fromUnicode(uc: u"abc");
275 if (result.startsWith(c: 'a')) {
276 QCOMPARE(result.size(), 3);
277 QCOMPARE(result, QByteArray("abc"));
278 } else {
279 QVERIFY(true);
280 }
281}
282
283void tst_QTextCodec::toUnicode_codecForHtml()
284{
285 QFile file(QFINDTESTDATA("QT4-crashtest.txt"));
286 QVERIFY(file.open(QFile::ReadOnly));
287
288 QByteArray data = file.readAll();
289 QTextCodec *codec = QTextCodec::codecForHtml(ba: data);
290 codec->toUnicode(data); // this line crashes
291}
292
293
294void tst_QTextCodec::toUnicode_incremental()
295{
296 QByteArray ba;
297 ba += char(0xf0);
298 ba += char(0x90);
299 ba += char(0x80);
300 ba += char(0x80);
301 ba += char(0xf4);
302 ba += char(0x8f);
303 ba += char(0xbf);
304 ba += char(0xbd);
305
306 QString expected = QString::fromUtf8(str: ba);
307
308 QString incremental;
309 QTextDecoder *utf8Decoder = QTextCodec::codecForMib(mib: 106)->makeDecoder();
310
311 QString actual;
312 for (int i = 0; i < ba.size(); ++i)
313 utf8Decoder->toUnicode(target: &actual, chars: ba.constData() + i, len: 1);
314
315 QCOMPARE(actual, expected);
316
317
318 delete utf8Decoder;
319}
320
321void tst_QTextCodec::codecForLocale()
322{
323 QTextCodec *codec = QTextCodec::codecForLocale();
324 QVERIFY(codec != 0);
325
326 // The rest of this test is for Unix only
327#if defined(Q_OS_UNIX)
328 // get a time string that is locale-encoded
329 QByteArray originalLocaleEncodedTimeString;
330 originalLocaleEncodedTimeString.resize(size: 1024);
331 time_t t;
332 time(timer: &t);
333 int r = strftime(s: originalLocaleEncodedTimeString.data(),
334 maxsize: originalLocaleEncodedTimeString.size(),
335 format: "%A%a%B%b%Z",
336 tp: localtime(timer: &t));
337 QVERIFY(r != 0);
338 originalLocaleEncodedTimeString.resize(size: r);
339
340 QString unicodeTimeString = codec->toUnicode(originalLocaleEncodedTimeString);
341 QByteArray localeEncodedTimeString = codec->fromUnicode(uc: unicodeTimeString);
342 QCOMPARE(localeEncodedTimeString, originalLocaleEncodedTimeString);
343
344 // find a codec that is not the codecForLocale()
345 QTextCodec *codec2 = 0;
346 const auto availableMibs = QTextCodec::availableMibs();
347 for (int mib : availableMibs ) {
348 if (mib != codec->mibEnum()) {
349 codec2 = QTextCodec::codecForMib(mib);
350 if (codec2)
351 break;
352 }
353 }
354
355 // Only run the rest of the test if we could find a codec that is not
356 // already the codecForLocale().
357 if (codec2) {
358 // set it, codecForLocale() should return it now
359 QTextCodec::setCodecForLocale(codec2);
360 QCOMPARE(QTextCodec::codecForLocale(), codec2);
361
362 // reset back to the default
363 QTextCodec::setCodecForLocale(0);
364 QCOMPARE(QTextCodec::codecForLocale(), codec);
365 }
366#endif
367}
368
369void tst_QTextCodec::asciiToIscii() const
370{
371 /* Add all low, 7-bit ASCII characters. */
372 QString ascii;
373 const int len = 0xA0 - 1;
374 ascii.resize(size: len);
375
376 for(int i = 0; i < len; ++i)
377 ascii[i] = QChar(i + 1);
378
379 static const char *const isciiCodecs[] =
380 {
381 "Iscii-Mlm",
382 "Iscii-Knd",
383 "Iscii-Tlg",
384 "Iscii-Tml",
385 "Iscii-Ori",
386 "Iscii-Gjr",
387 "Iscii-Pnj",
388 "Iscii-Bng",
389 "Iscii-Dev"
390 };
391 const int isciiCodecsLen = sizeof(isciiCodecs) / sizeof(const char *);
392
393 for(int i = 0; i < isciiCodecsLen; ++i) {
394 /* For each codec. */
395
396 const QTextCodec *const textCodec = QTextCodec::codecForName(name: isciiCodecs[i]);
397 if (!textCodec)
398 QSKIP("No ISCII codecs available.");
399
400 for(int i2 = 0; i2 < len; ++i2) {
401 /* For each character in ascii. */
402 const QChar c(ascii[i2]);
403 QVERIFY2(textCodec->canEncode(c), qPrintable(QString::fromLatin1("Failed to encode %1 with encoding %2")
404 .arg(QString::number(c.unicode()), QString::fromLatin1(textCodec->name().constData()))));
405 }
406
407 QVERIFY2(textCodec->canEncode(ascii), qPrintable(QString::fromLatin1("Failed for full string with encoding %1")
408 .arg(QString::fromLatin1(textCodec->name().constData()))));
409 QVERIFY(textCodec->canEncode(QStringView(ascii)));
410 }
411}
412
413void tst_QTextCodec::nonFlaggedCodepointFFFF() const
414{
415 //Check that the code point 0xFFFF (=non-character code 0xEFBFBF) is not flagged
416 const QChar ch(0xFFFF);
417
418 QTextCodec *const codec = QTextCodec::codecForMib(mib: 106); // UTF-8
419 QVERIFY(codec);
420
421 const QByteArray asDecoded = codec->fromUnicode(uc: QStringView(&ch, 1));
422 QCOMPARE(asDecoded, QByteArray("\357\277\277"));
423
424 QByteArray ffff("\357\277\277");
425 QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
426 QVERIFY(codec->toUnicode(ffff.constData(), ffff.length(), &state) == QByteArray::fromHex("EFBFBF"));
427}
428
429void tst_QTextCodec::flagF7808080() const
430{
431 /* This test case stems from test not-wf-sa-170, tests/qxmlstream/XML-Test-Suite/xmlconf/xmltest/not-wf/sa/166.xml,
432 * whose description reads:
433 *
434 * "Four byte UTF-8 encodings can encode UCS-4 characters
435 * which are beyond the range of legal XML characters
436 * (and can't be expressed in Unicode surrogate pairs).
437 * This document holds such a character."
438 *
439 * In binary, this is:
440 * 11110111100000001000000010000000
441 * * * * *
442 * 11110www10xxxxxx10yyyyyy10zzzzzz
443 *
444 * With multibyte logic removed it is the codepoint 0x1C0000.
445 */
446 QByteArray input;
447 input.resize(size: 4);
448 input[0] = char(0xF7);
449 input[1] = char(0x80);
450 input[2] = char(0x80);
451 input[3] = char(0x80);
452
453 QTextCodec *const codec = QTextCodec::codecForMib(mib: 106); // UTF-8
454 QVERIFY(codec);
455
456 //QVERIFY(!codec->canEncode(QChar(0x1C0000)));
457
458 QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
459 QCOMPARE(codec->toUnicode(input.constData(), input.length(), &state), QString(input.size(), QChar(0)));
460}
461
462void tst_QTextCodec::nonFlaggedEFBFBF() const
463{
464 /* Check that the codec does NOT flag EFBFBF.
465 * This is a regression test; see QTBUG-33229
466 */
467 QByteArray validInput;
468 validInput.resize(size: 3);
469 validInput[0] = char(0xEF);
470 validInput[1] = char(0xBF);
471 validInput[2] = char(0xBF);
472
473 const QTextCodec *const codec = QTextCodec::codecForMib(mib: 106); // UTF-8
474 QVERIFY(codec);
475
476 {
477 //QVERIFY(!codec->canEncode(QChar(0xFFFF)));
478 QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
479 QVERIFY(codec->toUnicode(validInput.constData(), validInput.length(), &state) == QByteArray::fromHex("EFBFBF"));
480
481 QByteArray start("<?pi ");
482 start.append(a: validInput);
483 start.append(s: "?>");
484 }
485
486 // Check that 0xEFBFBF is correctly decoded when preceded by an arbitrary character
487 {
488 QByteArray start("B");
489 start.append(a: validInput);
490
491 QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
492 QVERIFY(codec->toUnicode(start.constData(), start.length(), &state) == QByteArray("B").append(QByteArray::fromHex("EFBFBF")));
493 }
494}
495
496void tst_QTextCodec::decode0D() const
497{
498 QByteArray input;
499 input.resize(size: 3);
500 input[0] = 'A';
501 input[1] = '\r';
502 input[2] = 'B';
503
504 QCOMPARE(QString::fromUtf8(input.constData()).toUtf8(), input);
505}
506
507void tst_QTextCodec::aliasForUTF16() const
508{
509 QVERIFY(QTextCodec::codecForName("UTF-16")->aliases().isEmpty());
510}
511
512void tst_QTextCodec::mibForTSCII() const
513{
514 QTextCodec *codec = QTextCodec::codecForName(name: "TSCII");
515 QVERIFY(codec);
516 QCOMPARE(codec->mibEnum(), 2107);
517}
518
519void tst_QTextCodec::codecForTSCII() const
520{
521 QTextCodec *codec = QTextCodec::codecForMib(mib: 2107);
522 QVERIFY(codec);
523 QCOMPARE(codec->mibEnum(), 2107);
524}
525
526void tst_QTextCodec::iso8859_16() const
527{
528 QTextCodec *codec = QTextCodec::codecForName(name: "ISO8859-16");
529 QVERIFY(codec);
530 QCOMPARE(codec->name(), QByteArray("ISO-8859-16"));
531}
532
533static QString fromInvalidUtf8Sequence(const QByteArray &ba)
534{
535 return QString().fill(c: QChar::ReplacementCharacter, size: ba.size());
536}
537
538// copied from tst_QString::fromUtf8_data()
539void tst_QTextCodec::utf8Codec_data()
540{
541 QTest::addColumn<QByteArray>(name: "utf8");
542 QTest::addColumn<QString>(name: "res");
543 QTest::addColumn<int>(name: "len");
544 QString str;
545
546 QTest::newRow(dataTag: "str0") << QByteArray("abcdefgh") << QString("abcdefgh") << -1;
547 QTest::newRow(dataTag: "str0-len") << QByteArray("abcdefgh") << QString("abc") << 3;
548 QTest::newRow(dataTag: "str1") << QByteArray("\303\266\303\244\303\274\303\226\303\204\303\234\303\270\303\246\303\245\303\230\303\206\303\205")
549 << QString::fromLatin1(str: "\366\344\374\326\304\334\370\346\345\330\306\305") << -1;
550 QTest::newRow(dataTag: "str1-len") << QByteArray("\303\266\303\244\303\274\303\226\303\204\303\234\303\270\303\246\303\245\303\230\303\206\303\205")
551 << QString::fromLatin1(str: "\366\344\374\326\304") << 10;
552
553 str += QChar(0x05e9);
554 str += QChar(0x05d3);
555 str += QChar(0x05d2);
556 QTest::newRow(dataTag: "str2") << QByteArray("\327\251\327\223\327\222") << str << -1;
557
558 str = QChar(0x05e9);
559 QTest::newRow(dataTag: "str2-len") << QByteArray("\327\251\327\223\327\222") << str << 2;
560
561 str = QChar(0x20ac);
562 str += " some text";
563 QTest::newRow(dataTag: "str3") << QByteArray("\342\202\254 some text") << str << -1;
564
565 str = QChar(0x20ac);
566 str += " some ";
567 QTest::newRow(dataTag: "str3-len") << QByteArray("\342\202\254 some text") << str << 9;
568
569 str = "hello";
570 str += QChar::ReplacementCharacter;
571 str += QChar(0x68);
572 str += QChar::ReplacementCharacter;
573 str += QChar::ReplacementCharacter;
574 str += QChar::ReplacementCharacter;
575 str += QChar::ReplacementCharacter;
576 str += QChar(0x61);
577 str += QChar::ReplacementCharacter;
578 QTest::newRow(dataTag: "invalid utf8") << QByteArray("hello\344h\344\344\366\344a\304") << str << -1;
579 QTest::newRow(dataTag: "invalid utf8-len") << QByteArray("hello\344h\344\344\366\344a\304") << QString("hello") << 5;
580
581 str = "Prohl";
582 str += QChar::ReplacementCharacter;
583 str += QChar::ReplacementCharacter;
584 str += QLatin1Char('e');
585 str += QChar::ReplacementCharacter;
586 str += " plugin";
587 str += QChar::ReplacementCharacter;
588 str += " Netscape";
589
590 QTest::newRow(dataTag: "task28417") << QByteArray("Prohl\355\276e\350 plugin\371 Netscape") << str << -1;
591 QTest::newRow(dataTag: "task28417-len") << QByteArray("Prohl\355\276e\350 plugin\371 Netscape") << QString("") << 0;
592
593 QTest::newRow(dataTag: "null-1") << QByteArray() << QString() << -1;
594 QTest::newRow(dataTag: "null0") << QByteArray() << QString() << 0;
595 // QTest::newRow("null5") << QByteArray() << QString() << 5;
596 QTest::newRow(dataTag: "empty-1") << QByteArray("\0abcd", 5) << QString() << -1;
597 QTest::newRow(dataTag: "empty0") << QByteArray() << QString() << 0;
598 QTest::newRow(dataTag: "empty5") << QByteArray("\0abcd", 5) << QString::fromLatin1(str: "\0abcd", size: 5) << 5;
599 QTest::newRow(dataTag: "other-1") << QByteArray("ab\0cd", 5) << QString::fromLatin1(str: "ab") << -1;
600 QTest::newRow(dataTag: "other5") << QByteArray("ab\0cd", 5) << QString::fromLatin1(str: "ab\0cd", size: 5) << 5;
601
602 str = "Old Italic: ";
603 str += QChar(0xd800);
604 str += QChar(0xdf00);
605 str += QChar(0xd800);
606 str += QChar(0xdf01);
607 str += QChar(0xd800);
608 str += QChar(0xdf02);
609 str += QChar(0xd800);
610 str += QChar(0xdf03);
611 str += QChar(0xd800);
612 str += QChar(0xdf04);
613 QTest::newRow(dataTag: "surrogate") << QByteArray("Old Italic: \360\220\214\200\360\220\214\201\360\220\214\202\360\220\214\203\360\220\214\204") << str << -1;
614
615 QTest::newRow(dataTag: "surrogate-len") << QByteArray("Old Italic: \360\220\214\200\360\220\214\201\360\220\214\202\360\220\214\203\360\220\214\204") << str.left(n: 16) << 20;
616
617 // from http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html
618
619 // 2.1.1 U+00000000
620 QByteArray utf8;
621 utf8 += char(0x00);
622 str = QChar(QChar::Null);
623 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.1") << utf8 << str << 1;
624
625 // 2.1.2 U+00000080
626 utf8.clear();
627 utf8 += char(0xc2);
628 utf8 += char(0x80);
629 str = QChar(0x80);
630 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.2") << utf8 << str << -1;
631
632 // 2.1.3 U+00000800
633 utf8.clear();
634 utf8 += char(0xe0);
635 utf8 += char(0xa0);
636 utf8 += char(0x80);
637 str = QChar(0x800);
638 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.3") << utf8 << str << -1;
639
640 // 2.1.4 U+00010000
641 utf8.clear();
642 utf8 += char(0xf0);
643 utf8 += char(0x90);
644 utf8 += char(0x80);
645 utf8 += char(0x80);
646 str.clear();
647 str += QChar(0xd800);
648 str += QChar(0xdc00);
649 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.4") << utf8 << str << -1;
650
651 // 2.1.5 U+00200000 (not a valid Unicode character)
652 utf8.clear();
653 utf8 += char(0xf8);
654 utf8 += char(0x88);
655 utf8 += char(0x80);
656 utf8 += char(0x80);
657 utf8 += char(0x80);
658 str = fromInvalidUtf8Sequence(ba: utf8);
659 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.5") << utf8 << str << -1;
660
661 // 2.1.6 U+04000000 (not a valid Unicode character)
662 utf8.clear();
663 utf8 += char(0xfc);
664 utf8 += char(0x84);
665 utf8 += char(0x80);
666 utf8 += char(0x80);
667 utf8 += char(0x80);
668 utf8 += char(0x80);
669 str = fromInvalidUtf8Sequence(ba: utf8);
670 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.6") << utf8 << str << -1;
671
672 // 2.2.1 U+0000007F
673 utf8.clear();
674 utf8 += char(0x7f);
675 str = QChar(0x7f);
676 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.1") << utf8 << str << -1;
677
678 // 2.2.2 U+000007FF
679 utf8.clear();
680 utf8 += char(0xdf);
681 utf8 += char(0xbf);
682 str = QChar(0x7ff);
683 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.2") << utf8 << str << -1;
684
685 // 2.2.3 U+000FFFF - non-character code
686 utf8.clear();
687 utf8 += char(0xef);
688 utf8 += char(0xbf);
689 utf8 += char(0xbf);
690 str = QString::fromUtf8(str: utf8);
691 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.3") << utf8 << str << -1;
692
693 // 2.2.4 U+001FFFFF
694 utf8.clear();
695 utf8 += char(0xf7);
696 utf8 += char(0xbf);
697 utf8 += char(0xbf);
698 utf8 += char(0xbf);
699 str = fromInvalidUtf8Sequence(ba: utf8);
700 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.4") << utf8 << str << -1;
701
702 // 2.2.5 U+03FFFFFF (not a valid Unicode character)
703 utf8.clear();
704 utf8 += char(0xfb);
705 utf8 += char(0xbf);
706 utf8 += char(0xbf);
707 utf8 += char(0xbf);
708 utf8 += char(0xbf);
709 str = fromInvalidUtf8Sequence(ba: utf8);
710 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.5") << utf8 << str << -1;
711
712 // 2.2.6 U+7FFFFFFF
713 utf8.clear();
714 utf8 += char(0xfd);
715 utf8 += char(0xbf);
716 utf8 += char(0xbf);
717 utf8 += char(0xbf);
718 utf8 += char(0xbf);
719 utf8 += char(0xbf);
720 str = fromInvalidUtf8Sequence(ba: utf8);
721 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.6") << utf8 << str << -1;
722
723 // 2.3.1 U+0000D7FF
724 utf8.clear();
725 utf8 += char(0xed);
726 utf8 += char(0x9f);
727 utf8 += char(0xbf);
728 str = QChar(0xd7ff);
729 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.1") << utf8 << str << -1;
730
731 // 2.3.2 U+0000E000
732 utf8.clear();
733 utf8 += char(0xee);
734 utf8 += char(0x80);
735 utf8 += char(0x80);
736 str = QChar(0xe000);
737 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.2") << utf8 << str << -1;
738
739 // 2.3.3 U+0000FFFD
740 utf8.clear();
741 utf8 += char(0xef);
742 utf8 += char(0xbf);
743 utf8 += char(0xbd);
744 str = QChar(QChar::ReplacementCharacter);
745 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.3") << utf8 << str << -1;
746
747 // 2.3.4 U+0010FFFD
748 utf8.clear();
749 utf8 += char(0xf4);
750 utf8 += char(0x8f);
751 utf8 += char(0xbf);
752 utf8 += char(0xbd);
753 str.clear();
754 str += QChar(0xdbff);
755 str += QChar(0xdffd);
756 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.4") << utf8 << str << -1;
757
758 // 2.3.5 U+00110000
759 utf8.clear();
760 utf8 += char(0xf4);
761 utf8 += char(0x90);
762 utf8 += char(0x80);
763 utf8 += char(0x80);
764 str = fromInvalidUtf8Sequence(ba: utf8);
765 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.5") << utf8 << str << -1;
766
767 // 3.1.1
768 utf8.clear();
769 utf8 += char(0x80);
770 str = fromInvalidUtf8Sequence(ba: utf8);
771 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.1") << utf8 << str << -1;
772
773 // 3.1.2
774 utf8.clear();
775 utf8 += char(0xbf);
776 str = fromInvalidUtf8Sequence(ba: utf8);
777 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.2") << utf8 << str << -1;
778
779 // 3.1.3
780 utf8.clear();
781 utf8 += char(0x80);
782 utf8 += char(0xbf);
783 str = fromInvalidUtf8Sequence(ba: utf8);
784 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.3") << utf8 << str << -1;
785
786 // 3.1.4
787 utf8.clear();
788 utf8 += char(0x80);
789 utf8 += char(0xbf);
790 utf8 += char(0x80);
791 str = fromInvalidUtf8Sequence(ba: utf8);
792 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.4") << utf8 << str << -1;
793
794 // 3.1.5
795 utf8.clear();
796 utf8 += char(0x80);
797 utf8 += char(0xbf);
798 utf8 += char(0x80);
799 utf8 += char(0xbf);
800 str = fromInvalidUtf8Sequence(ba: utf8);
801 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.5") << utf8 << str << -1;
802
803 // 3.1.6
804 utf8.clear();
805 utf8 += char(0x80);
806 utf8 += char(0xbf);
807 utf8 += char(0x80);
808 utf8 += char(0xbf);
809 utf8 += char(0x80);
810 str = fromInvalidUtf8Sequence(ba: utf8);
811 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.6") << utf8 << str << -1;
812
813 // 3.1.7
814 utf8.clear();
815 utf8 += char(0x80);
816 utf8 += char(0xbf);
817 utf8 += char(0x80);
818 utf8 += char(0xbf);
819 utf8 += char(0x80);
820 utf8 += char(0xbf);
821 str = fromInvalidUtf8Sequence(ba: utf8);
822 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.7") << utf8 << str << -1;
823
824 // 3.1.8
825 utf8.clear();
826 utf8 += char(0x80);
827 utf8 += char(0xbf);
828 utf8 += char(0x80);
829 utf8 += char(0xbf);
830 utf8 += char(0x80);
831 utf8 += char(0xbf);
832 utf8 += char(0x80);
833 str = fromInvalidUtf8Sequence(ba: utf8);
834 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.8") << utf8 << str << -1;
835
836 // 3.1.9
837 utf8.clear();
838 for (uint i = 0x80; i<= 0xbf; ++i)
839 utf8 += i;
840 str = fromInvalidUtf8Sequence(ba: utf8);
841 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.9") << utf8 << str << -1;
842
843 // 3.2.1
844 utf8.clear();
845 str.clear();
846 for (uint i = 0xc8; i <= 0xdf; ++i) {
847 utf8 += i;
848 utf8 += char(0x20);
849
850 str += QChar::ReplacementCharacter;
851 str += QChar(0x0020);
852 }
853 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.2.1") << utf8 << str << -1;
854
855 // 3.2.2
856 utf8.clear();
857 str.clear();
858 for (uint i = 0xe0; i <= 0xef; ++i) {
859 utf8 += i;
860 utf8 += char(0x20);
861
862 str += QChar::ReplacementCharacter;
863 str += QChar(0x0020);
864 }
865 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.2.2") << utf8 << str << -1;
866
867 // 3.2.3
868 utf8.clear();
869 str.clear();
870 for (uint i = 0xf0; i <= 0xf7; ++i) {
871 utf8 += i;
872 utf8 += 0x20;
873
874 str += QChar::ReplacementCharacter;
875 str += QChar(0x0020);
876 }
877 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.2.3") << utf8 << str << -1;
878
879 // 3.2.4
880 utf8.clear();
881 str.clear();
882 for (uint i = 0xf8; i <= 0xfb; ++i) {
883 utf8 += i;
884 utf8 += 0x20;
885
886 str += QChar::ReplacementCharacter;
887 str += QChar(0x0020);
888 }
889 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.2.4") << utf8 << str << -1;
890
891 // 3.2.5
892 utf8.clear();
893 str.clear();
894 for (uint i = 0xfc; i <= 0xfd; ++i) {
895 utf8 += i;
896 utf8 += 0x20;
897
898 str += QChar::ReplacementCharacter;
899 str += QChar(0x0020);
900 }
901 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.2.5") << utf8 << str << -1;
902
903 // 3.3.1
904 utf8.clear();
905 utf8 += char(0xc0);
906 str = fromInvalidUtf8Sequence(ba: utf8);
907 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.1") << utf8 << str << -1;
908 utf8 += char(0x30);
909 str += 0x30;
910 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.1-1") << utf8 << str << -1;
911
912 // 3.3.2
913 utf8.clear();
914 utf8 += char(0xe0);
915 utf8 += char(0x80);
916 str = fromInvalidUtf8Sequence(ba: utf8);
917 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.2") << utf8 << str << -1;
918 utf8 += char(0x30);
919 str += 0x30;
920 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.2-1") << utf8 << str << -1;
921
922 utf8.clear();
923 utf8 += char(0xe0);
924 str = fromInvalidUtf8Sequence(ba: utf8);
925 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.2-2") << utf8 << str << -1;
926 utf8 += 0x30;
927 str += 0x30;
928 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.2-3") << utf8 << str << -1;
929
930 // 3.3.3
931 utf8.clear();
932 utf8 += char(0xf0);
933 utf8 += char(0x80);
934 utf8 += char(0x80);
935 str = fromInvalidUtf8Sequence(ba: utf8);
936 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3") << utf8 << str << -1;
937 utf8 += char(0x30);
938 str += 0x30;
939 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3-1") << utf8 << str << -1;
940
941 utf8.clear();
942 utf8 += char(0xf0);
943 str = fromInvalidUtf8Sequence(ba: utf8);
944 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3-2") << utf8 << str << -1;
945 utf8 += char(0x30);
946 str += 0x30;
947 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3-3") << utf8 << str << -1;
948
949 utf8.clear();
950 utf8 += char(0xf0);
951 utf8 += char(0x80);
952 str = fromInvalidUtf8Sequence(ba: utf8);
953 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3-4") << utf8 << str << -1;
954 utf8 += char(0x30);
955 str += 0x30;
956 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3-5") << utf8 << str << -1;
957
958 // 3.3.4
959 utf8.clear();
960 utf8 += char(0xf8);
961 utf8 += char(0x80);
962 utf8 += char(0x80);
963 utf8 += char(0x80);
964 str = fromInvalidUtf8Sequence(ba: utf8);
965 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4") << utf8 << str << -1;
966 utf8 += char(0x30);
967 str += 0x30;
968 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-1") << utf8 << str << -1;
969
970 utf8.clear();
971 utf8 += char(0xf8);
972 utf8 += char(0x80);
973 utf8 += char(0x80);
974 str = fromInvalidUtf8Sequence(ba: utf8);
975 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-2") << utf8 << str << -1;
976 utf8 += char(0x30);
977 str += 0x30;
978 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-3") << utf8 << str << -1;
979
980 utf8.clear();
981 utf8 += char(0xf8);
982 utf8 += char(0x80);
983 str = fromInvalidUtf8Sequence(ba: utf8);
984 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-4") << utf8 << str << -1;
985 utf8 += char(0x30);
986 str += 0x30;
987 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-5") << utf8 << str << -1;
988
989 utf8.clear();
990 utf8 += char(0xf8);
991 str = fromInvalidUtf8Sequence(ba: utf8);
992 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-6") << utf8 << str << -1;
993 utf8 += char(0x30);
994 str += 0x30;
995 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-7") << utf8 << str << -1;
996
997 // 3.3.5
998 utf8.clear();
999 utf8 += char(0xfc);
1000 utf8 += char(0x80);
1001 utf8 += char(0x80);
1002 utf8 += char(0x80);
1003 utf8 += char(0x80);
1004 str = fromInvalidUtf8Sequence(ba: utf8);
1005 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5") << utf8 << str << -1;
1006 utf8 += char(0x30);
1007 str += 0x30;
1008 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-1") << utf8 << str << -1;
1009
1010 utf8.clear();
1011 utf8 += char(0xfc);
1012 utf8 += char(0x80);
1013 utf8 += char(0x80);
1014 utf8 += char(0x80);
1015 str = fromInvalidUtf8Sequence(ba: utf8);
1016 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-2") << utf8 << str << -1;
1017 utf8 += char(0x30);
1018 str += 0x30;
1019 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-3") << utf8 << str << -1;
1020
1021 utf8.clear();
1022 utf8 += char(0xfc);
1023 utf8 += char(0x80);
1024 utf8 += char(0x80);
1025 str = fromInvalidUtf8Sequence(ba: utf8);
1026 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-4") << utf8 << str << -1;
1027 utf8 += char(0x30);
1028 str += 0x30;
1029 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-5") << utf8 << str << -1;
1030
1031 utf8.clear();
1032 utf8 += char(0xfc);
1033 utf8 += char(0x80);
1034 str = fromInvalidUtf8Sequence(ba: utf8);
1035 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-6") << utf8 << str << -1;
1036 utf8 += char(0x30);
1037 str += 0x30;
1038 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-7") << utf8 << str << -1;
1039
1040 utf8.clear();
1041 utf8 += char(0xfc);
1042 str = fromInvalidUtf8Sequence(ba: utf8);
1043 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-8") << utf8 << str << -1;
1044 utf8 += char(0x30);
1045 str += 0x30;
1046 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-9") << utf8 << str << -1;
1047
1048 // 3.3.6
1049 utf8.clear();
1050 utf8 += char(0xdf);
1051 str = fromInvalidUtf8Sequence(ba: utf8);
1052 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.6") << utf8 << str << -1;
1053 utf8 += char(0x30);
1054 str += 0x30;
1055 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.6-1") << utf8 << str << -1;
1056
1057 // 3.3.7
1058 utf8.clear();
1059 utf8 += char(0xef);
1060 utf8 += char(0xbf);
1061 str = fromInvalidUtf8Sequence(ba: utf8);
1062 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.7") << utf8 << str << -1;
1063 utf8 += char(0x30);
1064 str += 0x30;
1065 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.7-1") << utf8 << str << -1;
1066
1067 utf8.clear();
1068 utf8 += char(0xef);
1069 str = fromInvalidUtf8Sequence(ba: utf8);
1070 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.7-2") << utf8 << str << -1;
1071 utf8 += char(0x30);
1072 str += 0x30;
1073 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.7-3") << utf8 << str << -1;
1074
1075 // 3.3.8
1076 utf8.clear();
1077 utf8 += char(0xf7);
1078 utf8 += char(0xbf);
1079 utf8 += char(0xbf);
1080 str = fromInvalidUtf8Sequence(ba: utf8);
1081 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8") << utf8 << str << -1;
1082 utf8 += char(0x30);
1083 str += 0x30;
1084 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8-1") << utf8 << str << -1;
1085
1086 utf8.clear();
1087 utf8 += char(0xf7);
1088 utf8 += char(0xbf);
1089 str = fromInvalidUtf8Sequence(ba: utf8);
1090 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8-2") << utf8 << str << -1;
1091 utf8 += char(0x30);
1092 str += 0x30;
1093 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8-3") << utf8 << str << -1;
1094
1095 utf8.clear();
1096 utf8 += char(0xf7);
1097 str = fromInvalidUtf8Sequence(ba: utf8);
1098 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8-4") << utf8 << str << -1;
1099 utf8 += char(0x30);
1100 str += 0x30;
1101 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8-5") << utf8 << str << -1;
1102
1103 // 3.3.9
1104 utf8.clear();
1105 utf8 += char(0xfb);
1106 utf8 += char(0xbf);
1107 utf8 += char(0xbf);
1108 utf8 += char(0xbf);
1109 str = fromInvalidUtf8Sequence(ba: utf8);
1110 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9") << utf8 << str << -1;
1111 utf8 += char(0x30);
1112 str += 0x30;
1113 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-1") << utf8 << str << -1;
1114
1115 utf8.clear();
1116 utf8 += char(0xfb);
1117 utf8 += char(0xbf);
1118 utf8 += char(0xbf);
1119 str = fromInvalidUtf8Sequence(ba: utf8);
1120 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-2") << utf8 << str << -1;
1121 utf8 += char(0x30);
1122 str += 0x30;
1123 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-3") << utf8 << str << -1;
1124
1125 utf8.clear();
1126 utf8 += char(0xfb);
1127 utf8 += char(0xbf);
1128 str = fromInvalidUtf8Sequence(ba: utf8);
1129 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-4") << utf8 << str << -1;
1130 utf8 += char(0x30);
1131 str += 0x30;
1132 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-5") << utf8 << str << -1;
1133
1134 utf8.clear();
1135 utf8 += char(0xfb);
1136 str = fromInvalidUtf8Sequence(ba: utf8);
1137 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-6") << utf8 << str << -1;
1138 utf8 += char(0x30);
1139 str += 0x30;
1140 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-7") << utf8 << str << -1;
1141
1142 // 3.3.10
1143 utf8.clear();
1144 utf8 += char(0xfd);
1145 utf8 += char(0xbf);
1146 utf8 += char(0xbf);
1147 utf8 += char(0xbf);
1148 utf8 += char(0xbf);
1149 str = fromInvalidUtf8Sequence(ba: utf8);
1150 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10") << utf8 << str << -1;
1151 utf8 += char(0x30);
1152 str += 0x30;
1153 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-1") << utf8 << str << -1;
1154
1155 utf8.clear();
1156 utf8 += char(0xfd);
1157 utf8 += char(0xbf);
1158 utf8 += char(0xbf);
1159 utf8 += char(0xbf);
1160 str = fromInvalidUtf8Sequence(ba: utf8);
1161 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-2") << utf8 << str << -1;
1162 utf8 += char(0x30);
1163 str += 0x30;
1164 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-3") << utf8 << str << -1;
1165
1166 utf8.clear();
1167 utf8 += char(0xfd);
1168 utf8 += char(0xbf);
1169 utf8 += char(0xbf);
1170 str = fromInvalidUtf8Sequence(ba: utf8);
1171 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-4") << utf8 << str << -1;
1172 utf8 += char(0x30);
1173 str += 0x30;
1174 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-5") << utf8 << str << -1;
1175
1176 utf8.clear();
1177 utf8 += char(0xfd);
1178 utf8 += char(0xbf);
1179 str = fromInvalidUtf8Sequence(ba: utf8);
1180 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-6") << utf8 << str << -1;
1181 utf8 += char(0x30);
1182 str += 0x30;
1183 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-7") << utf8 << str << -1;
1184
1185 utf8.clear();
1186 utf8 += char(0xfd);
1187 str = fromInvalidUtf8Sequence(ba: utf8);
1188 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-8") << utf8 << str << -1;
1189 utf8 += char(0x30);
1190 str += 0x30;
1191 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-9") << utf8 << str << -1;
1192
1193 // 3.4
1194 utf8.clear();
1195 utf8 += char(0xc0);
1196 utf8 += char(0xe0);
1197 utf8 += char(0x80);
1198 utf8 += char(0xf0);
1199 utf8 += char(0x80);
1200 utf8 += char(0x80);
1201 utf8 += char(0xf8);
1202 utf8 += char(0x80);
1203 utf8 += char(0x80);
1204 utf8 += char(0x80);
1205 utf8 += char(0xfc);
1206 utf8 += char(0x80);
1207 utf8 += char(0x80);
1208 utf8 += char(0x80);
1209 utf8 += char(0x80);
1210 utf8 += char(0xdf);
1211 utf8 += char(0xef);
1212 utf8 += char(0xbf);
1213 utf8 += char(0xf7);
1214 utf8 += char(0xbf);
1215 utf8 += char(0xbf);
1216 utf8 += char(0xfb);
1217 utf8 += char(0xbf);
1218 utf8 += char(0xbf);
1219 utf8 += char(0xbf);
1220 utf8 += char(0xfd);
1221 utf8 += char(0xbf);
1222 utf8 += char(0xbf);
1223 utf8 += char(0xbf);
1224 utf8 += char(0xbf);
1225 str = fromInvalidUtf8Sequence(ba: utf8);
1226 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.4") << utf8 << str << -1;
1227
1228 // 3.5.1
1229 utf8.clear();
1230 utf8 += char(0xfe);
1231 str = fromInvalidUtf8Sequence(ba: utf8);
1232 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.5.1") << utf8 << str << -1;
1233
1234 // 3.5.2
1235 utf8.clear();
1236 utf8 += char(0xff);
1237 str = fromInvalidUtf8Sequence(ba: utf8);
1238 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.5.2") << utf8 << str << -1;
1239
1240 // 3.5.2
1241 utf8.clear();
1242 utf8 += char(0xfe);
1243 utf8 += char(0xfe);
1244 utf8 += char(0xff);
1245 str = fromInvalidUtf8Sequence(ba: utf8);
1246 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.5.2-1") << utf8 << str << -1;
1247
1248 // 4.1.1
1249 utf8.clear();
1250 utf8 += char(0xc0);
1251 utf8 += char(0xaf);
1252 str = fromInvalidUtf8Sequence(ba: utf8);
1253 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.1") << utf8 << str << -1;
1254
1255 // 4.1.2
1256 utf8.clear();
1257 utf8 += char(0xe0);
1258 utf8 += char(0x80);
1259 utf8 += char(0xaf);
1260 str = fromInvalidUtf8Sequence(ba: utf8);
1261 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.2") << utf8 << str << -1;
1262
1263 // 4.1.3
1264 utf8.clear();
1265 utf8 += char(0xf0);
1266 utf8 += char(0x80);
1267 utf8 += char(0x80);
1268 utf8 += char(0xaf);
1269 str = fromInvalidUtf8Sequence(ba: utf8);
1270 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.3") << utf8 << str << -1;
1271
1272 // 4.1.4
1273 utf8.clear();
1274 utf8 += char(0xf8);
1275 utf8 += char(0x80);
1276 utf8 += char(0x80);
1277 utf8 += char(0x80);
1278 utf8 += char(0xaf);
1279 str = fromInvalidUtf8Sequence(ba: utf8);
1280 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.4") << utf8 << str << -1;
1281
1282 // 4.1.5
1283 utf8.clear();
1284 utf8 += char(0xfc);
1285 utf8 += char(0x80);
1286 utf8 += char(0x80);
1287 utf8 += char(0x80);
1288 utf8 += char(0x80);
1289 utf8 += char(0xaf);
1290 str = fromInvalidUtf8Sequence(ba: utf8);
1291 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.5") << utf8 << str << -1;
1292
1293 // 4.2.1
1294 utf8.clear();
1295 utf8 += char(0xc1);
1296 utf8 += char(0xbf);
1297 str = fromInvalidUtf8Sequence(ba: utf8);
1298 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.1") << utf8 << str << -1;
1299
1300 // 4.2.2
1301 utf8.clear();
1302 utf8 += char(0xe0);
1303 utf8 += char(0x9f);
1304 utf8 += char(0xbf);
1305 str = fromInvalidUtf8Sequence(ba: utf8);
1306 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.2") << utf8 << str << -1;
1307
1308 // 4.2.3
1309 utf8.clear();
1310 utf8 += char(0xf0);
1311 utf8 += char(0x8f);
1312 utf8 += char(0xbf);
1313 utf8 += char(0xbf);
1314 str = fromInvalidUtf8Sequence(ba: utf8);
1315 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.3") << utf8 << str << -1;
1316
1317 // 4.2.4
1318 utf8.clear();
1319 utf8 += char(0xf8);
1320 utf8 += char(0x87);
1321 utf8 += char(0xbf);
1322 utf8 += char(0xbf);
1323 utf8 += char(0xbf);
1324 str = fromInvalidUtf8Sequence(ba: utf8);
1325 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.4") << utf8 << str << -1;
1326
1327 // 4.2.5
1328 utf8.clear();
1329 utf8 += char(0xfc);
1330 utf8 += char(0x83);
1331 utf8 += char(0xbf);
1332 utf8 += char(0xbf);
1333 utf8 += char(0xbf);
1334 utf8 += char(0xbf);
1335 str = fromInvalidUtf8Sequence(ba: utf8);
1336 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.5") << utf8 << str << -1;
1337
1338 // 4.3.1
1339 utf8.clear();
1340 utf8 += char(0xc0);
1341 utf8 += char(0x80);
1342 str = fromInvalidUtf8Sequence(ba: utf8);
1343 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.1") << utf8 << str << -1;
1344
1345 // 4.3.2
1346 utf8.clear();
1347 utf8 += char(0xe0);
1348 utf8 += char(0x80);
1349 utf8 += char(0x80);
1350 str = fromInvalidUtf8Sequence(ba: utf8);
1351 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.2") << utf8 << str << -1;
1352
1353 // 4.3.3
1354 utf8.clear();
1355 utf8 += char(0xf0);
1356 utf8 += char(0x80);
1357 utf8 += char(0x80);
1358 utf8 += char(0x80);
1359 str = fromInvalidUtf8Sequence(ba: utf8);
1360 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.3") << utf8 << str << -1;
1361
1362 // 4.3.4
1363 utf8.clear();
1364 utf8 += char(0xf8);
1365 utf8 += char(0x80);
1366 utf8 += char(0x80);
1367 utf8 += char(0x80);
1368 utf8 += char(0x80);
1369 str = fromInvalidUtf8Sequence(ba: utf8);
1370 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.4") << utf8 << str << -1;
1371
1372 // 4.3.5
1373 utf8.clear();
1374 utf8 += char(0xfc);
1375 utf8 += char(0x80);
1376 utf8 += char(0x80);
1377 utf8 += char(0x80);
1378 utf8 += char(0x80);
1379 utf8 += char(0x80);
1380 str = fromInvalidUtf8Sequence(ba: utf8);
1381 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.5") << utf8 << str << -1;
1382
1383 // 5.1.1
1384 utf8.clear();
1385 utf8 += char(0xed);
1386 utf8 += char(0xa0);
1387 utf8 += char(0x80);
1388 str = fromInvalidUtf8Sequence(ba: utf8);
1389 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.1") << utf8 << str << -1;
1390
1391 // 5.1.2
1392 utf8.clear();
1393 utf8 += char(0xed);
1394 utf8 += char(0xad);
1395 utf8 += char(0xbf);
1396 str = fromInvalidUtf8Sequence(ba: utf8);
1397 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.2") << utf8 << str << -1;
1398
1399 // 5.1.3
1400 utf8.clear();
1401 utf8 += char(0xed);
1402 utf8 += char(0xae);
1403 utf8 += char(0x80);
1404 str = fromInvalidUtf8Sequence(ba: utf8);
1405 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.3") << utf8 << str << -1;
1406
1407 // 5.1.4
1408 utf8.clear();
1409 utf8 += char(0xed);
1410 utf8 += char(0xaf);
1411 utf8 += char(0xbf);
1412 str = fromInvalidUtf8Sequence(ba: utf8);
1413 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.4") << utf8 << str << -1;
1414
1415 // 5.1.5
1416 utf8.clear();
1417 utf8 += char(0xed);
1418 utf8 += char(0xb0);
1419 utf8 += char(0x80);
1420 str = fromInvalidUtf8Sequence(ba: utf8);
1421 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.5") << utf8 << str << -1;
1422
1423 // 5.1.6
1424 utf8.clear();
1425 utf8 += char(0xed);
1426 utf8 += char(0xbe);
1427 utf8 += char(0x80);
1428 str = fromInvalidUtf8Sequence(ba: utf8);
1429 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.6") << utf8 << str << -1;
1430
1431 // 5.1.7
1432 utf8.clear();
1433 utf8 += char(0xed);
1434 utf8 += char(0xbf);
1435 utf8 += char(0xbf);
1436 str = fromInvalidUtf8Sequence(ba: utf8);
1437 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.7") << utf8 << str << -1;
1438
1439 // 5.2.1
1440 utf8.clear();
1441 utf8 += char(0xed);
1442 utf8 += char(0xa0);
1443 utf8 += char(0x80);
1444 utf8 += char(0xed);
1445 utf8 += char(0xb0);
1446 utf8 += char(0x80);
1447 str = fromInvalidUtf8Sequence(ba: utf8);
1448 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.1") << utf8 << str << -1;
1449
1450 // 5.2.2
1451 utf8.clear();
1452 utf8 += char(0xed);
1453 utf8 += char(0xa0);
1454 utf8 += char(0x80);
1455 utf8 += char(0xed);
1456 utf8 += char(0xbf);
1457 utf8 += char(0xbf);
1458 str = fromInvalidUtf8Sequence(ba: utf8);
1459 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.2") << utf8 << str << -1;
1460
1461 // 5.2.3
1462 utf8.clear();
1463 utf8 += char(0xed);
1464 utf8 += char(0xad);
1465 utf8 += char(0xbf);
1466 utf8 += char(0xed);
1467 utf8 += char(0xb0);
1468 utf8 += char(0x80);
1469 str = fromInvalidUtf8Sequence(ba: utf8);
1470 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.3") << utf8 << str << -1;
1471
1472 // 5.2.4
1473 utf8.clear();
1474 utf8 += char(0xed);
1475 utf8 += char(0xad);
1476 utf8 += char(0xbf);
1477 utf8 += char(0xed);
1478 utf8 += char(0xbf);
1479 utf8 += char(0xbf);
1480 str = fromInvalidUtf8Sequence(ba: utf8);
1481 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.4") << utf8 << str << -1;
1482
1483 // 5.2.5
1484 utf8.clear();
1485 utf8 += char(0xed);
1486 utf8 += char(0xae);
1487 utf8 += char(0x80);
1488 utf8 += char(0xed);
1489 utf8 += char(0xb0);
1490 utf8 += char(0x80);
1491 str = fromInvalidUtf8Sequence(ba: utf8);
1492 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.5") << utf8 << str << -1;
1493
1494 // 5.2.6
1495 utf8.clear();
1496 utf8 += char(0xed);
1497 utf8 += char(0xae);
1498 utf8 += char(0x80);
1499 utf8 += char(0xed);
1500 utf8 += char(0xbf);
1501 utf8 += char(0xbf);
1502 str = fromInvalidUtf8Sequence(ba: utf8);
1503 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.6") << utf8 << str << -1;
1504
1505 // 5.2.7
1506 utf8.clear();
1507 utf8 += char(0xed);
1508 utf8 += char(0xaf);
1509 utf8 += char(0xbf);
1510 utf8 += char(0xed);
1511 utf8 += char(0xb0);
1512 utf8 += char(0x80);
1513 str = fromInvalidUtf8Sequence(ba: utf8);
1514 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.7") << utf8 << str << -1;
1515
1516 // 5.2.8
1517 utf8.clear();
1518 utf8 += char(0xed);
1519 utf8 += char(0xaf);
1520 utf8 += char(0xbf);
1521 utf8 += char(0xed);
1522 utf8 += char(0xbf);
1523 utf8 += char(0xbf);
1524 str = fromInvalidUtf8Sequence(ba: utf8);
1525 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.8") << utf8 << str << -1;
1526
1527 // 5.3.1 - non-character code
1528 utf8.clear();
1529 utf8 += char(0xef);
1530 utf8 += char(0xbf);
1531 utf8 += char(0xbe);
1532 //str = QChar(QChar::ReplacementCharacter);
1533 str = QChar(0xfffe);
1534 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.1") << utf8 << str << -1;
1535
1536 // 5.3.2 - non-character code
1537 utf8.clear();
1538 utf8 += char(0xef);
1539 utf8 += char(0xbf);
1540 utf8 += char(0xbf);
1541 //str = QChar(QChar::ReplacementCharacter);
1542 str = QChar(0xffff);
1543 QTest::newRow(dataTag: "http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.2") << utf8 << str << -1;
1544}
1545
1546void tst_QTextCodec::utf8Codec()
1547{
1548 QTextCodec *codec = QTextCodec::codecForMib(mib: 106); // UTF-8
1549 QVERIFY(codec != 0);
1550
1551 QFETCH(QByteArray, utf8);
1552 QFETCH(QString, res);
1553 QFETCH(int, len);
1554
1555 QString str = codec->toUnicode(in: utf8.isNull() ? 0 : utf8.constData(),
1556 length: len < 0 ? qstrlen(str: utf8.constData()) : len);
1557 QCOMPARE(str, res);
1558
1559 str = QString::fromUtf8(str: utf8.isNull() ? 0 : utf8.constData(), size: len);
1560 QCOMPARE(str, res);
1561}
1562
1563void tst_QTextCodec::utf8bom_data()
1564{
1565 QTest::addColumn<QByteArray>(name: "data");
1566 QTest::addColumn<QString>(name: "result");
1567
1568 QTest::newRow(dataTag: "nobom")
1569 << QByteArray("\302\240", 2)
1570 << QString::fromLatin1(str: "\240");
1571
1572 {
1573 static const ushort data[] = { 0x201d };
1574 QTest::newRow(dataTag: "nobom 2")
1575 << QByteArray("\342\200\235", 3)
1576 << QString::fromUtf16(data, size: sizeof(data)/sizeof(short));
1577 }
1578
1579 {
1580 static const ushort data[] = { 0xf000 };
1581 QTest::newRow(dataTag: "bom1")
1582 << QByteArray("\357\200\200", 3)
1583 << QString::fromUtf16(data, size: sizeof(data)/sizeof(short));
1584 }
1585
1586 {
1587 static const ushort data[] = { 0xfec0 };
1588 QTest::newRow(dataTag: "bom2")
1589 << QByteArray("\357\273\200", 3)
1590 << QString::fromUtf16(data, size: sizeof(data)/sizeof(short));
1591 }
1592
1593 {
1594 QTest::newRow(dataTag: "normal-bom")
1595 << QByteArray("\357\273\277a", 4)
1596 << QString("a");
1597 }
1598
1599 { // test the non-SIMD code-path
1600 static const ushort data[] = { 0x61, 0xfeff, 0x62 };
1601 QTest::newRow(dataTag: "middle-bom (non SIMD)")
1602 << QByteArray("a\357\273\277b")
1603 << QString::fromUtf16(data, size: sizeof(data)/sizeof(short));
1604 }
1605
1606 { // test the SIMD code-path
1607 static const ushort data[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xfeff, 0x6d };
1608 QTest::newRow(dataTag: "middle-bom (SIMD)")
1609 << QByteArray("abcdefghijkl\357\273\277m")
1610 << QString::fromUtf16(data, size: sizeof(data)/sizeof(short));
1611 }
1612}
1613
1614void tst_QTextCodec::utf8bom()
1615{
1616 QFETCH(QByteArray, data);
1617 QFETCH(QString, result);
1618
1619 QTextCodec *const codec = QTextCodec::codecForMib(mib: 106); // UTF-8
1620 QVERIFY(codec);
1621
1622 QCOMPARE(codec->toUnicode(data.constData(), data.length(), 0), result);
1623
1624 QTextCodec::ConverterState state;
1625 QCOMPARE(codec->toUnicode(data.constData(), data.length(), &state), result);
1626}
1627
1628void tst_QTextCodec::utf8stateful_data()
1629{
1630 QTest::addColumn<QByteArray>(name: "buffer1");
1631 QTest::addColumn<QByteArray>(name: "buffer2");
1632 QTest::addColumn<QString>(name: "result"); // null QString indicates decoder error
1633
1634 // valid buffer continuations
1635 QTest::newRow(dataTag: "1of2+valid") << QByteArray("\xc2") << QByteArray("\xa0") << "\xc2\xa0";
1636 QTest::newRow(dataTag: "1of3+valid") << QByteArray("\xe0") << QByteArray("\xa0\x80") << "\xe0\xa0\x80";
1637 QTest::newRow(dataTag: "2of3+valid") << QByteArray("\xe0\xa0") << QByteArray("\x80") << "\xe0\xa0\x80";
1638 QTest::newRow(dataTag: "1of4+valid") << QByteArray("\360") << QByteArray("\220\210\203") << "\360\220\210\203";
1639 QTest::newRow(dataTag: "2of4+valid") << QByteArray("\360\220") << QByteArray("\210\203") << "\360\220\210\203";
1640 QTest::newRow(dataTag: "3of4+valid") << QByteArray("\360\220\210") << QByteArray("\203") << "\360\220\210\203";
1641 QTest::newRow(dataTag: "1ofBom+valid") << QByteArray("\xef") << QByteArray("\xbb\xbf") << "";
1642 QTest::newRow(dataTag: "2ofBom+valid") << QByteArray("\xef\xbb") << QByteArray("\xbf") << "";
1643
1644 // invalid continuation
1645 QTest::newRow(dataTag: "1of2+invalid") << QByteArray("\xc2") << QByteArray("a") << QString();
1646 QTest::newRow(dataTag: "1of3+invalid") << QByteArray("\xe0") << QByteArray("a") << QString();
1647 QTest::newRow(dataTag: "2of3+invalid") << QByteArray("\xe0\xa0") << QByteArray("a") << QString();
1648 QTest::newRow(dataTag: "1of4+invalid") << QByteArray("\360") << QByteArray("a") << QString();
1649 QTest::newRow(dataTag: "2of4+invalid") << QByteArray("\360\220") << QByteArray("a") << QString();
1650 QTest::newRow(dataTag: "3of4+invalid") << QByteArray("\360\220\210") << QByteArray("a") << QString();
1651
1652 // invalid: sequence too short (the empty second buffer causes a state reset)
1653 QTest::newRow(dataTag: "1of2+empty") << QByteArray("\xc2") << QByteArray() << QString();
1654 QTest::newRow(dataTag: "1of3+empty") << QByteArray("\xe0") << QByteArray() << QString();
1655 QTest::newRow(dataTag: "2of3+empty") << QByteArray("\xe0\xa0") << QByteArray() << QString();
1656 QTest::newRow(dataTag: "1of4+empty") << QByteArray("\360") << QByteArray() << QString();
1657 QTest::newRow(dataTag: "2of4+empty") << QByteArray("\360\220") << QByteArray() << QString();
1658 QTest::newRow(dataTag: "3of4+empty") << QByteArray("\360\220\210") << QByteArray() << QString();
1659
1660 // overlong sequence:
1661 QTest::newRow(dataTag: "overlong-1of2") << QByteArray("\xc1") << QByteArray("\x81") << QString();
1662 QTest::newRow(dataTag: "overlong-1of3") << QByteArray("\xe0") << QByteArray("\x81\x81") << QString();
1663 QTest::newRow(dataTag: "overlong-2of3") << QByteArray("\xe0\x81") << QByteArray("\x81") << QString();
1664 QTest::newRow(dataTag: "overlong-1of4") << QByteArray("\xf0") << QByteArray("\x80\x81\x81") << QString();
1665 QTest::newRow(dataTag: "overlong-2of4") << QByteArray("\xf0\x80") << QByteArray("\x81\x81") << QString();
1666 QTest::newRow(dataTag: "overlong-3of4") << QByteArray("\xf0\x80\x81") << QByteArray("\x81") << QString();
1667
1668 // out of range:
1669 // leading byte 0xF4 can produce codepoints above U+10FFFF, which aren't valid
1670 QTest::newRow(dataTag: "outofrange1-1of4") << QByteArray("\xf4") << QByteArray("\x90\x80\x80") << QString();
1671 QTest::newRow(dataTag: "outofrange1-2of4") << QByteArray("\xf4\x90") << QByteArray("\x80\x80") << QString();
1672 QTest::newRow(dataTag: "outofrange1-3of4") << QByteArray("\xf4\x90\x80") << QByteArray("\x80") << QString();
1673 QTest::newRow(dataTag: "outofrange2-1of4") << QByteArray("\xf5") << QByteArray("\x90\x80\x80") << QString();
1674 QTest::newRow(dataTag: "outofrange2-2of4") << QByteArray("\xf5\x90") << QByteArray("\x80\x80") << QString();
1675 QTest::newRow(dataTag: "outofrange2-3of4") << QByteArray("\xf5\x90\x80") << QByteArray("\x80") << QString();
1676 QTest::newRow(dataTag: "outofrange-1of5") << QByteArray("\xf8") << QByteArray("\x88\x80\x80\x80") << QString();
1677 QTest::newRow(dataTag: "outofrange-2of5") << QByteArray("\xf8\x88") << QByteArray("\x80\x80\x80") << QString();
1678 QTest::newRow(dataTag: "outofrange-3of5") << QByteArray("\xf8\x88\x80") << QByteArray("\x80\x80") << QString();
1679 QTest::newRow(dataTag: "outofrange-4of5") << QByteArray("\xf8\x88\x80\x80") << QByteArray("\x80") << QString();
1680 QTest::newRow(dataTag: "outofrange-1of6") << QByteArray("\xfc") << QByteArray("\x84\x80\x80\x80\x80") << QString();
1681 QTest::newRow(dataTag: "outofrange-2of6") << QByteArray("\xfc\x84") << QByteArray("\x80\x80\x80\x80") << QString();
1682 QTest::newRow(dataTag: "outofrange-3of6") << QByteArray("\xfc\x84\x80") << QByteArray("\x80\x80\x80") << QString();
1683 QTest::newRow(dataTag: "outofrange-4of6") << QByteArray("\xfc\x84\x80\x80") << QByteArray("\x80\x80") << QString();
1684 QTest::newRow(dataTag: "outofrange-5of6") << QByteArray("\xfc\x84\x80\x80\x80") << QByteArray("\x80") << QString();
1685}
1686
1687void tst_QTextCodec::utf8stateful()
1688{
1689 QFETCH(QByteArray, buffer1);
1690 QFETCH(QByteArray, buffer2);
1691 QFETCH(QString, result);
1692
1693 QTextCodec *utf8codec = QTextCodec::codecForName(name: "utf-8");
1694 QVERIFY(utf8codec);
1695
1696 QTextCodec::ConverterState state;
1697 memset(s: &state, c: 0, n: sizeof state);
1698
1699 QString decoded1 = utf8codec->toUnicode(in: buffer1, length: buffer1.size(), state: &state);
1700 if (result.isNull()) {
1701 // the decoder may have found an early error (invalidChars > 0):
1702 // if it has, remainingChars == 0;
1703 // if it hasn't, then it must have a state
1704 QVERIFY2((state.remainingChars == 0) != (state.invalidChars == 0),
1705 "remainingChars = " + QByteArray::number(state.remainingChars) +
1706 "; invalidChars = " + QByteArray::number(state.invalidChars));
1707 } else {
1708 QVERIFY(state.remainingChars > 0);
1709 QCOMPARE(state.invalidChars, 0);
1710 }
1711
1712 QString decoded2 = utf8codec->toUnicode(in: buffer2, length: buffer2.size(), state: &state);
1713 QCOMPARE(state.remainingChars, 0);
1714 if (result.isNull()) {
1715 QVERIFY(state.invalidChars > 0);
1716 } else {
1717 QCOMPARE(decoded1 + decoded2, result);
1718 }
1719}
1720
1721void tst_QTextCodec::utfHeaders_data()
1722{
1723 QTest::addColumn<QByteArray>(name: "codecName");
1724 QTest::addColumn<int>(name: "flags");
1725 QTest::addColumn<QByteArray>(name: "encoded");
1726 QTest::addColumn<QString>(name: "unicode");
1727 QTest::addColumn<bool>(name: "toUnicode");
1728
1729 QTest::newRow(dataTag: "utf8 bom")
1730 << QByteArray("UTF-8")
1731 << 0
1732 << QByteArray("\xef\xbb\xbfhello")
1733 << QString::fromLatin1(str: "hello")
1734 << true;
1735 QTest::newRow(dataTag: "utf8 nobom")
1736 << QByteArray("UTF-8")
1737 << 0
1738 << QByteArray("hello")
1739 << QString::fromLatin1(str: "hello")
1740 << true;
1741 QTest::newRow(dataTag: "utf8 bom ignore header")
1742 << QByteArray("UTF-8")
1743 << (int)QTextCodec::IgnoreHeader
1744 << QByteArray("\xef\xbb\xbfhello")
1745 << (QString(QChar(0xfeff)) + QString::fromLatin1(str: "hello"))
1746 << true;
1747 QTest::newRow(dataTag: "utf8 nobom ignore header")
1748 << QByteArray("UTF-8")
1749 << (int)QTextCodec::IgnoreHeader
1750 << QByteArray("hello")
1751 << QString::fromLatin1(str: "hello")
1752 << true;
1753
1754 QTest::newRow(dataTag: "utf16 bom be")
1755 << QByteArray("UTF-16")
1756 << 0
1757 << QByteArray("\xfe\xff\0h\0e\0l", 8)
1758 << QString::fromLatin1(str: "hel")
1759 << true;
1760 QTest::newRow(dataTag: "utf16 bom le")
1761 << QByteArray("UTF-16")
1762 << 0
1763 << QByteArray("\xff\xfeh\0e\0l\0", 8)
1764 << QString::fromLatin1(str: "hel")
1765 << true;
1766 if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
1767 QTest::newRow(dataTag: "utf16 nobom")
1768 << QByteArray("UTF-16")
1769 << 0
1770 << QByteArray("\0h\0e\0l", 6)
1771 << QString::fromLatin1(str: "hel")
1772 << true;
1773 QTest::newRow(dataTag: "utf16 bom be ignore header")
1774 << QByteArray("UTF-16")
1775 << (int)QTextCodec::IgnoreHeader
1776 << QByteArray("\xfe\xff\0h\0e\0l", 8)
1777 << (QString(QChar(0xfeff)) + QString::fromLatin1(str: "hel"))
1778 << true;
1779 } else {
1780 QTest::newRow(dataTag: "utf16 nobom")
1781 << QByteArray("UTF-16")
1782 << 0
1783 << QByteArray("h\0e\0l\0", 6)
1784 << QString::fromLatin1(str: "hel")
1785 << true;
1786 QTest::newRow(dataTag: "utf16 bom le ignore header")
1787 << QByteArray("UTF-16")
1788 << (int)QTextCodec::IgnoreHeader
1789 << QByteArray("\xff\xfeh\0e\0l\0", 8)
1790 << (QString(QChar(0xfeff)) + QString::fromLatin1(str: "hel"))
1791 << true;
1792 }
1793
1794 QTest::newRow(dataTag: "utf16-be bom be")
1795 << QByteArray("UTF-16BE")
1796 << 0
1797 << QByteArray("\xfe\xff\0h\0e\0l", 8)
1798 << QString::fromLatin1(str: "hel")
1799 << true;
1800 QTest::newRow(dataTag: "utf16-be nobom")
1801 << QByteArray("UTF-16BE")
1802 << 0
1803 << QByteArray("\0h\0e\0l", 6)
1804 << QString::fromLatin1(str: "hel")
1805 << true;
1806 QTest::newRow(dataTag: "utf16-be bom be ignore header")
1807 << QByteArray("UTF-16BE")
1808 << (int)QTextCodec::IgnoreHeader
1809 << QByteArray("\xfe\xff\0h\0e\0l", 8)
1810 << (QString(QChar(0xfeff)) + QString::fromLatin1(str: "hel"))
1811 << true;
1812
1813 QTest::newRow(dataTag: "utf16-le bom le")
1814 << QByteArray("UTF-16LE")
1815 << 0
1816 << QByteArray("\xff\xfeh\0e\0l\0", 8)
1817 << QString::fromLatin1(str: "hel")
1818 << true;
1819 QTest::newRow(dataTag: "utf16-le nobom")
1820 << QByteArray("UTF-16LE")
1821 << 0
1822 << QByteArray("h\0e\0l\0", 6)
1823 << QString::fromLatin1(str: "hel")
1824 << true;
1825 QTest::newRow(dataTag: "utf16-le bom le ignore header")
1826 << QByteArray("UTF-16LE")
1827 << (int)QTextCodec::IgnoreHeader
1828 << QByteArray("\xff\xfeh\0e\0l\0", 8)
1829 << (QString(QChar(0xfeff)) + QString::fromLatin1(str: "hel"))
1830 << true;
1831
1832
1833 QTest::newRow(dataTag: "utf32 bom be")
1834 << QByteArray("UTF-32")
1835 << 0
1836 << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16)
1837 << QString::fromLatin1(str: "hel")
1838 << true;
1839 QTest::newRow(dataTag: "utf32 bom le")
1840 << QByteArray("UTF-32")
1841 << 0
1842 << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16)
1843 << QString::fromLatin1(str: "hel")
1844 << true;
1845 if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
1846 QTest::newRow(dataTag: "utf32 nobom")
1847 << QByteArray("UTF-32")
1848 << 0
1849 << QByteArray("\0\0\0h\0\0\0e\0\0\0l", 12)
1850 << QString::fromLatin1(str: "hel")
1851 << true;
1852 QTest::newRow(dataTag: "utf32 bom be ignore header")
1853 << QByteArray("UTF-32")
1854 << (int)QTextCodec::IgnoreHeader
1855 << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16)
1856 << (QString(QChar(0xfeff)) + QString::fromLatin1(str: "hel"))
1857 << true;
1858 } else {
1859 QTest::newRow(dataTag: "utf32 nobom")
1860 << QByteArray("UTF-32")
1861 << 0
1862 << QByteArray("h\0\0\0e\0\0\0l\0\0\0", 12)
1863 << QString::fromLatin1(str: "hel")
1864 << true;
1865 QTest::newRow(dataTag: "utf32 bom le ignore header")
1866 << QByteArray("UTF-32")
1867 << (int)QTextCodec::IgnoreHeader
1868 << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16)
1869 << (QString(QChar(0xfeff)) + QString::fromLatin1(str: "hel"))
1870 << true;
1871 }
1872
1873
1874 QTest::newRow(dataTag: "utf32-be bom be")
1875 << QByteArray("UTF-32BE")
1876 << 0
1877 << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16)
1878 << QString::fromLatin1(str: "hel")
1879 << true;
1880 QTest::newRow(dataTag: "utf32-be nobom")
1881 << QByteArray("UTF-32BE")
1882 << 0
1883 << QByteArray("\0\0\0h\0\0\0e\0\0\0l", 12)
1884 << QString::fromLatin1(str: "hel")
1885 << true;
1886 QTest::newRow(dataTag: "utf32-be bom be ignore header")
1887 << QByteArray("UTF-32BE")
1888 << (int)QTextCodec::IgnoreHeader
1889 << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16)
1890 << (QString(QChar(0xfeff)) + QString::fromLatin1(str: "hel"))
1891 << true;
1892
1893
1894 QTest::newRow(dataTag: "utf32-le bom le")
1895 << QByteArray("UTF-32LE")
1896 << 0
1897 << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16)
1898 << QString::fromLatin1(str: "hel")
1899 << true;
1900 QTest::newRow(dataTag: "utf32-le nobom")
1901 << QByteArray("UTF-32LE")
1902 << 0
1903 << QByteArray("h\0\0\0e\0\0\0l\0\0\0", 12)
1904 << QString::fromLatin1(str: "hel")
1905 << true;
1906 QTest::newRow(dataTag: "utf32-le bom le ignore header")
1907 << QByteArray("UTF-32LE")
1908 << (int)QTextCodec::IgnoreHeader
1909 << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16)
1910 << (QString(QChar(0xfeff)) + QString::fromLatin1(str: "hel"))
1911 << true;
1912}
1913
1914void tst_QTextCodec::utfHeaders()
1915{
1916 QFETCH(QByteArray, codecName);
1917 QTextCodec *codec = QTextCodec::codecForName(name: codecName);
1918 QVERIFY(codec != 0);
1919
1920 QFETCH(int, flags);
1921 QTextCodec::ConversionFlags cFlags = QTextCodec::ConversionFlags(flags);
1922 QTextCodec::ConverterState state(cFlags);
1923
1924 QFETCH(QByteArray, encoded);
1925 QFETCH(QString, unicode);
1926
1927 QFETCH(bool, toUnicode);
1928
1929 QLatin1String ignoreReverseTestOn = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? QLatin1String(" le") : QLatin1String(" be");
1930 QString rowName(QTest::currentDataTag());
1931
1932 if (toUnicode) {
1933 QString result = codec->toUnicode(in: encoded.constData(), length: encoded.length(), state: &state);
1934 QCOMPARE(result.length(), unicode.length());
1935 QCOMPARE(result, unicode);
1936
1937 if (!rowName.endsWith(s: "nobom") && !rowName.contains(s: ignoreReverseTestOn)) {
1938 QTextCodec::ConverterState state2(cFlags);
1939 QByteArray reencoded = codec->fromUnicode(in: unicode.unicode(), length: unicode.length(), state: &state2);
1940 QCOMPARE(reencoded, encoded);
1941 }
1942 } else {
1943 QByteArray result = codec->fromUnicode(in: unicode.unicode(), length: unicode.length(), state: &state);
1944 QCOMPARE(result, encoded);
1945 }
1946}
1947
1948void tst_QTextCodec::codecForHtml_data()
1949{
1950 QTest::addColumn<QByteArray>(name: "html");
1951 QTest::addColumn<int>(name: "defaultCodecMib");
1952 QTest::addColumn<int>(name: "expectedMibEnum");
1953
1954 int noDefault = -1;
1955 int fallback = 4; // latin 1
1956 QByteArray html = "<html><head></head><body>blah</body></html>";
1957 QTest::newRow(dataTag: "no charset, latin 1") << html << noDefault << fallback;
1958
1959 QTest::newRow(dataTag: "no charset, default UTF-8") << html << 106 << 106;
1960
1961 html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=ISO-8859-15\" /></head></html>";
1962 QTest::newRow(dataTag: "latin 15, default UTF-8") << html << 106 << 111;
1963
1964 html = "<html><head><meta content=\"text/html; charset=ISO-8859-15\" http-equiv=\"content-type\" /></head></html>";
1965 QTest::newRow(dataTag: "latin 15, default UTF-8 (#2)") << html << 106 << 111;
1966
1967 html = "<!DOCTYPE html><html><head><meta charset=\"utf-8\"><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><title>Test</title></head>";
1968 QTest::newRow(dataTag: "UTF-8, no default") << html << noDefault << 106;
1969
1970 html = "<!DOCTYPE html><html><head><meta charset=\"ISO_8859-1:1987\"><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><title>Test</title></head>";
1971 QTest::newRow(dataTag: "latin 1, no default") << html << noDefault << 4;
1972
1973 html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset=\"utf-8\"><title>Test</title></head>";
1974 QTest::newRow(dataTag: "UTF-8, no default (#2)") << html << noDefault << 106;
1975
1976 html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8/></head></html>";
1977 QTest::newRow(dataTag: "UTF-8, no quotes") << html << noDefault << 106;
1978
1979 html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset='UTF-8'/></head></html>";
1980 QTest::newRow(dataTag: "UTF-8, single quotes") << html << noDefault << 106;
1981
1982 html = "<!DOCTYPE html><html><head><meta charset=utf-8><title>Test</title></head>";
1983 QTest::newRow(dataTag: "UTF-8, > terminator") << html << noDefault << 106;
1984
1985 html = "<!DOCTYPE html><html><head><meta charset= utf-8 ><title>Test</title></head>";
1986 QTest::newRow(dataTag: "UTF-8, > terminator with spaces") << html << noDefault << 106;
1987
1988 html = "<!DOCTYPE html><html><head><meta charset= utf/8 ><title>Test</title></head>";
1989 QTest::newRow(dataTag: "UTF-8, > teminator with early backslash)") << html << noDefault << 106;
1990
1991 // Test invalid charsets.
1992 html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=invalid-foo\" /></head></html>";
1993 QTest::newRow(dataTag: "invalid charset, no default") << html << noDefault << fallback;
1994 QTest::newRow(dataTag: "invalid charset, default UTF-8") << html << 106 << 106;
1995
1996 html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset=\"";
1997 html.prepend(a: QByteArray().fill(c: ' ', size: 512 - html.size()));
1998 QTest::newRow(dataTag: "invalid charset (large header)") << html << noDefault << fallback;
1999
2000 html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset=\"utf-8";
2001 QTest::newRow(dataTag: "invalid charset (no closing double quote)") << html << noDefault << fallback;
2002
2003 html = "<!DOCTYPE html><html><head><meta http-equiv=\"X-UA-Compatible\" content=\"IE=9,chrome=1\"><meta charset='utf-8";
2004 QTest::newRow(dataTag: "invalid charset (no closing single quote)") << html << noDefault << fallback;
2005
2006 html = "<!DOCTYPE html><html><head><meta charset=utf-8 foo=bar><title>Test</title></head>";
2007 QTest::newRow(dataTag: "invalid (space terminator)") << html << noDefault << fallback;
2008
2009 html = "<!DOCTYPE html><html><head><meta charset=\" utf' 8 /><title>Test</title></head>";
2010 QTest::newRow(dataTag: "invalid charset, early terminator (')") << html << noDefault << fallback;
2011
2012 const char src[] = { char(0xff), char(0xfe), char(0x7a), char(0x03), 0, 0 };
2013 html = src;
2014 QTest::newRow(dataTag: "greek text UTF-16LE") << html << 106 << 1014;
2015
2016 html = "<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"><span style=\"color: rgb(0, 0, 0); font-family: "
2017 "'Galatia SIL'; font-size: 27px; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; "
2018 "line-height: normal; orphans: auto; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: "
2019 "auto; word-spacing: 0px; -webkit-text-size-adjust: auto; -webkit-text-stroke-width: 0px; display: inline !important; float: "
2020 "none;\">&#x37b</span>\000";
2021 QTest::newRow(dataTag: "greek text UTF-8") << html << 106 << 106;
2022
2023 html = "<!DOCTYPE html><html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=unicode\">"
2024 "<head/><body><p>bla</p></body></html>"; // QTBUG-41998, ICU will return UTF-16.
2025 QTest::newRow(dataTag: "legacy unicode UTF-8") << html << 106 << 106;
2026}
2027
2028void tst_QTextCodec::codecForHtml()
2029{
2030 QFETCH(QByteArray, html);
2031 QFETCH(int, defaultCodecMib);
2032 QFETCH(int, expectedMibEnum);
2033
2034 if (defaultCodecMib != -1)
2035 QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(defaultCodecMib))->mibEnum(), expectedMibEnum);
2036 else // Test one parameter version when there is no default codec.
2037 QCOMPARE(QTextCodec::codecForHtml(html)->mibEnum(), expectedMibEnum);
2038}
2039
2040void tst_QTextCodec::codecForUtfText_data()
2041{
2042 QTest::addColumn<QByteArray>(name: "encoded");
2043 QTest::addColumn<bool>(name: "detected");
2044 QTest::addColumn<int>(name: "mib");
2045
2046
2047 QTest::newRow(dataTag: "utf8 bom")
2048 << QByteArray("\xef\xbb\xbfhello")
2049 << true
2050 << 106;
2051 QTest::newRow(dataTag: "utf8 nobom")
2052 << QByteArray("hello")
2053 << false
2054 << 0;
2055
2056 QTest::newRow(dataTag: "utf16 bom be")
2057 << QByteArray("\xfe\xff\0h\0e\0l", 8)
2058 << true
2059 << 1013;
2060 QTest::newRow(dataTag: "utf16 bom le")
2061 << QByteArray("\xff\xfeh\0e\0l\0", 8)
2062 << true
2063 << 1014;
2064 QTest::newRow(dataTag: "utf16 nobom")
2065 << QByteArray("\0h\0e\0l", 6)
2066 << false
2067 << 0;
2068
2069 QTest::newRow(dataTag: "utf32 bom be")
2070 << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16)
2071 << true
2072 << 1018;
2073 QTest::newRow(dataTag: "utf32 bom le")
2074 << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16)
2075 << true
2076 << 1019;
2077 QTest::newRow(dataTag: "utf32 nobom")
2078 << QByteArray("\0\0\0h\0\0\0e\0\0\0l", 12)
2079 << false
2080 << 0;
2081}
2082
2083void tst_QTextCodec::codecForUtfText()
2084{
2085 QFETCH(QByteArray, encoded);
2086 QFETCH(bool, detected);
2087 QFETCH(int, mib);
2088
2089 QTextCodec *codec = QTextCodec::codecForUtfText(ba: encoded, defaultCodec: 0);
2090 if (detected)
2091 QCOMPARE(codec->mibEnum(), mib);
2092 else
2093 QVERIFY(!codec);
2094}
2095
2096#if defined(Q_OS_UNIX)
2097void tst_QTextCodec::toLocal8Bit()
2098{
2099#if !QT_CONFIG(process)
2100 QSKIP("No qprocess support", SkipAll);
2101#else
2102 // Add the executable's directory to path so that we can find the test helper next to it
2103 // in a cross-platform way. We must do this because the CWD is not pointing to this directory
2104 // in debug-and-release builds.
2105 QByteArray path = qgetenv(varName: "PATH");
2106 qputenv(varName: "PATH",
2107 value: path + QDir::listSeparator().toLatin1()
2108 + QCoreApplication::applicationDirPath().toLocal8Bit());
2109 auto restore = qScopeGuard(f: [&] { qputenv(varName: "PATH", value: path); });
2110
2111 QProcess process;
2112 process.start(command: "echo_helper");
2113 QString string(QChar(0x410));
2114 process.write(data: (const char*)string.utf16(), len: string.length()*2);
2115
2116 process.closeWriteChannel();
2117 process.waitForFinished();
2118 QCOMPARE(process.exitStatus(), QProcess::NormalExit);
2119 QCOMPARE(process.exitCode(), 0);
2120#endif
2121}
2122#endif
2123
2124class LoadAndConvert: public QRunnable
2125{
2126public:
2127 LoadAndConvert(const QByteArray &source, QByteArray *destination)
2128 : codecName(source), target(destination)
2129 {}
2130 QByteArray codecName;
2131 QByteArray *target;
2132 void run()
2133 {
2134 QTextCodec *c = QTextCodec::codecForName(name: codecName);
2135 if (!c) {
2136 qWarning() << "WARNING" << codecName << "not found?";
2137 return;
2138 }
2139 QString str = QString::fromLatin1(str: codecName);
2140 QByteArray b = c->fromUnicode(uc: str);
2141 c->toUnicode(b);
2142 *target = codecName;
2143 }
2144};
2145
2146class LoadAndConvertMIB: public QRunnable
2147{
2148public:
2149 LoadAndConvertMIB(int mib, int *target)
2150 : mib(mib), target(target)
2151 {}
2152 int mib;
2153 int *target;
2154 void run()
2155 {
2156 QTextCodec *c = QTextCodec::codecForMib(mib);
2157 if (!c) {
2158 qWarning() << "WARNING" << mib << "not found?";
2159 return;
2160 }
2161 QString str = QString::number(mib);
2162 QByteArray b = c->fromUnicode(uc: str);
2163 c->toUnicode(b);
2164 *target = mib;
2165 }
2166};
2167
2168
2169void tst_QTextCodec::threadSafety()
2170{
2171 QList<QByteArray> codecList = QTextCodec::availableCodecs();
2172 const QVector<int> mibList = QTextCodec::availableMibs().toVector();
2173 QThreadPool::globalInstance()->setMaxThreadCount(12);
2174
2175 QVector<QByteArray> res;
2176 res.resize(size: codecList.size());
2177 for (int i = 0; i < codecList.size(); ++i) {
2178 QThreadPool::globalInstance()->start(runnable: new LoadAndConvert(codecList.at(i), &res[i]));
2179 }
2180
2181 QVector<int> res2;
2182 res2.resize(size: mibList.size());
2183 for (int i = 0; i < mibList.size(); ++i) {
2184 QThreadPool::globalInstance()->start(runnable: new LoadAndConvertMIB(mibList.at(i), &res2[i]));
2185 }
2186
2187 // wait for all threads to finish working
2188 QThreadPool::globalInstance()->waitForDone();
2189
2190 QCOMPARE(res.toList(), codecList);
2191 QCOMPARE(res2, mibList);
2192}
2193
2194void tst_QTextCodec::invalidNames()
2195{
2196 QVERIFY(!QTextCodec::codecForName(""));
2197 QVERIFY(!QTextCodec::codecForName(QByteArray()));
2198 QVERIFY(!QTextCodec::codecForName("-"));
2199 QVERIFY(!QTextCodec::codecForName("\1a\2b\3a\4d\5c\6s\7a\xffr\xec_\x9c_"));
2200 QVERIFY(!QTextCodec::codecForName("\n"));
2201 QVERIFY(!QTextCodec::codecForName("don't exist"));
2202 QByteArray huge = "azertyuiop^$qsdfghjklm<wxcvbn,;:=1234567890�_";
2203 huge = huge + huge + huge + huge + huge + huge + huge + huge;
2204 huge = huge + huge + huge + huge + huge + huge + huge + huge;
2205 huge = huge + huge + huge + huge + huge + huge + huge + huge;
2206 huge = huge + huge + huge + huge + huge + huge + huge + huge;
2207 QVERIFY(!QTextCodec::codecForName(huge));
2208}
2209
2210void tst_QTextCodec::checkAliases_data()
2211{
2212 QTest::addColumn<QByteArray>(name: "codecName");
2213 const QList<QByteArray> codecList = QTextCodec::availableCodecs();
2214 for (const QByteArray &a : codecList)
2215 QTest::newRow( dataTag: a.constData() ) << a;
2216}
2217
2218void tst_QTextCodec::checkAliases()
2219{
2220 QFETCH( QByteArray, codecName );
2221 QTextCodec *c = QTextCodec::codecForName(name: codecName);
2222 QVERIFY(c);
2223 QCOMPARE(QTextCodec::codecForName(codecName), c);
2224 QCOMPARE(QTextCodec::codecForName(c->name()), c);
2225
2226 const auto aliases = c->aliases();
2227 for (const QByteArray &a : aliases) {
2228 QCOMPARE(QTextCodec::codecForName(a), c);
2229 }
2230}
2231
2232
2233void tst_QTextCodec::moreToFromUnicode_data() {
2234 QTest::addColumn<QByteArray>(name: "codecName");
2235 QTest::addColumn<QByteArray>(name: "testData");
2236
2237 QTest::newRow(dataTag: "russian") << QByteArray("ISO-8859-5")
2238 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF\x00");
2239
2240 QTest::newRow(dataTag: "arabic") << QByteArray("ISO-8859-6")
2241 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA4\xAC\xAD\xBB\xBF\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2");
2242
2243 QTest::newRow(dataTag: "greek") << QByteArray("ISO-8859-7")
2244 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA6\xA7\xA8\xA9\xAB\xAC\xAD\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE");
2245
2246 QTest::newRow(dataTag: "turkish") << QByteArray("ISO-8859-9")
2247 << QByteArray("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2248
2249 QTest::newRow(dataTag: "latin1") << QByteArray("ISO-8859-1")
2250 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2251
2252 QByteArray sms7bit_ba;
2253 for (int i=1; i <= 0x7f; ++i) {
2254 if (i!='\x1b') {
2255 sms7bit_ba.append(c: i);
2256 }
2257 }
2258
2259 QTest::newRow(dataTag: "latin2") << QByteArray("ISO-8859-2")
2260 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2261
2262 QTest::newRow(dataTag: "latin3") << QByteArray("ISO-8859-3")
2263 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBF\xC0\xC1\xC2\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2264
2265 QTest::newRow(dataTag: "latin4") << QByteArray("ISO-8859-4")
2266 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2267
2268 QTest::newRow(dataTag: "russian 2") << QByteArray("ISO-8859-5")
2269 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2270
2271 QTest::newRow(dataTag: "arabic 2") << QByteArray("ISO-8859-6")
2272 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA4\xAC\xAD\xBB\xBF\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2");
2273
2274 QTest::newRow(dataTag: "greek 2") << QByteArray("ISO-8859-7")
2275 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA6\xA7\xA8\xA9\xAB\xAC\xAD\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE");
2276
2277 QTest::newRow(dataTag: "latin5") << QByteArray("ISO-8859-9")
2278 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2279
2280 QTest::newRow(dataTag: "latin6") << QByteArray("ISO-8859-10")
2281 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2282
2283#if 0
2284 QByteArray iso8859_11_ba;
2285 for (int x=0x20; x<=0x7f; ++x) {
2286 iso8859_11_ba.append(x);
2287 }
2288 for (int x=0xa0; x<0xff; ++x) {
2289 if ((x>=0xdb && x<0xdf) || x>0xfb){
2290 continue;
2291 }
2292 iso8859_11_ba.append(x);
2293 }
2294 QTest::newRow("latin-thai") << QByteArray("ISO-8859-11") << iso8859_11_ba;
2295#endif
2296
2297 QTest::newRow(dataTag: "latin7") << QByteArray("ISO-8859-13")
2298 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2299
2300 QTest::newRow(dataTag: "celtic") << QByteArray("ISO-8859-14")
2301 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2302
2303 QTest::newRow(dataTag: "latin9") << QByteArray("ISO-8859-15")
2304 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2305
2306// QTest::newRow("latin10") << QByteArray("ISO-8859-16")
2307// << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2308
2309 QTest::newRow(dataTag: "cp850") << QByteArray("CP850")
2310 << QByteArray("\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff");
2311
2312 QTest::newRow(dataTag: "cp874") << QByteArray("CP874")
2313 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x85\x91\x92\x93\x94\x95\x96\x97\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB");
2314
2315 QTest::newRow(dataTag: "cp1250") << QByteArray("CP1250")
2316 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x82\x84\x85\x86\x87\x89\x8A\x8B\x8C\x8D\x8E\x8F\x91\x92\x93\x94\x95\x96\x97\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2317
2318 QTest::newRow(dataTag: "cp1251") << QByteArray("CP1251")
2319 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2320
2321 QTest::newRow(dataTag: "cp1252") << QByteArray("CP1252")
2322 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8E\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2323
2324 QTest::newRow(dataTag: "cp1253") << QByteArray("CP1253")
2325 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x82\x83\x84\x85\x86\x87\x89\x8B\x91\x92\x93\x94\x95\x96\x97\x99\x9B\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE");
2326
2327 QTest::newRow(dataTag: "cp1254") << QByteArray("CP1254")
2328 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2329
2330 QTest::newRow(dataTag: "cp1255") << QByteArray("CP1255")
2331 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x82\x83\x84\x85\x86\x87\x88\x89,x8B\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9B\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFD\xFE");
2332
2333 QTest::newRow(dataTag: "cp1256") << QByteArray("CP1256")
2334 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2335
2336 QTest::newRow(dataTag: "cp1257") << QByteArray("CP1257")
2337 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x82\x84\x85\x86\x87\x89\x8B\x8D\x8E\x8F\x91\x92\x93\x94\x95\x96\x97\x99\x9B\x9D\x9E\xA0\xA2\xA3\xA4\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2338
2339 QTest::newRow(dataTag: "cp1258") << QByteArray("CP1258")
2340 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x82\x83\x84\x85\x86\x87\x88\x89\x8B\x8C\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9B\x9C\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2341
2342 QByteArray koi8_r_ba;
2343 for (int x=0x20; x<=0xff; ++x) {
2344 if (x!=0x9A && x!=0xbf) {
2345 koi8_r_ba.append(c: x);
2346 }
2347 }
2348 QTest::newRow(dataTag: "KOI8-R") << QByteArray("KOI8-R") << koi8_r_ba;
2349
2350 QByteArray koi8_u_ba;
2351 for (int x=0x20; x<=0xff; ++x) {
2352 koi8_u_ba.append(c: x);
2353 }
2354 QTest::newRow(dataTag: "KOI8-U") << QByteArray("KOI8-U") << koi8_u_ba;
2355
2356
2357 QByteArray big5_ba;
2358 for (unsigned char u=0xa1; u<=0xf9; u++) {
2359 if (u==0xc8) {
2360 continue;
2361 }
2362 for (unsigned char v=0x40; v<=0x7e; v++) {
2363 big5_ba.append(c: u);
2364 big5_ba.append(c: v);
2365 }
2366 unsigned char v_up;
2367 switch (u) {
2368 case 0xa2: v_up=0xa1; break;
2369 case 0xa3: v_up=0xbf; break;
2370 case 0xc7: v_up=0xfc; break;
2371 case 0xf9: v_up=0xd5; break;
2372 default: v_up=0xfe;
2373 }
2374
2375 for (unsigned char v=0xa1; v<=v_up; v++) {
2376 if (u==0xa2 && (v==0xcc || v==0xce)) {
2377 continue;
2378 }
2379 big5_ba.append(c: u);
2380 big5_ba.append(c: v);
2381 }
2382 }
2383
2384 QTest::newRow(dataTag: "BIG5") << QByteArray("BIG5") << big5_ba;
2385
2386 QByteArray gb2312_ba;
2387 for (unsigned char u=0xa1; u<=0xf7; u++) {
2388 for (unsigned char v=0xa1; v<=0xfe; v++) {
2389 gb2312_ba.append(c: u);
2390 gb2312_ba.append(c: v);
2391 }
2392 }
2393
2394 QTest::newRow(dataTag: "GB2312") << QByteArray("GB2312") << gb2312_ba;
2395}
2396
2397void tst_QTextCodec::moreToFromUnicode()
2398{
2399 QFETCH( QByteArray, codecName );
2400 QFETCH( QByteArray, testData );
2401
2402 QTextCodec *c = QTextCodec::codecForName( name: codecName.data() );
2403 QVERIFY(c);
2404
2405 QString uStr = c->toUnicode(testData);
2406 QByteArray cStr = c->fromUnicode(uc: uStr);
2407 QCOMPARE(testData, cStr);
2408}
2409
2410void tst_QTextCodec::shiftJis()
2411{
2412 QByteArray backslashTilde("\\~");
2413 QTextCodec* codec = QTextCodec::codecForName(name: "shift_jis");
2414 QString string = codec->toUnicode(backslashTilde);
2415 QCOMPARE(string.length(), 2);
2416 QCOMPARE(string.at(0), QChar(QLatin1Char('\\')));
2417 QCOMPARE(string.at(1), QChar(QLatin1Char('~')));
2418
2419 QByteArray encoded = codec->fromUnicode(uc: string);
2420 QCOMPARE(encoded, backslashTilde);
2421}
2422
2423struct UserCodec : public QTextCodec
2424{
2425 // implement pure virtuals
2426 QByteArray name() const override
2427 { return "UserCodec"; }
2428 QList<QByteArray> aliases() const override
2429 { return QList<QByteArray>() << "usercodec" << "user-codec"; }
2430 int mibEnum() const override
2431 { return 5000; }
2432
2433 virtual QString convertToUnicode(const char *, int, ConverterState *) const override
2434 { return QString(); }
2435 virtual QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override
2436 { return QByteArray(); }
2437};
2438
2439void tst_QTextCodec::userCodec()
2440{
2441 // check that it isn't there
2442 static bool executedOnce = false;
2443 if (executedOnce)
2444 QSKIP("Test already executed once");
2445
2446 QVERIFY(!QTextCodec::availableCodecs().contains("UserCodec"));
2447 QVERIFY(!QTextCodec::codecForName("UserCodec"));
2448
2449 UserCodec *codec = new UserCodec;
2450 executedOnce = true;
2451
2452 QList<QByteArray> availableCodecs = QTextCodec::availableCodecs();
2453 QVERIFY(availableCodecs.contains("UserCodec"));
2454 QVERIFY(availableCodecs.contains("usercodec"));
2455 QVERIFY(availableCodecs.contains("user-codec"));
2456
2457 QTextCodec *pcodec = QTextCodec::codecForName(name: "UserCodec");
2458 QCOMPARE(pcodec, codec);
2459
2460 pcodec = QTextCodec::codecForName(name: "user-codec");
2461 QCOMPARE(pcodec, codec);
2462
2463 pcodec = QTextCodec::codecForName(name: "User-Codec");
2464 QCOMPARE(pcodec, codec);
2465
2466 pcodec = QTextCodec::codecForMib(mib: 5000);
2467 QCOMPARE(pcodec, codec);
2468
2469 delete codec;
2470
2471 pcodec = QTextCodec::codecForName(name: "UserCodec");
2472 QCOMPARE(pcodec, nullptr);
2473}
2474
2475void tst_QTextCodec::canEncode()
2476{
2477 QFETCH(QString, codecName);
2478 QFETCH(QString, inputString);
2479 QFETCH(QByteArray, expectedData);
2480 QFETCH(bool, canEncode);
2481
2482 QTextCodec *codec = QTextCodec::codecForName(name: codecName.toLatin1());
2483 QVERIFY(codec != nullptr);
2484
2485 QCOMPARE(codec->canEncode(inputString), canEncode);
2486 QByteArray encoded = codec->fromUnicode(uc: inputString);
2487 QCOMPARE(encoded, expectedData);
2488}
2489
2490void tst_QTextCodec::canEncode_data()
2491{
2492 QTest::addColumn<QString>(name: "codecName");
2493 QTest::addColumn<QString>(name: "inputString");
2494 QTest::addColumn<QByteArray>(name: "expectedData");
2495 QTest::addColumn<bool>(name: "canEncode");
2496
2497 QTest::newRow(dataTag: "English ISO-8859-1") << "ISO-8859-1" << "Hello World"
2498 << QByteArray("Hello World") << true;
2499 QTest::newRow(dataTag: "English big5") << "Big5" << "Hello World" << QByteArray("Hello World") << true;
2500
2501 QTest::newRow(dataTag: "Greek win1252")
2502 << "Windows-1252"
2503 << QString("\u03c0\u03bf\u03bb\u03cd\u03c4\u03c1\u03bf\u03c0\u03bf\u03bd")
2504 << QByteArray("??????????") << false;
2505 QTest::newRow(dataTag: "Greek win1253")
2506 << "Windows-1253"
2507 << QString("\u03c0\u03bf\u03bb\u03cd\u03c4\u03c1\u03bf\u03c0\u03bf\u03bd")
2508 << QByteArray("\xF0\xEF\xEB\xFD\xF4\xF1\xEF\xF0\xEF\xED") << true;
2509
2510 QTest::newRow(dataTag: "Russian win1252")
2511 << "Windows-1252" << QString("\u041f\u0440\u0438\u0432\u0435\u0442 \u043c\u0438\u0440")
2512 << QByteArray("?????? ???") << false;
2513 QTest::newRow(dataTag: "Russian win1251")
2514 << "Windows-1251" << QString("\u041f\u0440\u0438\u0432\u0435\u0442 \u043c\u0438\u0440")
2515 << QByteArray("\xCF\xF0\xE8\xE2\xE5\xF2 \xEC\xE8\xF0") << true;
2516
2517 QTest::newRow(dataTag: "English from ucs4")
2518 << "ISO-8859-1" << QString("\u0048\u0065\u006c\u006c\u006f\u0021")
2519 << QByteArray("Hello!") << true;
2520
2521 // ICU on Linux RHEL 7.6 seems to be old, and does not handle NULL
2522 // characters properly. It returns 0x01 instead of 0x00 for it, so
2523 // we just skip the test.
2524#if !QT_CONFIG(icu) || (U_ICU_VERSION_MAJOR_NUM > 56)
2525 QTest::newRow(dataTag: "With null") << "ISO-8859-1" << QString::fromUcs4(str: U"Hello\u0000World", size: 11)
2526 << QByteArray("Hello\x00World", 11) << true;
2527#endif
2528
2529 QTest::newRow(dataTag: "With special chars")
2530 << "ISO-8859-1" << QString("\u0001\u0002\u0003\u0008\u0009\u000a\u000b\u000d")
2531 << QByteArray("\x01\x02\x03\b\t\n\x0B\r") << true;
2532
2533 QTest::newRow(dataTag: "Pencil icon") << "ISO-8859-1" << QString("\u270f") << QByteArray("?") << false;
2534}
2535
2536struct DontCrashAtExit {
2537 ~DontCrashAtExit() {
2538 QTextCodec *c = QTextCodec::codecForName(name: "utf8");
2539 if (c)
2540 c->toUnicode(chars: "azerty");
2541
2542 }
2543} dontCrashAtExit;
2544
2545
2546QTEST_MAIN(tst_QTextCodec)
2547#include "tst_qtextcodec.moc"
2548

source code of qtbase/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp