| 1 | /**************************************************************************** | 
| 2 | ** | 
| 3 | ** Copyright (C) 2018 The Qt Company Ltd. | 
| 4 | ** Copyright (C) 2018 Intel Corporation. | 
| 5 | ** Contact: https://www.qt.io/licensing/ | 
| 6 | ** | 
| 7 | ** This file is part of the test suite of the Qt Toolkit. | 
| 8 | ** | 
| 9 | ** $QT_BEGIN_LICENSE:GPL-EXCEPT$ | 
| 10 | ** Commercial License Usage | 
| 11 | ** Licensees holding valid commercial Qt licenses may use this file in | 
| 12 | ** accordance with the commercial license agreement provided with the | 
| 13 | ** Software or, alternatively, in accordance with the terms contained in | 
| 14 | ** a written agreement between you and The Qt Company. For licensing terms | 
| 15 | ** and conditions see https://www.qt.io/terms-conditions. For further | 
| 16 | ** information use the contact form at https://www.qt.io/contact-us. | 
| 17 | ** | 
| 18 | ** GNU General Public License Usage | 
| 19 | ** Alternatively, this file may be used under the terms of the GNU | 
| 20 | ** General Public License version 3 as published by the Free Software | 
| 21 | ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT | 
| 22 | ** included in the packaging of this file. Please review the following | 
| 23 | ** information to ensure the GNU General Public License requirements will | 
| 24 | ** be met: https://www.gnu.org/licenses/gpl-3.0.html. | 
| 25 | ** | 
| 26 | ** $QT_END_LICENSE$ | 
| 27 | ** | 
| 28 | ****************************************************************************/ | 
| 29 | #include <QtTest/QtTest> | 
| 30 |  | 
| 31 | void loadInvalidUtf8Rows() | 
| 32 | { | 
| 33 |     // Wrong continuations | 
| 34 |     QTest::newRow(dataTag: "bad-continuation-1char" ) << QByteArray("\x80" ); | 
| 35 |     QTest::newRow(dataTag: "bad-continuation-2chars-1" ) << QByteArray("\xC2\xC0" ); | 
| 36 |     QTest::newRow(dataTag: "bad-continuation-2chars-2" ) << QByteArray("\xC3\xDF" ); | 
| 37 |     QTest::newRow(dataTag: "bad-continuation-2chars-3" ) << QByteArray("\xC7\xF0" ); | 
| 38 |     QTest::newRow(dataTag: "bad-continuation-3chars-1" ) << QByteArray("\xE0\xA0\xC0" ); | 
| 39 |     QTest::newRow(dataTag: "bad-continuation-3chars-2" ) << QByteArray("\xE0\xC0\xA0" ); | 
| 40 |     QTest::newRow(dataTag: "bad-continuation-4chars-1" ) << QByteArray("\xF0\x90\x80\xC0" ); | 
| 41 |     QTest::newRow(dataTag: "bad-continuation-4chars-2" ) << QByteArray("\xF0\x90\xC0\x80" ); | 
| 42 |     QTest::newRow(dataTag: "bad-continuation-4chars-3" ) << QByteArray("\xF0\xC0\x80\x80" ); | 
| 43 |  | 
| 44 |     // Too short | 
| 45 |     QTest::newRow(dataTag: "too-short-2chars" ) << QByteArray("\xC2" ); | 
| 46 |     QTest::newRow(dataTag: "too-short-3chars-1" ) << QByteArray("\xE0" ); | 
| 47 |     QTest::newRow(dataTag: "too-short-3chars-2" ) << QByteArray("\xE0\xA0" ); | 
| 48 |     QTest::newRow(dataTag: "too-short-4chars-1" ) << QByteArray("\xF0" ); | 
| 49 |     QTest::newRow(dataTag: "too-short-4chars-2" ) << QByteArray("\xF0\x90" ); | 
| 50 |     QTest::newRow(dataTag: "too-short-4chars-3" ) << QByteArray("\xF0\x90\x80" ); | 
| 51 |  | 
| 52 |     // Surrogate pairs must now be present either | 
| 53 |     // U+D800:        1101   10 0000   00 0000 | 
| 54 |     // encoding: xxxz:1101 xz10:0000 xz00:0000 | 
| 55 |     QTest::newRow(dataTag: "hi-surrogate" ) << QByteArray("\xED\xA0\x80" ); | 
| 56 |     // U+DC00:        1101   11 0000   00 0000 | 
| 57 |     // encoding: xxxz:1101 xz11:0000 xz00:0000 | 
| 58 |     QTest::newRow(dataTag: "lo-surrogate" ) << QByteArray("\xED\xB0\x80" ); | 
| 59 |  | 
| 60 |     // not even in pair: | 
| 61 |     QTest::newRow(dataTag: "surrogate-pair" ) << QByteArray("\xED\xA0\x80\xED\xB0\x80" ); | 
| 62 |  | 
| 63 |     // Characters outside the Unicode range: | 
| 64 |     // 0x110000:   00 0100   01 0000   00 0000   00 0000 | 
| 65 |     // encoding: xxxx:z100 xz01:0000 xz00:0000 xz00:0000 | 
| 66 |     QTest::newRow(dataTag: "non-unicode-1" ) << QByteArray("\xF4\x90\x80\x80" ); | 
| 67 |     // 0x200000:             00 1000   00 0000   00 0000   00 0000 | 
| 68 |     // encoding: xxxx:xz00 xz00:1000 xz00:0000 xz00:0000 xz00:0000 | 
| 69 |     QTest::newRow(dataTag: "non-unicode-2" ) << QByteArray("\xF8\x88\x80\x80\x80" ); | 
| 70 |     // 0x04000000:              0100   00 0000   00 0000   00 0000   00 0000 | 
| 71 |     // encoding: xxxx:xxz0 xz00:0100 xz00:0000 xz00:0000 xz00:0001 xz00:0001 | 
| 72 |     QTest::newRow(dataTag: "non-unicode-3" ) << QByteArray("\xFC\x84\x80\x80\x80\x80" ); | 
| 73 |     // 0x7fffffff:       1   11 1111   11 1111   11 1111   11 1111   11 1111 | 
| 74 |     // encoding: xxxx:xxz0 xz00:0100 xz00:0000 xz00:0000 xz00:0001 xz00:0001 | 
| 75 |     QTest::newRow(dataTag: "non-unicode-4" ) << QByteArray("\xFD\xBF\xBF\xBF\xBF\xBF" ); | 
| 76 |  | 
| 77 |     // As seen above, 0xFE and 0xFF never appear: | 
| 78 |     QTest::newRow(dataTag: "fe" ) << QByteArray("\xFE" ); | 
| 79 |     QTest::newRow(dataTag: "fe-bis" ) << QByteArray("\xFE\xBF\xBF\xBF\xBF\xBF\xBF" ); | 
| 80 |     QTest::newRow(dataTag: "ff" ) << QByteArray("\xFF" ); | 
| 81 |     QTest::newRow(dataTag: "ff-bis" ) << QByteArray("\xFF\xBF\xBF\xBF\xBF\xBF\xBF\xBF" ); | 
| 82 |  | 
| 83 |     // some combinations in UTF-8 are invalid even though they have the proper bits set | 
| 84 |     // these are known as overlong sequences | 
| 85 |  | 
| 86 |     // "A": U+0041:                                               01   00 0001 | 
| 87 |     // overlong 2:                                         xxz0:0001 xz00:0001 | 
| 88 |     QTest::newRow(dataTag: "overlong-1-2" ) << QByteArray("\xC1\x81" ); | 
| 89 |     // overlong 3:                               xxxz:0000 xz00:0001 xz00:0001 | 
| 90 |     QTest::newRow(dataTag: "overlong-1-3" ) << QByteArray("\xE0\x81\x81" ); | 
| 91 |     // overlong 4:                     xxxx:z000 xz00:0000 xz00:0001 xz00:0001 | 
| 92 |     QTest::newRow(dataTag: "overlong-1-4" ) << QByteArray("\xF0\x80\x81\x81" ); | 
| 93 |     // overlong 5:           xxxx:xz00 xz00:0000 xz00:0000 xz00:0001 xz00:0001 | 
| 94 |     QTest::newRow(dataTag: "overlong-1-5" ) << QByteArray("\xF8\x80\x80\x81\x81" ); | 
| 95 |     // overlong 6: xxxx:xxz0 xz00:0000 xz00:0000 xz00:0000 xz00:0001 xz00:0001 | 
| 96 |     QTest::newRow(dataTag: "overlong-1-6" ) << QByteArray("\xFC\x80\x80\x80\x81\x81" ); | 
| 97 |  | 
| 98 |     // U+0080:                                                    10   00 0000 | 
| 99 |     // proper encoding:                                    xxz0:0010 xz00:0000 | 
| 100 |     // overlong 3:                               xxxz:0000 xz00:0010 xz00:0000 | 
| 101 |     QTest::newRow(dataTag: "overlong-2-3" ) << QByteArray("\xE0\x82\x80" ); | 
| 102 |     // overlong 4:                     xxxx:z000 xz00:0000 xz00:0010 xz00:0000 | 
| 103 |     QTest::newRow(dataTag: "overlong-2-4" ) << QByteArray("\xF0\x80\x82\x80" ); | 
| 104 |     // overlong 5:           xxxx:xz00 xz00:0000 xz00:0000 xz00:0010 xz00:0000 | 
| 105 |     QTest::newRow(dataTag: "overlong-2-5" ) << QByteArray("\xF8\x80\x80\x82\x80" ); | 
| 106 |     // overlong 6: xxxx:xxz0 xz00:0000 xz00:0000 xz00:0000 xz00:0010 xz00:0000 | 
| 107 |     QTest::newRow(dataTag: "overlong-2-6" ) << QByteArray("\xFC\x80\x80\x80\x82\x80" ); | 
| 108 |  | 
| 109 |     // U+0800:                                               10 0000   00 0000 | 
| 110 |     // proper encoding:                          xxxz:0000 xz10:0000 xz00:0000 | 
| 111 |     // overlong 4:                     xxxx:z000 xz00:0000 xz10:0000 xz00:0000 | 
| 112 |     QTest::newRow(dataTag: "overlong-3-4" ) << QByteArray("\xF0\x80\xA0\x80" ); | 
| 113 |     // overlong 5:           xxxx:xz00 xz00:0000 xz00:0000 xz10:0000 xz00:0000 | 
| 114 |     QTest::newRow(dataTag: "overlong-3-5" ) << QByteArray("\xF8\x80\x80\xA0\x80" ); | 
| 115 |     // overlong 6: xxxx:xxz0 xz00:0000 xz00:0000 xz00:0000 xz10:0000 xz00:0000 | 
| 116 |     QTest::newRow(dataTag: "overlong-3-6" ) << QByteArray("\xFC\x80\x80\x80\xA0\x80" ); | 
| 117 |  | 
| 118 |     // U+010000:                                   00 0100   00 0000   00 0000 | 
| 119 |     // proper encoding:                xxxx:z000 xz00:0100 xz00:0000 xz00:0000 | 
| 120 |     // overlong 5:           xxxx:xz00 xz00:0000 xz00:0100 xz00:0000 xz00:0000 | 
| 121 |     QTest::newRow(dataTag: "overlong-4-5" ) << QByteArray("\xF8\x80\x84\x80\x80" ); | 
| 122 |     // overlong 6: xxxx:xxz0 xz00:0000 xz00:0000 xz00:0100 xz00:0000 xz00:0000 | 
| 123 |     QTest::newRow(dataTag: "overlong-4-6" ) << QByteArray("\xFC\x80\x80\x84\x80\x80" ); | 
| 124 |  | 
| 125 | } | 
| 126 |  | 
| 127 | void loadNonCharactersRows() | 
| 128 | { | 
| 129 |     // Unicode has a couple of "non-characters" that one can use internally | 
| 130 |     // These characters are allowed for text-interchange (see http://www.unicode.org/versions/corrigendum9.html) | 
| 131 |     // | 
| 132 |     // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF, | 
| 133 |     // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and | 
| 134 |     // U+FDEF (inclusive) | 
| 135 |  | 
| 136 |     // U+FDD0 through U+FDEF | 
| 137 |     for (int i = 0; i < 16; ++i) { | 
| 138 |         char utf8[] = { char(0357), char(0267), char(0220 + i), 0 }; | 
| 139 |         QString utf16 = QChar(0xfdd0 + i); | 
| 140 |         QTest::newRow(qPrintable(QString::number(0xfdd0 + i, 16))) << QByteArray(utf8) << utf16; | 
| 141 |     } | 
| 142 |  | 
| 143 |     // the last two in Planes 1 through 16 | 
| 144 |     for (uint plane = 1; plane <= 16; ++plane) { | 
| 145 |         for (uint lower = 0xfffe; lower < 0x10000; ++lower) { | 
| 146 |             uint ucs4 = (plane << 16) | lower; | 
| 147 |             char utf8[] = { char(0xf0 | uchar(ucs4 >> 18)), | 
| 148 |                             char(0x80 | (uchar(ucs4 >> 12) & 0x3f)), | 
| 149 |                             char(0x80 | (uchar(ucs4 >> 6) & 0x3f)), | 
| 150 |                             char(0x80 | (uchar(ucs4) & 0x3f)), | 
| 151 |                             0 }; | 
| 152 |             ushort utf16[] = { QChar::highSurrogate(ucs4), QChar::lowSurrogate(ucs4), 0 }; | 
| 153 |  | 
| 154 |             QTest::newRow(qPrintable(QString::number(ucs4, 16))) << QByteArray(utf8) << QString::fromUtf16(utf16); | 
| 155 |         } | 
| 156 |     } | 
| 157 |  | 
| 158 |     QTest::newRow(dataTag: "fffe" ) << QByteArray("\xEF\xBF\xBE" ) << QString(QChar(0xfffe)); | 
| 159 |     QTest::newRow(dataTag: "ffff" ) << QByteArray("\xEF\xBF\xBF" ) << QString(QChar(0xffff)); | 
| 160 | } | 
| 161 |  |