| 1 | /**************************************************************************** |
| 2 | ** |
| 3 | ** Copyright (C) 2016 The Qt Company Ltd. |
| 4 | ** Copyright (C) 2016 Intel Corporation. |
| 5 | ** Contact: https://www.qt.io/licensing/ |
| 6 | ** |
| 7 | ** This file is part of the test suite of the Qt Toolkit. |
| 8 | ** |
| 9 | ** $QT_BEGIN_LICENSE:GPL-EXCEPT$ |
| 10 | ** Commercial License Usage |
| 11 | ** Licensees holding valid commercial Qt licenses may use this file in |
| 12 | ** accordance with the commercial license agreement provided with the |
| 13 | ** Software or, alternatively, in accordance with the terms contained in |
| 14 | ** a written agreement between you and The Qt Company. For licensing terms |
| 15 | ** and conditions see https://www.qt.io/terms-conditions. For further |
| 16 | ** information use the contact form at https://www.qt.io/contact-us. |
| 17 | ** |
| 18 | ** GNU General Public License Usage |
| 19 | ** Alternatively, this file may be used under the terms of the GNU |
| 20 | ** General Public License version 3 as published by the Free Software |
| 21 | ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT |
| 22 | ** included in the packaging of this file. Please review the following |
| 23 | ** information to ensure the GNU General Public License requirements will |
| 24 | ** be met: https://www.gnu.org/licenses/gpl-3.0.html. |
| 25 | ** |
| 26 | ** $QT_END_LICENSE$ |
| 27 | ** |
| 28 | ****************************************************************************/ |
| 29 | |
| 30 | #include <QtCore/QUrl> |
| 31 | #include <QtTest/QtTest> |
| 32 | |
| 33 | #include "private/qtldurl_p.h" |
| 34 | #include "private/qurl_p.h" |
| 35 | |
| 36 | // For testsuites |
| 37 | #define IDNA_ACE_PREFIX "xn--" |
| 38 | #define IDNA_SUCCESS 1 |
| 39 | #define STRINGPREP_NO_UNASSIGNED 1 |
| 40 | #define STRINGPREP_CONTAINS_UNASSIGNED 2 |
| 41 | #define STRINGPREP_CONTAINS_PROHIBITED 3 |
| 42 | #define STRINGPREP_BIDI_BOTH_L_AND_RAL 4 |
| 43 | #define STRINGPREP_BIDI_LEADTRAIL_NOT_RAL 5 |
| 44 | |
| 45 | struct ushortarray { |
| 46 | ushortarray() {} |
| 47 | template <size_t N> |
| 48 | ushortarray(unsigned short (&array)[N]) |
| 49 | { |
| 50 | memcpy(points, array, N*sizeof(unsigned short)); |
| 51 | } |
| 52 | |
| 53 | unsigned short points[100]; |
| 54 | }; |
| 55 | |
| 56 | Q_DECLARE_METATYPE(ushortarray) |
| 57 | Q_DECLARE_METATYPE(QUrl::FormattingOptions) |
| 58 | Q_DECLARE_METATYPE(QUrl::ComponentFormattingOptions) |
| 59 | |
| 60 | class tst_QUrlInternal : public QObject |
| 61 | { |
| 62 | Q_OBJECT |
| 63 | |
| 64 | private Q_SLOTS: |
| 65 | // IDNA internals |
| 66 | #ifdef QT_BUILD_INTERNAL |
| 67 | void idna_testsuite_data(); |
| 68 | void idna_testsuite(); |
| 69 | void nameprep_testsuite_data(); |
| 70 | void nameprep_testsuite(); |
| 71 | void nameprep_highcodes_data(); |
| 72 | void nameprep_highcodes(); |
| 73 | #endif |
| 74 | void ace_testsuite_data(); |
| 75 | void ace_testsuite(); |
| 76 | void std3violations_data(); |
| 77 | void std3violations(); |
| 78 | void std3deviations_data(); |
| 79 | void std3deviations(); |
| 80 | |
| 81 | // percent-encoding internals |
| 82 | void correctEncodedMistakes_data(); |
| 83 | void correctEncodedMistakes(); |
| 84 | void encodingRecode_data(); |
| 85 | void encodingRecode(); |
| 86 | void encodingRecodeInvalidUtf8_data(); |
| 87 | void encodingRecodeInvalidUtf8(); |
| 88 | void recodeByteArray_data(); |
| 89 | void recodeByteArray(); |
| 90 | }; |
| 91 | #include "tst_qurlinternal.moc" |
| 92 | |
| 93 | #ifdef QT_BUILD_INTERNAL |
| 94 | void tst_QUrlInternal::idna_testsuite_data() |
| 95 | { |
| 96 | QTest::addColumn<int>("numchars" ); |
| 97 | QTest::addColumn<ushortarray>("unicode" ); |
| 98 | QTest::addColumn<QByteArray>("punycode" ); |
| 99 | QTest::addColumn<int>("allowunassigned" ); |
| 100 | QTest::addColumn<int>("usestd3asciirules" ); |
| 101 | QTest::addColumn<int>("toasciirc" ); |
| 102 | QTest::addColumn<int>("tounicoderc" ); |
| 103 | |
| 104 | unsigned short d1[] = { 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, |
| 105 | 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, |
| 106 | 0x061F }; |
| 107 | QTest::newRow(dataTag: "Arabic (Egyptian)" ) << 17 << ushortarray(d1) |
| 108 | << QByteArray(IDNA_ACE_PREFIX "egbpdaj6bu4bxfgehfvwxn" ) |
| 109 | << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS; |
| 110 | |
| 111 | unsigned short d2[] = { 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, |
| 112 | 0x6587 }; |
| 113 | QTest::newRow(dataTag: "Chinese (simplified)" ) << 9 << ushortarray(d2) |
| 114 | << QByteArray(IDNA_ACE_PREFIX "ihqwcrb4cv8a8dqg056pqjye" ) |
| 115 | << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS; |
| 116 | |
| 117 | unsigned short d3[] = { 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, |
| 118 | 0x6587 }; |
| 119 | QTest::newRow(dataTag: "Chinese (traditional)" ) << 9 << ushortarray(d3) |
| 120 | << QByteArray(IDNA_ACE_PREFIX "ihqwctvzc91f659drss3x8bo0yb" ) |
| 121 | << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS; |
| 122 | |
| 123 | unsigned short d4[] = { 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, |
| 124 | 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, |
| 125 | 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079 }; |
| 126 | QTest::newRow(dataTag: "Czech" ) << 22 << ushortarray(d4) |
| 127 | << QByteArray(IDNA_ACE_PREFIX "Proprostnemluvesky-uyb24dma41a" ) |
| 128 | << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS; |
| 129 | |
| 130 | unsigned short d5[] = { 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, |
| 131 | 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, |
| 132 | 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA }; |
| 133 | QTest::newRow(dataTag: "Hebrew" ) << 22 << ushortarray(d5) |
| 134 | << QByteArray(IDNA_ACE_PREFIX "4dbcagdahymbxekheh6e0a7fei0b" ) |
| 135 | << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS; |
| 136 | |
| 137 | unsigned short d6[] = { 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, |
| 138 | 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, |
| 139 | 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, |
| 140 | 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902 }; |
| 141 | QTest::newRow(dataTag: "Hindi (Devanagari)" ) << 30 << ushortarray(d6) |
| 142 | << QByteArray(IDNA_ACE_PREFIX "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd" ) |
| 143 | << 0 << 0 << IDNA_SUCCESS; |
| 144 | |
| 145 | unsigned short d7[] = { 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, |
| 146 | 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, |
| 147 | 0x306E, 0x304B }; |
| 148 | QTest::newRow(dataTag: "Japanese (kanji and hiragana)" ) << 18 << ushortarray(d7) |
| 149 | << QByteArray(IDNA_ACE_PREFIX "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa" ) |
| 150 | << 0 << 0 << IDNA_SUCCESS; |
| 151 | |
| 152 | unsigned short d8[] = { 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, |
| 153 | 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, |
| 154 | 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, |
| 155 | 0x0441, 0x0441, 0x043A, 0x0438 }; |
| 156 | QTest::newRow(dataTag: "Russian (Cyrillic)" ) << 28 << ushortarray(d8) |
| 157 | << QByteArray(IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l" ) |
| 158 | << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS; |
| 159 | |
| 160 | unsigned short d9[] = { 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, |
| 161 | 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, |
| 162 | 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, |
| 163 | 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065, |
| 164 | 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C }; |
| 165 | QTest::newRow(dataTag: "Spanish" ) << 40 << ushortarray(d9) |
| 166 | << QByteArray(IDNA_ACE_PREFIX "PorqunopuedensimplementehablarenEspaol-fmd56a" ) |
| 167 | << 0 << 0 << IDNA_SUCCESS; |
| 168 | |
| 169 | unsigned short d10[] = { 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, |
| 170 | 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, |
| 171 | 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, |
| 172 | 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074 }; |
| 173 | QTest::newRow(dataTag: "Vietnamese" ) << 31 << ushortarray(d10) |
| 174 | << QByteArray(IDNA_ACE_PREFIX "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g" ) |
| 175 | << 0 << 0 << IDNA_SUCCESS; |
| 176 | |
| 177 | unsigned short d11[] = { 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F }; |
| 178 | QTest::newRow(dataTag: "Japanese" ) << 8 << ushortarray(d11) |
| 179 | << QByteArray(IDNA_ACE_PREFIX "3B-ww4c5e180e575a65lsy2b" ) |
| 180 | << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS; |
| 181 | |
| 182 | // this test does NOT include nameprepping, so the capitals will remain |
| 183 | unsigned short d12[] = { 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, |
| 184 | 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, |
| 185 | 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053 }; |
| 186 | QTest::newRow(dataTag: "Japanese2" ) << 24 << ushortarray(d12) |
| 187 | << QByteArray(IDNA_ACE_PREFIX "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n" ) |
| 188 | << 0 << 0 << IDNA_SUCCESS; |
| 189 | |
| 190 | unsigned short d13[] = { 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, |
| 191 | 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, |
| 192 | 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, |
| 193 | 0x6240 }; |
| 194 | QTest::newRow(dataTag: "Japanese3" ) << 25 << ushortarray(d13) |
| 195 | << QByteArray(IDNA_ACE_PREFIX "Hello-Another-Way--fc4qua05auwb3674vfr0b" ) |
| 196 | << 0 << 0 << IDNA_SUCCESS; |
| 197 | |
| 198 | unsigned short d14[] = { 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032 }; |
| 199 | QTest::newRow(dataTag: "Japanese4" ) << 8 << ushortarray(d14) |
| 200 | << QByteArray(IDNA_ACE_PREFIX "2-u9tlzr9756bt3uc0v" ) |
| 201 | << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS; |
| 202 | |
| 203 | unsigned short d15[] = { 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, |
| 204 | 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D }; |
| 205 | QTest::newRow(dataTag: "Japanese5" ) << 13 << ushortarray(d15) |
| 206 | << QByteArray(IDNA_ACE_PREFIX "MajiKoi5-783gue6qz075azm5e" ) |
| 207 | << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS; |
| 208 | |
| 209 | unsigned short d16[] = { 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0 }; |
| 210 | QTest::newRow(dataTag: "Japanese6" ) << 9 << ushortarray(d16) |
| 211 | << QByteArray(IDNA_ACE_PREFIX "de-jg4avhby1noc0d" ) |
| 212 | << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS; |
| 213 | |
| 214 | unsigned short d17[] = { 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067 }; |
| 215 | QTest::newRow(dataTag: "Japanese7" ) << 7 << ushortarray(d17) |
| 216 | << QByteArray(IDNA_ACE_PREFIX "d9juau41awczczp" ) |
| 217 | << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS; |
| 218 | |
| 219 | unsigned short d18[] = { 0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac }; |
| 220 | QTest::newRow(dataTag: "Greek" ) << 8 << ushortarray(d18) |
| 221 | << QByteArray(IDNA_ACE_PREFIX "hxargifdar" ) |
| 222 | << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS; |
| 223 | |
| 224 | unsigned short d19[] = { 0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127, |
| 225 | 0x0127, 0x0061 }; |
| 226 | QTest::newRow(dataTag: "Maltese (Malti)" ) << 10 << ushortarray(d19) |
| 227 | << QByteArray(IDNA_ACE_PREFIX "bonusaa-5bb1da" ) |
| 228 | << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS; |
| 229 | |
| 230 | unsigned short d20[] = {0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435, |
| 231 | 0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432, |
| 232 | 0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443, |
| 233 | 0x0441, 0x0441, 0x043a, 0x0438 }; |
| 234 | QTest::newRow(dataTag: "Russian (Cyrillic)" ) << 28 << ushortarray(d20) |
| 235 | << QByteArray(IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l" ) |
| 236 | << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS; |
| 237 | } |
| 238 | #endif |
| 239 | |
| 240 | #ifdef QT_BUILD_INTERNAL |
| 241 | void tst_QUrlInternal::idna_testsuite() |
| 242 | { |
| 243 | QFETCH(int, numchars); |
| 244 | QFETCH(ushortarray, unicode); |
| 245 | QFETCH(QByteArray, punycode); |
| 246 | |
| 247 | QString result; |
| 248 | qt_punycodeEncoder(s: (QChar*)unicode.points, ucLength: numchars, output: &result); |
| 249 | QCOMPARE(result.toLatin1(), punycode); |
| 250 | QCOMPARE(qt_punycodeDecoder(result), QString::fromUtf16(unicode.points, numchars)); |
| 251 | } |
| 252 | #endif |
| 253 | |
| 254 | #ifdef QT_BUILD_INTERNAL |
| 255 | void tst_QUrlInternal::nameprep_testsuite_data() |
| 256 | { |
| 257 | QTest::addColumn<QString>(name: "in" ); |
| 258 | QTest::addColumn<QString>(name: "out" ); |
| 259 | QTest::addColumn<QString>(name: "profile" ); |
| 260 | QTest::addColumn<int>(name: "flags" ); |
| 261 | QTest::addColumn<int>(name: "rc" ); |
| 262 | |
| 263 | QTest::newRow(dataTag: "Map to nothing" ) |
| 264 | << QString::fromUtf8(str: "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B" |
| 265 | "bar" "\xE2\x80\x8B\xE2\x81\xA0" "baz\xEF\xB8\x80\xEF\xB8\x88" |
| 266 | "\xEF\xB8\x8F\xEF\xBB\xBF" ) |
| 267 | << QString::fromUtf8(str: "foobarbaz" ) |
| 268 | << QString() << 0 << 0; |
| 269 | |
| 270 | QTest::newRow(dataTag: "Case folding ASCII U+0043 U+0041 U+0046 U+0045" ) |
| 271 | << QString::fromUtf8(str: "CAFE" ) |
| 272 | << QString::fromUtf8(str: "cafe" ) |
| 273 | << QString() << 0 << 0; |
| 274 | |
| 275 | QTest::newRow(dataTag: "Case folding 8bit U+00DF (german sharp s)" ) |
| 276 | << QString::fromUtf8(str: "\xC3\x9F" ) |
| 277 | << QString("ss" ) |
| 278 | << QString() << 0 << 0; |
| 279 | |
| 280 | QTest::newRow(dataTag: "Case folding U+0130 (turkish capital I with dot)" ) |
| 281 | << QString::fromUtf8(str: "\xC4\xB0" ) |
| 282 | << QString::fromUtf8(str: "i\xcc\x87" ) |
| 283 | << QString() << 0 << 0; |
| 284 | |
| 285 | QTest::newRow(dataTag: "Case folding multibyte U+0143 U+037A" ) |
| 286 | << QString::fromUtf8(str: "\xC5\x83\xCD\xBA" ) |
| 287 | << QString::fromUtf8(str: "\xC5\x84 \xCE\xB9" ) |
| 288 | << QString() << 0 << 0; |
| 289 | |
| 290 | QTest::newRow(dataTag: "Case folding U+2121 U+33C6 U+1D7BB" ) |
| 291 | << QString::fromUtf8(str: "\xE2\x84\xA1\xE3\x8F\x86\xF0\x9D\x9E\xBB" ) |
| 292 | << QString::fromUtf8(str: "telc\xE2\x88\x95" "kg\xCF\x83" ) |
| 293 | << QString() << 0 << 0; |
| 294 | |
| 295 | QTest::newRow(dataTag: "Normalization of U+006a U+030c U+00A0 U+00AA" ) |
| 296 | << QString::fromUtf8(str: "\x6A\xCC\x8C\xC2\xA0\xC2\xAA" ) |
| 297 | << QString::fromUtf8(str: "\xC7\xB0 a" ) |
| 298 | << QString() << 0 << 0; |
| 299 | |
| 300 | QTest::newRow(dataTag: "Case folding U+1FB7 and normalization" ) |
| 301 | << QString::fromUtf8(str: "\xE1\xBE\xB7" ) |
| 302 | << QString::fromUtf8(str: "\xE1\xBE\xB6\xCE\xB9" ) |
| 303 | << QString() << 0 << 0; |
| 304 | |
| 305 | QTest::newRow(dataTag: "Self-reverting case folding U+01F0 and normalization" ) |
| 306 | // << QString::fromUtf8("\xC7\xF0") ### typo in the original testsuite |
| 307 | << QString::fromUtf8(str: "\xC7\xB0" ) |
| 308 | << QString::fromUtf8(str: "\xC7\xB0" ) |
| 309 | << QString() << 0 << 0; |
| 310 | |
| 311 | QTest::newRow(dataTag: "Self-reverting case folding U+0390 and normalization" ) |
| 312 | << QString::fromUtf8(str: "\xCE\x90" ) |
| 313 | << QString::fromUtf8(str: "\xCE\x90" ) |
| 314 | << QString() << 0 << 0; |
| 315 | |
| 316 | QTest::newRow(dataTag: "Self-reverting case folding U+03B0 and normalization" ) |
| 317 | << QString::fromUtf8(str: "\xCE\xB0" ) |
| 318 | << QString::fromUtf8(str: "\xCE\xB0" ) |
| 319 | << QString() << 0 << 0; |
| 320 | |
| 321 | QTest::newRow(dataTag: "Self-reverting case folding U+1E96 and normalization" ) |
| 322 | << QString::fromUtf8(str: "\xE1\xBA\x96" ) |
| 323 | << QString::fromUtf8(str: "\xE1\xBA\x96" ) |
| 324 | << QString() << 0 << 0; |
| 325 | |
| 326 | QTest::newRow(dataTag: "Self-reverting case folding U+1F56 and normalization" ) |
| 327 | << QString::fromUtf8(str: "\xE1\xBD\x96" ) |
| 328 | << QString::fromUtf8(str: "\xE1\xBD\x96" ) |
| 329 | << QString() << 0 << 0; |
| 330 | |
| 331 | QTest::newRow(dataTag: "ASCII space character U+0020" ) |
| 332 | << QString::fromUtf8(str: "\x20" ) |
| 333 | << QString::fromUtf8(str: "\x20" ) |
| 334 | << QString() << 0 << 0; |
| 335 | |
| 336 | QTest::newRow(dataTag: "Non-ASCII 8bit space character U+00A0" ) |
| 337 | << QString::fromUtf8(str: "\xC2\xA0" ) |
| 338 | << QString::fromUtf8(str: "\x20" ) |
| 339 | << QString() << 0 << 0; |
| 340 | |
| 341 | QTest::newRow(dataTag: "Non-ASCII multibyte space character U+1680" ) |
| 342 | << QString::fromUtf8(str: "x\xE1\x9A\x80x" ) |
| 343 | << QString() |
| 344 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 345 | |
| 346 | QTest::newRow(dataTag: "Non-ASCII multibyte space character U+2000" ) |
| 347 | << QString::fromUtf8(str: "\xE2\x80\x80" ) |
| 348 | << QString::fromUtf8(str: "\x20" ) |
| 349 | << QString() << 0 << 0; |
| 350 | |
| 351 | QTest::newRow(dataTag: "Zero Width Space U+200b" ) |
| 352 | << QString::fromUtf8(str: "\xE2\x80\x8b" ) |
| 353 | << QString() |
| 354 | << QString() << 0 << 0; |
| 355 | |
| 356 | QTest::newRow(dataTag: "Non-ASCII multibyte space character U+3000" ) |
| 357 | << QString::fromUtf8(str: "\xE3\x80\x80" ) |
| 358 | << QString::fromUtf8(str: "\x20" ) |
| 359 | << QString() << 0 << 0; |
| 360 | |
| 361 | QTest::newRow(dataTag: "ASCII control characters U+0010 U+007F" ) |
| 362 | << QString::fromUtf8(str: "\x10\x7F" ) |
| 363 | << QString::fromUtf8(str: "\x10\x7F" ) |
| 364 | << QString() << 0 << 0; |
| 365 | |
| 366 | QTest::newRow(dataTag: "Non-ASCII 8bit control character U+0080" ) |
| 367 | << QString::fromUtf8(str: "x\xC2\x80x" ) |
| 368 | << QString() |
| 369 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 370 | |
| 371 | QTest::newRow(dataTag: "Non-ASCII 8bit control character U+0085" ) |
| 372 | << QString::fromUtf8(str: "x\xC2\x85x" ) |
| 373 | << QString() |
| 374 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 375 | |
| 376 | QTest::newRow(dataTag: "Non-ASCII multibyte control character U+180E" ) |
| 377 | << QString::fromUtf8(str: "x\xE1\xA0\x8Ex" ) |
| 378 | << QString() |
| 379 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 380 | |
| 381 | QTest::newRow(dataTag: "Zero Width No-Break Space U+FEFF" ) |
| 382 | << QString::fromUtf8(str: "\xEF\xBB\xBF" ) |
| 383 | << QString() |
| 384 | << QString() << 0 << 0; |
| 385 | |
| 386 | QTest::newRow(dataTag: "Non-ASCII control character U+1D175" ) |
| 387 | << QString::fromUtf8(str: "x\xF0\x9D\x85\xB5x" ) |
| 388 | << QString() |
| 389 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 390 | |
| 391 | QTest::newRow(dataTag: "Plane 0 private use character U+F123" ) |
| 392 | << QString::fromUtf8(str: "x\xEF\x84\xA3x" ) |
| 393 | << QString() |
| 394 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 395 | |
| 396 | QTest::newRow(dataTag: "Plane 15 private use character U+F1234" ) |
| 397 | << QString::fromUtf8(str: "x\xF3\xB1\x88\xB4x" ) |
| 398 | << QString() |
| 399 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 400 | |
| 401 | QTest::newRow(dataTag: "Plane 16 private use character U+10F234" ) |
| 402 | << QString::fromUtf8(str: "x\xF4\x8F\x88\xB4x" ) |
| 403 | << QString() |
| 404 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 405 | |
| 406 | QTest::newRow(dataTag: "Non-character code point U+8FFFE" ) |
| 407 | << QString::fromUtf8(str: "x\xF2\x8F\xBF\xBEx" ) |
| 408 | << QString() |
| 409 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 410 | |
| 411 | QTest::newRow(dataTag: "Non-character code point U+10FFFF" ) |
| 412 | << QString::fromUtf8(str: "x\xF4\x8F\xBF\xBFx" ) |
| 413 | << QString() |
| 414 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 415 | |
| 416 | QTest::newRow(dataTag: "Surrogate code U+DF42" ) |
| 417 | << QString::fromUtf8(str: "x\xED\xBD\x82x" ) |
| 418 | << QString() |
| 419 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 420 | |
| 421 | QTest::newRow(dataTag: "Non-plain text character U+FFFD" ) |
| 422 | << QString::fromUtf8(str: "x\xEF\xBF\xBDx" ) |
| 423 | << QString() |
| 424 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 425 | |
| 426 | QTest::newRow(dataTag: "Ideographic description character U+2FF5" ) |
| 427 | << QString::fromUtf8(str: "x\xE2\xBF\xB5x" ) |
| 428 | << QString() |
| 429 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 430 | |
| 431 | QTest::newRow(dataTag: "Display property character U+0341" ) |
| 432 | << QString::fromUtf8(str: "\xCD\x81" ) |
| 433 | << QString::fromUtf8(str: "\xCC\x81" ) |
| 434 | << QString() << 0 << 0; |
| 435 | |
| 436 | QTest::newRow(dataTag: "Left-to-right mark U+200E" ) |
| 437 | << QString::fromUtf8(str: "x\xE2\x80\x8Ex" ) |
| 438 | << QString() |
| 439 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 440 | |
| 441 | QTest::newRow(dataTag: "Deprecated U+202A" ) |
| 442 | << QString::fromUtf8(str: "x\xE2\x80\xAA" ) |
| 443 | << QString() |
| 444 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 445 | |
| 446 | QTest::newRow(dataTag: "Language tagging character U+E0001" ) |
| 447 | << QString::fromUtf8(str: "x\xF3\xA0\x80\x81x" ) |
| 448 | << QString() |
| 449 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 450 | |
| 451 | QTest::newRow(dataTag: "Language tagging character U+E0042" ) |
| 452 | << QString::fromUtf8(str: "x\xF3\xA0\x81\x82x" ) |
| 453 | << QString() |
| 454 | << QString("Nameprep" ) << 0 << STRINGPREP_CONTAINS_PROHIBITED; |
| 455 | |
| 456 | QTest::newRow(dataTag: "Bidi: RandALCat character U+05BE and LCat characters" ) |
| 457 | << QString::fromUtf8(str: "foo\xD6\xBE" "bar" ) |
| 458 | << QString() |
| 459 | << QString("Nameprep" ) << 0 << STRINGPREP_BIDI_BOTH_L_AND_RAL; |
| 460 | |
| 461 | QTest::newRow(dataTag: "Bidi: RandALCat character U+FD50 and LCat characters" ) |
| 462 | << QString::fromUtf8(str: "foo\xEF\xB5\x90" "bar" ) |
| 463 | << QString() |
| 464 | << QString("Nameprep" ) << 0 << STRINGPREP_BIDI_BOTH_L_AND_RAL; |
| 465 | |
| 466 | QTest::newRow(dataTag: "Bidi: RandALCat character U+FB38 and LCat characters" ) |
| 467 | << QString::fromUtf8(str: "foo\xEF\xB9\xB6" "bar" ) |
| 468 | << QString::fromUtf8(str: "foo \xd9\x8e" "bar" ) |
| 469 | << QString() << 0 << 0; |
| 470 | |
| 471 | QTest::newRow(dataTag: "Bidi: RandALCat without trailing RandALCat U+0627 U+0031" ) |
| 472 | << QString::fromUtf8(str: "\xD8\xA7\x31" ) |
| 473 | << QString() |
| 474 | << QString("Nameprep" ) << 0 << STRINGPREP_BIDI_LEADTRAIL_NOT_RAL; |
| 475 | |
| 476 | QTest::newRow(dataTag: "Bidi: RandALCat character U+0627 U+0031 U+0628" ) |
| 477 | << QString::fromUtf8(str: "\xD8\xA7\x31\xD8\xA8" ) |
| 478 | << QString::fromUtf8(str: "\xD8\xA7\x31\xD8\xA8" ) |
| 479 | << QString() << 0 << 0; |
| 480 | |
| 481 | QTest::newRow(dataTag: "Unassigned code point U+E0002" ) |
| 482 | << QString::fromUtf8(str: "\xF3\xA0\x80\x82" ) |
| 483 | << QString() |
| 484 | << QString("Nameprep" ) << STRINGPREP_NO_UNASSIGNED << STRINGPREP_CONTAINS_UNASSIGNED; |
| 485 | |
| 486 | QTest::newRow(dataTag: "Larger test (shrinking)" ) |
| 487 | << QString::fromUtf8(str: "X\xC2\xAD\xC3\x9F\xC4\xB0\xE2\x84\xA1\x6a\xcc\x8c\xc2\xa0\xc2" |
| 488 | "\xaa\xce\xb0\xe2\x80\x80" ) |
| 489 | << QString::fromUtf8(str: "xssi\xcc\x87" "tel\xc7\xb0 a\xce\xb0 " ) |
| 490 | << QString("Nameprep" ) << 0 << 0; |
| 491 | |
| 492 | QTest::newRow(dataTag: "Larger test (expanding)" ) |
| 493 | << QString::fromUtf8(str: "X\xC3\x9F\xe3\x8c\x96\xC4\xB0\xE2\x84\xA1\xE2\x92\x9F\xE3\x8c\x80" ) |
| 494 | << QString::fromUtf8(str: "xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\x88" |
| 495 | "\xe3\x83\xab" "i\xcc\x87" "tel\x28" "d\x29\xe3\x82\xa2\xe3\x83\x91" |
| 496 | "\xe3\x83\xbc\xe3\x83\x88" ) |
| 497 | << QString() << 0 << 0; |
| 498 | } |
| 499 | #endif |
| 500 | |
| 501 | #ifdef QT_BUILD_INTERNAL |
| 502 | void tst_QUrlInternal::nameprep_testsuite() |
| 503 | { |
| 504 | QFETCH(QString, in); |
| 505 | QFETCH(QString, out); |
| 506 | QFETCH(QString, profile); |
| 507 | |
| 508 | qt_nameprep(source: &in, from: 0); |
| 509 | QCOMPARE(in, out); |
| 510 | } |
| 511 | #endif |
| 512 | |
| 513 | #ifdef QT_BUILD_INTERNAL |
| 514 | void tst_QUrlInternal::nameprep_highcodes_data() |
| 515 | { |
| 516 | QTest::addColumn<QString>(name: "in" ); |
| 517 | QTest::addColumn<QString>(name: "out" ); |
| 518 | QTest::addColumn<QString>(name: "profile" ); |
| 519 | QTest::addColumn<int>(name: "flags" ); |
| 520 | QTest::addColumn<int>(name: "rc" ); |
| 521 | |
| 522 | { |
| 523 | QChar st[] = { '-', 0xd801, 0xdc1d, 'a' }; |
| 524 | QChar se[] = { '-', 0xd801, 0xdc45, 'a' }; |
| 525 | QTest::newRow(dataTag: "highcodes (U+1041D)" ) |
| 526 | << QString(st, sizeof(st)/sizeof(st[0])) |
| 527 | << QString(se, sizeof(se)/sizeof(se[0])) |
| 528 | << QString() << 0 << 0; |
| 529 | } |
| 530 | { |
| 531 | QChar st[] = { 0x011C, 0xd835, 0xdf6e, 0x0110 }; |
| 532 | QChar se[] = { 0x011D, 0x03C9, 0x0111 }; |
| 533 | QTest::newRow(dataTag: "highcodes (U+1D76E)" ) |
| 534 | << QString(st, sizeof(st)/sizeof(st[0])) |
| 535 | << QString(se, sizeof(se)/sizeof(se[0])) |
| 536 | << QString() << 0 << 0; |
| 537 | } |
| 538 | { |
| 539 | QChar st[] = { 'D', 'o', '\'', 0x2060, 'h' }; |
| 540 | QChar se[] = { 'd', 'o', '\'', 'h' }; |
| 541 | QTest::newRow(dataTag: "highcodes (D, o, ', U+2060, h)" ) |
| 542 | << QString(st, sizeof(st)/sizeof(st[0])) |
| 543 | << QString(se, sizeof(se)/sizeof(se[0])) |
| 544 | << QString() << 0 << 0; |
| 545 | } |
| 546 | } |
| 547 | #endif |
| 548 | |
| 549 | #ifdef QT_BUILD_INTERNAL |
| 550 | void tst_QUrlInternal::nameprep_highcodes() |
| 551 | { |
| 552 | QFETCH(QString, in); |
| 553 | QFETCH(QString, out); |
| 554 | QFETCH(QString, profile); |
| 555 | |
| 556 | qt_nameprep(source: &in, from: 0); |
| 557 | QCOMPARE(in, out); |
| 558 | } |
| 559 | #endif |
| 560 | |
| 561 | void tst_QUrlInternal::ace_testsuite_data() |
| 562 | { |
| 563 | QTest::addColumn<QString>(name: "in" ); |
| 564 | QTest::addColumn<QString>(name: "toace" ); |
| 565 | QTest::addColumn<QString>(name: "fromace" ); |
| 566 | QTest::addColumn<QString>(name: "unicode" ); |
| 567 | |
| 568 | QTest::newRow(dataTag: "ascii-lower" ) << "fluke" << "fluke" << "fluke" << "fluke" ; |
| 569 | QTest::newRow(dataTag: "ascii-mixed" ) << "FLuke" << "fluke" << "fluke" << "fluke" ; |
| 570 | QTest::newRow(dataTag: "ascii-upper" ) << "FLUKE" << "fluke" << "fluke" << "fluke" ; |
| 571 | |
| 572 | QTest::newRow(dataTag: "asciifolded" ) << QString::fromLatin1(str: "stra\337e" ) << "strasse" << "." << "strasse" ; |
| 573 | QTest::newRow(dataTag: "asciifolded-dotcom" ) << QString::fromLatin1(str: "stra\337e.example.com" ) << "strasse.example.com" << "." << "strasse.example.com" ; |
| 574 | QTest::newRow(dataTag: "greek-mu" ) << QString::fromLatin1(str: "\265V" ) |
| 575 | <<"xn--v-lmb" |
| 576 | << "." |
| 577 | << QString::fromUtf8(str: "\316\274v" ); |
| 578 | |
| 579 | QTest::newRow(dataTag: "non-ascii-lower" ) << QString::fromLatin1(str: "alqualond\353" ) |
| 580 | << "xn--alqualond-34a" |
| 581 | << "." |
| 582 | << QString::fromLatin1(str: "alqualond\353" ); |
| 583 | QTest::newRow(dataTag: "non-ascii-mixed" ) << QString::fromLatin1(str: "Alqualond\353" ) |
| 584 | << "xn--alqualond-34a" |
| 585 | << "." |
| 586 | << QString::fromLatin1(str: "alqualond\353" ); |
| 587 | QTest::newRow(dataTag: "non-ascii-upper" ) << QString::fromLatin1(str: "ALQUALOND\313" ) |
| 588 | << "xn--alqualond-34a" |
| 589 | << "." |
| 590 | << QString::fromLatin1(str: "alqualond\353" ); |
| 591 | |
| 592 | QTest::newRow(dataTag: "idn-lower" ) << "xn--alqualond-34a" << "xn--alqualond-34a" |
| 593 | << QString::fromLatin1(str: "alqualond\353" ) |
| 594 | << QString::fromLatin1(str: "alqualond\353" ); |
| 595 | QTest::newRow(dataTag: "idn-mixed" ) << "Xn--alqualond-34a" << "xn--alqualond-34a" |
| 596 | << QString::fromLatin1(str: "alqualond\353" ) |
| 597 | << QString::fromLatin1(str: "alqualond\353" ); |
| 598 | QTest::newRow(dataTag: "idn-mixed2" ) << "XN--alqualond-34a" << "xn--alqualond-34a" |
| 599 | << QString::fromLatin1(str: "alqualond\353" ) |
| 600 | << QString::fromLatin1(str: "alqualond\353" ); |
| 601 | QTest::newRow(dataTag: "idn-mixed3" ) << "xn--ALQUALOND-34a" << "xn--alqualond-34a" |
| 602 | << QString::fromLatin1(str: "alqualond\353" ) |
| 603 | << QString::fromLatin1(str: "alqualond\353" ); |
| 604 | QTest::newRow(dataTag: "idn-mixed4" ) << "xn--alqualond-34A" << "xn--alqualond-34a" |
| 605 | << QString::fromLatin1(str: "alqualond\353" ) |
| 606 | << QString::fromLatin1(str: "alqualond\353" ); |
| 607 | QTest::newRow(dataTag: "idn-upper" ) << "XN--ALQUALOND-34A" << "xn--alqualond-34a" |
| 608 | << QString::fromLatin1(str: "alqualond\353" ) |
| 609 | << QString::fromLatin1(str: "alqualond\353" ); |
| 610 | |
| 611 | QTest::newRow(dataTag: "separator-3002" ) << QString::fromUtf8(str: "example\343\200\202com" ) |
| 612 | << "example.com" << "." << "example.com" ; |
| 613 | |
| 614 | QString egyptianIDN = |
| 615 | QString::fromUtf8(str: "\331\210\330\262\330\247\330\261\330\251\055\330\247\331\204\330" |
| 616 | "\243\330\252\330\265\330\247\331\204\330\247\330\252.\331\205" |
| 617 | "\330\265\330\261" ); |
| 618 | QTest::newRow(dataTag: "egyptian-tld-ace" ) |
| 619 | << "xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c" |
| 620 | << "xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c" |
| 621 | << "." |
| 622 | << egyptianIDN; |
| 623 | QTest::newRow(dataTag: "egyptian-tld-unicode" ) |
| 624 | << egyptianIDN |
| 625 | << "xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c" |
| 626 | << "." |
| 627 | << egyptianIDN; |
| 628 | QTest::newRow(dataTag: "egyptian-tld-mix1" ) |
| 629 | << QString::fromUtf8(str: "\331\210\330\262\330\247\330\261\330\251\055\330\247\331\204\330" |
| 630 | "\243\330\252\330\265\330\247\331\204\330\247\330\252.xn--wgbh1c" ) |
| 631 | << "xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c" |
| 632 | << "." |
| 633 | << egyptianIDN; |
| 634 | QTest::newRow(dataTag: "egyptian-tld-mix2" ) |
| 635 | << QString::fromUtf8(str: "xn----rmckbbajlc6dj7bxne2c.\331\205\330\265\330\261" ) |
| 636 | << "xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c" |
| 637 | << "." |
| 638 | << egyptianIDN; |
| 639 | |
| 640 | QString russianIDN = QString::fromUtf8(str: "\321\217\320\275\320\264\320\265\320\272\321\201.\321\200\321\204" ); |
| 641 | QTest::newRow(dataTag: "russian-tld-ace" ) |
| 642 | << "xn--d1acpjx3f.xn--p1ai" |
| 643 | << "xn--d1acpjx3f.xn--p1ai" |
| 644 | << "." |
| 645 | << russianIDN; |
| 646 | |
| 647 | QString taiwaneseIDN = QString::fromUtf8(str: "\345\217\260\345\214\227\346\214\211\346\221\251.\345\217\260\347\201\243" ); |
| 648 | QTest::newRow(dataTag: "taiwanese-tld-ace" ) |
| 649 | << "xn--djrptm67aikb.xn--kpry57d" |
| 650 | << "xn--djrptm67aikb.xn--kpry57d" |
| 651 | << "." |
| 652 | << taiwaneseIDN; |
| 653 | |
| 654 | // violations / invalids |
| 655 | QTest::newRow(dataTag: "invalid-punycode" ) << "xn--z" << "xn--z" << "xn--z" << "xn--z" ; |
| 656 | |
| 657 | // U+00A0 NO-BREAK SPACE encodes to Punycode "6a" |
| 658 | // but it is prohibited and should have caused encoding failure |
| 659 | QTest::newRow(dataTag: "invalid-nameprep-prohibited" ) << "xn--6a" << "xn--6a" << "xn--6a" << "xn--6a" ; |
| 660 | |
| 661 | // U+00AD SOFT HYPHEN between "a" and "b" encodes to Punycode "ab-5da" |
| 662 | // but it should have been removed in the nameprep stage |
| 663 | QTest::newRow(dataTag: "invalid-nameprep-maptonothing" ) << "xn-ab-5da" << "xn-ab-5da" << "xn-ab-5da" << "xn-ab-5da" ; |
| 664 | |
| 665 | // U+00C1 LATIN CAPITAL LETTER A WITH ACUTE encodes to Punycode "4ba" |
| 666 | // but it should have nameprepped to lowercase first |
| 667 | QTest::newRow(dataTag: "invalid-nameprep-uppercase" ) << "xn--4ba" << "xn--4ba" << "xn--4ba" << "xn--4ba" ; |
| 668 | |
| 669 | // U+00B5 MICRO SIGN encodes to Punycode "sba" |
| 670 | // but is should have nameprepped to NFKC U+03BC GREEK SMALL LETTER MU |
| 671 | QTest::newRow(dataTag: "invalid-nameprep-nonnfkc" ) << "xn--sba" << "xn--sba" << "xn--sba" << "xn--sba" ; |
| 672 | |
| 673 | // U+04CF CYRILLIC SMALL LETTER PALOCHKA encodes to "s5a" |
| 674 | // but it's not in RFC 3454's allowed character list (Unicode 3.2) |
| 675 | QTest::newRow(dataTag: "invalid-nameprep-unassigned" ) << "xn--s5a" << "xn--s5a" << "xn--s5a" << "xn--s5a" ; |
| 676 | // same character, see QTBUG-60364 |
| 677 | QTest::newRow(dataTag: "invalid-nameprep-unassigned2" ) << "xn--80ak6aa92e" << "xn--80ak6aa92e" << "xn--80ak6aa92e" << "xn--80ak6aa92e" ; |
| 678 | } |
| 679 | |
| 680 | void tst_QUrlInternal::ace_testsuite() |
| 681 | { |
| 682 | static const char canonsuffix[] = ".troll.no" ; |
| 683 | QFETCH(QString, in); |
| 684 | QFETCH(QString, toace); |
| 685 | QFETCH(QString, fromace); |
| 686 | QFETCH(QString, unicode); |
| 687 | |
| 688 | const char *suffix = canonsuffix; |
| 689 | if (toace.contains(c: '.')) |
| 690 | suffix = 0; |
| 691 | |
| 692 | QString domain = in + suffix; |
| 693 | QCOMPARE(QString::fromLatin1(QUrl::toAce(domain)), toace + suffix); |
| 694 | if (fromace != "." ) |
| 695 | QCOMPARE(QUrl::fromAce(domain.toLatin1()), fromace + suffix); |
| 696 | QCOMPARE(QUrl::fromAce(QUrl::toAce(domain)), unicode + suffix); |
| 697 | |
| 698 | QUrl u; |
| 699 | u.setHost(host: domain); |
| 700 | QVERIFY(u.isValid()); |
| 701 | QCOMPARE(u.host(), unicode + suffix); |
| 702 | QCOMPARE(u.host(QUrl::EncodeUnicode), toace + suffix); |
| 703 | QCOMPARE(u.toEncoded(), "//" + toace.toLatin1() + suffix); |
| 704 | QCOMPARE(u.toDisplayString(), "//" + unicode + suffix); |
| 705 | |
| 706 | domain = in + (suffix ? ".troll.No" : "" ); |
| 707 | QCOMPARE(QString::fromLatin1(QUrl::toAce(domain)), toace + suffix); |
| 708 | if (fromace != "." ) |
| 709 | QCOMPARE(QUrl::fromAce(domain.toLatin1()), fromace + suffix); |
| 710 | QCOMPARE(QUrl::fromAce(QUrl::toAce(domain)), unicode + suffix); |
| 711 | |
| 712 | domain = in + (suffix ? ".troll.NO" : "" ); |
| 713 | QCOMPARE(QString::fromLatin1(QUrl::toAce(domain)), toace + suffix); |
| 714 | if (fromace != "." ) |
| 715 | QCOMPARE(QUrl::fromAce(domain.toLatin1()), fromace + suffix); |
| 716 | QCOMPARE(QUrl::fromAce(QUrl::toAce(domain)), unicode + suffix); |
| 717 | } |
| 718 | |
| 719 | void tst_QUrlInternal::std3violations_data() |
| 720 | { |
| 721 | QTest::addColumn<QString>(name: "source" ); |
| 722 | QTest::addColumn<bool>(name: "validUrl" ); |
| 723 | |
| 724 | QTest::newRow(dataTag: "too-long" ) << "this-domain-is-far-too-long-for-its-own-good-and-should-have-been-limited-to-63-chars" << false; |
| 725 | QTest::newRow(dataTag: "dash-begin" ) << "-x-foo" << false; |
| 726 | QTest::newRow(dataTag: "dash-end" ) << "x-foo-" << false; |
| 727 | QTest::newRow(dataTag: "dash-begin-end" ) << "-foo-" << false; |
| 728 | |
| 729 | QTest::newRow(dataTag: "control" ) << "\033foo" << false; |
| 730 | QTest::newRow(dataTag: "bang" ) << "foo!" << false; |
| 731 | QTest::newRow(dataTag: "plus" ) << "foo+bar" << false; |
| 732 | QTest::newRow(dataTag: "dot" ) << "foo.bar" ; |
| 733 | QTest::newRow(dataTag: "startingdot" ) << ".bar" << false; |
| 734 | QTest::newRow(dataTag: "startingdot2" ) << ".example.com" << false; |
| 735 | QTest::newRow(dataTag: "slash" ) << "foo/bar" << true; |
| 736 | QTest::newRow(dataTag: "colon" ) << "foo:80" << true; |
| 737 | QTest::newRow(dataTag: "question" ) << "foo?bar" << true; |
| 738 | QTest::newRow(dataTag: "at" ) << "foo@bar" << true; |
| 739 | QTest::newRow(dataTag: "backslash" ) << "foo\\bar" << false; |
| 740 | |
| 741 | // these characters are transformed by NFKC to non-LDH characters |
| 742 | QTest::newRow(dataTag: "dot-like" ) << QString::fromUtf8(str: "foo\342\200\244bar" ) << false; // U+2024 ONE DOT LEADER |
| 743 | QTest::newRow(dataTag: "slash-like" ) << QString::fromUtf8(str: "foo\357\274\217bar" ) << false; // U+FF0F FULLWIDTH SOLIDUS |
| 744 | |
| 745 | // The following should be invalid but isn't |
| 746 | // the DIVISON SLASH doesn't case-fold to a slash |
| 747 | // is this a problem with RFC 3490? |
| 748 | //QTest::newRow("slash-like2") << QString::fromUtf8("foo\342\210\225bar") << false; // U+2215 DIVISION SLASH |
| 749 | } |
| 750 | |
| 751 | void tst_QUrlInternal::std3violations() |
| 752 | { |
| 753 | QFETCH(QString, source); |
| 754 | |
| 755 | #ifdef QT_BUILD_INTERNAL |
| 756 | { |
| 757 | QString prepped = source; |
| 758 | qt_nameprep(source: &prepped, from: 0); |
| 759 | QVERIFY(!qt_check_std3rules(prepped.constData(), prepped.length())); |
| 760 | } |
| 761 | #endif |
| 762 | |
| 763 | if (source.contains(c: '.')) |
| 764 | return; // this test ends here |
| 765 | |
| 766 | QUrl url; |
| 767 | url.setHost(host: source); |
| 768 | QVERIFY(url.host().isEmpty()); |
| 769 | |
| 770 | QFETCH(bool, validUrl); |
| 771 | if (validUrl) |
| 772 | return; // test ends here for these cases |
| 773 | |
| 774 | url = QUrl("http://" + source + "/some/path" ); |
| 775 | QVERIFY(!url.isValid()); |
| 776 | } |
| 777 | |
| 778 | void tst_QUrlInternal::std3deviations_data() |
| 779 | { |
| 780 | QTest::addColumn<QString>(name: "source" ); |
| 781 | |
| 782 | QTest::newRow(dataTag: "ending-dot" ) << "example.com." ; |
| 783 | QTest::newRow(dataTag: "ending-dot3002" ) << QString("example.com" ) + QChar(0x3002); |
| 784 | QTest::newRow(dataTag: "underline" ) << "foo_bar" ; //QTBUG-7434 |
| 785 | } |
| 786 | |
| 787 | void tst_QUrlInternal::std3deviations() |
| 788 | { |
| 789 | QFETCH(QString, source); |
| 790 | QVERIFY(!QUrl::toAce(source).isEmpty()); |
| 791 | |
| 792 | QUrl url; |
| 793 | url.setHost(host: source); |
| 794 | QVERIFY(!url.host().isEmpty()); |
| 795 | } |
| 796 | |
| 797 | void tst_QUrlInternal::correctEncodedMistakes_data() |
| 798 | { |
| 799 | QTest::addColumn<QString>(name: "input" ); |
| 800 | QTest::addColumn<QString>(name: "expected" ); |
| 801 | |
| 802 | QTest::newRow(dataTag: "empty" ) << "" << "" ; |
| 803 | |
| 804 | // these contain one invalid percent |
| 805 | QTest::newRow(dataTag: "%" ) << QString("%" ) << QString("%25" ); |
| 806 | QTest::newRow(dataTag: "3%" ) << QString("3%" ) << QString("3%25" ); |
| 807 | QTest::newRow(dataTag: "13%" ) << QString("13%" ) << QString("13%25" ); |
| 808 | QTest::newRow(dataTag: "13%!" ) << QString("13%!" ) << QString("13%25!" ); |
| 809 | QTest::newRow(dataTag: "13%!!" ) << QString("13%!!" ) << QString("13%25!!" ); |
| 810 | QTest::newRow(dataTag: "13%a" ) << QString("13%a" ) << QString("13%25a" ); |
| 811 | QTest::newRow(dataTag: "13%az" ) << QString("13%az" ) << QString("13%25az" ); |
| 812 | |
| 813 | // two invalid percents |
| 814 | QTest::newRow(dataTag: "13%%" ) << "13%%" << "13%25%25" ; |
| 815 | QTest::newRow(dataTag: "13%a%a" ) << "13%a%a" << "13%25a%25a" ; |
| 816 | QTest::newRow(dataTag: "13%az%az" ) << "13%az%az" << "13%25az%25az" ; |
| 817 | |
| 818 | // these are correct (idempotent) |
| 819 | QTest::newRow(dataTag: "13%25" ) << QString("13%25" ) << QString("13%25" ); |
| 820 | QTest::newRow(dataTag: "13%25%25" ) << QString("13%25%25" ) << QString("13%25%25" ); |
| 821 | |
| 822 | // these contain one invalid and one valid |
| 823 | // the code assumes they are all invalid |
| 824 | QTest::newRow(dataTag: "13%13..%" ) << "13%13..%" << "13%2513..%25" ; |
| 825 | QTest::newRow(dataTag: "13%..%13" ) << "13%..%13" << "13%25..%2513" ; |
| 826 | |
| 827 | // three percents, one invalid |
| 828 | QTest::newRow(dataTag: "%01%02%3" ) << "%01%02%3" << "%2501%2502%253" ; |
| 829 | |
| 830 | // now mix bad percents with Unicode decoding |
| 831 | QTest::newRow(dataTag: "%C2%" ) << "%C2%" << "%25C2%25" ; |
| 832 | QTest::newRow(dataTag: "%C2%A" ) << "%C2%A" << "%25C2%25A" ; |
| 833 | QTest::newRow(dataTag: "%C2%Az" ) << "%C2%Az" << "%25C2%25Az" ; |
| 834 | QTest::newRow(dataTag: "%E2%A0%" ) << "%E2%A0%" << "%25E2%25A0%25" ; |
| 835 | QTest::newRow(dataTag: "%E2%A0%A" ) << "%E2%A0%A" << "%25E2%25A0%25A" ; |
| 836 | QTest::newRow(dataTag: "%E2%A0%Az" ) << "%E2%A0%Az" << "%25E2%25A0%25Az" ; |
| 837 | QTest::newRow(dataTag: "%F2%A0%A0%" ) << "%F2%A0%A0%" << "%25F2%25A0%25A0%25" ; |
| 838 | QTest::newRow(dataTag: "%F2%A0%A0%A" ) << "%F2%A0%A0%A" << "%25F2%25A0%25A0%25A" ; |
| 839 | QTest::newRow(dataTag: "%F2%A0%A0%Az" ) << "%F2%A0%A0%Az" << "%25F2%25A0%25A0%25Az" ; |
| 840 | } |
| 841 | |
| 842 | void tst_QUrlInternal::correctEncodedMistakes() |
| 843 | { |
| 844 | QFETCH(QString, input); |
| 845 | QFETCH(QString, expected); |
| 846 | |
| 847 | // prepend some data to be sure that it remains there |
| 848 | QString dataTag = QTest::currentDataTag(); |
| 849 | QString output = dataTag; |
| 850 | |
| 851 | if (!qt_urlRecode(appendTo&: output, begin: input.constData(), end: input.constData() + input.length(), encoding: { })) |
| 852 | output += input; |
| 853 | QCOMPARE(output, dataTag + expected); |
| 854 | |
| 855 | // now try the full decode mode |
| 856 | output = dataTag; |
| 857 | QString expected2 = QUrl::fromPercentEncoding(expected.toLatin1()); |
| 858 | |
| 859 | if (!qt_urlRecode(appendTo&: output, begin: input.constData(), end: input.constData() + input.length(), encoding: QUrl::FullyDecoded)) |
| 860 | output += input; |
| 861 | QCOMPARE(output, dataTag + expected2); |
| 862 | } |
| 863 | |
| 864 | static void addUtf8Data(const char *name, const char *data) |
| 865 | { |
| 866 | QString encoded = QByteArray(data).toPercentEncoding(); |
| 867 | QString decoded = QString::fromUtf8(str: data); |
| 868 | |
| 869 | // this data contains invaild UTF-8 sequences, so FullyDecoded doesn't work (by design) |
| 870 | // use PrettyDecoded instead |
| 871 | QTest::newRow(dataTag: QByteArray("decode-" ) + name) << encoded << QUrl::ComponentFormattingOptions(QUrl::PrettyDecoded) << decoded; |
| 872 | QTest::newRow(dataTag: QByteArray("encode-" ) + name) << decoded << QUrl::ComponentFormattingOptions(QUrl::FullyEncoded) << encoded; |
| 873 | } |
| 874 | |
| 875 | void tst_QUrlInternal::encodingRecode_data() |
| 876 | { |
| 877 | typedef QUrl::ComponentFormattingOptions F; |
| 878 | QTest::addColumn<QString>(name: "input" ); |
| 879 | QTest::addColumn<F>(name: "encodingMode" ); |
| 880 | QTest::addColumn<QString>(name: "expected" ); |
| 881 | |
| 882 | // -- idempotent tests -- |
| 883 | static int modes[] = { QUrl::PrettyDecoded, |
| 884 | QUrl::EncodeSpaces, |
| 885 | QUrl::EncodeSpaces | QUrl::EncodeUnicode, |
| 886 | QUrl::EncodeSpaces | QUrl::EncodeUnicode | QUrl::EncodeDelimiters, |
| 887 | QUrl::EncodeSpaces | QUrl::EncodeUnicode | QUrl::EncodeDelimiters | QUrl::EncodeReserved, |
| 888 | QUrl::EncodeSpaces | QUrl::EncodeUnicode | QUrl::EncodeDelimiters | QUrl::DecodeReserved, |
| 889 | QUrl::EncodeSpaces | QUrl::EncodeUnicode | QUrl::EncodeReserved, |
| 890 | QUrl::EncodeSpaces | QUrl::EncodeUnicode | QUrl::DecodeReserved, |
| 891 | QUrl::EncodeSpaces | QUrl::EncodeDelimiters, |
| 892 | QUrl::EncodeSpaces | QUrl::EncodeDelimiters | QUrl::EncodeReserved, |
| 893 | QUrl::EncodeSpaces | QUrl::EncodeDelimiters | QUrl::DecodeReserved, |
| 894 | QUrl::EncodeSpaces | QUrl::EncodeReserved, |
| 895 | QUrl::EncodeSpaces | QUrl::DecodeReserved, |
| 896 | |
| 897 | QUrl::EncodeUnicode, |
| 898 | QUrl::EncodeUnicode | QUrl::EncodeDelimiters, |
| 899 | QUrl::EncodeUnicode | QUrl::EncodeDelimiters | QUrl::EncodeReserved, |
| 900 | QUrl::EncodeUnicode | QUrl::EncodeDelimiters | QUrl::DecodeReserved, |
| 901 | QUrl::EncodeUnicode | QUrl::EncodeReserved, |
| 902 | |
| 903 | QUrl::EncodeDelimiters, |
| 904 | QUrl::EncodeDelimiters | QUrl::EncodeReserved, |
| 905 | QUrl::EncodeDelimiters | QUrl::DecodeReserved, |
| 906 | QUrl::EncodeReserved, |
| 907 | QUrl::DecodeReserved }; |
| 908 | for (uint i = 0; i < sizeof(modes)/sizeof(modes[0]); ++i) { |
| 909 | QByteArray code = QByteArray::number(modes[i], base: 16); |
| 910 | F mode = QUrl::ComponentFormattingOption(modes[i]); |
| 911 | |
| 912 | QTest::newRow(dataTag: "null-0x" + code) << QString() << mode << QString(); |
| 913 | QTest::newRow(dataTag: "empty-0x" + code) << "" << mode << "" ; |
| 914 | |
| 915 | // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" |
| 916 | // Unreserved characters are never encoded |
| 917 | QTest::newRow(dataTag: "alpha-0x" + code) << "abcABCZZzz" << mode << "abcABCZZzz" ; |
| 918 | QTest::newRow(dataTag: "digits-0x" + code) << "01234567890" << mode << "01234567890" ; |
| 919 | QTest::newRow(dataTag: "otherunreserved-0x" + code) << "-._~" << mode << "-._~" ; |
| 920 | |
| 921 | // Control characters are always encoded |
| 922 | // Use uppercase because the output is also uppercased |
| 923 | QTest::newRow(dataTag: "control-nul-0x" + code) << "%00" << mode << "%00" ; |
| 924 | QTest::newRow(dataTag: "control-0x" + code) << "%0D%0A%1F%1A%7F" << mode << "%0D%0A%1F%1A%7F" ; |
| 925 | |
| 926 | // The percent is always encoded |
| 927 | QTest::newRow(dataTag: "percent-0x" + code) << "25%2525" << mode << "25%2525" ; |
| 928 | |
| 929 | // mixed control and unreserved |
| 930 | QTest::newRow(dataTag: "control-unreserved-0x" + code) << "Foo%00Bar%0D%0Abksp%7F" << mode << "Foo%00Bar%0D%0Abksp%7F" ; |
| 931 | } |
| 932 | |
| 933 | // however, control characters and the percent *are* decoded in FullyDecoded mode |
| 934 | // this is the only exception |
| 935 | QTest::newRow(dataTag: "control-nul-fullydecoded" ) << "%00" << F(QUrl::FullyDecoded) << QStringLiteral("\0" ); |
| 936 | QTest::newRow(dataTag: "control-fullydecoded" ) << "%0D%0A%1F%1A%7F" << F(QUrl::FullyDecoded) << "\r\n\x1f\x1a\x7f" ; |
| 937 | QTest::newRow(dataTag: "percent-fullydecoded" ) << "25%2525" << F(QUrl::FullyDecoded) << "25%25" ; |
| 938 | QTest::newRow(dataTag: "control-unreserved-fullydecoded" ) << "Foo%00Bar%0D%0Abksp%7F" << F(QUrl::FullyDecoded) |
| 939 | << QStringLiteral("Foo\0Bar\r\nbksp\x7F" ); |
| 940 | |
| 941 | // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" |
| 942 | // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" |
| 943 | // / "*" / "+" / "," / ";" / "=" |
| 944 | // in the default operation, delimiters don't get encoded or decoded |
| 945 | static const char delimiters[] = ":/?#[]@" "!$&'()*+,;=" ; |
| 946 | for (const char *c = delimiters; *c; ++c) { |
| 947 | QByteArray code = QByteArray::number(*c, base: 16); |
| 948 | QString encoded = QString("abc%" ) + code.toUpper() + "def" ; |
| 949 | QString decoded = QString("abc" ) + *c + "def" ; |
| 950 | QTest::newRow(dataTag: "delimiter-encoded-" + code) << encoded << F(QUrl::FullyEncoded) << encoded; |
| 951 | QTest::newRow(dataTag: "delimiter-decoded-" + code) << decoded << F(QUrl::FullyEncoded) << decoded; |
| 952 | } |
| 953 | |
| 954 | // encode control characters |
| 955 | QTest::newRow(dataTag: "encode-control" ) << "\1abc\2\033esc" << F(QUrl::PrettyDecoded) << "%01abc%02%1Besc" ; |
| 956 | QTest::newRow(dataTag: "encode-nul" ) << QString::fromLatin1(str: "abc\0def" , size: 7) << F(QUrl::PrettyDecoded) << "abc%00def" ; |
| 957 | |
| 958 | // space |
| 959 | QTest::newRow(dataTag: "space-leave-decoded" ) << "Hello World " << F(QUrl::PrettyDecoded) << "Hello World " ; |
| 960 | QTest::newRow(dataTag: "space-leave-encoded" ) << "Hello%20World%20" << F(QUrl::FullyEncoded) << "Hello%20World%20" ; |
| 961 | QTest::newRow(dataTag: "space-encode" ) << "Hello World " << F(QUrl::FullyEncoded) << "Hello%20World%20" ; |
| 962 | QTest::newRow(dataTag: "space-decode" ) << "Hello%20World%20" << F(QUrl::PrettyDecoded) << "Hello World " ; |
| 963 | |
| 964 | // decode unreserved |
| 965 | QTest::newRow(dataTag: "unreserved-decode" ) << "%66%6f%6f%42a%72" << F(QUrl::FullyEncoded) << "fooBar" ; |
| 966 | |
| 967 | // mix encoding with decoding |
| 968 | QTest::newRow(dataTag: "encode-control-decode-space" ) << "\1\2%200" << F(QUrl::PrettyDecoded) << "%01%02 0" ; |
| 969 | QTest::newRow(dataTag: "decode-space-encode-control" ) << "%20\1\2" << F(QUrl::PrettyDecoded) << " %01%02" ; |
| 970 | |
| 971 | // decode and encode valid UTF-8 data |
| 972 | // invalid is tested in encodingRecodeInvalidUtf8 |
| 973 | addUtf8Data(name: "utf8-2char-1" , data: "\xC2\x80" ); // U+0080 |
| 974 | addUtf8Data(name: "utf8-2char-2" , data: "\xDF\xBF" ); // U+07FF |
| 975 | addUtf8Data(name: "utf8-3char-1" , data: "\xE0\xA0\x80" ); // U+0800 |
| 976 | addUtf8Data(name: "utf8-3char-2" , data: "\xED\x9F\xBF" ); // U+D7FF |
| 977 | addUtf8Data(name: "utf8-3char-3" , data: "\xEE\x80\x80" ); // U+E000 |
| 978 | addUtf8Data(name: "utf8-3char-4" , data: "\xEF\xBF\xBD" ); // U+FFFD |
| 979 | addUtf8Data(name: "utf8-4char-1" , data: "\xF0\x90\x80\x80" ); // U+10000 |
| 980 | addUtf8Data(name: "utf8-4char-2" , data: "\xF4\x8F\xBF\xBD" ); // U+10FFFD |
| 981 | |
| 982 | // longer UTF-8 sequences, mixed with unreserved |
| 983 | addUtf8Data(name: "utf8-string-1" , data: "R\xc3\xa9sum\xc3\xa9" ); |
| 984 | addUtf8Data(name: "utf8-string-2" , data: "\xDF\xBF\xE0\xA0\x80" "A" ); |
| 985 | addUtf8Data(name: "utf8-string-3" , data: "\xE0\xA0\x80\xDF\xBF..." ); |
| 986 | |
| 987 | QTest::newRow(dataTag: "encode-unicode-noncharacter" ) << QString(QChar(0xffff)) << F(QUrl::FullyEncoded) << "%EF%BF%BF" ; |
| 988 | QTest::newRow(dataTag: "decode-unicode-noncharacter" ) << QString(QChar(0xffff)) << F(QUrl::PrettyDecoded) << QString::fromUtf8(str: "\xEF\xBF\xBF" ); |
| 989 | |
| 990 | // special cases: stuff we can encode, but not decode |
| 991 | QTest::newRow(dataTag: "unicode-lo-surrogate" ) << QString(QChar(0xD800)) << F(QUrl::FullyEncoded) << "%ED%A0%80" ; |
| 992 | QTest::newRow(dataTag: "unicode-hi-surrogate" ) << QString(QChar(0xDC00)) << F(QUrl::FullyEncoded) << "%ED%B0%80" ; |
| 993 | |
| 994 | // a couple of Unicode strings with leading spaces |
| 995 | QTest::newRow(dataTag: "space-unicode" ) << QString::fromUtf8(str: " \xc2\xa0" ) << F(QUrl::FullyEncoded) << "%20%C2%A0" ; |
| 996 | QTest::newRow(dataTag: "space-space-unicode" ) << QString::fromUtf8(str: " \xc2\xa0" ) << F(QUrl::FullyEncoded) << "%20%20%C2%A0" ; |
| 997 | QTest::newRow(dataTag: "space-space-space-unicode" ) << QString::fromUtf8(str: " \xc2\xa0" ) << F(QUrl::FullyEncoded) << "%20%20%20%C2%A0" ; |
| 998 | |
| 999 | // hex case testing |
| 1000 | QTest::newRow(dataTag: "FF" ) << "%FF" << F(QUrl::FullyEncoded) << "%FF" ; |
| 1001 | QTest::newRow(dataTag: "Ff" ) << "%Ff" << F(QUrl::FullyEncoded) << "%FF" ; |
| 1002 | QTest::newRow(dataTag: "fF" ) << "%fF" << F(QUrl::FullyEncoded) << "%FF" ; |
| 1003 | QTest::newRow(dataTag: "ff" ) << "%ff" << F(QUrl::FullyEncoded) << "%FF" ; |
| 1004 | |
| 1005 | // decode UTF-8 mixed with non-UTF-8 and unreserved |
| 1006 | QTest::newRow(dataTag: "utf8-mix-1" ) << "%80%C2%80" << F(QUrl::PrettyDecoded) << QString::fromUtf8(str: "%80\xC2\x80" ); |
| 1007 | QTest::newRow(dataTag: "utf8-mix-2" ) << "%C2%C2%80" << F(QUrl::PrettyDecoded) << QString::fromUtf8(str: "%C2\xC2\x80" ); |
| 1008 | QTest::newRow(dataTag: "utf8-mix-3" ) << "%E0%C2%80" << F(QUrl::PrettyDecoded) << QString::fromUtf8(str: "%E0\xC2\x80" ); |
| 1009 | QTest::newRow(dataTag: "utf8-mix-3" ) << "A%C2%80" << F(QUrl::PrettyDecoded) << QString::fromUtf8(str: "A\xC2\x80" ); |
| 1010 | QTest::newRow(dataTag: "utf8-mix-3" ) << "%C2%80A" << F(QUrl::PrettyDecoded) << QString::fromUtf8(str: "\xC2\x80" "A" ); |
| 1011 | } |
| 1012 | |
| 1013 | void tst_QUrlInternal::encodingRecode() |
| 1014 | { |
| 1015 | QFETCH(QString, input); |
| 1016 | QFETCH(QString, expected); |
| 1017 | QFETCH(QUrl::ComponentFormattingOptions, encodingMode); |
| 1018 | |
| 1019 | // prepend some data to be sure that it remains there |
| 1020 | QString output = QTest::currentDataTag(); |
| 1021 | expected.prepend(s: output); |
| 1022 | |
| 1023 | if (!qt_urlRecode(appendTo&: output, begin: input.constData(), end: input.constData() + input.length(), encoding: encodingMode)) |
| 1024 | output += input; |
| 1025 | QCOMPARE(output, expected); |
| 1026 | } |
| 1027 | |
| 1028 | void tst_QUrlInternal::encodingRecodeInvalidUtf8_data() |
| 1029 | { |
| 1030 | QTest::addColumn<QByteArray>(name: "utf8" ); |
| 1031 | QTest::addColumn<QString>(name: "utf16" ); |
| 1032 | |
| 1033 | extern void loadInvalidUtf8Rows(); |
| 1034 | extern void loadNonCharactersRows(); |
| 1035 | loadInvalidUtf8Rows(); |
| 1036 | loadNonCharactersRows(); |
| 1037 | |
| 1038 | QTest::newRow(dataTag: "utf8-mix-4" ) << QByteArray("\xE0.A2\x80" ); |
| 1039 | QTest::newRow(dataTag: "utf8-mix-5" ) << QByteArray("\xE0\xA2.80" ); |
| 1040 | QTest::newRow(dataTag: "utf8-mix-6" ) << QByteArray("\xE0\xA2\x33" ); |
| 1041 | } |
| 1042 | |
| 1043 | void tst_QUrlInternal::encodingRecodeInvalidUtf8() |
| 1044 | { |
| 1045 | QFETCH(QByteArray, utf8); |
| 1046 | QString input = utf8.toPercentEncoding(); |
| 1047 | |
| 1048 | // prepend some data to be sure that it remains there |
| 1049 | QString output = QTest::currentDataTag(); |
| 1050 | |
| 1051 | if (!qt_urlRecode(appendTo&: output, begin: input.constData(), end: input.constData() + input.length(), encoding: QUrl::PrettyDecoded)) |
| 1052 | output += input; |
| 1053 | QCOMPARE(output, QTest::currentDataTag() + input); |
| 1054 | |
| 1055 | // this is just control |
| 1056 | output = QTest::currentDataTag(); |
| 1057 | if (!qt_urlRecode(appendTo&: output, begin: input.constData(), end: input.constData() + input.length(), encoding: QUrl::FullyEncoded)) |
| 1058 | output += input; |
| 1059 | QCOMPARE(output, QTest::currentDataTag() + input); |
| 1060 | |
| 1061 | // verify for security reasons that all bad UTF-8 data got replaced by QChar::ReplacementCharacter |
| 1062 | output = QTest::currentDataTag(); |
| 1063 | if (!qt_urlRecode(appendTo&: output, begin: input.constData(), end: input.constData() + input.length(), encoding: QUrl::FullyEncoded)) |
| 1064 | output += input; |
| 1065 | for (int i = int(strlen(s: QTest::currentDataTag())); i < output.length(); ++i) { |
| 1066 | QVERIFY2(output.at(i).unicode() < 0x80 || output.at(i) == QChar::ReplacementCharacter, |
| 1067 | qPrintable(QString("Character at i == %1 was U+%2" ).arg(i).arg(output.at(i).unicode(), 4, 16, QLatin1Char('0')))); |
| 1068 | } |
| 1069 | } |
| 1070 | |
| 1071 | void tst_QUrlInternal::recodeByteArray_data() |
| 1072 | { |
| 1073 | QTest::addColumn<QByteArray>(name: "input" ); |
| 1074 | QTest::addColumn<QString>(name: "expected" ); |
| 1075 | |
| 1076 | QTest::newRow(dataTag: "null" ) << QByteArray() << QString(); |
| 1077 | QTest::newRow(dataTag: "empty" ) << QByteArray("" ) << QString("" ); |
| 1078 | QTest::newRow(dataTag: "normal" ) << QByteArray("Hello" ) << "Hello" ; |
| 1079 | QTest::newRow(dataTag: "valid-utf8" ) << QByteArray("\xc3\xa9" ) << "%C3%A9" ; |
| 1080 | QTest::newRow(dataTag: "percent-encoded" ) << QByteArray("%C3%A9%00%C0%80" ) << "%C3%A9%00%C0%80" ; |
| 1081 | QTest::newRow(dataTag: "invalid-utf8-1" ) << QByteArray("\xc3\xc3" ) << "%C3%C3" ; |
| 1082 | QTest::newRow(dataTag: "invalid-utf8-2" ) << QByteArray("\xc0\x80" ) << "%C0%80" ; |
| 1083 | |
| 1084 | // note: percent-encoding the control characters ("\0" -> "%00") would also |
| 1085 | // be correct, but it's unnecessary for this function |
| 1086 | QTest::newRow(dataTag: "binary" ) << QByteArray("\0\x1f" , 2) << QString::fromLatin1(str: "\0\x1f" , size: 2);; |
| 1087 | QTest::newRow(dataTag: "binary+percent-encoded" ) << QByteArray("\0%25" , 4) << QString::fromLatin1(str: "\0%25" , size: 4); |
| 1088 | } |
| 1089 | |
| 1090 | void tst_QUrlInternal::recodeByteArray() |
| 1091 | { |
| 1092 | QFETCH(QByteArray, input); |
| 1093 | QFETCH(QString, expected); |
| 1094 | QString output = qt_urlRecodeByteArray(ba: input); |
| 1095 | |
| 1096 | QCOMPARE(output.isNull(), input.isNull()); |
| 1097 | QCOMPARE(output.isEmpty(), input.isEmpty()); |
| 1098 | QCOMPARE(output, expected); |
| 1099 | } |
| 1100 | |
| 1101 | QTEST_APPLESS_MAIN(tst_QUrlInternal) |
| 1102 | |