1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Copyright (C) 2016 Intel Corporation.
5** Contact: https://www.qt.io/licensing/
6**
7** This file is part of the test suite of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:GPL-EXCEPT$
10** Commercial License Usage
11** Licensees holding valid commercial Qt licenses may use this file in
12** accordance with the commercial license agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and The Qt Company. For licensing terms
15** and conditions see https://www.qt.io/terms-conditions. For further
16** information use the contact form at https://www.qt.io/contact-us.
17**
18** GNU General Public License Usage
19** Alternatively, this file may be used under the terms of the GNU
20** General Public License version 3 as published by the Free Software
21** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
22** included in the packaging of this file. Please review the following
23** information to ensure the GNU General Public License requirements will
24** be met: https://www.gnu.org/licenses/gpl-3.0.html.
25**
26** $QT_END_LICENSE$
27**
28****************************************************************************/
29
30#include <QtCore/QUrl>
31#include <QtTest/QtTest>
32
33#include "private/qtldurl_p.h"
34#include "private/qurl_p.h"
35
36// For testsuites
37#define IDNA_ACE_PREFIX "xn--"
38#define IDNA_SUCCESS 1
39#define STRINGPREP_NO_UNASSIGNED 1
40#define STRINGPREP_CONTAINS_UNASSIGNED 2
41#define STRINGPREP_CONTAINS_PROHIBITED 3
42#define STRINGPREP_BIDI_BOTH_L_AND_RAL 4
43#define STRINGPREP_BIDI_LEADTRAIL_NOT_RAL 5
44
45struct ushortarray {
46 ushortarray() {}
47 template <size_t N>
48 ushortarray(unsigned short (&array)[N])
49 {
50 memcpy(points, array, N*sizeof(unsigned short));
51 }
52
53 unsigned short points[100];
54};
55
56Q_DECLARE_METATYPE(ushortarray)
57Q_DECLARE_METATYPE(QUrl::FormattingOptions)
58Q_DECLARE_METATYPE(QUrl::ComponentFormattingOptions)
59
60class tst_QUrlInternal : public QObject
61{
62 Q_OBJECT
63
64private Q_SLOTS:
65 // IDNA internals
66#ifdef QT_BUILD_INTERNAL
67 void idna_testsuite_data();
68 void idna_testsuite();
69 void nameprep_testsuite_data();
70 void nameprep_testsuite();
71 void nameprep_highcodes_data();
72 void nameprep_highcodes();
73#endif
74 void ace_testsuite_data();
75 void ace_testsuite();
76 void std3violations_data();
77 void std3violations();
78 void std3deviations_data();
79 void std3deviations();
80
81 // percent-encoding internals
82 void correctEncodedMistakes_data();
83 void correctEncodedMistakes();
84 void encodingRecode_data();
85 void encodingRecode();
86 void encodingRecodeInvalidUtf8_data();
87 void encodingRecodeInvalidUtf8();
88 void recodeByteArray_data();
89 void recodeByteArray();
90};
91#include "tst_qurlinternal.moc"
92
93#ifdef QT_BUILD_INTERNAL
94void tst_QUrlInternal::idna_testsuite_data()
95{
96 QTest::addColumn<int>("numchars");
97 QTest::addColumn<ushortarray>("unicode");
98 QTest::addColumn<QByteArray>("punycode");
99 QTest::addColumn<int>("allowunassigned");
100 QTest::addColumn<int>("usestd3asciirules");
101 QTest::addColumn<int>("toasciirc");
102 QTest::addColumn<int>("tounicoderc");
103
104 unsigned short d1[] = { 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
105 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
106 0x061F };
107 QTest::newRow(dataTag: "Arabic (Egyptian)") << 17 << ushortarray(d1)
108 << QByteArray(IDNA_ACE_PREFIX "egbpdaj6bu4bxfgehfvwxn")
109 << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
110
111 unsigned short d2[] = { 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D,
112 0x6587 };
113 QTest::newRow(dataTag: "Chinese (simplified)") << 9 << ushortarray(d2)
114 << QByteArray(IDNA_ACE_PREFIX "ihqwcrb4cv8a8dqg056pqjye")
115 << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
116
117 unsigned short d3[] = { 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D,
118 0x6587 };
119 QTest::newRow(dataTag: "Chinese (traditional)") << 9 << ushortarray(d3)
120 << QByteArray(IDNA_ACE_PREFIX "ihqwctvzc91f659drss3x8bo0yb")
121 << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
122
123 unsigned short d4[] = { 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
124 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
125 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079 };
126 QTest::newRow(dataTag: "Czech") << 22 << ushortarray(d4)
127 << QByteArray(IDNA_ACE_PREFIX "Proprostnemluvesky-uyb24dma41a")
128 << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
129
130 unsigned short d5[] = { 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
131 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
132 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA };
133 QTest::newRow(dataTag: "Hebrew") << 22 << ushortarray(d5)
134 << QByteArray(IDNA_ACE_PREFIX "4dbcagdahymbxekheh6e0a7fei0b")
135 << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
136
137 unsigned short d6[] = { 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
138 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
139 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
140 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902 };
141 QTest::newRow(dataTag: "Hindi (Devanagari)") << 30 << ushortarray(d6)
142 << QByteArray(IDNA_ACE_PREFIX "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd")
143 << 0 << 0 << IDNA_SUCCESS;
144
145 unsigned short d7[] = { 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
146 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
147 0x306E, 0x304B };
148 QTest::newRow(dataTag: "Japanese (kanji and hiragana)") << 18 << ushortarray(d7)
149 << QByteArray(IDNA_ACE_PREFIX "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa")
150 << 0 << 0 << IDNA_SUCCESS;
151
152 unsigned short d8[] = { 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
153 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
154 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
155 0x0441, 0x0441, 0x043A, 0x0438 };
156 QTest::newRow(dataTag: "Russian (Cyrillic)") << 28 << ushortarray(d8)
157 << QByteArray(IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l")
158 << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
159
160 unsigned short d9[] = { 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
161 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
162 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
163 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
164 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C };
165 QTest::newRow(dataTag: "Spanish") << 40 << ushortarray(d9)
166 << QByteArray(IDNA_ACE_PREFIX "PorqunopuedensimplementehablarenEspaol-fmd56a")
167 << 0 << 0 << IDNA_SUCCESS;
168
169 unsigned short d10[] = { 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
170 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
171 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
172 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074 };
173 QTest::newRow(dataTag: "Vietnamese") << 31 << ushortarray(d10)
174 << QByteArray(IDNA_ACE_PREFIX "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g")
175 << 0 << 0 << IDNA_SUCCESS;
176
177 unsigned short d11[] = { 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F };
178 QTest::newRow(dataTag: "Japanese") << 8 << ushortarray(d11)
179 << QByteArray(IDNA_ACE_PREFIX "3B-ww4c5e180e575a65lsy2b")
180 << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
181
182 // this test does NOT include nameprepping, so the capitals will remain
183 unsigned short d12[] = { 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
184 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
185 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053 };
186 QTest::newRow(dataTag: "Japanese2") << 24 << ushortarray(d12)
187 << QByteArray(IDNA_ACE_PREFIX "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n")
188 << 0 << 0 << IDNA_SUCCESS;
189
190 unsigned short d13[] = { 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
191 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
192 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
193 0x6240 };
194 QTest::newRow(dataTag: "Japanese3") << 25 << ushortarray(d13)
195 << QByteArray(IDNA_ACE_PREFIX "Hello-Another-Way--fc4qua05auwb3674vfr0b")
196 << 0 << 0 << IDNA_SUCCESS;
197
198 unsigned short d14[] = { 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032 };
199 QTest::newRow(dataTag: "Japanese4") << 8 << ushortarray(d14)
200 << QByteArray(IDNA_ACE_PREFIX "2-u9tlzr9756bt3uc0v")
201 << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
202
203 unsigned short d15[] = { 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
204 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D };
205 QTest::newRow(dataTag: "Japanese5") << 13 << ushortarray(d15)
206 << QByteArray(IDNA_ACE_PREFIX "MajiKoi5-783gue6qz075azm5e")
207 << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
208
209 unsigned short d16[] = { 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0 };
210 QTest::newRow(dataTag: "Japanese6") << 9 << ushortarray(d16)
211 << QByteArray(IDNA_ACE_PREFIX "de-jg4avhby1noc0d")
212 << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
213
214 unsigned short d17[] = { 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067 };
215 QTest::newRow(dataTag: "Japanese7") << 7 << ushortarray(d17)
216 << QByteArray(IDNA_ACE_PREFIX "d9juau41awczczp")
217 << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
218
219 unsigned short d18[] = { 0x03b5, 0x03bb, 0x03bb, 0x03b7, 0x03bd, 0x03b9, 0x03ba, 0x03ac };
220 QTest::newRow(dataTag: "Greek") << 8 << ushortarray(d18)
221 << QByteArray(IDNA_ACE_PREFIX "hxargifdar")
222 << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
223
224 unsigned short d19[] = { 0x0062, 0x006f, 0x006e, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
225 0x0127, 0x0061 };
226 QTest::newRow(dataTag: "Maltese (Malti)") << 10 << ushortarray(d19)
227 << QByteArray(IDNA_ACE_PREFIX "bonusaa-5bb1da")
228 << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
229
230 unsigned short d20[] = {0x043f, 0x043e, 0x0447, 0x0435, 0x043c, 0x0443, 0x0436, 0x0435,
231 0x043e, 0x043d, 0x0438, 0x043d, 0x0435, 0x0433, 0x043e, 0x0432,
232 0x043e, 0x0440, 0x044f, 0x0442, 0x043f, 0x043e, 0x0440, 0x0443,
233 0x0441, 0x0441, 0x043a, 0x0438 };
234 QTest::newRow(dataTag: "Russian (Cyrillic)") << 28 << ushortarray(d20)
235 << QByteArray(IDNA_ACE_PREFIX "b1abfaaepdrnnbgefbadotcwatmq2g4l")
236 << 0 << 0 << IDNA_SUCCESS << IDNA_SUCCESS;
237}
238#endif
239
240#ifdef QT_BUILD_INTERNAL
241void tst_QUrlInternal::idna_testsuite()
242{
243 QFETCH(int, numchars);
244 QFETCH(ushortarray, unicode);
245 QFETCH(QByteArray, punycode);
246
247 QString result;
248 qt_punycodeEncoder(s: (QChar*)unicode.points, ucLength: numchars, output: &result);
249 QCOMPARE(result.toLatin1(), punycode);
250 QCOMPARE(qt_punycodeDecoder(result), QString::fromUtf16(unicode.points, numchars));
251}
252#endif
253
254#ifdef QT_BUILD_INTERNAL
255void tst_QUrlInternal::nameprep_testsuite_data()
256{
257 QTest::addColumn<QString>(name: "in");
258 QTest::addColumn<QString>(name: "out");
259 QTest::addColumn<QString>(name: "profile");
260 QTest::addColumn<int>(name: "flags");
261 QTest::addColumn<int>(name: "rc");
262
263 QTest::newRow(dataTag: "Map to nothing")
264 << QString::fromUtf8(str: "foo\xC2\xAD\xCD\x8F\xE1\xA0\x86\xE1\xA0\x8B"
265 "bar""\xE2\x80\x8B\xE2\x81\xA0""baz\xEF\xB8\x80\xEF\xB8\x88"
266 "\xEF\xB8\x8F\xEF\xBB\xBF")
267 << QString::fromUtf8(str: "foobarbaz")
268 << QString() << 0 << 0;
269
270 QTest::newRow(dataTag: "Case folding ASCII U+0043 U+0041 U+0046 U+0045")
271 << QString::fromUtf8(str: "CAFE")
272 << QString::fromUtf8(str: "cafe")
273 << QString() << 0 << 0;
274
275 QTest::newRow(dataTag: "Case folding 8bit U+00DF (german sharp s)")
276 << QString::fromUtf8(str: "\xC3\x9F")
277 << QString("ss")
278 << QString() << 0 << 0;
279
280 QTest::newRow(dataTag: "Case folding U+0130 (turkish capital I with dot)")
281 << QString::fromUtf8(str: "\xC4\xB0")
282 << QString::fromUtf8(str: "i\xcc\x87")
283 << QString() << 0 << 0;
284
285 QTest::newRow(dataTag: "Case folding multibyte U+0143 U+037A")
286 << QString::fromUtf8(str: "\xC5\x83\xCD\xBA")
287 << QString::fromUtf8(str: "\xC5\x84 \xCE\xB9")
288 << QString() << 0 << 0;
289
290 QTest::newRow(dataTag: "Case folding U+2121 U+33C6 U+1D7BB")
291 << QString::fromUtf8(str: "\xE2\x84\xA1\xE3\x8F\x86\xF0\x9D\x9E\xBB")
292 << QString::fromUtf8(str: "telc\xE2\x88\x95""kg\xCF\x83")
293 << QString() << 0 << 0;
294
295 QTest::newRow(dataTag: "Normalization of U+006a U+030c U+00A0 U+00AA")
296 << QString::fromUtf8(str: "\x6A\xCC\x8C\xC2\xA0\xC2\xAA")
297 << QString::fromUtf8(str: "\xC7\xB0 a")
298 << QString() << 0 << 0;
299
300 QTest::newRow(dataTag: "Case folding U+1FB7 and normalization")
301 << QString::fromUtf8(str: "\xE1\xBE\xB7")
302 << QString::fromUtf8(str: "\xE1\xBE\xB6\xCE\xB9")
303 << QString() << 0 << 0;
304
305 QTest::newRow(dataTag: "Self-reverting case folding U+01F0 and normalization")
306// << QString::fromUtf8("\xC7\xF0") ### typo in the original testsuite
307 << QString::fromUtf8(str: "\xC7\xB0")
308 << QString::fromUtf8(str: "\xC7\xB0")
309 << QString() << 0 << 0;
310
311 QTest::newRow(dataTag: "Self-reverting case folding U+0390 and normalization")
312 << QString::fromUtf8(str: "\xCE\x90")
313 << QString::fromUtf8(str: "\xCE\x90")
314 << QString() << 0 << 0;
315
316 QTest::newRow(dataTag: "Self-reverting case folding U+03B0 and normalization")
317 << QString::fromUtf8(str: "\xCE\xB0")
318 << QString::fromUtf8(str: "\xCE\xB0")
319 << QString() << 0 << 0;
320
321 QTest::newRow(dataTag: "Self-reverting case folding U+1E96 and normalization")
322 << QString::fromUtf8(str: "\xE1\xBA\x96")
323 << QString::fromUtf8(str: "\xE1\xBA\x96")
324 << QString() << 0 << 0;
325
326 QTest::newRow(dataTag: "Self-reverting case folding U+1F56 and normalization")
327 << QString::fromUtf8(str: "\xE1\xBD\x96")
328 << QString::fromUtf8(str: "\xE1\xBD\x96")
329 << QString() << 0 << 0;
330
331 QTest::newRow(dataTag: "ASCII space character U+0020")
332 << QString::fromUtf8(str: "\x20")
333 << QString::fromUtf8(str: "\x20")
334 << QString() << 0 << 0;
335
336 QTest::newRow(dataTag: "Non-ASCII 8bit space character U+00A0")
337 << QString::fromUtf8(str: "\xC2\xA0")
338 << QString::fromUtf8(str: "\x20")
339 << QString() << 0 << 0;
340
341 QTest::newRow(dataTag: "Non-ASCII multibyte space character U+1680")
342 << QString::fromUtf8(str: "x\xE1\x9A\x80x")
343 << QString()
344 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
345
346 QTest::newRow(dataTag: "Non-ASCII multibyte space character U+2000")
347 << QString::fromUtf8(str: "\xE2\x80\x80")
348 << QString::fromUtf8(str: "\x20")
349 << QString() << 0 << 0;
350
351 QTest::newRow(dataTag: "Zero Width Space U+200b")
352 << QString::fromUtf8(str: "\xE2\x80\x8b")
353 << QString()
354 << QString() << 0 << 0;
355
356 QTest::newRow(dataTag: "Non-ASCII multibyte space character U+3000")
357 << QString::fromUtf8(str: "\xE3\x80\x80")
358 << QString::fromUtf8(str: "\x20")
359 << QString() << 0 << 0;
360
361 QTest::newRow(dataTag: "ASCII control characters U+0010 U+007F")
362 << QString::fromUtf8(str: "\x10\x7F")
363 << QString::fromUtf8(str: "\x10\x7F")
364 << QString() << 0 << 0;
365
366 QTest::newRow(dataTag: "Non-ASCII 8bit control character U+0080")
367 << QString::fromUtf8(str: "x\xC2\x80x")
368 << QString()
369 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
370
371 QTest::newRow(dataTag: "Non-ASCII 8bit control character U+0085")
372 << QString::fromUtf8(str: "x\xC2\x85x")
373 << QString()
374 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
375
376 QTest::newRow(dataTag: "Non-ASCII multibyte control character U+180E")
377 << QString::fromUtf8(str: "x\xE1\xA0\x8Ex")
378 << QString()
379 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
380
381 QTest::newRow(dataTag: "Zero Width No-Break Space U+FEFF")
382 << QString::fromUtf8(str: "\xEF\xBB\xBF")
383 << QString()
384 << QString() << 0 << 0;
385
386 QTest::newRow(dataTag: "Non-ASCII control character U+1D175")
387 << QString::fromUtf8(str: "x\xF0\x9D\x85\xB5x")
388 << QString()
389 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
390
391 QTest::newRow(dataTag: "Plane 0 private use character U+F123")
392 << QString::fromUtf8(str: "x\xEF\x84\xA3x")
393 << QString()
394 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
395
396 QTest::newRow(dataTag: "Plane 15 private use character U+F1234")
397 << QString::fromUtf8(str: "x\xF3\xB1\x88\xB4x")
398 << QString()
399 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
400
401 QTest::newRow(dataTag: "Plane 16 private use character U+10F234")
402 << QString::fromUtf8(str: "x\xF4\x8F\x88\xB4x")
403 << QString()
404 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
405
406 QTest::newRow(dataTag: "Non-character code point U+8FFFE")
407 << QString::fromUtf8(str: "x\xF2\x8F\xBF\xBEx")
408 << QString()
409 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
410
411 QTest::newRow(dataTag: "Non-character code point U+10FFFF")
412 << QString::fromUtf8(str: "x\xF4\x8F\xBF\xBFx")
413 << QString()
414 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
415
416 QTest::newRow(dataTag: "Surrogate code U+DF42")
417 << QString::fromUtf8(str: "x\xED\xBD\x82x")
418 << QString()
419 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
420
421 QTest::newRow(dataTag: "Non-plain text character U+FFFD")
422 << QString::fromUtf8(str: "x\xEF\xBF\xBDx")
423 << QString()
424 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
425
426 QTest::newRow(dataTag: "Ideographic description character U+2FF5")
427 << QString::fromUtf8(str: "x\xE2\xBF\xB5x")
428 << QString()
429 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
430
431 QTest::newRow(dataTag: "Display property character U+0341")
432 << QString::fromUtf8(str: "\xCD\x81")
433 << QString::fromUtf8(str: "\xCC\x81")
434 << QString() << 0 << 0;
435
436 QTest::newRow(dataTag: "Left-to-right mark U+200E")
437 << QString::fromUtf8(str: "x\xE2\x80\x8Ex")
438 << QString()
439 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
440
441 QTest::newRow(dataTag: "Deprecated U+202A")
442 << QString::fromUtf8(str: "x\xE2\x80\xAA")
443 << QString()
444 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
445
446 QTest::newRow(dataTag: "Language tagging character U+E0001")
447 << QString::fromUtf8(str: "x\xF3\xA0\x80\x81x")
448 << QString()
449 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
450
451 QTest::newRow(dataTag: "Language tagging character U+E0042")
452 << QString::fromUtf8(str: "x\xF3\xA0\x81\x82x")
453 << QString()
454 << QString("Nameprep") << 0 << STRINGPREP_CONTAINS_PROHIBITED;
455
456 QTest::newRow(dataTag: "Bidi: RandALCat character U+05BE and LCat characters")
457 << QString::fromUtf8(str: "foo\xD6\xBE""bar")
458 << QString()
459 << QString("Nameprep") << 0 << STRINGPREP_BIDI_BOTH_L_AND_RAL;
460
461 QTest::newRow(dataTag: "Bidi: RandALCat character U+FD50 and LCat characters")
462 << QString::fromUtf8(str: "foo\xEF\xB5\x90""bar")
463 << QString()
464 << QString("Nameprep") << 0 << STRINGPREP_BIDI_BOTH_L_AND_RAL;
465
466 QTest::newRow(dataTag: "Bidi: RandALCat character U+FB38 and LCat characters")
467 << QString::fromUtf8(str: "foo\xEF\xB9\xB6""bar")
468 << QString::fromUtf8(str: "foo \xd9\x8e""bar")
469 << QString() << 0 << 0;
470
471 QTest::newRow(dataTag: "Bidi: RandALCat without trailing RandALCat U+0627 U+0031")
472 << QString::fromUtf8(str: "\xD8\xA7\x31")
473 << QString()
474 << QString("Nameprep") << 0 << STRINGPREP_BIDI_LEADTRAIL_NOT_RAL;
475
476 QTest::newRow(dataTag: "Bidi: RandALCat character U+0627 U+0031 U+0628")
477 << QString::fromUtf8(str: "\xD8\xA7\x31\xD8\xA8")
478 << QString::fromUtf8(str: "\xD8\xA7\x31\xD8\xA8")
479 << QString() << 0 << 0;
480
481 QTest::newRow(dataTag: "Unassigned code point U+E0002")
482 << QString::fromUtf8(str: "\xF3\xA0\x80\x82")
483 << QString()
484 << QString("Nameprep") << STRINGPREP_NO_UNASSIGNED << STRINGPREP_CONTAINS_UNASSIGNED;
485
486 QTest::newRow(dataTag: "Larger test (shrinking)")
487 << QString::fromUtf8(str: "X\xC2\xAD\xC3\x9F\xC4\xB0\xE2\x84\xA1\x6a\xcc\x8c\xc2\xa0\xc2"
488 "\xaa\xce\xb0\xe2\x80\x80")
489 << QString::fromUtf8(str: "xssi\xcc\x87""tel\xc7\xb0 a\xce\xb0 ")
490 << QString("Nameprep") << 0 << 0;
491
492 QTest::newRow(dataTag: "Larger test (expanding)")
493 << QString::fromUtf8(str: "X\xC3\x9F\xe3\x8c\x96\xC4\xB0\xE2\x84\xA1\xE2\x92\x9F\xE3\x8c\x80")
494 << QString::fromUtf8(str: "xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\x88"
495 "\xe3\x83\xab""i\xcc\x87""tel\x28""d\x29\xe3\x82\xa2\xe3\x83\x91"
496 "\xe3\x83\xbc\xe3\x83\x88")
497 << QString() << 0 << 0;
498}
499#endif
500
501#ifdef QT_BUILD_INTERNAL
502void tst_QUrlInternal::nameprep_testsuite()
503{
504 QFETCH(QString, in);
505 QFETCH(QString, out);
506 QFETCH(QString, profile);
507
508 qt_nameprep(source: &in, from: 0);
509 QCOMPARE(in, out);
510}
511#endif
512
513#ifdef QT_BUILD_INTERNAL
514void tst_QUrlInternal::nameprep_highcodes_data()
515{
516 QTest::addColumn<QString>(name: "in");
517 QTest::addColumn<QString>(name: "out");
518 QTest::addColumn<QString>(name: "profile");
519 QTest::addColumn<int>(name: "flags");
520 QTest::addColumn<int>(name: "rc");
521
522 {
523 QChar st[] = { '-', 0xd801, 0xdc1d, 'a' };
524 QChar se[] = { '-', 0xd801, 0xdc45, 'a' };
525 QTest::newRow(dataTag: "highcodes (U+1041D)")
526 << QString(st, sizeof(st)/sizeof(st[0]))
527 << QString(se, sizeof(se)/sizeof(se[0]))
528 << QString() << 0 << 0;
529 }
530 {
531 QChar st[] = { 0x011C, 0xd835, 0xdf6e, 0x0110 };
532 QChar se[] = { 0x011D, 0x03C9, 0x0111 };
533 QTest::newRow(dataTag: "highcodes (U+1D76E)")
534 << QString(st, sizeof(st)/sizeof(st[0]))
535 << QString(se, sizeof(se)/sizeof(se[0]))
536 << QString() << 0 << 0;
537 }
538 {
539 QChar st[] = { 'D', 'o', '\'', 0x2060, 'h' };
540 QChar se[] = { 'd', 'o', '\'', 'h' };
541 QTest::newRow(dataTag: "highcodes (D, o, ', U+2060, h)")
542 << QString(st, sizeof(st)/sizeof(st[0]))
543 << QString(se, sizeof(se)/sizeof(se[0]))
544 << QString() << 0 << 0;
545 }
546}
547#endif
548
549#ifdef QT_BUILD_INTERNAL
550void tst_QUrlInternal::nameprep_highcodes()
551{
552 QFETCH(QString, in);
553 QFETCH(QString, out);
554 QFETCH(QString, profile);
555
556 qt_nameprep(source: &in, from: 0);
557 QCOMPARE(in, out);
558}
559#endif
560
561void tst_QUrlInternal::ace_testsuite_data()
562{
563 QTest::addColumn<QString>(name: "in");
564 QTest::addColumn<QString>(name: "toace");
565 QTest::addColumn<QString>(name: "fromace");
566 QTest::addColumn<QString>(name: "unicode");
567
568 QTest::newRow(dataTag: "ascii-lower") << "fluke" << "fluke" << "fluke" << "fluke";
569 QTest::newRow(dataTag: "ascii-mixed") << "FLuke" << "fluke" << "fluke" << "fluke";
570 QTest::newRow(dataTag: "ascii-upper") << "FLUKE" << "fluke" << "fluke" << "fluke";
571
572 QTest::newRow(dataTag: "asciifolded") << QString::fromLatin1(str: "stra\337e") << "strasse" << "." << "strasse";
573 QTest::newRow(dataTag: "asciifolded-dotcom") << QString::fromLatin1(str: "stra\337e.example.com") << "strasse.example.com" << "." << "strasse.example.com";
574 QTest::newRow(dataTag: "greek-mu") << QString::fromLatin1(str: "\265V")
575 <<"xn--v-lmb"
576 << "."
577 << QString::fromUtf8(str: "\316\274v");
578
579 QTest::newRow(dataTag: "non-ascii-lower") << QString::fromLatin1(str: "alqualond\353")
580 << "xn--alqualond-34a"
581 << "."
582 << QString::fromLatin1(str: "alqualond\353");
583 QTest::newRow(dataTag: "non-ascii-mixed") << QString::fromLatin1(str: "Alqualond\353")
584 << "xn--alqualond-34a"
585 << "."
586 << QString::fromLatin1(str: "alqualond\353");
587 QTest::newRow(dataTag: "non-ascii-upper") << QString::fromLatin1(str: "ALQUALOND\313")
588 << "xn--alqualond-34a"
589 << "."
590 << QString::fromLatin1(str: "alqualond\353");
591
592 QTest::newRow(dataTag: "idn-lower") << "xn--alqualond-34a" << "xn--alqualond-34a"
593 << QString::fromLatin1(str: "alqualond\353")
594 << QString::fromLatin1(str: "alqualond\353");
595 QTest::newRow(dataTag: "idn-mixed") << "Xn--alqualond-34a" << "xn--alqualond-34a"
596 << QString::fromLatin1(str: "alqualond\353")
597 << QString::fromLatin1(str: "alqualond\353");
598 QTest::newRow(dataTag: "idn-mixed2") << "XN--alqualond-34a" << "xn--alqualond-34a"
599 << QString::fromLatin1(str: "alqualond\353")
600 << QString::fromLatin1(str: "alqualond\353");
601 QTest::newRow(dataTag: "idn-mixed3") << "xn--ALQUALOND-34a" << "xn--alqualond-34a"
602 << QString::fromLatin1(str: "alqualond\353")
603 << QString::fromLatin1(str: "alqualond\353");
604 QTest::newRow(dataTag: "idn-mixed4") << "xn--alqualond-34A" << "xn--alqualond-34a"
605 << QString::fromLatin1(str: "alqualond\353")
606 << QString::fromLatin1(str: "alqualond\353");
607 QTest::newRow(dataTag: "idn-upper") << "XN--ALQUALOND-34A" << "xn--alqualond-34a"
608 << QString::fromLatin1(str: "alqualond\353")
609 << QString::fromLatin1(str: "alqualond\353");
610
611 QTest::newRow(dataTag: "separator-3002") << QString::fromUtf8(str: "example\343\200\202com")
612 << "example.com" << "." << "example.com";
613
614 QString egyptianIDN =
615 QString::fromUtf8(str: "\331\210\330\262\330\247\330\261\330\251\055\330\247\331\204\330"
616 "\243\330\252\330\265\330\247\331\204\330\247\330\252.\331\205"
617 "\330\265\330\261");
618 QTest::newRow(dataTag: "egyptian-tld-ace")
619 << "xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c"
620 << "xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c"
621 << "."
622 << egyptianIDN;
623 QTest::newRow(dataTag: "egyptian-tld-unicode")
624 << egyptianIDN
625 << "xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c"
626 << "."
627 << egyptianIDN;
628 QTest::newRow(dataTag: "egyptian-tld-mix1")
629 << QString::fromUtf8(str: "\331\210\330\262\330\247\330\261\330\251\055\330\247\331\204\330"
630 "\243\330\252\330\265\330\247\331\204\330\247\330\252.xn--wgbh1c")
631 << "xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c"
632 << "."
633 << egyptianIDN;
634 QTest::newRow(dataTag: "egyptian-tld-mix2")
635 << QString::fromUtf8(str: "xn----rmckbbajlc6dj7bxne2c.\331\205\330\265\330\261")
636 << "xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c"
637 << "."
638 << egyptianIDN;
639
640 QString russianIDN = QString::fromUtf8(str: "\321\217\320\275\320\264\320\265\320\272\321\201.\321\200\321\204");
641 QTest::newRow(dataTag: "russian-tld-ace")
642 << "xn--d1acpjx3f.xn--p1ai"
643 << "xn--d1acpjx3f.xn--p1ai"
644 << "."
645 << russianIDN;
646
647 QString taiwaneseIDN = QString::fromUtf8(str: "\345\217\260\345\214\227\346\214\211\346\221\251.\345\217\260\347\201\243");
648 QTest::newRow(dataTag: "taiwanese-tld-ace")
649 << "xn--djrptm67aikb.xn--kpry57d"
650 << "xn--djrptm67aikb.xn--kpry57d"
651 << "."
652 << taiwaneseIDN;
653
654 // violations / invalids
655 QTest::newRow(dataTag: "invalid-punycode") << "xn--z" << "xn--z" << "xn--z" << "xn--z";
656
657 // U+00A0 NO-BREAK SPACE encodes to Punycode "6a"
658 // but it is prohibited and should have caused encoding failure
659 QTest::newRow(dataTag: "invalid-nameprep-prohibited") << "xn--6a" << "xn--6a" << "xn--6a" << "xn--6a";
660
661 // U+00AD SOFT HYPHEN between "a" and "b" encodes to Punycode "ab-5da"
662 // but it should have been removed in the nameprep stage
663 QTest::newRow(dataTag: "invalid-nameprep-maptonothing") << "xn-ab-5da" << "xn-ab-5da" << "xn-ab-5da" << "xn-ab-5da";
664
665 // U+00C1 LATIN CAPITAL LETTER A WITH ACUTE encodes to Punycode "4ba"
666 // but it should have nameprepped to lowercase first
667 QTest::newRow(dataTag: "invalid-nameprep-uppercase") << "xn--4ba" << "xn--4ba" << "xn--4ba" << "xn--4ba";
668
669 // U+00B5 MICRO SIGN encodes to Punycode "sba"
670 // but is should have nameprepped to NFKC U+03BC GREEK SMALL LETTER MU
671 QTest::newRow(dataTag: "invalid-nameprep-nonnfkc") << "xn--sba" << "xn--sba" << "xn--sba" << "xn--sba";
672
673 // U+04CF CYRILLIC SMALL LETTER PALOCHKA encodes to "s5a"
674 // but it's not in RFC 3454's allowed character list (Unicode 3.2)
675 QTest::newRow(dataTag: "invalid-nameprep-unassigned") << "xn--s5a" << "xn--s5a" << "xn--s5a" << "xn--s5a";
676 // same character, see QTBUG-60364
677 QTest::newRow(dataTag: "invalid-nameprep-unassigned2") << "xn--80ak6aa92e" << "xn--80ak6aa92e" << "xn--80ak6aa92e" << "xn--80ak6aa92e";
678}
679
680void tst_QUrlInternal::ace_testsuite()
681{
682 static const char canonsuffix[] = ".troll.no";
683 QFETCH(QString, in);
684 QFETCH(QString, toace);
685 QFETCH(QString, fromace);
686 QFETCH(QString, unicode);
687
688 const char *suffix = canonsuffix;
689 if (toace.contains(c: '.'))
690 suffix = 0;
691
692 QString domain = in + suffix;
693 QCOMPARE(QString::fromLatin1(QUrl::toAce(domain)), toace + suffix);
694 if (fromace != ".")
695 QCOMPARE(QUrl::fromAce(domain.toLatin1()), fromace + suffix);
696 QCOMPARE(QUrl::fromAce(QUrl::toAce(domain)), unicode + suffix);
697
698 QUrl u;
699 u.setHost(host: domain);
700 QVERIFY(u.isValid());
701 QCOMPARE(u.host(), unicode + suffix);
702 QCOMPARE(u.host(QUrl::EncodeUnicode), toace + suffix);
703 QCOMPARE(u.toEncoded(), "//" + toace.toLatin1() + suffix);
704 QCOMPARE(u.toDisplayString(), "//" + unicode + suffix);
705
706 domain = in + (suffix ? ".troll.No" : "");
707 QCOMPARE(QString::fromLatin1(QUrl::toAce(domain)), toace + suffix);
708 if (fromace != ".")
709 QCOMPARE(QUrl::fromAce(domain.toLatin1()), fromace + suffix);
710 QCOMPARE(QUrl::fromAce(QUrl::toAce(domain)), unicode + suffix);
711
712 domain = in + (suffix ? ".troll.NO" : "");
713 QCOMPARE(QString::fromLatin1(QUrl::toAce(domain)), toace + suffix);
714 if (fromace != ".")
715 QCOMPARE(QUrl::fromAce(domain.toLatin1()), fromace + suffix);
716 QCOMPARE(QUrl::fromAce(QUrl::toAce(domain)), unicode + suffix);
717}
718
719void tst_QUrlInternal::std3violations_data()
720{
721 QTest::addColumn<QString>(name: "source");
722 QTest::addColumn<bool>(name: "validUrl");
723
724 QTest::newRow(dataTag: "too-long") << "this-domain-is-far-too-long-for-its-own-good-and-should-have-been-limited-to-63-chars" << false;
725 QTest::newRow(dataTag: "dash-begin") << "-x-foo" << false;
726 QTest::newRow(dataTag: "dash-end") << "x-foo-" << false;
727 QTest::newRow(dataTag: "dash-begin-end") << "-foo-" << false;
728
729 QTest::newRow(dataTag: "control") << "\033foo" << false;
730 QTest::newRow(dataTag: "bang") << "foo!" << false;
731 QTest::newRow(dataTag: "plus") << "foo+bar" << false;
732 QTest::newRow(dataTag: "dot") << "foo.bar";
733 QTest::newRow(dataTag: "startingdot") << ".bar" << false;
734 QTest::newRow(dataTag: "startingdot2") << ".example.com" << false;
735 QTest::newRow(dataTag: "slash") << "foo/bar" << true;
736 QTest::newRow(dataTag: "colon") << "foo:80" << true;
737 QTest::newRow(dataTag: "question") << "foo?bar" << true;
738 QTest::newRow(dataTag: "at") << "foo@bar" << true;
739 QTest::newRow(dataTag: "backslash") << "foo\\bar" << false;
740
741 // these characters are transformed by NFKC to non-LDH characters
742 QTest::newRow(dataTag: "dot-like") << QString::fromUtf8(str: "foo\342\200\244bar") << false; // U+2024 ONE DOT LEADER
743 QTest::newRow(dataTag: "slash-like") << QString::fromUtf8(str: "foo\357\274\217bar") << false; // U+FF0F FULLWIDTH SOLIDUS
744
745 // The following should be invalid but isn't
746 // the DIVISON SLASH doesn't case-fold to a slash
747 // is this a problem with RFC 3490?
748 //QTest::newRow("slash-like2") << QString::fromUtf8("foo\342\210\225bar") << false; // U+2215 DIVISION SLASH
749}
750
751void tst_QUrlInternal::std3violations()
752{
753 QFETCH(QString, source);
754
755#ifdef QT_BUILD_INTERNAL
756 {
757 QString prepped = source;
758 qt_nameprep(source: &prepped, from: 0);
759 QVERIFY(!qt_check_std3rules(prepped.constData(), prepped.length()));
760 }
761#endif
762
763 if (source.contains(c: '.'))
764 return; // this test ends here
765
766 QUrl url;
767 url.setHost(host: source);
768 QVERIFY(url.host().isEmpty());
769
770 QFETCH(bool, validUrl);
771 if (validUrl)
772 return; // test ends here for these cases
773
774 url = QUrl("http://" + source + "/some/path");
775 QVERIFY(!url.isValid());
776}
777
778void tst_QUrlInternal::std3deviations_data()
779{
780 QTest::addColumn<QString>(name: "source");
781
782 QTest::newRow(dataTag: "ending-dot") << "example.com.";
783 QTest::newRow(dataTag: "ending-dot3002") << QString("example.com") + QChar(0x3002);
784 QTest::newRow(dataTag: "underline") << "foo_bar"; //QTBUG-7434
785}
786
787void tst_QUrlInternal::std3deviations()
788{
789 QFETCH(QString, source);
790 QVERIFY(!QUrl::toAce(source).isEmpty());
791
792 QUrl url;
793 url.setHost(host: source);
794 QVERIFY(!url.host().isEmpty());
795}
796
797void tst_QUrlInternal::correctEncodedMistakes_data()
798{
799 QTest::addColumn<QString>(name: "input");
800 QTest::addColumn<QString>(name: "expected");
801
802 QTest::newRow(dataTag: "empty") << "" << "";
803
804 // these contain one invalid percent
805 QTest::newRow(dataTag: "%") << QString("%") << QString("%25");
806 QTest::newRow(dataTag: "3%") << QString("3%") << QString("3%25");
807 QTest::newRow(dataTag: "13%") << QString("13%") << QString("13%25");
808 QTest::newRow(dataTag: "13%!") << QString("13%!") << QString("13%25!");
809 QTest::newRow(dataTag: "13%!!") << QString("13%!!") << QString("13%25!!");
810 QTest::newRow(dataTag: "13%a") << QString("13%a") << QString("13%25a");
811 QTest::newRow(dataTag: "13%az") << QString("13%az") << QString("13%25az");
812
813 // two invalid percents
814 QTest::newRow(dataTag: "13%%") << "13%%" << "13%25%25";
815 QTest::newRow(dataTag: "13%a%a") << "13%a%a" << "13%25a%25a";
816 QTest::newRow(dataTag: "13%az%az") << "13%az%az" << "13%25az%25az";
817
818 // these are correct (idempotent)
819 QTest::newRow(dataTag: "13%25") << QString("13%25") << QString("13%25");
820 QTest::newRow(dataTag: "13%25%25") << QString("13%25%25") << QString("13%25%25");
821
822 // these contain one invalid and one valid
823 // the code assumes they are all invalid
824 QTest::newRow(dataTag: "13%13..%") << "13%13..%" << "13%2513..%25";
825 QTest::newRow(dataTag: "13%..%13") << "13%..%13" << "13%25..%2513";
826
827 // three percents, one invalid
828 QTest::newRow(dataTag: "%01%02%3") << "%01%02%3" << "%2501%2502%253";
829
830 // now mix bad percents with Unicode decoding
831 QTest::newRow(dataTag: "%C2%") << "%C2%" << "%25C2%25";
832 QTest::newRow(dataTag: "%C2%A") << "%C2%A" << "%25C2%25A";
833 QTest::newRow(dataTag: "%C2%Az") << "%C2%Az" << "%25C2%25Az";
834 QTest::newRow(dataTag: "%E2%A0%") << "%E2%A0%" << "%25E2%25A0%25";
835 QTest::newRow(dataTag: "%E2%A0%A") << "%E2%A0%A" << "%25E2%25A0%25A";
836 QTest::newRow(dataTag: "%E2%A0%Az") << "%E2%A0%Az" << "%25E2%25A0%25Az";
837 QTest::newRow(dataTag: "%F2%A0%A0%") << "%F2%A0%A0%" << "%25F2%25A0%25A0%25";
838 QTest::newRow(dataTag: "%F2%A0%A0%A") << "%F2%A0%A0%A" << "%25F2%25A0%25A0%25A";
839 QTest::newRow(dataTag: "%F2%A0%A0%Az") << "%F2%A0%A0%Az" << "%25F2%25A0%25A0%25Az";
840}
841
842void tst_QUrlInternal::correctEncodedMistakes()
843{
844 QFETCH(QString, input);
845 QFETCH(QString, expected);
846
847 // prepend some data to be sure that it remains there
848 QString dataTag = QTest::currentDataTag();
849 QString output = dataTag;
850
851 if (!qt_urlRecode(appendTo&: output, begin: input.constData(), end: input.constData() + input.length(), encoding: { }))
852 output += input;
853 QCOMPARE(output, dataTag + expected);
854
855 // now try the full decode mode
856 output = dataTag;
857 QString expected2 = QUrl::fromPercentEncoding(expected.toLatin1());
858
859 if (!qt_urlRecode(appendTo&: output, begin: input.constData(), end: input.constData() + input.length(), encoding: QUrl::FullyDecoded))
860 output += input;
861 QCOMPARE(output, dataTag + expected2);
862}
863
864static void addUtf8Data(const char *name, const char *data)
865{
866 QString encoded = QByteArray(data).toPercentEncoding();
867 QString decoded = QString::fromUtf8(str: data);
868
869 // this data contains invaild UTF-8 sequences, so FullyDecoded doesn't work (by design)
870 // use PrettyDecoded instead
871 QTest::newRow(dataTag: QByteArray("decode-") + name) << encoded << QUrl::ComponentFormattingOptions(QUrl::PrettyDecoded) << decoded;
872 QTest::newRow(dataTag: QByteArray("encode-") + name) << decoded << QUrl::ComponentFormattingOptions(QUrl::FullyEncoded) << encoded;
873}
874
875void tst_QUrlInternal::encodingRecode_data()
876{
877 typedef QUrl::ComponentFormattingOptions F;
878 QTest::addColumn<QString>(name: "input");
879 QTest::addColumn<F>(name: "encodingMode");
880 QTest::addColumn<QString>(name: "expected");
881
882 // -- idempotent tests --
883 static int modes[] = { QUrl::PrettyDecoded,
884 QUrl::EncodeSpaces,
885 QUrl::EncodeSpaces | QUrl::EncodeUnicode,
886 QUrl::EncodeSpaces | QUrl::EncodeUnicode | QUrl::EncodeDelimiters,
887 QUrl::EncodeSpaces | QUrl::EncodeUnicode | QUrl::EncodeDelimiters | QUrl::EncodeReserved,
888 QUrl::EncodeSpaces | QUrl::EncodeUnicode | QUrl::EncodeDelimiters | QUrl::DecodeReserved,
889 QUrl::EncodeSpaces | QUrl::EncodeUnicode | QUrl::EncodeReserved,
890 QUrl::EncodeSpaces | QUrl::EncodeUnicode | QUrl::DecodeReserved,
891 QUrl::EncodeSpaces | QUrl::EncodeDelimiters,
892 QUrl::EncodeSpaces | QUrl::EncodeDelimiters | QUrl::EncodeReserved,
893 QUrl::EncodeSpaces | QUrl::EncodeDelimiters | QUrl::DecodeReserved,
894 QUrl::EncodeSpaces | QUrl::EncodeReserved,
895 QUrl::EncodeSpaces | QUrl::DecodeReserved,
896
897 QUrl::EncodeUnicode,
898 QUrl::EncodeUnicode | QUrl::EncodeDelimiters,
899 QUrl::EncodeUnicode | QUrl::EncodeDelimiters | QUrl::EncodeReserved,
900 QUrl::EncodeUnicode | QUrl::EncodeDelimiters | QUrl::DecodeReserved,
901 QUrl::EncodeUnicode | QUrl::EncodeReserved,
902
903 QUrl::EncodeDelimiters,
904 QUrl::EncodeDelimiters | QUrl::EncodeReserved,
905 QUrl::EncodeDelimiters | QUrl::DecodeReserved,
906 QUrl::EncodeReserved,
907 QUrl::DecodeReserved };
908 for (uint i = 0; i < sizeof(modes)/sizeof(modes[0]); ++i) {
909 QByteArray code = QByteArray::number(modes[i], base: 16);
910 F mode = QUrl::ComponentFormattingOption(modes[i]);
911
912 QTest::newRow(dataTag: "null-0x" + code) << QString() << mode << QString();
913 QTest::newRow(dataTag: "empty-0x" + code) << "" << mode << "";
914
915 // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
916 // Unreserved characters are never encoded
917 QTest::newRow(dataTag: "alpha-0x" + code) << "abcABCZZzz" << mode << "abcABCZZzz";
918 QTest::newRow(dataTag: "digits-0x" + code) << "01234567890" << mode << "01234567890";
919 QTest::newRow(dataTag: "otherunreserved-0x" + code) << "-._~" << mode << "-._~";
920
921 // Control characters are always encoded
922 // Use uppercase because the output is also uppercased
923 QTest::newRow(dataTag: "control-nul-0x" + code) << "%00" << mode << "%00";
924 QTest::newRow(dataTag: "control-0x" + code) << "%0D%0A%1F%1A%7F" << mode << "%0D%0A%1F%1A%7F";
925
926 // The percent is always encoded
927 QTest::newRow(dataTag: "percent-0x" + code) << "25%2525" << mode << "25%2525";
928
929 // mixed control and unreserved
930 QTest::newRow(dataTag: "control-unreserved-0x" + code) << "Foo%00Bar%0D%0Abksp%7F" << mode << "Foo%00Bar%0D%0Abksp%7F";
931 }
932
933 // however, control characters and the percent *are* decoded in FullyDecoded mode
934 // this is the only exception
935 QTest::newRow(dataTag: "control-nul-fullydecoded") << "%00" << F(QUrl::FullyDecoded) << QStringLiteral("\0");
936 QTest::newRow(dataTag: "control-fullydecoded") << "%0D%0A%1F%1A%7F" << F(QUrl::FullyDecoded) << "\r\n\x1f\x1a\x7f";
937 QTest::newRow(dataTag: "percent-fullydecoded") << "25%2525" << F(QUrl::FullyDecoded) << "25%25";
938 QTest::newRow(dataTag: "control-unreserved-fullydecoded") << "Foo%00Bar%0D%0Abksp%7F" << F(QUrl::FullyDecoded)
939 << QStringLiteral("Foo\0Bar\r\nbksp\x7F");
940
941 // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
942 // sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
943 // / "*" / "+" / "," / ";" / "="
944 // in the default operation, delimiters don't get encoded or decoded
945 static const char delimiters[] = ":/?#[]@" "!$&'()*+,;=";
946 for (const char *c = delimiters; *c; ++c) {
947 QByteArray code = QByteArray::number(*c, base: 16);
948 QString encoded = QString("abc%") + code.toUpper() + "def" ;
949 QString decoded = QString("abc") + *c + "def" ;
950 QTest::newRow(dataTag: "delimiter-encoded-" + code) << encoded << F(QUrl::FullyEncoded) << encoded;
951 QTest::newRow(dataTag: "delimiter-decoded-" + code) << decoded << F(QUrl::FullyEncoded) << decoded;
952 }
953
954 // encode control characters
955 QTest::newRow(dataTag: "encode-control") << "\1abc\2\033esc" << F(QUrl::PrettyDecoded) << "%01abc%02%1Besc";
956 QTest::newRow(dataTag: "encode-nul") << QString::fromLatin1(str: "abc\0def", size: 7) << F(QUrl::PrettyDecoded) << "abc%00def";
957
958 // space
959 QTest::newRow(dataTag: "space-leave-decoded") << "Hello World " << F(QUrl::PrettyDecoded) << "Hello World ";
960 QTest::newRow(dataTag: "space-leave-encoded") << "Hello%20World%20" << F(QUrl::FullyEncoded) << "Hello%20World%20";
961 QTest::newRow(dataTag: "space-encode") << "Hello World " << F(QUrl::FullyEncoded) << "Hello%20World%20";
962 QTest::newRow(dataTag: "space-decode") << "Hello%20World%20" << F(QUrl::PrettyDecoded) << "Hello World ";
963
964 // decode unreserved
965 QTest::newRow(dataTag: "unreserved-decode") << "%66%6f%6f%42a%72" << F(QUrl::FullyEncoded) << "fooBar";
966
967 // mix encoding with decoding
968 QTest::newRow(dataTag: "encode-control-decode-space") << "\1\2%200" << F(QUrl::PrettyDecoded) << "%01%02 0";
969 QTest::newRow(dataTag: "decode-space-encode-control") << "%20\1\2" << F(QUrl::PrettyDecoded) << " %01%02";
970
971 // decode and encode valid UTF-8 data
972 // invalid is tested in encodingRecodeInvalidUtf8
973 addUtf8Data(name: "utf8-2char-1", data: "\xC2\x80"); // U+0080
974 addUtf8Data(name: "utf8-2char-2", data: "\xDF\xBF"); // U+07FF
975 addUtf8Data(name: "utf8-3char-1", data: "\xE0\xA0\x80"); // U+0800
976 addUtf8Data(name: "utf8-3char-2", data: "\xED\x9F\xBF"); // U+D7FF
977 addUtf8Data(name: "utf8-3char-3", data: "\xEE\x80\x80"); // U+E000
978 addUtf8Data(name: "utf8-3char-4", data: "\xEF\xBF\xBD"); // U+FFFD
979 addUtf8Data(name: "utf8-4char-1", data: "\xF0\x90\x80\x80"); // U+10000
980 addUtf8Data(name: "utf8-4char-2", data: "\xF4\x8F\xBF\xBD"); // U+10FFFD
981
982 // longer UTF-8 sequences, mixed with unreserved
983 addUtf8Data(name: "utf8-string-1", data: "R\xc3\xa9sum\xc3\xa9");
984 addUtf8Data(name: "utf8-string-2", data: "\xDF\xBF\xE0\xA0\x80""A");
985 addUtf8Data(name: "utf8-string-3", data: "\xE0\xA0\x80\xDF\xBF...");
986
987 QTest::newRow(dataTag: "encode-unicode-noncharacter") << QString(QChar(0xffff)) << F(QUrl::FullyEncoded) << "%EF%BF%BF";
988 QTest::newRow(dataTag: "decode-unicode-noncharacter") << QString(QChar(0xffff)) << F(QUrl::PrettyDecoded) << QString::fromUtf8(str: "\xEF\xBF\xBF");
989
990 // special cases: stuff we can encode, but not decode
991 QTest::newRow(dataTag: "unicode-lo-surrogate") << QString(QChar(0xD800)) << F(QUrl::FullyEncoded) << "%ED%A0%80";
992 QTest::newRow(dataTag: "unicode-hi-surrogate") << QString(QChar(0xDC00)) << F(QUrl::FullyEncoded) << "%ED%B0%80";
993
994 // a couple of Unicode strings with leading spaces
995 QTest::newRow(dataTag: "space-unicode") << QString::fromUtf8(str: " \xc2\xa0") << F(QUrl::FullyEncoded) << "%20%C2%A0";
996 QTest::newRow(dataTag: "space-space-unicode") << QString::fromUtf8(str: " \xc2\xa0") << F(QUrl::FullyEncoded) << "%20%20%C2%A0";
997 QTest::newRow(dataTag: "space-space-space-unicode") << QString::fromUtf8(str: " \xc2\xa0") << F(QUrl::FullyEncoded) << "%20%20%20%C2%A0";
998
999 // hex case testing
1000 QTest::newRow(dataTag: "FF") << "%FF" << F(QUrl::FullyEncoded) << "%FF";
1001 QTest::newRow(dataTag: "Ff") << "%Ff" << F(QUrl::FullyEncoded) << "%FF";
1002 QTest::newRow(dataTag: "fF") << "%fF" << F(QUrl::FullyEncoded) << "%FF";
1003 QTest::newRow(dataTag: "ff") << "%ff" << F(QUrl::FullyEncoded) << "%FF";
1004
1005 // decode UTF-8 mixed with non-UTF-8 and unreserved
1006 QTest::newRow(dataTag: "utf8-mix-1") << "%80%C2%80" << F(QUrl::PrettyDecoded) << QString::fromUtf8(str: "%80\xC2\x80");
1007 QTest::newRow(dataTag: "utf8-mix-2") << "%C2%C2%80" << F(QUrl::PrettyDecoded) << QString::fromUtf8(str: "%C2\xC2\x80");
1008 QTest::newRow(dataTag: "utf8-mix-3") << "%E0%C2%80" << F(QUrl::PrettyDecoded) << QString::fromUtf8(str: "%E0\xC2\x80");
1009 QTest::newRow(dataTag: "utf8-mix-3") << "A%C2%80" << F(QUrl::PrettyDecoded) << QString::fromUtf8(str: "A\xC2\x80");
1010 QTest::newRow(dataTag: "utf8-mix-3") << "%C2%80A" << F(QUrl::PrettyDecoded) << QString::fromUtf8(str: "\xC2\x80""A");
1011}
1012
1013void tst_QUrlInternal::encodingRecode()
1014{
1015 QFETCH(QString, input);
1016 QFETCH(QString, expected);
1017 QFETCH(QUrl::ComponentFormattingOptions, encodingMode);
1018
1019 // prepend some data to be sure that it remains there
1020 QString output = QTest::currentDataTag();
1021 expected.prepend(s: output);
1022
1023 if (!qt_urlRecode(appendTo&: output, begin: input.constData(), end: input.constData() + input.length(), encoding: encodingMode))
1024 output += input;
1025 QCOMPARE(output, expected);
1026}
1027
1028void tst_QUrlInternal::encodingRecodeInvalidUtf8_data()
1029{
1030 QTest::addColumn<QByteArray>(name: "utf8");
1031 QTest::addColumn<QString>(name: "utf16");
1032
1033 extern void loadInvalidUtf8Rows();
1034 extern void loadNonCharactersRows();
1035 loadInvalidUtf8Rows();
1036 loadNonCharactersRows();
1037
1038 QTest::newRow(dataTag: "utf8-mix-4") << QByteArray("\xE0.A2\x80");
1039 QTest::newRow(dataTag: "utf8-mix-5") << QByteArray("\xE0\xA2.80");
1040 QTest::newRow(dataTag: "utf8-mix-6") << QByteArray("\xE0\xA2\x33");
1041}
1042
1043void tst_QUrlInternal::encodingRecodeInvalidUtf8()
1044{
1045 QFETCH(QByteArray, utf8);
1046 QString input = utf8.toPercentEncoding();
1047
1048 // prepend some data to be sure that it remains there
1049 QString output = QTest::currentDataTag();
1050
1051 if (!qt_urlRecode(appendTo&: output, begin: input.constData(), end: input.constData() + input.length(), encoding: QUrl::PrettyDecoded))
1052 output += input;
1053 QCOMPARE(output, QTest::currentDataTag() + input);
1054
1055 // this is just control
1056 output = QTest::currentDataTag();
1057 if (!qt_urlRecode(appendTo&: output, begin: input.constData(), end: input.constData() + input.length(), encoding: QUrl::FullyEncoded))
1058 output += input;
1059 QCOMPARE(output, QTest::currentDataTag() + input);
1060
1061 // verify for security reasons that all bad UTF-8 data got replaced by QChar::ReplacementCharacter
1062 output = QTest::currentDataTag();
1063 if (!qt_urlRecode(appendTo&: output, begin: input.constData(), end: input.constData() + input.length(), encoding: QUrl::FullyEncoded))
1064 output += input;
1065 for (int i = int(strlen(s: QTest::currentDataTag())); i < output.length(); ++i) {
1066 QVERIFY2(output.at(i).unicode() < 0x80 || output.at(i) == QChar::ReplacementCharacter,
1067 qPrintable(QString("Character at i == %1 was U+%2").arg(i).arg(output.at(i).unicode(), 4, 16, QLatin1Char('0'))));
1068 }
1069}
1070
1071void tst_QUrlInternal::recodeByteArray_data()
1072{
1073 QTest::addColumn<QByteArray>(name: "input");
1074 QTest::addColumn<QString>(name: "expected");
1075
1076 QTest::newRow(dataTag: "null") << QByteArray() << QString();
1077 QTest::newRow(dataTag: "empty") << QByteArray("") << QString("");
1078 QTest::newRow(dataTag: "normal") << QByteArray("Hello") << "Hello";
1079 QTest::newRow(dataTag: "valid-utf8") << QByteArray("\xc3\xa9") << "%C3%A9";
1080 QTest::newRow(dataTag: "percent-encoded") << QByteArray("%C3%A9%00%C0%80") << "%C3%A9%00%C0%80";
1081 QTest::newRow(dataTag: "invalid-utf8-1") << QByteArray("\xc3\xc3") << "%C3%C3";
1082 QTest::newRow(dataTag: "invalid-utf8-2") << QByteArray("\xc0\x80") << "%C0%80";
1083
1084 // note: percent-encoding the control characters ("\0" -> "%00") would also
1085 // be correct, but it's unnecessary for this function
1086 QTest::newRow(dataTag: "binary") << QByteArray("\0\x1f", 2) << QString::fromLatin1(str: "\0\x1f", size: 2);;
1087 QTest::newRow(dataTag: "binary+percent-encoded") << QByteArray("\0%25", 4) << QString::fromLatin1(str: "\0%25", size: 4);
1088}
1089
1090void tst_QUrlInternal::recodeByteArray()
1091{
1092 QFETCH(QByteArray, input);
1093 QFETCH(QString, expected);
1094 QString output = qt_urlRecodeByteArray(ba: input);
1095
1096 QCOMPARE(output.isNull(), input.isNull());
1097 QCOMPARE(output.isEmpty(), input.isEmpty());
1098 QCOMPARE(output, expected);
1099}
1100
1101QTEST_APPLESS_MAIN(tst_QUrlInternal)
1102

source code of qtbase/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp