1// Copyright (C) 2016 Intel Corporation.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qurl.h"
5#include "private/qstringconverter_p.h"
6#include "private/qtools_p.h"
7#include "private/qsimd_p.h"
8
9QT_BEGIN_NAMESPACE
10
11// ### move to qurl_p.h
12enum EncodingAction {
13 DecodeCharacter = 0,
14 LeaveCharacter = 1,
15 EncodeCharacter = 2
16};
17
18// From RFC 3896, Appendix A Collected ABNF for URI
19// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
20// reserved = gen-delims / sub-delims
21// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
22// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
23// / "*" / "+" / "," / ";" / "="
24static const uchar defaultActionTable[96] = {
25 2, // space
26 1, // '!' (sub-delim)
27 2, // '"'
28 1, // '#' (gen-delim)
29 1, // '$' (gen-delim)
30 2, // '%' (percent)
31 1, // '&' (gen-delim)
32 1, // "'" (sub-delim)
33 1, // '(' (sub-delim)
34 1, // ')' (sub-delim)
35 1, // '*' (sub-delim)
36 1, // '+' (sub-delim)
37 1, // ',' (sub-delim)
38 0, // '-' (unreserved)
39 0, // '.' (unreserved)
40 1, // '/' (gen-delim)
41
42 0, 0, 0, 0, 0, // '0' to '4' (unreserved)
43 0, 0, 0, 0, 0, // '5' to '9' (unreserved)
44 1, // ':' (gen-delim)
45 1, // ';' (sub-delim)
46 2, // '<'
47 1, // '=' (sub-delim)
48 2, // '>'
49 1, // '?' (gen-delim)
50
51 1, // '@' (gen-delim)
52 0, 0, 0, 0, 0, // 'A' to 'E' (unreserved)
53 0, 0, 0, 0, 0, // 'F' to 'J' (unreserved)
54 0, 0, 0, 0, 0, // 'K' to 'O' (unreserved)
55 0, 0, 0, 0, 0, // 'P' to 'T' (unreserved)
56 0, 0, 0, 0, 0, 0, // 'U' to 'Z' (unreserved)
57 1, // '[' (gen-delim)
58 2, // '\'
59 1, // ']' (gen-delim)
60 2, // '^'
61 0, // '_' (unreserved)
62
63 2, // '`'
64 0, 0, 0, 0, 0, // 'a' to 'e' (unreserved)
65 0, 0, 0, 0, 0, // 'f' to 'j' (unreserved)
66 0, 0, 0, 0, 0, // 'k' to 'o' (unreserved)
67 0, 0, 0, 0, 0, // 'p' to 't' (unreserved)
68 0, 0, 0, 0, 0, 0, // 'u' to 'z' (unreserved)
69 2, // '{'
70 2, // '|'
71 2, // '}'
72 0, // '~' (unreserved)
73
74 2 // BSKP
75};
76
77// mask tables, in negative polarity
78// 0x00 if it belongs to this category
79// 0xff if it doesn't
80
81static const uchar reservedMask[96] = {
82 0xff, // space
83 0xff, // '!' (sub-delim)
84 0x00, // '"'
85 0xff, // '#' (gen-delim)
86 0xff, // '$' (gen-delim)
87 0xff, // '%' (percent)
88 0xff, // '&' (gen-delim)
89 0xff, // "'" (sub-delim)
90 0xff, // '(' (sub-delim)
91 0xff, // ')' (sub-delim)
92 0xff, // '*' (sub-delim)
93 0xff, // '+' (sub-delim)
94 0xff, // ',' (sub-delim)
95 0xff, // '-' (unreserved)
96 0xff, // '.' (unreserved)
97 0xff, // '/' (gen-delim)
98
99 0xff, 0xff, 0xff, 0xff, 0xff, // '0' to '4' (unreserved)
100 0xff, 0xff, 0xff, 0xff, 0xff, // '5' to '9' (unreserved)
101 0xff, // ':' (gen-delim)
102 0xff, // ';' (sub-delim)
103 0x00, // '<'
104 0xff, // '=' (sub-delim)
105 0x00, // '>'
106 0xff, // '?' (gen-delim)
107
108 0xff, // '@' (gen-delim)
109 0xff, 0xff, 0xff, 0xff, 0xff, // 'A' to 'E' (unreserved)
110 0xff, 0xff, 0xff, 0xff, 0xff, // 'F' to 'J' (unreserved)
111 0xff, 0xff, 0xff, 0xff, 0xff, // 'K' to 'O' (unreserved)
112 0xff, 0xff, 0xff, 0xff, 0xff, // 'P' to 'T' (unreserved)
113 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'U' to 'Z' (unreserved)
114 0xff, // '[' (gen-delim)
115 0x00, // '\'
116 0xff, // ']' (gen-delim)
117 0x00, // '^'
118 0xff, // '_' (unreserved)
119
120 0x00, // '`'
121 0xff, 0xff, 0xff, 0xff, 0xff, // 'a' to 'e' (unreserved)
122 0xff, 0xff, 0xff, 0xff, 0xff, // 'f' to 'j' (unreserved)
123 0xff, 0xff, 0xff, 0xff, 0xff, // 'k' to 'o' (unreserved)
124 0xff, 0xff, 0xff, 0xff, 0xff, // 'p' to 't' (unreserved)
125 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 'u' to 'z' (unreserved)
126 0x00, // '{'
127 0x00, // '|'
128 0x00, // '}'
129 0xff, // '~' (unreserved)
130
131 0xff // BSKP
132};
133
134static inline bool isHex(char16_t c)
135{
136 return (c >= u'a' && c <= u'f') || (c >= u'A' && c <= u'F') || (c >= u'0' && c <= u'9');
137}
138
139static inline bool isUpperHex(char16_t c)
140{
141 // undefined behaviour if c isn't an hex char!
142 return c < 0x60;
143}
144
145static inline char16_t toUpperHex(char16_t c)
146{
147 return isUpperHex(c) ? c : c - 0x20;
148}
149
150static inline ushort decodeNibble(char16_t c)
151{
152 return c >= u'a' ? c - u'a' + 0xA : c >= u'A' ? c - u'A' + 0xA : c - u'0';
153}
154
155// if the sequence at input is 2*HEXDIG, returns its decoding
156// returns -1 if it isn't.
157// assumes that the range has been checked already
158static inline char16_t decodePercentEncoding(const char16_t *input)
159{
160 char16_t c1 = input[1];
161 char16_t c2 = input[2];
162 if (!isHex(c: c1) || !isHex(c: c2))
163 return char16_t(-1);
164 return decodeNibble(c: c1) << 4 | decodeNibble(c: c2);
165}
166
167static inline char16_t encodeNibble(ushort c)
168{
169 return QtMiscUtils::toHexUpper(value: c);
170}
171
172static void ensureDetached(QString &result, char16_t *&output, const char16_t *begin, const char16_t *input, const char16_t *end,
173 int add = 0)
174{
175 if (!output) {
176 // now detach
177 // create enough space if the rest of the string needed to be percent-encoded
178 int charsProcessed = input - begin;
179 int charsRemaining = end - input;
180 int spaceNeeded = end - begin + 2 * charsRemaining + add;
181 int origSize = result.size();
182 result.resize(size: origSize + spaceNeeded);
183
184 // we know that resize() above detached, so we bypass the reference count check
185 output = const_cast<char16_t *>(reinterpret_cast<const char16_t *>(result.constData()))
186 + origSize;
187
188 // copy the chars we've already processed
189 int i;
190 for (i = 0; i < charsProcessed; ++i)
191 output[i] = begin[i];
192 output += i;
193 }
194}
195
196namespace {
197struct QUrlUtf8Traits : public QUtf8BaseTraitsNoAscii
198{
199 // From RFC 3987:
200 // iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
201 //
202 // ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
203 // / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
204 // / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
205 // / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
206 // / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
207 // / %xD0000-DFFFD / %xE1000-EFFFD
208 //
209 // iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD
210 //
211 // That RFC allows iprivate only as part of iquery, but we don't know here
212 // whether we're looking at a query or another part of an URI, so we accept
213 // them too. The definition above excludes U+FFF0 to U+FFFD from appearing
214 // unencoded, but we see no reason for its exclusion, so we allow them to
215 // be decoded (and we need U+FFFD the replacement character to indicate
216 // failure to decode).
217 //
218 // That means we must disallow:
219 // * unpaired surrogates (QUtf8Functions takes care of that for us)
220 // * non-characters
221 static const bool allowNonCharacters = false;
222
223 // override: our "bytes" are three percent-encoded UTF-16 characters
224 static void appendByte(char16_t *&ptr, uchar b)
225 {
226 // b >= 0x80, by construction, so percent-encode
227 *ptr++ = '%';
228 *ptr++ = encodeNibble(c: b >> 4);
229 *ptr++ = encodeNibble(c: b & 0xf);
230 }
231
232 static uchar peekByte(const char16_t *ptr, qsizetype n = 0)
233 {
234 // decodePercentEncoding returns char16_t(-1) if it can't decode,
235 // which means we return 0xff, which is not a valid continuation byte.
236 // If ptr[i * 3] is not '%', we'll multiply by zero and return 0,
237 // also not a valid continuation byte (if it's '%', we multiply by 1).
238 return uchar(decodePercentEncoding(input: ptr + n * 3))
239 * uchar(ptr[n * 3] == '%');
240 }
241
242 static qptrdiff availableBytes(const char16_t *ptr, const char16_t *end)
243 {
244 return (end - ptr) / 3;
245 }
246
247 static void advanceByte(const char16_t *&ptr, int n = 1)
248 {
249 ptr += n * 3;
250 }
251};
252}
253
254// returns true if we performed an UTF-8 decoding
255static bool encodedUtf8ToUtf16(QString &result, char16_t *&output, const char16_t *begin,
256 const char16_t *&input, const char16_t *end, char16_t decoded)
257{
258 char32_t ucs4 = 0, *dst = &ucs4;
259 const char16_t *src = input + 3;// skip the %XX that yielded \a decoded
260 int charsNeeded = QUtf8Functions::fromUtf8<QUrlUtf8Traits>(b: decoded, dst, src, end);
261 if (charsNeeded < 0)
262 return false;
263
264 if (!QChar::requiresSurrogates(ucs4)) {
265 // UTF-8 decoded and no surrogates are required
266 // detach if necessary
267 // possibilities are: 6 chars (%XX%XX) -> one char; 9 chars (%XX%XX%XX) -> one char
268 ensureDetached(result, output, begin, input, end, add: -3 * charsNeeded + 1);
269 *output++ = ucs4;
270 } else {
271 // UTF-8 decoded to something that requires a surrogate pair
272 // compressing from %XX%XX%XX%XX (12 chars) to two
273 ensureDetached(result, output, begin, input, end, add: -10);
274 *output++ = QChar::highSurrogate(ucs4);
275 *output++ = QChar::lowSurrogate(ucs4);
276 }
277
278 input = src - 1;
279 return true;
280}
281
282static void unicodeToEncodedUtf8(QString &result, char16_t *&output, const char16_t *begin,
283 const char16_t *&input, const char16_t *end, char16_t decoded)
284{
285 // calculate the utf8 length and ensure enough space is available
286 int utf8len = QChar::isHighSurrogate(ucs4: decoded) ? 4 : decoded >= 0x800 ? 3 : 2;
287
288 // detach
289 if (!output) {
290 // we need 3 * utf8len for the encoded UTF-8 sequence
291 // but ensureDetached already adds 3 for the char we're processing
292 ensureDetached(result, output, begin, input, end, add: 3*utf8len - 3);
293 } else {
294 // verify that there's enough space or expand
295 int charsRemaining = end - input - 1; // not including this one
296 int pos = output - reinterpret_cast<const char16_t *>(result.constData());
297 int spaceRemaining = result.size() - pos;
298 if (spaceRemaining < 3*charsRemaining + 3*utf8len) {
299 // must resize
300 result.resize(size: result.size() + 3*utf8len);
301
302 // we know that resize() above detached, so we bypass the reference count check
303 output = const_cast<char16_t *>(reinterpret_cast<const char16_t *>(result.constData()));
304 output += pos;
305 }
306 }
307
308 ++input;
309 int res = QUtf8Functions::toUtf8<QUrlUtf8Traits>(u: decoded, dst&: output, src&: input, end);
310 --input;
311 if (res < 0) {
312 // bad surrogate pair sequence
313 // we will encode bad UTF-16 to UTF-8
314 // but they don't get decoded back
315
316 // first of three bytes
317 uchar c = 0xe0 | uchar(decoded >> 12);
318 *output++ = '%';
319 *output++ = 'E';
320 *output++ = encodeNibble(c: c & 0xf);
321
322 // second byte
323 c = 0x80 | (uchar(decoded >> 6) & 0x3f);
324 *output++ = '%';
325 *output++ = encodeNibble(c: c >> 4);
326 *output++ = encodeNibble(c: c & 0xf);
327
328 // third byte
329 c = 0x80 | (decoded & 0x3f);
330 *output++ = '%';
331 *output++ = encodeNibble(c: c >> 4);
332 *output++ = encodeNibble(c: c & 0xf);
333 }
334}
335
336static int recode(QString &result, const char16_t *begin, const char16_t *end,
337 QUrl::ComponentFormattingOptions encoding, const uchar *actionTable,
338 bool retryBadEncoding)
339{
340 const int origSize = result.size();
341 const char16_t *input = begin;
342 char16_t *output = nullptr;
343
344 EncodingAction action = EncodeCharacter;
345 for ( ; input != end; ++input) {
346 char16_t c;
347 // try a run where no change is necessary
348 for ( ; input != end; ++input) {
349 c = *input;
350 if (c < 0x20U)
351 action = EncodeCharacter;
352 if (c < 0x20U || c >= 0x80U) // also: (c - 0x20 < 0x60U)
353 goto non_trivial;
354 action = EncodingAction(actionTable[c - ' ']);
355 if (action == EncodeCharacter)
356 goto non_trivial;
357 if (output)
358 *output++ = c;
359 }
360 break;
361
362non_trivial:
363 char16_t decoded;
364 if (c == '%' && retryBadEncoding) {
365 // always write "%25"
366 ensureDetached(result, output, begin, input, end);
367 *output++ = '%';
368 *output++ = '2';
369 *output++ = '5';
370 continue;
371 } else if (c == '%') {
372 // check if the input is valid
373 if (input + 2 >= end || (decoded = decodePercentEncoding(input)) == char16_t(-1)) {
374 // not valid, retry
375 result.resize(size: origSize);
376 return recode(result, begin, end, encoding, actionTable, retryBadEncoding: true);
377 }
378
379 if (decoded >= 0x80) {
380 // decode the UTF-8 sequence
381 if (!(encoding & QUrl::EncodeUnicode) &&
382 encodedUtf8ToUtf16(result, output, begin, input, end, decoded))
383 continue;
384
385 // decoding the encoded UTF-8 failed
386 action = LeaveCharacter;
387 } else if (decoded >= 0x20) {
388 action = EncodingAction(actionTable[decoded - ' ']);
389 }
390 } else {
391 decoded = c;
392 if (decoded >= 0x80 && encoding & QUrl::EncodeUnicode) {
393 // encode the UTF-8 sequence
394 unicodeToEncodedUtf8(result, output, begin, input, end, decoded);
395 continue;
396 } else if (decoded >= 0x80) {
397 if (output)
398 *output++ = c;
399 continue;
400 }
401 }
402
403 // there are six possibilities:
404 // current \ action | DecodeCharacter | LeaveCharacter | EncodeCharacter
405 // decoded | 1:leave | 2:leave | 3:encode
406 // encoded | 4:decode | 5:leave | 6:leave
407 // cases 1 and 2 were handled before this section
408
409 if (c == '%' && action != DecodeCharacter) {
410 // cases 5 and 6: it's encoded and we're leaving it as it is
411 // except we're pedantic and we'll uppercase the hex
412 if (output || !isUpperHex(c: input[1]) || !isUpperHex(c: input[2])) {
413 ensureDetached(result, output, begin, input, end);
414 *output++ = '%';
415 *output++ = toUpperHex(c: *++input);
416 *output++ = toUpperHex(c: *++input);
417 }
418 } else if (c == '%' && action == DecodeCharacter) {
419 // case 4: we need to decode
420 ensureDetached(result, output, begin, input, end);
421 *output++ = decoded;
422 input += 2;
423 } else {
424 // must be case 3: we need to encode
425 ensureDetached(result, output, begin, input, end);
426 *output++ = '%';
427 *output++ = encodeNibble(c: c >> 4);
428 *output++ = encodeNibble(c: c & 0xf);
429 }
430 }
431
432 if (output) {
433 int len = output - reinterpret_cast<const char16_t *>(result.constData());
434 result.truncate(pos: len);
435 return len - origSize;
436 }
437 return 0;
438}
439
440/*
441 * Returns true if the input it checked (if it checked anything) is not
442 * encoded. A return of false indicates there's a percent at \a input that
443 * needs to be decoded.
444 */
445#ifdef __SSE2__
446static bool simdCheckNonEncoded(QChar *&output, const char16_t *&input, const char16_t *end)
447{
448# ifdef __AVX2__
449 const __m256i percents256 = _mm256_broadcastw_epi16(_mm_cvtsi32_si128('%'));
450 const __m128i percents = _mm256_castsi256_si128(percents256);
451# else
452 const __m128i percents = _mm_set1_epi16(w: '%');
453# endif
454
455 uint idx = 0;
456 quint32 mask = 0;
457 if (input + 16 <= end) {
458 qptrdiff offset = 0;
459 for ( ; input + offset + 16 <= end; offset += 16) {
460# ifdef __AVX2__
461 // do 32 bytes at a time using AVX2
462 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(input + offset));
463 __m256i comparison = _mm256_cmpeq_epi16(data, percents256);
464 mask = _mm256_movemask_epi8(comparison);
465 _mm256_storeu_si256(reinterpret_cast<__m256i *>(output + offset), data);
466# else
467 // do 32 bytes at a time using unrolled SSE2
468 __m128i data1 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(input + offset));
469 __m128i data2 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(input + offset + 8));
470 __m128i comparison1 = _mm_cmpeq_epi16(a: data1, b: percents);
471 __m128i comparison2 = _mm_cmpeq_epi16(a: data2, b: percents);
472 uint mask1 = _mm_movemask_epi8(a: comparison1);
473 uint mask2 = _mm_movemask_epi8(a: comparison2);
474
475 _mm_storeu_si128(p: reinterpret_cast<__m128i *>(output + offset), b: data1);
476 if (!mask1)
477 _mm_storeu_si128(p: reinterpret_cast<__m128i *>(output + offset + 8), b: data2);
478 mask = mask1 | (mask2 << 16);
479# endif
480
481 if (mask) {
482 idx = qCountTrailingZeroBits(v: mask) / 2;
483 break;
484 }
485 }
486
487 input += offset;
488 if (output)
489 output += offset;
490 } else if (input + 8 <= end) {
491 // do 16 bytes at a time
492 __m128i data = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(input));
493 __m128i comparison = _mm_cmpeq_epi16(a: data, b: percents);
494 mask = _mm_movemask_epi8(a: comparison);
495 _mm_storeu_si128(p: reinterpret_cast<__m128i *>(output), b: data);
496 idx = qCountTrailingZeroBits(v: quint16(mask)) / 2;
497 } else if (input + 4 <= end) {
498 // do 8 bytes only
499 __m128i data = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(input));
500 __m128i comparison = _mm_cmpeq_epi16(a: data, b: percents);
501 mask = _mm_movemask_epi8(a: comparison) & 0xffu;
502 _mm_storel_epi64(p: reinterpret_cast<__m128i *>(output), a: data);
503 idx = qCountTrailingZeroBits(v: quint8(mask)) / 2;
504 } else {
505 // no percents found (because we didn't check)
506 return true;
507 }
508
509 // advance to the next non-encoded
510 input += idx;
511 output += idx;
512
513 return !mask;
514}
515#else
516static bool simdCheckNonEncoded(...)
517{
518 return true;
519}
520#endif
521
522/*!
523 \since 5.0
524 \internal
525
526 This function decodes a percent-encoded string located in \a in
527 by appending each character to \a appendTo. It returns the number of
528 characters appended. Each percent-encoded sequence is decoded as follows:
529
530 \list
531 \li from %00 to %7F: the exact decoded value is appended;
532 \li from %80 to %FF: QChar::ReplacementCharacter is appended;
533 \li bad encoding: original input is copied to the output, undecoded.
534 \endlist
535
536 Given the above, it's important for the input to already have all UTF-8
537 percent sequences decoded by qt_urlRecode (that is, the input should not
538 have been processed with QUrl::EncodeUnicode).
539
540 The input should also be a valid percent-encoded sequence (the output of
541 qt_urlRecode is always valid).
542*/
543static qsizetype decode(QString &appendTo, QStringView in)
544{
545 const char16_t *begin = in.utf16();
546 const char16_t *end = begin + in.size();
547
548 // fast check whether there's anything to be decoded in the first place
549 const char16_t *input = QtPrivate::qustrchr(str: in, ch: '%');
550
551 if (Q_LIKELY(input == end))
552 return 0; // nothing to do, it was already decoded!
553
554 // detach
555 const int origSize = appendTo.size();
556 appendTo.resize(size: origSize + (end - begin));
557 QChar *output = appendTo.data() + origSize;
558 memcpy(dest: static_cast<void *>(output), src: static_cast<const void *>(begin), n: (input - begin) * sizeof(QChar));
559 output += input - begin;
560
561 while (input != end) {
562 // something was encoded
563 Q_ASSERT(*input == '%');
564
565 if (Q_UNLIKELY(end - input < 3 || !isHex(input[1]) || !isHex(input[2]))) {
566 // badly-encoded data
567 appendTo.resize(size: origSize + (end - begin));
568 memcpy(dest: static_cast<void *>(appendTo.begin() + origSize),
569 src: static_cast<const void *>(begin), n: (end - begin) * sizeof(*end));
570 return end - begin;
571 }
572
573 ++input;
574 *output++ = QChar::fromUcs2(c: decodeNibble(c: input[0]) << 4 | decodeNibble(c: input[1]));
575 if (output[-1].unicode() >= 0x80)
576 output[-1] = QChar::ReplacementCharacter;
577 input += 2;
578
579 // search for the next percent, copying from input to output
580 if (simdCheckNonEncoded(output, input, end)) {
581 while (input != end) {
582 const char16_t uc = *input;
583 if (uc == '%')
584 break;
585 *output++ = uc;
586 ++input;
587 }
588 }
589 }
590
591 const qsizetype len = output - appendTo.begin();
592 appendTo.truncate(pos: len);
593 return len - origSize;
594}
595
596template <size_t N>
597static void maskTable(uchar (&table)[N], const uchar (&mask)[N])
598{
599 for (size_t i = 0; i < N; ++i)
600 table[i] &= mask[i];
601}
602
603/*!
604 \internal
605
606 Recodes the string from \a begin to \a end. If any transformations are
607 done, append them to \a appendTo and return the number of characters added.
608 If no transformations were required, return 0.
609
610 The \a encoding option modifies the default behaviour:
611 \list
612 \li QUrl::DecodeReserved: if set, reserved characters will be decoded;
613 if unset, reserved characters will be encoded
614 \li QUrl::EncodeSpaces: if set, spaces will be encoded to "%20"; if unset, they will be " "
615 \li QUrl::EncodeUnicode: if set, characters above U+0080 will be encoded to their UTF-8
616 percent-encoded form; if unset, they will be decoded to UTF-16
617 \li QUrl::FullyDecoded: if set, this function will decode all percent-encoded sequences,
618 including that of the percent character. The resulting string
619 will not be percent-encoded anymore. Use with caution!
620 In this mode, the behaviour is undefined if the input string
621 contains any percent-encoding sequences above %80.
622 Also, the function will not correct bad % sequences.
623 \endlist
624
625 Other flags are ignored (including QUrl::EncodeReserved).
626
627 The \a tableModifications argument can be used to supply extra
628 modifications to the tables, to be applied after the flags above are
629 handled. It consists of a sequence of 16-bit values, where the low 8 bits
630 indicate the character in question and the high 8 bits are either \c
631 EncodeCharacter, \c LeaveCharacter or \c DecodeCharacter.
632
633 This function corrects percent-encoded errors by interpreting every '%' as
634 meaning "%25" (all percents in the same content).
635 */
636
637Q_AUTOTEST_EXPORT qsizetype
638qt_urlRecode(QString &appendTo, QStringView in,
639 QUrl::ComponentFormattingOptions encoding, const ushort *tableModifications)
640{
641 uchar actionTable[sizeof defaultActionTable];
642 if ((encoding & QUrl::FullyDecoded) == QUrl::FullyDecoded) {
643 return decode(appendTo, in);
644 }
645
646 memcpy(dest: actionTable, src: defaultActionTable, n: sizeof actionTable);
647 if (encoding & QUrl::DecodeReserved)
648 maskTable(table&: actionTable, mask: reservedMask);
649 if (!(encoding & QUrl::EncodeSpaces))
650 actionTable[0] = DecodeCharacter; // decode
651
652 if (tableModifications) {
653 for (const ushort *p = tableModifications; *p; ++p)
654 actionTable[uchar(*p) - ' '] = *p >> 8;
655 }
656
657 return recode(result&: appendTo, begin: reinterpret_cast<const char16_t *>(in.begin()),
658 end: reinterpret_cast<const char16_t *>(in.end()), encoding, actionTable, retryBadEncoding: false);
659}
660
661QT_END_NAMESPACE
662

source code of qtbase/src/corelib/io/qurlrecode.cpp