qurlrecode.cpp source code [qtbase/src/corelib/io/qurlrecode.cpp]

1	// Copyright (C) 2016 Intel Corporation.
2	// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4	#include "qurl.h"
5	#include "private/qstringconverter_p.h"
6	#include "private/qtools_p.h"
7	#include "private/qsimd_p.h"
8
9	QT_BEGIN_NAMESPACE
10
11	// ### move to qurl_p.h
12	enum EncodingAction {
13	DecodeCharacter = `0`,
14	LeaveCharacter = `1`,
15	EncodeCharacter = `2`
16	};
17
18	// From RFC 3896, Appendix A Collected ABNF for URI
19	// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
20	// reserved = gen-delims / sub-delims
21	// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
22	// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
23	// / "" / "+" / "," / ";" / "="*
24	static const uchar defaultActionTable[`96`] = {
25	`2`, // space
26	`1`, // '!' (sub-delim)
27	`2`, // '"'
28	`1`, // '#' (gen-delim)
29	`1`, // '$' (gen-delim)
30	`2`, // '%' (percent)
31	`1`, // '&' (gen-delim)
32	`1`, // "'" (sub-delim)
33	`1`, // '(' (sub-delim)
34	`1`, // ')' (sub-delim)
35	`1`, // '' (sub-delim)*
36	`1`, // '+' (sub-delim)
37	`1`, // ',' (sub-delim)
38	`0`, // '-' (unreserved)
39	`0`, // '.' (unreserved)
40	`1`, // '/' (gen-delim)
41
42	`0`, `0`, `0`, `0`, `0`, // '0' to '4' (unreserved)
43	`0`, `0`, `0`, `0`, `0`, // '5' to '9' (unreserved)
44	`1`, // ':' (gen-delim)
45	`1`, // ';' (sub-delim)
46	`2`, // '<'
47	`1`, // '=' (sub-delim)
48	`2`, // '>'
49	`1`, // '?' (gen-delim)
50
51	`1`, // '@' (gen-delim)
52	`0`, `0`, `0`, `0`, `0`, // 'A' to 'E' (unreserved)
53	`0`, `0`, `0`, `0`, `0`, // 'F' to 'J' (unreserved)
54	`0`, `0`, `0`, `0`, `0`, // 'K' to 'O' (unreserved)
55	`0`, `0`, `0`, `0`, `0`, // 'P' to 'T' (unreserved)
56	`0`, `0`, `0`, `0`, `0`, `0`, // 'U' to 'Z' (unreserved)
57	`1`, // '[' (gen-delim)
58	`2`, // '\'
59	`1`, // ']' (gen-delim)
60	`2`, // '^'
61	`0`, // '_' (unreserved)
62
63	`2`, // '`'
64	`0`, `0`, `0`, `0`, `0`, // 'a' to 'e' (unreserved)
65	`0`, `0`, `0`, `0`, `0`, // 'f' to 'j' (unreserved)
66	`0`, `0`, `0`, `0`, `0`, // 'k' to 'o' (unreserved)
67	`0`, `0`, `0`, `0`, `0`, // 'p' to 't' (unreserved)
68	`0`, `0`, `0`, `0`, `0`, `0`, // 'u' to 'z' (unreserved)
69	`2`, // '{'
70	`2`, // '\|'
71	`2`, // '}'
72	`0`, // '~' (unreserved)
73
74	`2` // BSKP
75	};
76
77	// mask tables, in negative polarity
78	// 0x00 if it belongs to this category
79	// 0xff if it doesn't
80
81	static const uchar reservedMask[`96`] = {
82	`0xff`, // space
83	`0xff`, // '!' (sub-delim)
84	`0x00`, // '"'
85	`0xff`, // '#' (gen-delim)
86	`0xff`, // '$' (gen-delim)
87	`0xff`, // '%' (percent)
88	`0xff`, // '&' (gen-delim)
89	`0xff`, // "'" (sub-delim)
90	`0xff`, // '(' (sub-delim)
91	`0xff`, // ')' (sub-delim)
92	`0xff`, // '' (sub-delim)*
93	`0xff`, // '+' (sub-delim)
94	`0xff`, // ',' (sub-delim)
95	`0xff`, // '-' (unreserved)
96	`0xff`, // '.' (unreserved)
97	`0xff`, // '/' (gen-delim)
98
99	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, // '0' to '4' (unreserved)
100	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, // '5' to '9' (unreserved)
101	`0xff`, // ':' (gen-delim)
102	`0xff`, // ';' (sub-delim)
103	`0x00`, // '<'
104	`0xff`, // '=' (sub-delim)
105	`0x00`, // '>'
106	`0xff`, // '?' (gen-delim)
107
108	`0xff`, // '@' (gen-delim)
109	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, // 'A' to 'E' (unreserved)
110	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, // 'F' to 'J' (unreserved)
111	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, // 'K' to 'O' (unreserved)
112	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, // 'P' to 'T' (unreserved)
113	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, // 'U' to 'Z' (unreserved)
114	`0xff`, // '[' (gen-delim)
115	`0x00`, // '\'
116	`0xff`, // ']' (gen-delim)
117	`0x00`, // '^'
118	`0xff`, // '_' (unreserved)
119
120	`0x00`, // '`'
121	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, // 'a' to 'e' (unreserved)
122	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, // 'f' to 'j' (unreserved)
123	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, // 'k' to 'o' (unreserved)
124	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, // 'p' to 't' (unreserved)
125	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, // 'u' to 'z' (unreserved)
126	`0x00`, // '{'
127	`0x00`, // '\|'
128	`0x00`, // '}'
129	`0xff`, // '~' (unreserved)
130
131	`0xff` // BSKP
132	};
133
134	static inline bool isHex(char16_t c)
135	{
136	return (c >= u`'a'` && c <= u`'f'`) \|\| (c >= u`'A'` && c <= u`'F'`) \|\| (c >= u`'0'` && c <= u`'9'`);
137	}
138
139	static inline bool isUpperHex(char16_t c)
140	{
141	// undefined behaviour if c isn't an hex char!
142	return c < `0x60`;
143	}
144
145	static inline char16_t toUpperHex(char16_t c)
146	{
147	return isUpperHex(c) ? c : c - `0x20`;
148	}
149
150	static inline ushort decodeNibble(char16_t c)
151	{
152	return c >= u`'a'` ? c - u`'a'` + `0xA` : c >= u`'A'` ? c - u`'A'` + `0xA` : c - u`'0'`;
153	}
154
155	// if the sequence at input is 2HEXDIG, returns its decoding*
156	// returns -1 if it isn't.
157	// assumes that the range has been checked already
158	static inline char16_t decodePercentEncoding(const char16_t *input)
159	{
160	char16_t c1 = input[`1`];
161	char16_t c2 = input[`2`];
162	if (!isHex(c: c1) \|\| !isHex(c: c2))
163	return char16_t(-`1`);
164	return decodeNibble(c: c1) << `4` \| decodeNibble(c: c2);
165	}
166
167	static inline char16_t encodeNibble(ushort c)
168	{
169	return QtMiscUtils::toHexUpper(value: c);
170	}
171
172	static void ensureDetached(QString &result, char16_t &output, const* char16_t begin, const* char16_t input, const* char16_t *end,
173	int add = `0`)
174	{
175	if (!output) {
176	// now detach
177	// create enough space if the rest of the string needed to be percent-encoded
178	int charsProcessed = input - begin;
179	int charsRemaining = end - input;
180	int spaceNeeded = end - begin + `2` * charsRemaining + add;
181	int origSize = result.size();
182	result.resize(size: origSize + spaceNeeded);
183
184	// we know that resize() above detached, so we bypass the reference count check
185	output = const_cast<char16_t >(reinterpret_cast<const* char16_t *>(result.constData()))
186	+ origSize;
187
188	// copy the chars we've already processed
189	int i;
190	for (i = `0`; i < charsProcessed; ++i)
191	output[i] = begin[i];
192	output += i;
193	}
194	}
195
196	namespace {
197	struct QUrlUtf8Traits : public QUtf8BaseTraitsNoAscii
198	{
199	// From RFC 3987:
200	// iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
201	//
202	// ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
203	// / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
204	// / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
205	// / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
206	// / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
207	// / %xD0000-DFFFD / %xE1000-EFFFD
208	//
209	// iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD
210	//
211	// That RFC allows iprivate only as part of iquery, but we don't know here
212	// whether we're looking at a query or another part of an URI, so we accept
213	// them too. The definition above excludes U+FFF0 to U+FFFD from appearing
214	// unencoded, but we see no reason for its exclusion, so we allow them to
215	// be decoded (and we need U+FFFD the replacement character to indicate
216	// failure to decode).
217	//
218	// That means we must disallow:
219	// unpaired surrogates (QUtf8Functions takes care of that for us)*
220	// non-characters*
221	static const bool allowNonCharacters = false;
222
223	// override: our "bytes" are three percent-encoded UTF-16 characters
224	static void appendByte(char16_t *&ptr, uchar b)
225	{
226	// b >= 0x80, by construction, so percent-encode
227	*ptr++ = `'%'`;
228	*ptr++ = encodeNibble(c: b >> `4`);
229	*ptr++ = encodeNibble(c: b & `0xf`);
230	}
231
232	static uchar peekByte(const char16_t *ptr, qsizetype n = `0`)
233	{
234	// decodePercentEncoding returns char16_t(-1) if it can't decode,
235	// which means we return 0xff, which is not a valid continuation byte.
236	// If ptr[i 3] is not '%', we'll multiply by zero and return 0,*
237	// also not a valid continuation byte (if it's '%', we multiply by 1).
238	return uchar(decodePercentEncoding(input: ptr + n * `3`))
239	* uchar(ptr[n * `3`] == `'%'`);
240	}
241
242	static qptrdiff availableBytes(const char16_t ptr, const* char16_t *end)
243	{
244	return (end - ptr) / `3`;
245	}
246
247	static void advanceByte(const char16_t &ptr, int* n = `1`)
248	{
249	ptr += n * `3`;
250	}
251	};
252	}
253
254	// returns true if we performed an UTF-8 decoding
255	static bool encodedUtf8ToUtf16(QString &result, char16_t &output, const* char16_t *begin,
256	const char16_t &input, const* char16_t end, char16_t* decoded)
257	{
258	char32_t ucs4 = `0`, *dst = &ucs4;
259	const char16_t src = input + `3`;// skip the %XX that yielded \a decoded*
260	int charsNeeded = QUtf8Functions::fromUtf8<QUrlUtf8Traits>(b: decoded, dst, src, end);
261	if (charsNeeded < `0`)
262	return false;
263
264	if (!QChar::requiresSurrogates(ucs4)) {
265	// UTF-8 decoded and no surrogates are required
266	// detach if necessary
267	// possibilities are: 6 chars (%XX%XX) -> one char; 9 chars (%XX%XX%XX) -> one char
268	ensureDetached(result, output, begin, input, end, add: -`3` * charsNeeded + `1`);
269	*output++ = ucs4;
270	} else {
271	// UTF-8 decoded to something that requires a surrogate pair
272	// compressing from %XX%XX%XX%XX (12 chars) to two
273	ensureDetached(result, output, begin, input, end, add: -`10`);
274	*output++ = QChar::highSurrogate(ucs4);
275	*output++ = QChar::lowSurrogate(ucs4);
276	}
277
278	input = src - `1`;
279	return true;
280	}
281
282	static void unicodeToEncodedUtf8(QString &result, char16_t &output, const* char16_t *begin,
283	const char16_t &input, const* char16_t end, char16_t* decoded)
284	{
285	// calculate the utf8 length and ensure enough space is available
286	int utf8len = QChar::isHighSurrogate(ucs4: decoded) ? `4` : decoded >= `0x800` ? `3` : `2`;
287
288	// detach
289	if (!output) {
290	// we need 3 utf8len for the encoded UTF-8 sequence*
291	// but ensureDetached already adds 3 for the char we're processing
292	ensureDetached(result, output, begin, input, end, add: `3`*utf8len - `3`);
293	} else {
294	// verify that there's enough space or expand
295	int charsRemaining = end - input - `1`; // not including this one
296	int pos = output - reinterpret_cast<const char16_t *>(result.constData());
297	int spaceRemaining = result.size() - pos;
298	if (spaceRemaining < `3`charsRemaining + `3`utf8len) {
299	// must resize
300	result.resize(size: result.size() + `3`*utf8len);
301
302	// we know that resize() above detached, so we bypass the reference count check
303	output = const_cast<char16_t >(reinterpret_cast<const* char16_t *>(result.constData()));
304	output += pos;
305	}
306	}
307
308	++input;
309	int res = QUtf8Functions::toUtf8<QUrlUtf8Traits>(u: decoded, dst&: output, src&: input, end);
310	--input;
311	if (res < `0`) {
312	// bad surrogate pair sequence
313	// we will encode bad UTF-16 to UTF-8
314	// but they don't get decoded back
315
316	// first of three bytes
317	uchar c = `0xe0` \| uchar(decoded >> `12`);
318	*output++ = `'%'`;
319	*output++ = `'E'`;
320	*output++ = encodeNibble(c: c & `0xf`);
321
322	// second byte
323	c = `0x80` \| (uchar(decoded >> `6`) & `0x3f`);
324	*output++ = `'%'`;
325	*output++ = encodeNibble(c: c >> `4`);
326	*output++ = encodeNibble(c: c & `0xf`);
327
328	// third byte
329	c = `0x80` \| (decoded & `0x3f`);
330	*output++ = `'%'`;
331	*output++ = encodeNibble(c: c >> `4`);
332	*output++ = encodeNibble(c: c & `0xf`);
333	}
334	}
335
336	static int recode(QString &result, const char16_t begin, const* char16_t *end,
337	QUrl::ComponentFormattingOptions encoding, const uchar *actionTable,
338	bool retryBadEncoding)
339	{
340	const int origSize = result.size();
341	const char16_t *input = begin;
342	char16_t output = nullptr*;
343
344	EncodingAction action = EncodeCharacter;
345	for ( ; input != end; ++input) {
346	char16_t c;
347	// try a run where no change is necessary
348	for ( ; input != end; ++input) {
349	c = *input;
350	if (c < `0x20U`)
351	action = EncodeCharacter;
352	if (c < `0x20U` \|\| c >= `0x80U`) // also: (c - 0x20 < 0x60U)
353	goto non_trivial;
354	action = EncodingAction(actionTable[c - `' '`]);
355	if (action == EncodeCharacter)
356	goto non_trivial;
357	if (output)
358	*output++ = c;
359	}
360	break;
361
362	non_trivial:
363	char16_t decoded;
364	if (c == `'%'` && retryBadEncoding) {
365	// always write "%25"
366	ensureDetached(result, output, begin, input, end);
367	*output++ = `'%'`;
368	*output++ = `'2'`;
369	*output++ = `'5'`;
370	continue;
371	} else if (c == `'%'`) {
372	// check if the input is valid
373	if (input + `2` >= end \|\| (decoded = decodePercentEncoding(input)) == char16_t(-`1`)) {
374	// not valid, retry
375	result.resize(size: origSize);
376	return recode(result, begin, end, encoding, actionTable, retryBadEncoding: true);
377	}
378
379	if (decoded >= `0x80`) {
380	// decode the UTF-8 sequence
381	if (!(encoding & QUrl::EncodeUnicode) &&
382	encodedUtf8ToUtf16(result, output, begin, input, end, decoded))
383	continue;
384
385	// decoding the encoded UTF-8 failed
386	action = LeaveCharacter;
387	} else if (decoded >= `0x20`) {
388	action = EncodingAction(actionTable[decoded - `' '`]);
389	}
390	} else {
391	decoded = c;
392	if (decoded >= `0x80` && encoding & QUrl::EncodeUnicode) {
393	// encode the UTF-8 sequence
394	unicodeToEncodedUtf8(result, output, begin, input, end, decoded);
395	continue;
396	} else if (decoded >= `0x80`) {
397	if (output)
398	*output++ = c;
399	continue;
400	}
401	}
402
403	// there are six possibilities:
404	// current \ action \| DecodeCharacter \| LeaveCharacter \| EncodeCharacter
405	// decoded \| 1:leave \| 2:leave \| 3:encode
406	// encoded \| 4:decode \| 5:leave \| 6:leave
407	// cases 1 and 2 were handled before this section
408
409	if (c == `'%'` && action != DecodeCharacter) {
410	// cases 5 and 6: it's encoded and we're leaving it as it is
411	// except we're pedantic and we'll uppercase the hex
412	if (output \|\| !isUpperHex(c: input[`1`]) \|\| !isUpperHex(c: input[`2`])) {
413	ensureDetached(result, output, begin, input, end);
414	*output++ = `'%'`;
415	output++ = toUpperHex(c: ++input);
416	output++ = toUpperHex(c: ++input);
417	}
418	} else if (c == `'%'` && action == DecodeCharacter) {
419	// case 4: we need to decode
420	ensureDetached(result, output, begin, input, end);
421	*output++ = decoded;
422	input += `2`;
423	} else {
424	// must be case 3: we need to encode
425	ensureDetached(result, output, begin, input, end);
426	*output++ = `'%'`;
427	*output++ = encodeNibble(c: c >> `4`);
428	*output++ = encodeNibble(c: c & `0xf`);
429	}
430	}
431
432	if (output) {
433	int len = output - reinterpret_cast<const char16_t *>(result.constData());
434	result.truncate(pos: len);
435	return len - origSize;
436	}
437	return `0`;
438	}
439
440	/*
441	* Returns true if the input it checked (if it checked anything) is not
442	* encoded. A return of false indicates there's a percent at \a input that
443	* needs to be decoded.
444	*/
445	#ifdef __SSE2__
446	static bool simdCheckNonEncoded(QChar &output, const* char16_t &input, const* char16_t *end)
447	{
448	# ifdef __AVX2__
449	const __m256i percents256 = _mm256_broadcastw_epi16(_mm_cvtsi32_si128(`'%'`));
450	const __m128i percents = _mm256_castsi256_si128(percents256);
451	# else
452	const __m128i percents = _mm_set1_epi16(w: `'%'`);
453	# endif
454
455	uint idx = `0`;
456	quint32 mask = `0`;
457	if (input + `16` <= end) {
458	qptrdiff offset = `0`;
459	for ( ; input + offset + `16` <= end; offset += `16`) {
460	# ifdef __AVX2__
461	// do 32 bytes at a time using AVX2
462	__m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(input + offset));
463	__m256i comparison = _mm256_cmpeq_epi16(data, percents256);
464	mask = _mm256_movemask_epi8(comparison);
465	_mm256_storeu_si256(reinterpret_cast<__m256i *>(output + offset), data);
466	# else
467	// do 32 bytes at a time using unrolled SSE2
468	__m128i data1 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(input + offset));
469	__m128i data2 = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(input + offset + `8`));
470	__m128i comparison1 = _mm_cmpeq_epi16(a: data1, b: percents);
471	__m128i comparison2 = _mm_cmpeq_epi16(a: data2, b: percents);
472	uint mask1 = _mm_movemask_epi8(a: comparison1);
473	uint mask2 = _mm_movemask_epi8(a: comparison2);
474
475	_mm_storeu_si128(p: reinterpret_cast<__m128i *>(output + offset), b: data1);
476	if (!mask1)
477	_mm_storeu_si128(p: reinterpret_cast<__m128i *>(output + offset + `8`), b: data2);
478	mask = mask1 \| (mask2 << `16`);
479	# endif
480
481	if (mask) {
482	idx = qCountTrailingZeroBits(v: mask) / `2`;
483	break;
484	}
485	}
486
487	input += offset;
488	if (output)
489	output += offset;
490	} else if (input + `8` <= end) {
491	// do 16 bytes at a time
492	__m128i data = _mm_loadu_si128(p: reinterpret_cast<const __m128i *>(input));
493	__m128i comparison = _mm_cmpeq_epi16(a: data, b: percents);
494	mask = _mm_movemask_epi8(a: comparison);
495	_mm_storeu_si128(p: reinterpret_cast<__m128i *>(output), b: data);
496	idx = qCountTrailingZeroBits(v: quint16(mask)) / `2`;
497	} else if (input + `4` <= end) {
498	// do 8 bytes only
499	__m128i data = _mm_loadl_epi64(p: reinterpret_cast<const __m128i *>(input));
500	__m128i comparison = _mm_cmpeq_epi16(a: data, b: percents);
501	mask = _mm_movemask_epi8(a: comparison) & `0xffu`;
502	_mm_storel_epi64(p: reinterpret_cast<__m128i *>(output), a: data);
503	idx = qCountTrailingZeroBits(v: quint8(mask)) / `2`;
504	} else {
505	// no percents found (because we didn't check)
506	return true;
507	}
508
509	// advance to the next non-encoded
510	input += idx;
511	output += idx;
512
513	return !mask;
514	}
515	#else
516	static bool simdCheckNonEncoded(...)
517	{
518	return true;
519	}
520	#endif
521
522	/!*
523	\since 5.0
524	\internal
525
526	This function decodes a percent-encoded string located in \a in
527	by appending each character to \a appendTo. It returns the number of
528	characters appended. Each percent-encoded sequence is decoded as follows:
529
530	\list
531	\li from %00 to %7F: the exact decoded value is appended;
532	\li from %80 to %FF: QChar::ReplacementCharacter is appended;
533	\li bad encoding: original input is copied to the output, undecoded.
534	\endlist
535
536	Given the above, it's important for the input to already have all UTF-8
537	percent sequences decoded by qt_urlRecode (that is, the input should not
538	have been processed with QUrl::EncodeUnicode).
539
540	The input should also be a valid percent-encoded sequence (the output of
541	qt_urlRecode is always valid).
542	*/
543	static qsizetype decode(QString &appendTo, QStringView in)
544	{
545	const char16_t *begin = in.utf16();
546	const char16_t *end = begin + in.size();
547
548	// fast check whether there's anything to be decoded in the first place
549	const char16_t *input = QtPrivate::qustrchr(str: in, ch: `'%'`);
550
551	if (Q_LIKELY(input == end))
552	return `0`; // nothing to do, it was already decoded!
553
554	// detach
555	const int origSize = appendTo.size();
556	appendTo.resize(size: origSize + (end - begin));
557	QChar *output = appendTo.data() + origSize;
558	memcpy(dest: static_cast<void >(output), src: static_cast<const* void >(begin), n: (input - begin) sizeof(QChar));
559	output += input - begin;
560
561	while (input != end) {
562	// something was encoded
563	Q_ASSERT(*input == `'%'`);
564
565	if (Q_UNLIKELY(end - input < `3` \|\| !isHex(input[`1`]) \|\| !isHex(input[`2`]))) {
566	// badly-encoded data
567	appendTo.resize(size: origSize + (end - begin));
568	memcpy(dest: static_cast<void *>(appendTo.begin() + origSize),
569	src: static_cast<const void >(begin), n: (end - begin) sizeof(*end));
570	return end - begin;
571	}
572
573	++input;
574	*output++ = QChar::fromUcs2(c: decodeNibble(c: input[`0`]) << `4` \| decodeNibble(c: input[`1`]));
575	if (output[-`1`].unicode() >= `0x80`)
576	output[-`1`] = QChar::ReplacementCharacter;
577	input += `2`;
578
579	// search for the next percent, copying from input to output
580	if (simdCheckNonEncoded(output, input, end)) {
581	while (input != end) {
582	const char16_t uc = *input;
583	if (uc == `'%'`)
584	break;
585	*output++ = uc;
586	++input;
587	}
588	}
589	}
590
591	const qsizetype len = output - appendTo.begin();
592	appendTo.truncate(pos: len);
593	return len - origSize;
594	}
595
596	template <size_t N>
597	static void maskTable(uchar (&table)[N], const uchar (&mask)[N])
598	{
599	for (size_t i = `0`; i < N; ++i)
600	table[i] &= mask[i];
601	}
602
603	/!*
604	\internal
605
606	Recodes the string from \a begin to \a end. If any transformations are
607	done, append them to \a appendTo and return the number of characters added.
608	If no transformations were required, return 0.
609
610	The \a encoding option modifies the default behaviour:
611	\list
612	\li QUrl::DecodeReserved: if set, reserved characters will be decoded;
613	if unset, reserved characters will be encoded
614	\li QUrl::EncodeSpaces: if set, spaces will be encoded to "%20"; if unset, they will be " "
615	\li QUrl::EncodeUnicode: if set, characters above U+0080 will be encoded to their UTF-8
616	percent-encoded form; if unset, they will be decoded to UTF-16
617	\li QUrl::FullyDecoded: if set, this function will decode all percent-encoded sequences,
618	including that of the percent character. The resulting string
619	will not be percent-encoded anymore. Use with caution!
620	In this mode, the behaviour is undefined if the input string
621	contains any percent-encoding sequences above %80.
622	Also, the function will not correct bad % sequences.
623	\endlist
624
625	Other flags are ignored (including QUrl::EncodeReserved).
626
627	The \a tableModifications argument can be used to supply extra
628	modifications to the tables, to be applied after the flags above are
629	handled. It consists of a sequence of 16-bit values, where the low 8 bits
630	indicate the character in question and the high 8 bits are either \c
631	EncodeCharacter, \c LeaveCharacter or \c DecodeCharacter.
632
633	This function corrects percent-encoded errors by interpreting every '%' as
634	meaning "%25" (all percents in the same content).
635	*/
636
637	Q_AUTOTEST_EXPORT qsizetype
638	qt_urlRecode(QString &appendTo, QStringView in,
639	QUrl::ComponentFormattingOptions encoding, const ushort *tableModifications)
640	{
641	uchar actionTable[sizeof defaultActionTable];
642	if ((encoding & QUrl::FullyDecoded) == QUrl::FullyDecoded) {
643	return decode(appendTo, in);
644	}
645
646	memcpy(dest: actionTable, src: defaultActionTable, n: sizeof actionTable);
647	if (encoding & QUrl::DecodeReserved)
648	maskTable(table&: actionTable, mask: reservedMask);
649	if (!(encoding & QUrl::EncodeSpaces))
650	actionTable[`0`] = DecodeCharacter; // decode
651
652	if (tableModifications) {
653	for (const ushort p = tableModifications; p; ++p)
654	actionTable[uchar(p) - `' '`] = p >> `8`;
655	}
656
657	return recode(result&: appendTo, begin: reinterpret_cast<const char16_t *>(in.begin()),
658	end: reinterpret_cast<const char16_t >(in.end()), encoding, actionTable, retryBadEncoding: false*);
659	}
660
661	QT_END_NAMESPACE
662

Provided by KDAB

Definitions

source code of qtbase/src/corelib/io/qurlrecode.cpp