1 | // Copyright 2013-2016 The rust-url developers. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
6 | // option. This file may not be copied, modified, or distributed |
7 | // except according to those terms. |
8 | |
9 | //! Parser and serializer for the [`application/x-www-form-urlencoded` syntax]( |
10 | //! http://url.spec.whatwg.org/#application/x-www-form-urlencoded), |
11 | //! as used by HTML forms. |
12 | //! |
13 | //! Converts between a string (such as an URL’s query string) |
14 | //! and a sequence of (name, value) pairs. |
15 | #![no_std ] |
16 | |
17 | // For forwards compatibility |
18 | #[cfg (feature = "std" )] |
19 | extern crate std as _; |
20 | |
21 | extern crate alloc; |
22 | |
23 | #[cfg (not(feature = "alloc" ))] |
24 | compile_error!("the `alloc` feature must currently be enabled" ); |
25 | |
26 | use alloc::borrow::{Borrow, Cow, ToOwned}; |
27 | use alloc::string::String; |
28 | use core::str; |
29 | use percent_encoding::{percent_decode, percent_encode_byte}; |
30 | |
31 | /// Convert a byte string in the `application/x-www-form-urlencoded` syntax |
32 | /// into a iterator of (name, value) pairs. |
33 | /// |
34 | /// Use `parse(input.as_bytes())` to parse a `&str` string. |
35 | /// |
36 | /// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be |
37 | /// converted to `[("#first", "%try%")]`. |
38 | #[inline ] |
39 | pub fn parse(input: &[u8]) -> Parse<'_> { |
40 | Parse { input } |
41 | } |
42 | /// The return type of `parse()`. |
43 | #[derive (Copy, Clone)] |
44 | pub struct Parse<'a> { |
45 | input: &'a [u8], |
46 | } |
47 | |
48 | impl<'a> Iterator for Parse<'a> { |
49 | type Item = (Cow<'a, str>, Cow<'a, str>); |
50 | |
51 | fn next(&mut self) -> Option<Self::Item> { |
52 | loop { |
53 | if self.input.is_empty() { |
54 | return None; |
55 | } |
56 | let mut split2: SplitN<'_, u8, impl Fn(&u8) -> …> = self.input.splitn(n:2, |&b: u8| b == b'&' ); |
57 | let sequence: &[u8] = split2.next().unwrap(); |
58 | self.input = split2.next().unwrap_or(&[][..]); |
59 | if sequence.is_empty() { |
60 | continue; |
61 | } |
62 | let mut split2: SplitN<'_, u8, impl Fn(&u8) -> …> = sequence.splitn(n:2, |&b: u8| b == b'=' ); |
63 | let name: &[u8] = split2.next().unwrap(); |
64 | let value: &[u8] = split2.next().unwrap_or(&[][..]); |
65 | return Some((decode(input:name), decode(input:value))); |
66 | } |
67 | } |
68 | } |
69 | |
70 | fn decode(input: &[u8]) -> Cow<'_, str> { |
71 | let replaced: Cow<'_, [u8]> = replace_plus(input); |
72 | decode_utf8_lossy(input:match percent_decode(&replaced).into() { |
73 | Cow::Owned(vec: Vec) => Cow::Owned(vec), |
74 | Cow::Borrowed(_) => replaced, |
75 | }) |
76 | } |
77 | |
78 | /// Replace b'+' with b' ' |
79 | fn replace_plus(input: &[u8]) -> Cow<'_, [u8]> { |
80 | match input.iter().position(|&b: u8| b == b'+' ) { |
81 | None => Cow::Borrowed(input), |
82 | Some(first_position: usize) => { |
83 | let mut replaced: Vec = input.to_owned(); |
84 | replaced[first_position] = b' ' ; |
85 | for byte: &mut u8 in &mut replaced[first_position + 1..] { |
86 | if *byte == b'+' { |
87 | *byte = b' ' ; |
88 | } |
89 | } |
90 | Cow::Owned(replaced) |
91 | } |
92 | } |
93 | } |
94 | |
95 | impl<'a> Parse<'a> { |
96 | /// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`. |
97 | pub fn into_owned(self) -> ParseIntoOwned<'a> { |
98 | ParseIntoOwned { inner: self } |
99 | } |
100 | } |
101 | |
102 | /// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`. |
103 | pub struct ParseIntoOwned<'a> { |
104 | inner: Parse<'a>, |
105 | } |
106 | |
107 | impl<'a> Iterator for ParseIntoOwned<'a> { |
108 | type Item = (String, String); |
109 | |
110 | fn next(&mut self) -> Option<Self::Item> { |
111 | self.inner |
112 | .next() |
113 | .map(|(k: Cow<'_, str>, v: Cow<'_, str>)| (k.into_owned(), v.into_owned())) |
114 | } |
115 | } |
116 | |
117 | /// The [`application/x-www-form-urlencoded` byte serializer]( |
118 | /// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer). |
119 | /// |
120 | /// Return an iterator of `&str` slices. |
121 | pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> { |
122 | ByteSerialize { bytes: input } |
123 | } |
124 | |
125 | /// Return value of `byte_serialize()`. |
126 | #[derive (Debug)] |
127 | pub struct ByteSerialize<'a> { |
128 | bytes: &'a [u8], |
129 | } |
130 | |
131 | fn byte_serialized_unchanged(byte: u8) -> bool { |
132 | matches!(byte, b'*' | b'-' | b'.' | b'0' ..= b'9' | b'A' ..= b'Z' | b'_' | b'a' ..= b'z' ) |
133 | } |
134 | |
135 | impl<'a> Iterator for ByteSerialize<'a> { |
136 | type Item = &'a str; |
137 | |
138 | fn next(&mut self) -> Option<&'a str> { |
139 | if let Some((&first, tail)) = self.bytes.split_first() { |
140 | if !byte_serialized_unchanged(first) { |
141 | self.bytes = tail; |
142 | return Some(if first == b' ' { |
143 | "+" |
144 | } else { |
145 | percent_encode_byte(first) |
146 | }); |
147 | } |
148 | let position = tail.iter().position(|&b| !byte_serialized_unchanged(b)); |
149 | let (unchanged_slice, remaining) = match position { |
150 | // 1 for first_byte + i unchanged in tail |
151 | Some(i) => self.bytes.split_at(1 + i), |
152 | None => (self.bytes, &[][..]), |
153 | }; |
154 | self.bytes = remaining; |
155 | // This unsafe is appropriate because we have already checked these |
156 | // bytes in byte_serialized_unchanged, which checks for a subset |
157 | // of UTF-8. So we know these bytes are valid UTF-8, and doing |
158 | // another UTF-8 check would be wasteful. |
159 | Some(unsafe { str::from_utf8_unchecked(unchanged_slice) }) |
160 | } else { |
161 | None |
162 | } |
163 | } |
164 | |
165 | fn size_hint(&self) -> (usize, Option<usize>) { |
166 | if self.bytes.is_empty() { |
167 | (0, Some(0)) |
168 | } else { |
169 | (1, Some(self.bytes.len())) |
170 | } |
171 | } |
172 | } |
173 | |
174 | /// The [`application/x-www-form-urlencoded` serializer]( |
175 | /// https://url.spec.whatwg.org/#concept-urlencoded-serializer). |
176 | pub struct Serializer<'a, T: Target> { |
177 | target: Option<T>, |
178 | start_position: usize, |
179 | encoding: EncodingOverride<'a>, |
180 | } |
181 | |
182 | pub trait Target { |
183 | fn as_mut_string(&mut self) -> &mut String; |
184 | fn finish(self) -> Self::Finished; |
185 | type Finished; |
186 | } |
187 | |
188 | impl Target for String { |
189 | fn as_mut_string(&mut self) -> &mut String { |
190 | self |
191 | } |
192 | fn finish(self) -> Self { |
193 | self |
194 | } |
195 | type Finished = Self; |
196 | } |
197 | |
198 | impl<'a> Target for &'a mut String { |
199 | fn as_mut_string(&mut self) -> &mut String { |
200 | self |
201 | } |
202 | fn finish(self) -> Self { |
203 | self |
204 | } |
205 | type Finished = Self; |
206 | } |
207 | |
208 | impl<'a, T: Target> Serializer<'a, T> { |
209 | /// Create a new `application/x-www-form-urlencoded` serializer for the given target. |
210 | /// |
211 | /// If the target is non-empty, |
212 | /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax. |
213 | pub fn new(target: T) -> Self { |
214 | Self::for_suffix(target, 0) |
215 | } |
216 | |
217 | /// Create a new `application/x-www-form-urlencoded` serializer |
218 | /// for a suffix of the given target. |
219 | /// |
220 | /// If that suffix is non-empty, |
221 | /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax. |
222 | pub fn for_suffix(mut target: T, start_position: usize) -> Self { |
223 | if target.as_mut_string().len() < start_position { |
224 | panic!( |
225 | "invalid length {} for target of length {}" , |
226 | start_position, |
227 | target.as_mut_string().len() |
228 | ); |
229 | } |
230 | |
231 | Serializer { |
232 | target: Some(target), |
233 | start_position, |
234 | encoding: None, |
235 | } |
236 | } |
237 | |
238 | /// Remove any existing name/value pair. |
239 | /// |
240 | /// Panics if called after `.finish()`. |
241 | pub fn clear(&mut self) -> &mut Self { |
242 | string(&mut self.target).truncate(self.start_position); |
243 | self |
244 | } |
245 | |
246 | /// Set the character encoding to be used for names and values before percent-encoding. |
247 | pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self { |
248 | self.encoding = new; |
249 | self |
250 | } |
251 | |
252 | /// Serialize and append a name/value pair. |
253 | /// |
254 | /// Panics if called after `.finish()`. |
255 | pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self { |
256 | append_pair( |
257 | string(&mut self.target), |
258 | self.start_position, |
259 | self.encoding, |
260 | name, |
261 | value, |
262 | ); |
263 | self |
264 | } |
265 | |
266 | /// Serialize and append a name of parameter without any value. |
267 | /// |
268 | /// Panics if called after `.finish()`. |
269 | pub fn append_key_only(&mut self, name: &str) -> &mut Self { |
270 | append_key_only( |
271 | string(&mut self.target), |
272 | self.start_position, |
273 | self.encoding, |
274 | name, |
275 | ); |
276 | self |
277 | } |
278 | |
279 | /// Serialize and append a number of name/value pairs. |
280 | /// |
281 | /// This simply calls `append_pair` repeatedly. |
282 | /// This can be more convenient, so the user doesn’t need to introduce a block |
283 | /// to limit the scope of `Serializer`’s borrow of its string. |
284 | /// |
285 | /// Panics if called after `.finish()`. |
286 | pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self |
287 | where |
288 | I: IntoIterator, |
289 | I::Item: Borrow<(K, V)>, |
290 | K: AsRef<str>, |
291 | V: AsRef<str>, |
292 | { |
293 | { |
294 | let string = string(&mut self.target); |
295 | for pair in iter { |
296 | let (k, v) = pair.borrow(); |
297 | append_pair( |
298 | string, |
299 | self.start_position, |
300 | self.encoding, |
301 | k.as_ref(), |
302 | v.as_ref(), |
303 | ); |
304 | } |
305 | } |
306 | self |
307 | } |
308 | |
309 | /// Serialize and append a number of names without values. |
310 | /// |
311 | /// This simply calls `append_key_only` repeatedly. |
312 | /// This can be more convenient, so the user doesn’t need to introduce a block |
313 | /// to limit the scope of `Serializer`’s borrow of its string. |
314 | /// |
315 | /// Panics if called after `.finish()`. |
316 | pub fn extend_keys_only<I, K>(&mut self, iter: I) -> &mut Self |
317 | where |
318 | I: IntoIterator, |
319 | I::Item: Borrow<K>, |
320 | K: AsRef<str>, |
321 | { |
322 | { |
323 | let string = string(&mut self.target); |
324 | for key in iter { |
325 | let k = key.borrow().as_ref(); |
326 | append_key_only(string, self.start_position, self.encoding, k); |
327 | } |
328 | } |
329 | self |
330 | } |
331 | |
332 | /// If this serializer was constructed with a string, take and return that string. |
333 | /// |
334 | /// ```rust |
335 | /// use form_urlencoded; |
336 | /// let encoded: String = form_urlencoded::Serializer::new(String::new()) |
337 | /// .append_pair("foo" , "bar & baz" ) |
338 | /// .append_pair("saison" , "Été+hiver" ) |
339 | /// .finish(); |
340 | /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver" ); |
341 | /// ``` |
342 | /// |
343 | /// Panics if called more than once. |
344 | pub fn finish(&mut self) -> T::Finished { |
345 | self.target |
346 | .take() |
347 | .expect("url::form_urlencoded::Serializer double finish" ) |
348 | .finish() |
349 | } |
350 | } |
351 | |
352 | fn append_separator_if_needed(string: &mut String, start_position: usize) { |
353 | if string.len() > start_position { |
354 | string.push(ch:'&' ) |
355 | } |
356 | } |
357 | |
358 | fn string<T: Target>(target: &mut Option<T>) -> &mut String { |
359 | target&mut T |
360 | .as_mut() |
361 | .expect(msg:"url::form_urlencoded::Serializer finished" ) |
362 | .as_mut_string() |
363 | } |
364 | |
365 | fn append_pair( |
366 | string: &mut String, |
367 | start_position: usize, |
368 | encoding: EncodingOverride<'_>, |
369 | name: &str, |
370 | value: &str, |
371 | ) { |
372 | append_separator_if_needed(string, start_position); |
373 | append_encoded(s:name, string, encoding); |
374 | string.push(ch:'=' ); |
375 | append_encoded(s:value, string, encoding); |
376 | } |
377 | |
378 | fn append_key_only( |
379 | string: &mut String, |
380 | start_position: usize, |
381 | encoding: EncodingOverride, |
382 | name: &str, |
383 | ) { |
384 | append_separator_if_needed(string, start_position); |
385 | append_encoded(s:name, string, encoding); |
386 | } |
387 | |
388 | fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>) { |
389 | string.extend(iter:byte_serialize(&encode(encoding_override:encoding, input:s))) |
390 | } |
391 | |
392 | pub(crate) fn encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]> { |
393 | if let Some(o: &dyn Fn(&str) -> Cow<'_, …>) = encoding_override { |
394 | return o(input); |
395 | } |
396 | input.as_bytes().into() |
397 | } |
398 | |
399 | pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> { |
400 | // Note: This function is duplicated in `percent_encoding/lib.rs`. |
401 | match input { |
402 | Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), |
403 | Cow::Owned(bytes) => { |
404 | match String::from_utf8_lossy(&bytes) { |
405 | Cow::Borrowed(utf8) => { |
406 | // If from_utf8_lossy returns a Cow::Borrowed, then we can |
407 | // be sure our original bytes were valid UTF-8. This is because |
408 | // if the bytes were invalid UTF-8 from_utf8_lossy would have |
409 | // to allocate a new owned string to back the Cow so it could |
410 | // replace invalid bytes with a placeholder. |
411 | |
412 | // First we do a debug_assert to confirm our description above. |
413 | let raw_utf8: *const [u8] = utf8.as_bytes(); |
414 | debug_assert!(raw_utf8 == &*bytes as *const [u8]); |
415 | |
416 | // Given we know the original input bytes are valid UTF-8, |
417 | // and we have ownership of those bytes, we re-use them and |
418 | // return a Cow::Owned here. |
419 | Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) }) |
420 | } |
421 | Cow::Owned(s) => Cow::Owned(s), |
422 | } |
423 | } |
424 | } |
425 | } |
426 | |
427 | pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>; |
428 | |