1// Copyright 2013-2016 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! Parser and serializer for the [`application/x-www-form-urlencoded` syntax](
10//! http://url.spec.whatwg.org/#application/x-www-form-urlencoded),
11//! as used by HTML forms.
12//!
13//! Converts between a string (such as an URL’s query string)
14//! and a sequence of (name, value) pairs.
15#![no_std]
16
17// For forwards compatibility
18#[cfg(feature = "std")]
19extern crate std as _;
20
21extern crate alloc;
22
23#[cfg(not(feature = "alloc"))]
24compile_error!("the `alloc` feature must currently be enabled");
25
26use alloc::borrow::{Borrow, Cow, ToOwned};
27use alloc::string::String;
28use core::str;
29use percent_encoding::{percent_decode, percent_encode_byte};
30
31/// Convert a byte string in the `application/x-www-form-urlencoded` syntax
32/// into a iterator of (name, value) pairs.
33///
34/// Use `parse(input.as_bytes())` to parse a `&str` string.
35///
36/// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be
37/// converted to `[("#first", "%try%")]`.
38#[inline]
39pub fn parse(input: &[u8]) -> Parse<'_> {
40 Parse { input }
41}
42/// The return type of `parse()`.
43#[derive(Copy, Clone)]
44pub struct Parse<'a> {
45 input: &'a [u8],
46}
47
48impl<'a> Iterator for Parse<'a> {
49 type Item = (Cow<'a, str>, Cow<'a, str>);
50
51 fn next(&mut self) -> Option<Self::Item> {
52 loop {
53 if self.input.is_empty() {
54 return None;
55 }
56 let mut split2: SplitN<'_, u8, impl Fn(&u8) -> …> = self.input.splitn(n:2, |&b: u8| b == b'&');
57 let sequence: &[u8] = split2.next().unwrap();
58 self.input = split2.next().unwrap_or(&[][..]);
59 if sequence.is_empty() {
60 continue;
61 }
62 let mut split2: SplitN<'_, u8, impl Fn(&u8) -> …> = sequence.splitn(n:2, |&b: u8| b == b'=');
63 let name: &[u8] = split2.next().unwrap();
64 let value: &[u8] = split2.next().unwrap_or(&[][..]);
65 return Some((decode(input:name), decode(input:value)));
66 }
67 }
68}
69
70fn decode(input: &[u8]) -> Cow<'_, str> {
71 let replaced: Cow<'_, [u8]> = replace_plus(input);
72 decode_utf8_lossy(input:match percent_decode(&replaced).into() {
73 Cow::Owned(vec: Vec) => Cow::Owned(vec),
74 Cow::Borrowed(_) => replaced,
75 })
76}
77
78/// Replace b'+' with b' '
79fn replace_plus(input: &[u8]) -> Cow<'_, [u8]> {
80 match input.iter().position(|&b: u8| b == b'+') {
81 None => Cow::Borrowed(input),
82 Some(first_position: usize) => {
83 let mut replaced: Vec = input.to_owned();
84 replaced[first_position] = b' ';
85 for byte: &mut u8 in &mut replaced[first_position + 1..] {
86 if *byte == b'+' {
87 *byte = b' ';
88 }
89 }
90 Cow::Owned(replaced)
91 }
92 }
93}
94
95impl<'a> Parse<'a> {
96 /// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`.
97 pub fn into_owned(self) -> ParseIntoOwned<'a> {
98 ParseIntoOwned { inner: self }
99 }
100}
101
102/// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`.
103pub struct ParseIntoOwned<'a> {
104 inner: Parse<'a>,
105}
106
107impl<'a> Iterator for ParseIntoOwned<'a> {
108 type Item = (String, String);
109
110 fn next(&mut self) -> Option<Self::Item> {
111 self.inner
112 .next()
113 .map(|(k: Cow<'_, str>, v: Cow<'_, str>)| (k.into_owned(), v.into_owned()))
114 }
115}
116
117/// The [`application/x-www-form-urlencoded` byte serializer](
118/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
119///
120/// Return an iterator of `&str` slices.
121pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> {
122 ByteSerialize { bytes: input }
123}
124
125/// Return value of `byte_serialize()`.
126#[derive(Debug)]
127pub struct ByteSerialize<'a> {
128 bytes: &'a [u8],
129}
130
131fn byte_serialized_unchanged(byte: u8) -> bool {
132 matches!(byte, b'*' | b'-' | b'.' | b'0' ..= b'9' | b'A' ..= b'Z' | b'_' | b'a' ..= b'z')
133}
134
135impl<'a> Iterator for ByteSerialize<'a> {
136 type Item = &'a str;
137
138 fn next(&mut self) -> Option<&'a str> {
139 if let Some((&first, tail)) = self.bytes.split_first() {
140 if !byte_serialized_unchanged(first) {
141 self.bytes = tail;
142 return Some(if first == b' ' {
143 "+"
144 } else {
145 percent_encode_byte(first)
146 });
147 }
148 let position = tail.iter().position(|&b| !byte_serialized_unchanged(b));
149 let (unchanged_slice, remaining) = match position {
150 // 1 for first_byte + i unchanged in tail
151 Some(i) => self.bytes.split_at(1 + i),
152 None => (self.bytes, &[][..]),
153 };
154 self.bytes = remaining;
155 // This unsafe is appropriate because we have already checked these
156 // bytes in byte_serialized_unchanged, which checks for a subset
157 // of UTF-8. So we know these bytes are valid UTF-8, and doing
158 // another UTF-8 check would be wasteful.
159 Some(unsafe { str::from_utf8_unchecked(unchanged_slice) })
160 } else {
161 None
162 }
163 }
164
165 fn size_hint(&self) -> (usize, Option<usize>) {
166 if self.bytes.is_empty() {
167 (0, Some(0))
168 } else {
169 (1, Some(self.bytes.len()))
170 }
171 }
172}
173
174/// The [`application/x-www-form-urlencoded` serializer](
175/// https://url.spec.whatwg.org/#concept-urlencoded-serializer).
176pub struct Serializer<'a, T: Target> {
177 target: Option<T>,
178 start_position: usize,
179 encoding: EncodingOverride<'a>,
180}
181
182pub trait Target {
183 fn as_mut_string(&mut self) -> &mut String;
184 fn finish(self) -> Self::Finished;
185 type Finished;
186}
187
188impl Target for String {
189 fn as_mut_string(&mut self) -> &mut String {
190 self
191 }
192 fn finish(self) -> Self {
193 self
194 }
195 type Finished = Self;
196}
197
198impl<'a> Target for &'a mut String {
199 fn as_mut_string(&mut self) -> &mut String {
200 self
201 }
202 fn finish(self) -> Self {
203 self
204 }
205 type Finished = Self;
206}
207
208impl<'a, T: Target> Serializer<'a, T> {
209 /// Create a new `application/x-www-form-urlencoded` serializer for the given target.
210 ///
211 /// If the target is non-empty,
212 /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
213 pub fn new(target: T) -> Self {
214 Self::for_suffix(target, 0)
215 }
216
217 /// Create a new `application/x-www-form-urlencoded` serializer
218 /// for a suffix of the given target.
219 ///
220 /// If that suffix is non-empty,
221 /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax.
222 pub fn for_suffix(mut target: T, start_position: usize) -> Self {
223 if target.as_mut_string().len() < start_position {
224 panic!(
225 "invalid length {} for target of length {}",
226 start_position,
227 target.as_mut_string().len()
228 );
229 }
230
231 Serializer {
232 target: Some(target),
233 start_position,
234 encoding: None,
235 }
236 }
237
238 /// Remove any existing name/value pair.
239 ///
240 /// Panics if called after `.finish()`.
241 pub fn clear(&mut self) -> &mut Self {
242 string(&mut self.target).truncate(self.start_position);
243 self
244 }
245
246 /// Set the character encoding to be used for names and values before percent-encoding.
247 pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self {
248 self.encoding = new;
249 self
250 }
251
252 /// Serialize and append a name/value pair.
253 ///
254 /// Panics if called after `.finish()`.
255 pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self {
256 append_pair(
257 string(&mut self.target),
258 self.start_position,
259 self.encoding,
260 name,
261 value,
262 );
263 self
264 }
265
266 /// Serialize and append a name of parameter without any value.
267 ///
268 /// Panics if called after `.finish()`.
269 pub fn append_key_only(&mut self, name: &str) -> &mut Self {
270 append_key_only(
271 string(&mut self.target),
272 self.start_position,
273 self.encoding,
274 name,
275 );
276 self
277 }
278
279 /// Serialize and append a number of name/value pairs.
280 ///
281 /// This simply calls `append_pair` repeatedly.
282 /// This can be more convenient, so the user doesn’t need to introduce a block
283 /// to limit the scope of `Serializer`’s borrow of its string.
284 ///
285 /// Panics if called after `.finish()`.
286 pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self
287 where
288 I: IntoIterator,
289 I::Item: Borrow<(K, V)>,
290 K: AsRef<str>,
291 V: AsRef<str>,
292 {
293 {
294 let string = string(&mut self.target);
295 for pair in iter {
296 let (k, v) = pair.borrow();
297 append_pair(
298 string,
299 self.start_position,
300 self.encoding,
301 k.as_ref(),
302 v.as_ref(),
303 );
304 }
305 }
306 self
307 }
308
309 /// Serialize and append a number of names without values.
310 ///
311 /// This simply calls `append_key_only` repeatedly.
312 /// This can be more convenient, so the user doesn’t need to introduce a block
313 /// to limit the scope of `Serializer`’s borrow of its string.
314 ///
315 /// Panics if called after `.finish()`.
316 pub fn extend_keys_only<I, K>(&mut self, iter: I) -> &mut Self
317 where
318 I: IntoIterator,
319 I::Item: Borrow<K>,
320 K: AsRef<str>,
321 {
322 {
323 let string = string(&mut self.target);
324 for key in iter {
325 let k = key.borrow().as_ref();
326 append_key_only(string, self.start_position, self.encoding, k);
327 }
328 }
329 self
330 }
331
332 /// If this serializer was constructed with a string, take and return that string.
333 ///
334 /// ```rust
335 /// use form_urlencoded;
336 /// let encoded: String = form_urlencoded::Serializer::new(String::new())
337 /// .append_pair("foo", "bar & baz")
338 /// .append_pair("saison", "Été+hiver")
339 /// .finish();
340 /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver");
341 /// ```
342 ///
343 /// Panics if called more than once.
344 pub fn finish(&mut self) -> T::Finished {
345 self.target
346 .take()
347 .expect("url::form_urlencoded::Serializer double finish")
348 .finish()
349 }
350}
351
352fn append_separator_if_needed(string: &mut String, start_position: usize) {
353 if string.len() > start_position {
354 string.push(ch:'&')
355 }
356}
357
358fn string<T: Target>(target: &mut Option<T>) -> &mut String {
359 target&mut T
360 .as_mut()
361 .expect(msg:"url::form_urlencoded::Serializer finished")
362 .as_mut_string()
363}
364
365fn append_pair(
366 string: &mut String,
367 start_position: usize,
368 encoding: EncodingOverride<'_>,
369 name: &str,
370 value: &str,
371) {
372 append_separator_if_needed(string, start_position);
373 append_encoded(s:name, string, encoding);
374 string.push(ch:'=');
375 append_encoded(s:value, string, encoding);
376}
377
378fn append_key_only(
379 string: &mut String,
380 start_position: usize,
381 encoding: EncodingOverride,
382 name: &str,
383) {
384 append_separator_if_needed(string, start_position);
385 append_encoded(s:name, string, encoding);
386}
387
388fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>) {
389 string.extend(iter:byte_serialize(&encode(encoding_override:encoding, input:s)))
390}
391
392pub(crate) fn encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]> {
393 if let Some(o: &dyn Fn(&str) -> Cow<'_, …>) = encoding_override {
394 return o(input);
395 }
396 input.as_bytes().into()
397}
398
399pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
400 // Note: This function is duplicated in `percent_encoding/lib.rs`.
401 match input {
402 Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes),
403 Cow::Owned(bytes) => {
404 match String::from_utf8_lossy(&bytes) {
405 Cow::Borrowed(utf8) => {
406 // If from_utf8_lossy returns a Cow::Borrowed, then we can
407 // be sure our original bytes were valid UTF-8. This is because
408 // if the bytes were invalid UTF-8 from_utf8_lossy would have
409 // to allocate a new owned string to back the Cow so it could
410 // replace invalid bytes with a placeholder.
411
412 // First we do a debug_assert to confirm our description above.
413 let raw_utf8: *const [u8] = utf8.as_bytes();
414 debug_assert!(raw_utf8 == &*bytes as *const [u8]);
415
416 // Given we know the original input bytes are valid UTF-8,
417 // and we have ownership of those bytes, we re-use them and
418 // return a Cow::Owned here.
419 Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
420 }
421 Cow::Owned(s) => Cow::Owned(s),
422 }
423 }
424 }
425}
426
427pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>;
428