| 1 | // Copyright 2013-2016 The rust-url developers. | 
| 2 | // | 
|---|
| 3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | 
|---|
| 4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | 
|---|
| 5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | 
|---|
| 6 | // option. This file may not be copied, modified, or distributed | 
|---|
| 7 | // except according to those terms. | 
|---|
| 8 |  | 
|---|
| 9 | //! Parser and serializer for the [`application/x-www-form-urlencoded` syntax]( | 
|---|
| 10 | //! http://url.spec.whatwg.org/#application/x-www-form-urlencoded), | 
|---|
| 11 | //! as used by HTML forms. | 
|---|
| 12 | //! | 
|---|
| 13 | //! Converts between a string (such as an URL’s query string) | 
|---|
| 14 | //! and a sequence of (name, value) pairs. | 
|---|
| 15 | #![ no_std] | 
|---|
| 16 |  | 
|---|
| 17 | // For forwards compatibility | 
|---|
| 18 | #[ cfg(feature = "std")] | 
|---|
| 19 | extern crate std as _; | 
|---|
| 20 |  | 
|---|
| 21 | extern crate alloc; | 
|---|
| 22 |  | 
|---|
| 23 | #[ cfg(not(feature = "alloc"))] | 
|---|
| 24 | compile_error!( "the `alloc` feature must currently be enabled"); | 
|---|
| 25 |  | 
|---|
| 26 | use alloc::borrow::{Borrow, Cow, ToOwned}; | 
|---|
| 27 | use alloc::string::String; | 
|---|
| 28 | use core::str; | 
|---|
| 29 | use percent_encoding::{percent_decode, percent_encode_byte}; | 
|---|
| 30 |  | 
|---|
| 31 | /// Convert a byte string in the `application/x-www-form-urlencoded` syntax | 
|---|
| 32 | /// into a iterator of (name, value) pairs. | 
|---|
| 33 | /// | 
|---|
| 34 | /// Use `parse(input.as_bytes())` to parse a `&str` string. | 
|---|
| 35 | /// | 
|---|
| 36 | /// The names and values are percent-decoded. For instance, `%23first=%25try%25` will be | 
|---|
| 37 | /// converted to `[("#first", "%try%")]`. | 
|---|
| 38 | #[ inline] | 
|---|
| 39 | pub fn parse(input: &[u8]) -> Parse<'_> { | 
|---|
| 40 | Parse { input } | 
|---|
| 41 | } | 
|---|
| 42 | /// The return type of `parse()`. | 
|---|
| 43 | #[ derive(Copy, Clone)] | 
|---|
| 44 | pub struct Parse<'a> { | 
|---|
| 45 | input: &'a [u8], | 
|---|
| 46 | } | 
|---|
| 47 |  | 
|---|
| 48 | impl<'a> Iterator for Parse<'a> { | 
|---|
| 49 | type Item = (Cow<'a, str>, Cow<'a, str>); | 
|---|
| 50 |  | 
|---|
| 51 | fn next(&mut self) -> Option<Self::Item> { | 
|---|
| 52 | loop { | 
|---|
| 53 | if self.input.is_empty() { | 
|---|
| 54 | return None; | 
|---|
| 55 | } | 
|---|
| 56 | let mut split2: SplitN<'_, u8, impl FnMut(…) -> …> = self.input.splitn(n:2, |&b: u8| b == b'&'); | 
|---|
| 57 | let sequence: &[u8] = split2.next().unwrap(); | 
|---|
| 58 | self.input = split2.next().unwrap_or(&[][..]); | 
|---|
| 59 | if sequence.is_empty() { | 
|---|
| 60 | continue; | 
|---|
| 61 | } | 
|---|
| 62 | let mut split2: SplitN<'_, u8, impl FnMut(…) -> …> = sequence.splitn(n:2, |&b: u8| b == b'='); | 
|---|
| 63 | let name: &[u8] = split2.next().unwrap(); | 
|---|
| 64 | let value: &[u8] = split2.next().unwrap_or(&[][..]); | 
|---|
| 65 | return Some((decode(input:name), decode(input:value))); | 
|---|
| 66 | } | 
|---|
| 67 | } | 
|---|
| 68 | } | 
|---|
| 69 |  | 
|---|
| 70 | fn decode(input: &[u8]) -> Cow<'_, str> { | 
|---|
| 71 | let replaced: Cow<'_, [u8]> = replace_plus(input); | 
|---|
| 72 | decode_utf8_lossy(input:match percent_decode(&replaced).into() { | 
|---|
| 73 | Cow::Owned(vec: Vec) => Cow::Owned(vec), | 
|---|
| 74 | Cow::Borrowed(_) => replaced, | 
|---|
| 75 | }) | 
|---|
| 76 | } | 
|---|
| 77 |  | 
|---|
| 78 | /// Replace b'+' with b' ' | 
|---|
| 79 | fn replace_plus(input: &[u8]) -> Cow<'_, [u8]> { | 
|---|
| 80 | match input.iter().position(|&b: u8| b == b'+') { | 
|---|
| 81 | None => Cow::Borrowed(input), | 
|---|
| 82 | Some(first_position: usize) => { | 
|---|
| 83 | let mut replaced: Vec = input.to_owned(); | 
|---|
| 84 | replaced[first_position] = b' '; | 
|---|
| 85 | for byte: &mut u8 in &mut replaced[first_position + 1..] { | 
|---|
| 86 | if *byte == b'+'{ | 
|---|
| 87 | *byte = b' '; | 
|---|
| 88 | } | 
|---|
| 89 | } | 
|---|
| 90 | Cow::Owned(replaced) | 
|---|
| 91 | } | 
|---|
| 92 | } | 
|---|
| 93 | } | 
|---|
| 94 |  | 
|---|
| 95 | impl<'a> Parse<'a> { | 
|---|
| 96 | /// Return a new iterator that yields pairs of `String` instead of pairs of `Cow<str>`. | 
|---|
| 97 | pub fn into_owned(self) -> ParseIntoOwned<'a> { | 
|---|
| 98 | ParseIntoOwned { inner: self } | 
|---|
| 99 | } | 
|---|
| 100 | } | 
|---|
| 101 |  | 
|---|
| 102 | /// Like `Parse`, but yields pairs of `String` instead of pairs of `Cow<str>`. | 
|---|
| 103 | pub struct ParseIntoOwned<'a> { | 
|---|
| 104 | inner: Parse<'a>, | 
|---|
| 105 | } | 
|---|
| 106 |  | 
|---|
| 107 | impl<'a> Iterator for ParseIntoOwned<'a> { | 
|---|
| 108 | type Item = (String, String); | 
|---|
| 109 |  | 
|---|
| 110 | fn next(&mut self) -> Option<Self::Item> { | 
|---|
| 111 | self.inner | 
|---|
| 112 | .next() | 
|---|
| 113 | .map(|(k: Cow<'a, str>, v: Cow<'a, str>)| (k.into_owned(), v.into_owned())) | 
|---|
| 114 | } | 
|---|
| 115 | } | 
|---|
| 116 |  | 
|---|
| 117 | /// The [`application/x-www-form-urlencoded` byte serializer]( | 
|---|
| 118 | /// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer). | 
|---|
| 119 | /// | 
|---|
| 120 | /// Return an iterator of `&str` slices. | 
|---|
| 121 | pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> { | 
|---|
| 122 | ByteSerialize { bytes: input } | 
|---|
| 123 | } | 
|---|
| 124 |  | 
|---|
| 125 | /// Return value of `byte_serialize()`. | 
|---|
| 126 | #[ derive(Debug)] | 
|---|
| 127 | pub struct ByteSerialize<'a> { | 
|---|
| 128 | bytes: &'a [u8], | 
|---|
| 129 | } | 
|---|
| 130 |  | 
|---|
| 131 | fn byte_serialized_unchanged(byte: u8) -> bool { | 
|---|
| 132 | matches!(byte, b'*'| b'-'| b'.'| b'0'..= b'9'| b'A'..= b'Z'| b'_'| b'a'..= b'z') | 
|---|
| 133 | } | 
|---|
| 134 |  | 
|---|
| 135 | impl<'a> Iterator for ByteSerialize<'a> { | 
|---|
| 136 | type Item = &'a str; | 
|---|
| 137 |  | 
|---|
| 138 | fn next(&mut self) -> Option<&'a str> { | 
|---|
| 139 | if let Some((&first, tail)) = self.bytes.split_first() { | 
|---|
| 140 | if !byte_serialized_unchanged(first) { | 
|---|
| 141 | self.bytes = tail; | 
|---|
| 142 | return Some(if first == b' '{ | 
|---|
| 143 | "+" | 
|---|
| 144 | } else { | 
|---|
| 145 | percent_encode_byte(first) | 
|---|
| 146 | }); | 
|---|
| 147 | } | 
|---|
| 148 | let position = tail.iter().position(|&b| !byte_serialized_unchanged(b)); | 
|---|
| 149 | let (unchanged_slice, remaining) = match position { | 
|---|
| 150 | // 1 for first_byte + i unchanged in tail | 
|---|
| 151 | Some(i) => self.bytes.split_at(1 + i), | 
|---|
| 152 | None => (self.bytes, &[][..]), | 
|---|
| 153 | }; | 
|---|
| 154 | self.bytes = remaining; | 
|---|
| 155 | // This unsafe is appropriate because we have already checked these | 
|---|
| 156 | // bytes in byte_serialized_unchanged, which checks for a subset | 
|---|
| 157 | // of UTF-8. So we know these bytes are valid UTF-8, and doing | 
|---|
| 158 | // another UTF-8 check would be wasteful. | 
|---|
| 159 | Some(unsafe { str::from_utf8_unchecked(unchanged_slice) }) | 
|---|
| 160 | } else { | 
|---|
| 161 | None | 
|---|
| 162 | } | 
|---|
| 163 | } | 
|---|
| 164 |  | 
|---|
| 165 | fn size_hint(&self) -> (usize, Option<usize>) { | 
|---|
| 166 | if self.bytes.is_empty() { | 
|---|
| 167 | (0, Some(0)) | 
|---|
| 168 | } else { | 
|---|
| 169 | (1, Some(self.bytes.len())) | 
|---|
| 170 | } | 
|---|
| 171 | } | 
|---|
| 172 | } | 
|---|
| 173 |  | 
|---|
| 174 | /// The [`application/x-www-form-urlencoded` serializer]( | 
|---|
| 175 | /// https://url.spec.whatwg.org/#concept-urlencoded-serializer). | 
|---|
| 176 | pub struct Serializer<'a, T: Target> { | 
|---|
| 177 | target: Option<T>, | 
|---|
| 178 | start_position: usize, | 
|---|
| 179 | encoding: EncodingOverride<'a>, | 
|---|
| 180 | } | 
|---|
| 181 |  | 
|---|
| 182 | pub trait Target { | 
|---|
| 183 | fn as_mut_string(&mut self) -> &mut String; | 
|---|
| 184 | fn finish(self) -> Self::Finished; | 
|---|
| 185 | type Finished; | 
|---|
| 186 | } | 
|---|
| 187 |  | 
|---|
| 188 | impl Target for String { | 
|---|
| 189 | fn as_mut_string(&mut self) -> &mut String { | 
|---|
| 190 | self | 
|---|
| 191 | } | 
|---|
| 192 | fn finish(self) -> Self { | 
|---|
| 193 | self | 
|---|
| 194 | } | 
|---|
| 195 | type Finished = Self; | 
|---|
| 196 | } | 
|---|
| 197 |  | 
|---|
| 198 | impl<'a> Target for &'a mut String { | 
|---|
| 199 | fn as_mut_string(&mut self) -> &mut String { | 
|---|
| 200 | self | 
|---|
| 201 | } | 
|---|
| 202 | fn finish(self) -> Self { | 
|---|
| 203 | self | 
|---|
| 204 | } | 
|---|
| 205 | type Finished = Self; | 
|---|
| 206 | } | 
|---|
| 207 |  | 
|---|
| 208 | impl<'a, T: Target> Serializer<'a, T> { | 
|---|
| 209 | /// Create a new `application/x-www-form-urlencoded` serializer for the given target. | 
|---|
| 210 | /// | 
|---|
| 211 | /// If the target is non-empty, | 
|---|
| 212 | /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax. | 
|---|
| 213 | pub fn new(target: T) -> Self { | 
|---|
| 214 | Self::for_suffix(target, 0) | 
|---|
| 215 | } | 
|---|
| 216 |  | 
|---|
| 217 | /// Create a new `application/x-www-form-urlencoded` serializer | 
|---|
| 218 | /// for a suffix of the given target. | 
|---|
| 219 | /// | 
|---|
| 220 | /// If that suffix is non-empty, | 
|---|
| 221 | /// its content is assumed to already be in `application/x-www-form-urlencoded` syntax. | 
|---|
| 222 | pub fn for_suffix(mut target: T, start_position: usize) -> Self { | 
|---|
| 223 | if target.as_mut_string().len() < start_position { | 
|---|
| 224 | panic!( | 
|---|
| 225 | "invalid length {}  for target of length {} ", | 
|---|
| 226 | start_position, | 
|---|
| 227 | target.as_mut_string().len() | 
|---|
| 228 | ); | 
|---|
| 229 | } | 
|---|
| 230 |  | 
|---|
| 231 | Serializer { | 
|---|
| 232 | target: Some(target), | 
|---|
| 233 | start_position, | 
|---|
| 234 | encoding: None, | 
|---|
| 235 | } | 
|---|
| 236 | } | 
|---|
| 237 |  | 
|---|
| 238 | /// Remove any existing name/value pair. | 
|---|
| 239 | /// | 
|---|
| 240 | /// Panics if called after `.finish()`. | 
|---|
| 241 | pub fn clear(&mut self) -> &mut Self { | 
|---|
| 242 | string(&mut self.target).truncate(self.start_position); | 
|---|
| 243 | self | 
|---|
| 244 | } | 
|---|
| 245 |  | 
|---|
| 246 | /// Set the character encoding to be used for names and values before percent-encoding. | 
|---|
| 247 | pub fn encoding_override(&mut self, new: EncodingOverride<'a>) -> &mut Self { | 
|---|
| 248 | self.encoding = new; | 
|---|
| 249 | self | 
|---|
| 250 | } | 
|---|
| 251 |  | 
|---|
| 252 | /// Serialize and append a name/value pair. | 
|---|
| 253 | /// | 
|---|
| 254 | /// Panics if called after `.finish()`. | 
|---|
| 255 | pub fn append_pair(&mut self, name: &str, value: &str) -> &mut Self { | 
|---|
| 256 | append_pair( | 
|---|
| 257 | string(&mut self.target), | 
|---|
| 258 | self.start_position, | 
|---|
| 259 | self.encoding, | 
|---|
| 260 | name, | 
|---|
| 261 | value, | 
|---|
| 262 | ); | 
|---|
| 263 | self | 
|---|
| 264 | } | 
|---|
| 265 |  | 
|---|
| 266 | /// Serialize and append a name of parameter without any value. | 
|---|
| 267 | /// | 
|---|
| 268 | /// Panics if called after `.finish()`. | 
|---|
| 269 | pub fn append_key_only(&mut self, name: &str) -> &mut Self { | 
|---|
| 270 | append_key_only( | 
|---|
| 271 | string(&mut self.target), | 
|---|
| 272 | self.start_position, | 
|---|
| 273 | self.encoding, | 
|---|
| 274 | name, | 
|---|
| 275 | ); | 
|---|
| 276 | self | 
|---|
| 277 | } | 
|---|
| 278 |  | 
|---|
| 279 | /// Serialize and append a number of name/value pairs. | 
|---|
| 280 | /// | 
|---|
| 281 | /// This simply calls `append_pair` repeatedly. | 
|---|
| 282 | /// This can be more convenient, so the user doesn’t need to introduce a block | 
|---|
| 283 | /// to limit the scope of `Serializer`’s borrow of its string. | 
|---|
| 284 | /// | 
|---|
| 285 | /// Panics if called after `.finish()`. | 
|---|
| 286 | pub fn extend_pairs<I, K, V>(&mut self, iter: I) -> &mut Self | 
|---|
| 287 | where | 
|---|
| 288 | I: IntoIterator, | 
|---|
| 289 | I::Item: Borrow<(K, V)>, | 
|---|
| 290 | K: AsRef<str>, | 
|---|
| 291 | V: AsRef<str>, | 
|---|
| 292 | { | 
|---|
| 293 | { | 
|---|
| 294 | let string = string(&mut self.target); | 
|---|
| 295 | for pair in iter { | 
|---|
| 296 | let (k, v) = pair.borrow(); | 
|---|
| 297 | append_pair( | 
|---|
| 298 | string, | 
|---|
| 299 | self.start_position, | 
|---|
| 300 | self.encoding, | 
|---|
| 301 | k.as_ref(), | 
|---|
| 302 | v.as_ref(), | 
|---|
| 303 | ); | 
|---|
| 304 | } | 
|---|
| 305 | } | 
|---|
| 306 | self | 
|---|
| 307 | } | 
|---|
| 308 |  | 
|---|
| 309 | /// Serialize and append a number of names without values. | 
|---|
| 310 | /// | 
|---|
| 311 | /// This simply calls `append_key_only` repeatedly. | 
|---|
| 312 | /// This can be more convenient, so the user doesn’t need to introduce a block | 
|---|
| 313 | /// to limit the scope of `Serializer`’s borrow of its string. | 
|---|
| 314 | /// | 
|---|
| 315 | /// Panics if called after `.finish()`. | 
|---|
| 316 | pub fn extend_keys_only<I, K>(&mut self, iter: I) -> &mut Self | 
|---|
| 317 | where | 
|---|
| 318 | I: IntoIterator, | 
|---|
| 319 | I::Item: Borrow<K>, | 
|---|
| 320 | K: AsRef<str>, | 
|---|
| 321 | { | 
|---|
| 322 | { | 
|---|
| 323 | let string = string(&mut self.target); | 
|---|
| 324 | for key in iter { | 
|---|
| 325 | let k = key.borrow().as_ref(); | 
|---|
| 326 | append_key_only(string, self.start_position, self.encoding, k); | 
|---|
| 327 | } | 
|---|
| 328 | } | 
|---|
| 329 | self | 
|---|
| 330 | } | 
|---|
| 331 |  | 
|---|
| 332 | /// If this serializer was constructed with a string, take and return that string. | 
|---|
| 333 | /// | 
|---|
| 334 | /// ```rust | 
|---|
| 335 | /// use form_urlencoded; | 
|---|
| 336 | /// let encoded: String = form_urlencoded::Serializer::new(String::new()) | 
|---|
| 337 | ///     .append_pair( "foo", "bar & baz") | 
|---|
| 338 | ///     .append_pair( "saison", "Été+hiver") | 
|---|
| 339 | ///     .finish(); | 
|---|
| 340 | /// assert_eq!(encoded, "foo=bar+%26+baz&saison=%C3%89t%C3%A9%2Bhiver"); | 
|---|
| 341 | /// ``` | 
|---|
| 342 | /// | 
|---|
| 343 | /// Panics if called more than once. | 
|---|
| 344 | pub fn finish(&mut self) -> T::Finished { | 
|---|
| 345 | self.target | 
|---|
| 346 | .take() | 
|---|
| 347 | .expect( "url::form_urlencoded::Serializer double finish") | 
|---|
| 348 | .finish() | 
|---|
| 349 | } | 
|---|
| 350 | } | 
|---|
| 351 |  | 
|---|
| 352 | fn append_separator_if_needed(string: &mut String, start_position: usize) { | 
|---|
| 353 | if string.len() > start_position { | 
|---|
| 354 | string.push(ch: '&') | 
|---|
| 355 | } | 
|---|
| 356 | } | 
|---|
| 357 |  | 
|---|
| 358 | fn string<T: Target>(target: &mut Option<T>) -> &mut String { | 
|---|
| 359 | target&mut T | 
|---|
| 360 | .as_mut() | 
|---|
| 361 | .expect(msg: "url::form_urlencoded::Serializer finished") | 
|---|
| 362 | .as_mut_string() | 
|---|
| 363 | } | 
|---|
| 364 |  | 
|---|
| 365 | fn append_pair( | 
|---|
| 366 | string: &mut String, | 
|---|
| 367 | start_position: usize, | 
|---|
| 368 | encoding: EncodingOverride<'_>, | 
|---|
| 369 | name: &str, | 
|---|
| 370 | value: &str, | 
|---|
| 371 | ) { | 
|---|
| 372 | append_separator_if_needed(string, start_position); | 
|---|
| 373 | append_encoded(s:name, string, encoding); | 
|---|
| 374 | string.push(ch: '='); | 
|---|
| 375 | append_encoded(s:value, string, encoding); | 
|---|
| 376 | } | 
|---|
| 377 |  | 
|---|
| 378 | fn append_key_only( | 
|---|
| 379 | string: &mut String, | 
|---|
| 380 | start_position: usize, | 
|---|
| 381 | encoding: EncodingOverride, | 
|---|
| 382 | name: &str, | 
|---|
| 383 | ) { | 
|---|
| 384 | append_separator_if_needed(string, start_position); | 
|---|
| 385 | append_encoded(s:name, string, encoding); | 
|---|
| 386 | } | 
|---|
| 387 |  | 
|---|
| 388 | fn append_encoded(s: &str, string: &mut String, encoding: EncodingOverride<'_>) { | 
|---|
| 389 | string.extend(iter:byte_serialize(&encode(encoding, input:s))) | 
|---|
| 390 | } | 
|---|
| 391 |  | 
|---|
| 392 | pub(crate) fn encode<'a>(encoding_override: EncodingOverride<'_>, input: &'a str) -> Cow<'a, [u8]> { | 
|---|
| 393 | if let Some(o: &dyn Fn(&str) -> Cow<'_, [u8]>) = encoding_override { | 
|---|
| 394 | return o(input); | 
|---|
| 395 | } | 
|---|
| 396 | input.as_bytes().into() | 
|---|
| 397 | } | 
|---|
| 398 |  | 
|---|
| 399 | pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> { | 
|---|
| 400 | // Note: This function is duplicated in `percent_encoding/lib.rs`. | 
|---|
| 401 | match input { | 
|---|
| 402 | Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), | 
|---|
| 403 | Cow::Owned(bytes) => { | 
|---|
| 404 | match String::from_utf8_lossy(&bytes) { | 
|---|
| 405 | Cow::Borrowed(utf8) => { | 
|---|
| 406 | // If from_utf8_lossy returns a Cow::Borrowed, then we can | 
|---|
| 407 | // be sure our original bytes were valid UTF-8. This is because | 
|---|
| 408 | // if the bytes were invalid UTF-8 from_utf8_lossy would have | 
|---|
| 409 | // to allocate a new owned string to back the Cow so it could | 
|---|
| 410 | // replace invalid bytes with a placeholder. | 
|---|
| 411 |  | 
|---|
| 412 | // First we do a debug_assert to confirm our description above. | 
|---|
| 413 | let raw_utf8: *const [u8] = utf8.as_bytes(); | 
|---|
| 414 | debug_assert!(raw_utf8 == &*bytes as *const [u8]); | 
|---|
| 415 |  | 
|---|
| 416 | // Given we know the original input bytes are valid UTF-8, | 
|---|
| 417 | // and we have ownership of those bytes, we re-use them and | 
|---|
| 418 | // return a Cow::Owned here. | 
|---|
| 419 | Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) }) | 
|---|
| 420 | } | 
|---|
| 421 | Cow::Owned(s) => Cow::Owned(s), | 
|---|
| 422 | } | 
|---|
| 423 | } | 
|---|
| 424 | } | 
|---|
| 425 | } | 
|---|
| 426 |  | 
|---|
| 427 | pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>; | 
|---|
| 428 |  | 
|---|