1use std::fmt::{self, Display};
2use std::io::{self, Write};
3use std::ops::Range;
4use std::time;
5
6use base64::{prelude::BASE64_STANDARD, Engine};
7use log::debug;
8use url::Url;
9
10#[cfg(feature = "cookies")]
11use cookie::Cookie;
12
13use crate::agent::RedirectAuthHeaders;
14use crate::body::{self, BodySize, Payload, SizedReader};
15use crate::error::{Error, ErrorKind};
16use crate::header;
17use crate::header::{get_header, Header};
18use crate::proxy::Proto;
19use crate::resolve::ArcResolver;
20use crate::response::Response;
21use crate::stream::{self, connect_test, Stream};
22use crate::Agent;
23
24/// A Unit is fully-built Request, ready to execute.
25///
26/// *Internal API*
27#[derive(Clone)]
28pub(crate) struct Unit {
29 pub agent: Agent,
30 pub method: String,
31 pub url: Url,
32 is_chunked: bool,
33 headers: Vec<Header>,
34 pub deadline: Option<time::Instant>,
35}
36
37impl Unit {
38 //
39
40 pub(crate) fn new(
41 agent: &Agent,
42 method: &str,
43 url: &Url,
44 mut headers: Vec<Header>,
45 body: &SizedReader,
46 deadline: Option<time::Instant>,
47 ) -> Self {
48 //
49
50 let (is_transfer_encoding_set, mut is_chunked) = get_header(&headers, "transfer-encoding")
51 // if the user has set an encoding header, obey that.
52 .map(|enc| {
53 let is_transfer_encoding_set = !enc.is_empty();
54 let last_encoding = enc.split(',').last();
55 let is_chunked = last_encoding
56 .map(|last_enc| last_enc.trim() == "chunked")
57 .unwrap_or(false);
58 (is_transfer_encoding_set, is_chunked)
59 })
60 // otherwise, no chunking.
61 .unwrap_or((false, false));
62
63 let mut extra_headers = {
64 let mut extra = vec![];
65
66 // chunking and Content-Length headers are mutually exclusive
67 // also don't write this if the user has set it themselves
68 if !is_chunked && get_header(&headers, "content-length").is_none() {
69 // if the payload is of known size (everything beside an unsized reader), set
70 // Content-Length,
71 // otherwise, use the chunked Transfer-Encoding (only if no other Transfer-Encoding
72 // has been set
73 match body.size {
74 BodySize::Known(size) => {
75 extra.push(Header::new("Content-Length", &format!("{}", size)))
76 }
77 BodySize::Unknown => {
78 if !is_transfer_encoding_set {
79 extra.push(Header::new("Transfer-Encoding", "chunked"));
80 is_chunked = true;
81 }
82 }
83 BodySize::Empty => {}
84 }
85 }
86
87 let username = url.username();
88 let password = url.password().unwrap_or("");
89 if (!username.is_empty() || !password.is_empty())
90 && get_header(&headers, "authorization").is_none()
91 {
92 let encoded = BASE64_STANDARD.encode(format!("{}:{}", username, password));
93 extra.push(Header::new("Authorization", &format!("Basic {}", encoded)));
94 }
95
96 #[cfg(feature = "cookies")]
97 extra.extend(extract_cookies(agent, url).into_iter());
98
99 extra
100 };
101
102 headers.append(&mut extra_headers);
103
104 Unit {
105 agent: agent.clone(),
106 method: method.to_string(),
107 url: url.clone(),
108 is_chunked,
109 headers,
110 deadline,
111 }
112 }
113
114 pub fn resolver(&self) -> ArcResolver {
115 self.agent.state.resolver.clone()
116 }
117
118 #[cfg(test)]
119 pub fn header(&self, name: &str) -> Option<&str> {
120 header::get_header(&self.headers, name)
121 }
122 #[cfg(test)]
123 pub fn has(&self, name: &str) -> bool {
124 header::has_header(&self.headers, name)
125 }
126 #[cfg(test)]
127 pub fn all(&self, name: &str) -> Vec<&str> {
128 header::get_all_headers(&self.headers, name)
129 }
130
131 // Returns true if this request, with the provided body, is retryable.
132 pub(crate) fn is_retryable(&self, body: &SizedReader) -> bool {
133 // Per https://tools.ietf.org/html/rfc7231#section-8.1.3
134 // these methods are idempotent.
135 let idempotent = match self.method.as_str() {
136 "DELETE" | "GET" | "HEAD" | "OPTIONS" | "PUT" | "TRACE" => true,
137 _ => false,
138 };
139 // Unsized bodies aren't retryable because we can't rewind the reader.
140 // Sized bodies are retryable only if they are zero-length because of
141 // coincidences of the current implementation - the function responsible
142 // for retries doesn't have a way to replay a Payload.
143 let retryable_body = match body.size {
144 BodySize::Unknown => false,
145 BodySize::Known(0) => true,
146 BodySize::Known(_) => false,
147 BodySize::Empty => true,
148 };
149
150 idempotent && retryable_body
151 }
152}
153
154/// Perform a connection. Follows redirects.
155pub(crate) fn connect(
156 mut unit: Unit,
157 use_pooled: bool,
158 mut body: SizedReader,
159) -> Result<Response, Error> {
160 let mut history = vec![];
161 let mut resp = loop {
162 let resp = connect_inner(&unit, use_pooled, body, &history)?;
163
164 // handle redirects
165 if !(300..399).contains(&resp.status()) || unit.agent.config.redirects == 0 {
166 break resp;
167 }
168 if history.len() + 1 >= unit.agent.config.redirects as usize {
169 return Err(ErrorKind::TooManyRedirects.msg(format!(
170 "reached max redirects ({})",
171 unit.agent.config.redirects
172 )));
173 }
174 // the location header
175 let location = match resp.header("location") {
176 Some(l) => l,
177 None => break resp,
178 };
179
180 let url = &unit.url;
181 let method = &unit.method;
182 // join location header to current url in case it is relative
183 let new_url = url.join(location).map_err(|e| {
184 ErrorKind::InvalidUrl
185 .msg(format!("Bad redirection: {}", location))
186 .src(e)
187 })?;
188
189 // perform the redirect differently depending on 3xx code.
190 let new_method = match resp.status() {
191 // this is to follow how curl does it. POST, PUT etc change
192 // to GET on a redirect.
193 301 | 302 | 303 => match &method[..] {
194 "GET" | "HEAD" => unit.method,
195 _ => "GET".into(),
196 },
197 // never change the method for 307/308
198 // only resend the request if it cannot have a body
199 // NOTE: DELETE is intentionally excluded: https://stackoverflow.com/questions/299628
200 307 | 308 if ["GET", "HEAD", "OPTIONS", "TRACE"].contains(&method.as_str()) => {
201 unit.method
202 }
203 _ => break resp,
204 };
205
206 let keep_auth_header = can_propagate_authorization_on_redirect(
207 &unit.agent.config.redirect_auth_headers,
208 url,
209 &new_url,
210 );
211
212 debug!("redirect {} {} -> {}", resp.status(), url, new_url);
213 history.push(unit.url);
214 body = Payload::Empty.into_read();
215
216 // reuse the previous header vec on redirects.
217 let mut headers = unit.headers;
218
219 // on redirects we don't want to keep "content-length". we also might want to
220 // strip away "authorization" and "cookie" to ensure credentials are not leaked.
221 headers.retain(|h| {
222 !h.is_name("content-length")
223 && !h.is_name("cookie")
224 && (!h.is_name("authorization") || keep_auth_header)
225 });
226
227 // recreate the unit to get a new hostname and cookies for the new host.
228 unit = Unit::new(
229 &unit.agent,
230 &new_method,
231 &new_url,
232 headers,
233 &body,
234 unit.deadline,
235 );
236 };
237 resp.history = history;
238 Ok(resp)
239}
240
241/// Perform a connection. Does not follow redirects.
242fn connect_inner(
243 unit: &Unit,
244 use_pooled: bool,
245 body: SizedReader,
246 history: &[Url],
247) -> Result<Response, Error> {
248 let host = unit
249 .url
250 .host_str()
251 // This unwrap is ok because Request::parse_url() ensure there is always a host present.
252 .unwrap();
253 let url = &unit.url;
254 let method = &unit.method;
255 // open socket
256 let (mut stream, is_recycled) = connect_socket(unit, host, use_pooled)?;
257
258 if is_recycled {
259 debug!("sending request (reused connection) {} {}", method, url);
260 } else {
261 debug!("sending request {} {}", method, url);
262 }
263
264 let send_result = send_prelude(unit, &mut stream);
265
266 if let Err(err) = send_result {
267 if is_recycled {
268 debug!("retrying request early {} {}: {}", method, url, err);
269 // we try open a new connection, this time there will be
270 // no connection in the pool. don't use it.
271 // NOTE: this recurses at most once because `use_pooled` is `false`.
272 return connect_inner(unit, false, body, history);
273 } else {
274 // not a pooled connection, propagate the error.
275 return Err(err.into());
276 }
277 }
278 let retryable = unit.is_retryable(&body);
279
280 // send the body (which can be empty now depending on redirects)
281 body::send_body(body, unit.is_chunked, &mut stream)?;
282
283 // start reading the response to process cookies and redirects.
284 // TODO: this unit.clone() bothers me. At this stage, we're not
285 // going to use the unit (much) anymore, and it should be possible
286 // to have ownership of it and pass it into the Response.
287 let result = Response::do_from_stream(stream, unit.clone());
288
289 // https://tools.ietf.org/html/rfc7230#section-6.3.1
290 // When an inbound connection is closed prematurely, a client MAY
291 // open a new connection and automatically retransmit an aborted
292 // sequence of requests if all of those requests have idempotent
293 // methods.
294 //
295 // We choose to retry only requests that used a recycled connection
296 // from the ConnectionPool, since those are most likely to have
297 // reached a server-side timeout. Note that this means we may do
298 // up to N+1 total tries, where N is max_idle_connections_per_host.
299 let resp = match result {
300 Err(err) if err.connection_closed() && retryable && is_recycled => {
301 debug!("retrying request {} {}: {}", method, url, err);
302 let empty = Payload::Empty.into_read();
303 // NOTE: this recurses at most once because `use_pooled` is `false`.
304 return connect_inner(unit, false, empty, history);
305 }
306 Err(e) => return Err(e),
307 Ok(resp) => resp,
308 };
309
310 // squirrel away cookies
311 #[cfg(feature = "cookies")]
312 save_cookies(unit, &resp);
313
314 debug!("response {} to {} {}", resp.status(), method, url);
315
316 // release the response
317 Ok(resp)
318}
319
320#[cfg(feature = "cookies")]
321fn extract_cookies(agent: &Agent, url: &Url) -> Option<Header> {
322 let header_value = agent
323 .state
324 .cookie_tin
325 .get_request_cookies(url)
326 .iter()
327 // This guards against sending rfc non-compliant cookies, even if the user has
328 // "prepped" their local cookie store with such cookies.
329 .filter(|c| {
330 let is_ok = is_cookie_rfc_compliant(c);
331 if !is_ok {
332 debug!("do not send non compliant cookie: {:?}", c);
333 }
334 is_ok
335 })
336 .map(|c| c.to_string())
337 .collect::<Vec<_>>()
338 .join(";");
339 match header_value.as_str() {
340 "" => None,
341 val => Some(Header::new("Cookie", val)),
342 }
343}
344
345/// Connect the socket, either by using the pool or grab a new one.
346fn connect_socket(unit: &Unit, hostname: &str, use_pooled: bool) -> Result<(Stream, bool), Error> {
347 match unit.url.scheme() {
348 "http" | "https" | "test" => (),
349 scheme => return Err(ErrorKind::UnknownScheme.msg(format!("unknown scheme '{}'", scheme))),
350 };
351 if unit.url.scheme() != "https" && unit.agent.config.https_only {
352 return Err(ErrorKind::InsecureRequestHttpsOnly
353 .msg("can't perform non https request with https_only set"));
354 }
355 if use_pooled {
356 let pool = &unit.agent.state.pool;
357 let proxy = &unit.agent.config.proxy;
358 // The connection may have been closed by the server
359 // due to idle timeout while it was sitting in the pool.
360 // Loop until we find one that is still good or run out of connections.
361 while let Some(stream) = pool.try_get_connection(&unit.url, proxy.clone()) {
362 let server_closed = stream.server_closed()?;
363 if !server_closed {
364 return Ok((stream, true));
365 }
366 debug!("dropping stream from pool; closed by server: {:?}", stream);
367 }
368 }
369 let stream = match unit.url.scheme() {
370 "http" => stream::connect_http(unit, hostname),
371 "https" => stream::connect_https(unit, hostname),
372 "test" => connect_test(unit),
373 scheme => Err(ErrorKind::UnknownScheme.msg(format!("unknown scheme {}", scheme))),
374 };
375 Ok((stream?, false))
376}
377
378fn can_propagate_authorization_on_redirect(
379 redirect_auth_headers: &RedirectAuthHeaders,
380 prev_url: &Url,
381 url: &Url,
382) -> bool {
383 fn scheme_is_https(url: &Url) -> bool {
384 url.scheme() == "https" || (cfg!(test) && url.scheme() == "test")
385 }
386
387 match redirect_auth_headers {
388 RedirectAuthHeaders::Never => false,
389 RedirectAuthHeaders::SameHost => {
390 let host: Option<&str> = url.host_str();
391 let is_https: bool = scheme_is_https(url);
392
393 let prev_host: Option<&str> = prev_url.host_str();
394 let prev_is_https: bool = scheme_is_https(prev_url);
395
396 let same_scheme_or_more_secure: bool =
397 is_https == prev_is_https || (!prev_is_https && is_https);
398
399 host == prev_host && same_scheme_or_more_secure
400 }
401 }
402}
403
404/// Send request line + headers (all up until the body).
405#[allow(clippy::write_with_newline)]
406fn send_prelude(unit: &Unit, stream: &mut Stream) -> io::Result<()> {
407 // build into a buffer and send in one go.
408 let mut prelude = PreludeBuilder::new();
409
410 let path = if let Some(proxy) = &unit.agent.config.proxy {
411 // HTTP proxies require the path to be in absolute URI form
412 // https://www.rfc-editor.org/rfc/rfc7230#section-5.3.2
413 match proxy.proto {
414 Proto::HTTP => match unit.url.port() {
415 Some(port) => format!(
416 "{}://{}:{}{}",
417 unit.url.scheme(),
418 unit.url.host().unwrap(),
419 port,
420 unit.url.path()
421 ),
422 None => format!(
423 "{}://{}{}",
424 unit.url.scheme(),
425 unit.url.host().unwrap(),
426 unit.url.path()
427 ),
428 },
429 _ => unit.url.path().into(),
430 }
431 } else {
432 unit.url.path().into()
433 };
434
435 // request line
436 prelude.write_request_line(&unit.method, &path, unit.url.query().unwrap_or_default())?;
437
438 // host header if not set by user.
439 if !header::has_header(&unit.headers, "host") {
440 let host = unit.url.host().unwrap();
441 match unit.url.port() {
442 Some(port) => {
443 let scheme_default: u16 = match unit.url.scheme() {
444 "http" => 80,
445 "https" => 443,
446 _ => 0,
447 };
448 if scheme_default != 0 && scheme_default == port {
449 prelude.write_header("Host", host)?;
450 } else {
451 prelude.write_header("Host", format_args!("{}:{}", host, port))?;
452 }
453 }
454 None => {
455 prelude.write_header("Host", host)?;
456 }
457 }
458 }
459 if !header::has_header(&unit.headers, "user-agent") {
460 prelude.write_header("User-Agent", &unit.agent.config.user_agent)?;
461 }
462 if !header::has_header(&unit.headers, "accept") {
463 prelude.write_header("Accept", "*/*")?;
464 }
465
466 // other headers
467 for header in &unit.headers {
468 if let Some(v) = header.value() {
469 if is_header_sensitive(header) {
470 prelude.write_sensitive_header(header.name(), v)?;
471 } else {
472 prelude.write_header(header.name(), v)?;
473 }
474 }
475 }
476
477 // finish
478 prelude.finish()?;
479
480 debug!("writing prelude: {}", prelude);
481 // write all to the wire
482 stream.write_all(prelude.as_slice())?;
483
484 Ok(())
485}
486
487fn is_header_sensitive(header: &Header) -> bool {
488 header.is_name("Authorization") || header.is_name("Cookie")
489}
490
491struct PreludeBuilder {
492 prelude: Vec<u8>,
493 // Sensitive information to be omitted in debug logging
494 sensitive_spans: Vec<Range<usize>>,
495}
496
497impl PreludeBuilder {
498 fn new() -> Self {
499 PreludeBuilder {
500 prelude: Vec::with_capacity(256),
501 sensitive_spans: Vec::new(),
502 }
503 }
504
505 fn write_request_line(&mut self, method: &str, path: &str, query: &str) -> io::Result<()> {
506 write!(self.prelude, "{} {}", method, path,)?;
507 if !query.is_empty() {
508 write!(self.prelude, "?{}", query)?;
509 }
510 write!(self.prelude, " HTTP/1.1\r\n")?;
511 Ok(())
512 }
513
514 fn write_header(&mut self, name: &str, value: impl Display) -> io::Result<()> {
515 write!(self.prelude, "{}: {}\r\n", name, value)
516 }
517
518 fn write_sensitive_header(&mut self, name: &str, value: impl Display) -> io::Result<()> {
519 write!(self.prelude, "{}: ", name)?;
520 let start = self.prelude.len();
521 write!(self.prelude, "{}", value)?;
522 let end = self.prelude.len();
523 self.sensitive_spans.push(start..end);
524 write!(self.prelude, "\r\n")?;
525 Ok(())
526 }
527
528 fn finish(&mut self) -> io::Result<()> {
529 write!(self.prelude, "\r\n")
530 }
531
532 fn as_slice(&self) -> &[u8] {
533 &self.prelude
534 }
535}
536
537impl fmt::Display for PreludeBuilder {
538 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
539 let mut pos: usize = 0;
540 for span: &Range in &self.sensitive_spans {
541 write!(
542 f,
543 "{}",
544 String::from_utf8_lossy(&self.prelude[pos..span.start])
545 )?;
546 write!(f, "***")?;
547 pos = span.end;
548 }
549 write!(
550 f,
551 "{}",
552 String::from_utf8_lossy(&self.prelude[pos..]).trim_end()
553 )?;
554 Ok(())
555 }
556}
557
558/// Investigate a response for "Set-Cookie" headers.
559#[cfg(feature = "cookies")]
560fn save_cookies(unit: &Unit, resp: &Response) {
561 //
562
563 let headers = resp.all("set-cookie");
564 // Avoid locking if there are no cookie headers
565 if headers.is_empty() {
566 return;
567 }
568 let cookies = headers.into_iter().flat_map(|header_value| {
569 debug!(
570 "received 'set-cookie: {}' from {} {}",
571 header_value, unit.method, unit.url
572 );
573 match Cookie::parse(header_value.to_string()) {
574 Err(_) => None,
575 Ok(c) => {
576 // This guards against accepting rfc non-compliant cookies from a host.
577 if is_cookie_rfc_compliant(&c) {
578 Some(c)
579 } else {
580 debug!("ignore incoming non compliant cookie: {:?}", c);
581 None
582 }
583 }
584 }
585 });
586 unit.agent
587 .state
588 .cookie_tin
589 .store_response_cookies(cookies, &unit.url.clone());
590}
591
592#[cfg(feature = "cookies")]
593fn is_cookie_rfc_compliant(cookie: &Cookie) -> bool {
594 // https://tools.ietf.org/html/rfc6265#page-9
595 // set-cookie-header = "Set-Cookie:" SP set-cookie-string
596 // set-cookie-string = cookie-pair *( ";" SP cookie-av )
597 // cookie-pair = cookie-name "=" cookie-value
598 // cookie-name = token
599 // cookie-value = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE )
600 // cookie-octet = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E
601 // ; US-ASCII characters excluding CTLs,
602 // ; whitespace DQUOTE, comma, semicolon,
603 // ; and backslash
604 // token = <token, defined in [RFC2616], Section 2.2>
605
606 // https://tools.ietf.org/html/rfc2616#page-17
607 // CHAR = <any US-ASCII character (octets 0 - 127)>
608 // ...
609 // CTL = <any US-ASCII control character
610 // (octets 0 - 31) and DEL (127)>
611 // ...
612 // token = 1*<any CHAR except CTLs or separators>
613 // separators = "(" | ")" | "<" | ">" | "@"
614 // | "," | ";" | ":" | "\" | <">
615 // | "/" | "[" | "]" | "?" | "="
616 // | "{" | "}" | SP | HT
617
618 fn is_valid_name(b: &u8) -> bool {
619 header::is_tchar(b)
620 }
621
622 fn is_valid_value(b: &u8) -> bool {
623 b.is_ascii()
624 && !b.is_ascii_control()
625 && !b.is_ascii_whitespace()
626 && *b != b'"'
627 && *b != b','
628 && *b != b';'
629 && *b != b'\\'
630 }
631
632 let name = cookie.name().as_bytes();
633
634 let valid_name = name.iter().all(is_valid_name);
635
636 if !valid_name {
637 log::trace!("cookie name is not valid: {:?}", cookie.name());
638 return false;
639 }
640
641 let value = cookie.value().as_bytes();
642
643 let valid_value = value.iter().all(is_valid_value);
644
645 if !valid_value {
646 log::trace!("cookie value is not valid: {:?}", cookie.value());
647 return false;
648 }
649
650 true
651}
652
653#[cfg(test)]
654#[cfg(feature = "cookies")]
655mod tests {
656 use cookie::Cookie;
657 use cookie_store::CookieStore;
658
659 use super::*;
660
661 use crate::Agent;
662 ///////////////////// COOKIE TESTS //////////////////////////////
663
664 #[test]
665 fn match_cookies_returns_one_header() {
666 let agent = Agent::new();
667 let url: Url = "https://crates.io/".parse().unwrap();
668 let cookie1: Cookie = "cookie1=value1; Domain=crates.io; Path=/".parse().unwrap();
669 let cookie2: Cookie = "cookie2=value2; Domain=crates.io; Path=/".parse().unwrap();
670 agent
671 .state
672 .cookie_tin
673 .store_response_cookies(vec![cookie1, cookie2].into_iter(), &url);
674
675 // There's no guarantee to the order in which cookies are defined.
676 // Ensure that they're either in one order or the other.
677 let result = extract_cookies(&agent, &url);
678 let order1 = "cookie1=value1;cookie2=value2";
679 let order2 = "cookie2=value2;cookie1=value1";
680
681 assert!(
682 result == Some(Header::new("Cookie", order1))
683 || result == Some(Header::new("Cookie", order2))
684 );
685 }
686
687 #[test]
688 fn not_send_illegal_cookies() {
689 // This prepares a cookie store with a cookie that isn't legal
690 // according to the relevant rfcs. ureq should not send this.
691 let empty = b"";
692 let mut store = CookieStore::load_json(&empty[..]).unwrap();
693 let url = Url::parse("https://mydomain.com").unwrap();
694 let cookie = Cookie::new("borked///", "illegal<>//");
695 store.insert_raw(&cookie, &url).unwrap();
696
697 let agent = crate::builder().cookie_store(store).build();
698 let cookies = extract_cookies(&agent, &url);
699 assert_eq!(cookies, None);
700 }
701
702 #[test]
703 fn check_cookie_crate_allows_illegal() {
704 // This test is there to see whether the cookie crate enforces
705 // https://tools.ietf.org/html/rfc6265#page-9
706 // https://tools.ietf.org/html/rfc2616#page-17
707 // for cookie name or cookie value.
708 // As long as it doesn't, we do additional filtering in ureq
709 // to not let non-compliant cookies through.
710 let cookie = Cookie::parse("borked///=illegal\\,").unwrap();
711 // these should not be allowed according to the RFCs.
712 assert_eq!(cookie.name(), "borked///");
713 assert_eq!(cookie.value(), "illegal\\,");
714 }
715
716 #[test]
717 fn illegal_cookie_name() {
718 let cookie = Cookie::parse("borked/=value").unwrap();
719 assert!(!is_cookie_rfc_compliant(&cookie));
720 }
721
722 #[test]
723 fn illegal_cookie_value() {
724 let cookie = Cookie::parse("name=borked,").unwrap();
725 assert!(!is_cookie_rfc_compliant(&cookie));
726 }
727
728 #[test]
729 fn legal_cookie_name_value() {
730 let cookie = Cookie::parse("name=value").unwrap();
731 assert!(is_cookie_rfc_compliant(&cookie));
732 }
733}
734