| 1 | // Copyright 2013-2014 The rust-url developers. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 6 | // option. This file may not be copied, modified, or distributed |
| 7 | // except according to those terms. |
| 8 | |
| 9 | //! Deprecated API for [*Unicode IDNA Compatibility Processing* |
| 10 | //! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/) |
| 11 | |
| 12 | #![allow (deprecated)] |
| 13 | |
| 14 | use alloc::borrow::Cow; |
| 15 | use alloc::string::String; |
| 16 | |
| 17 | use crate::uts46::*; |
| 18 | use crate::Errors; |
| 19 | |
| 20 | /// Performs preprocessing equivalent to UTS 46 transitional processing |
| 21 | /// if `transitional` is `true`. If `transitional` is `false`, merely |
| 22 | /// lets the input pass through as-is (for call site convenience). |
| 23 | /// |
| 24 | /// The output of this function is to be passed to [`Uts46::process`]. |
| 25 | fn map_transitional(domain: &str, transitional: bool) -> Cow<'_, str> { |
| 26 | if !transitional { |
| 27 | return Cow::Borrowed(domain); |
| 28 | } |
| 29 | let mut chars = domain.chars(); |
| 30 | loop { |
| 31 | let prev = chars.clone(); |
| 32 | if let Some(c) = chars.next() { |
| 33 | match c { |
| 34 | 'ß' | 'ẞ' | 'ς' | ' \u{200C}' | ' \u{200D}' => { |
| 35 | let mut s = String::with_capacity(domain.len()); |
| 36 | let tail = prev.as_str(); |
| 37 | let head = &domain[..domain.len() - tail.len()]; |
| 38 | s.push_str(head); |
| 39 | for c in tail.chars() { |
| 40 | match c { |
| 41 | 'ß' | 'ẞ' => { |
| 42 | s.push_str("ss" ); |
| 43 | } |
| 44 | 'ς' => { |
| 45 | s.push('σ' ); |
| 46 | } |
| 47 | ' \u{200C}' | ' \u{200D}' => {} |
| 48 | _ => { |
| 49 | s.push(c); |
| 50 | } |
| 51 | } |
| 52 | } |
| 53 | return Cow::Owned(s); |
| 54 | } |
| 55 | _ => {} |
| 56 | } |
| 57 | } else { |
| 58 | break; |
| 59 | } |
| 60 | } |
| 61 | Cow::Borrowed(domain) |
| 62 | } |
| 63 | |
| 64 | /// Deprecated. Use the crate-top-level functions or [`Uts46`]. |
| 65 | #[derive (Default)] |
| 66 | #[deprecated ] |
| 67 | pub struct Idna { |
| 68 | config: Config, |
| 69 | } |
| 70 | |
| 71 | impl Idna { |
| 72 | pub fn new(config: Config) -> Self { |
| 73 | Self { config } |
| 74 | } |
| 75 | |
| 76 | /// [UTS 46 ToASCII](http://www.unicode.org/reports/tr46/#ToASCII) |
| 77 | #[allow (clippy::wrong_self_convention)] // Retain old weirdness in deprecated API |
| 78 | pub fn to_ascii(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> { |
| 79 | let mapped = map_transitional(domain, self.config.transitional_processing); |
| 80 | match Uts46::new().process( |
| 81 | mapped.as_bytes(), |
| 82 | self.config.deny_list(), |
| 83 | self.config.hyphens(), |
| 84 | ErrorPolicy::FailFast, // Old code did not appear to expect the output to be useful in the error case. |
| 85 | |_, _, _| false, |
| 86 | out, |
| 87 | None, |
| 88 | ) { |
| 89 | Ok(ProcessingSuccess::Passthrough) => { |
| 90 | if self.config.verify_dns_length && !verify_dns_length(&mapped, true) { |
| 91 | return Err(crate::Errors::default()); |
| 92 | } |
| 93 | out.push_str(&mapped); |
| 94 | Ok(()) |
| 95 | } |
| 96 | Ok(ProcessingSuccess::WroteToSink) => { |
| 97 | if self.config.verify_dns_length && !verify_dns_length(out, true) { |
| 98 | return Err(crate::Errors::default()); |
| 99 | } |
| 100 | Ok(()) |
| 101 | } |
| 102 | Err(ProcessingError::ValidityError) => Err(crate::Errors::default()), |
| 103 | Err(ProcessingError::SinkError) => unreachable!(), |
| 104 | } |
| 105 | } |
| 106 | |
| 107 | /// [UTS 46 ToUnicode](http://www.unicode.org/reports/tr46/#ToUnicode) |
| 108 | #[allow (clippy::wrong_self_convention)] // Retain old weirdness in deprecated API |
| 109 | pub fn to_unicode(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> { |
| 110 | let mapped = map_transitional(domain, self.config.transitional_processing); |
| 111 | match Uts46::new().process( |
| 112 | mapped.as_bytes(), |
| 113 | self.config.deny_list(), |
| 114 | self.config.hyphens(), |
| 115 | ErrorPolicy::MarkErrors, |
| 116 | |_, _, _| true, |
| 117 | out, |
| 118 | None, |
| 119 | ) { |
| 120 | Ok(ProcessingSuccess::Passthrough) => { |
| 121 | out.push_str(&mapped); |
| 122 | Ok(()) |
| 123 | } |
| 124 | Ok(ProcessingSuccess::WroteToSink) => Ok(()), |
| 125 | Err(ProcessingError::ValidityError) => Err(crate::Errors::default()), |
| 126 | Err(ProcessingError::SinkError) => unreachable!(), |
| 127 | } |
| 128 | } |
| 129 | } |
| 130 | |
| 131 | /// Deprecated configuration API. |
| 132 | #[derive (Clone, Copy)] |
| 133 | #[must_use ] |
| 134 | #[deprecated ] |
| 135 | pub struct Config { |
| 136 | use_std3_ascii_rules: bool, |
| 137 | transitional_processing: bool, |
| 138 | verify_dns_length: bool, |
| 139 | check_hyphens: bool, |
| 140 | } |
| 141 | |
| 142 | /// The defaults are that of _beStrict=false_ in the [WHATWG URL Standard](https://url.spec.whatwg.org/#idna) |
| 143 | impl Default for Config { |
| 144 | fn default() -> Self { |
| 145 | Config { |
| 146 | use_std3_ascii_rules: false, |
| 147 | transitional_processing: false, |
| 148 | check_hyphens: false, |
| 149 | // Only use for to_ascii, not to_unicode |
| 150 | verify_dns_length: false, |
| 151 | } |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | impl Config { |
| 156 | /// Whether to enforce STD3 or WHATWG URL Standard ASCII deny list. |
| 157 | /// |
| 158 | /// `true` for STD3, `false` for no deny list. |
| 159 | /// |
| 160 | /// Note that `true` rejects pseudo-hosts used by various TXT record-based protocols. |
| 161 | #[inline ] |
| 162 | pub fn use_std3_ascii_rules(mut self, value: bool) -> Self { |
| 163 | self.use_std3_ascii_rules = value; |
| 164 | self |
| 165 | } |
| 166 | |
| 167 | /// Whether to enable (deprecated) transitional processing. |
| 168 | /// |
| 169 | /// Note that Firefox, Safari, and Chrome do not use transitional |
| 170 | /// processing. |
| 171 | #[inline ] |
| 172 | pub fn transitional_processing(mut self, value: bool) -> Self { |
| 173 | self.transitional_processing = value; |
| 174 | self |
| 175 | } |
| 176 | |
| 177 | /// Whether the _VerifyDNSLength_ operation should be performed |
| 178 | /// by `to_ascii`. |
| 179 | /// |
| 180 | /// For compatibility with previous behavior, even when set to `true`, |
| 181 | /// the trailing root label dot is allowed contrary to the spec. |
| 182 | #[inline ] |
| 183 | pub fn verify_dns_length(mut self, value: bool) -> Self { |
| 184 | self.verify_dns_length = value; |
| 185 | self |
| 186 | } |
| 187 | |
| 188 | /// Whether to enforce STD3 rules for hyphen placement. |
| 189 | /// |
| 190 | /// `true` to deny hyphens in the first and last positions. |
| 191 | /// `false` to not enforce hyphen placement. |
| 192 | /// |
| 193 | /// Note that for backward compatibility this is not the same as |
| 194 | /// UTS 46 _CheckHyphens_, which also disallows hyphens in the |
| 195 | /// third and fourth positions. |
| 196 | /// |
| 197 | /// Note that `true` rejects real-world names, including some GitHub user pages. |
| 198 | #[inline ] |
| 199 | pub fn check_hyphens(mut self, value: bool) -> Self { |
| 200 | self.check_hyphens = value; |
| 201 | self |
| 202 | } |
| 203 | |
| 204 | /// Obsolete method retained to ease migration. The argument must be `false`. |
| 205 | /// |
| 206 | /// Panics |
| 207 | /// |
| 208 | /// If the argument is `true`. |
| 209 | #[inline ] |
| 210 | #[allow (unused_mut)] |
| 211 | pub fn use_idna_2008_rules(mut self, value: bool) -> Self { |
| 212 | assert!(!value, "IDNA 2008 rules are no longer supported" ); |
| 213 | self |
| 214 | } |
| 215 | |
| 216 | /// Compute the deny list |
| 217 | fn deny_list(&self) -> AsciiDenyList { |
| 218 | if self.use_std3_ascii_rules { |
| 219 | AsciiDenyList::STD3 |
| 220 | } else { |
| 221 | AsciiDenyList::EMPTY |
| 222 | } |
| 223 | } |
| 224 | |
| 225 | /// Compute the hyphen mode |
| 226 | fn hyphens(&self) -> Hyphens { |
| 227 | if self.check_hyphens { |
| 228 | Hyphens::CheckFirstLast |
| 229 | } else { |
| 230 | Hyphens::Allow |
| 231 | } |
| 232 | } |
| 233 | |
| 234 | /// [UTS 46 ToASCII](http://www.unicode.org/reports/tr46/#ToASCII) |
| 235 | pub fn to_ascii(self, domain: &str) -> Result<String, Errors> { |
| 236 | let mut result = String::with_capacity(domain.len()); |
| 237 | let mut codec = Idna::new(self); |
| 238 | codec.to_ascii(domain, &mut result).map(|()| result) |
| 239 | } |
| 240 | |
| 241 | /// [UTS 46 ToUnicode](http://www.unicode.org/reports/tr46/#ToUnicode) |
| 242 | pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) { |
| 243 | let mut codec = Idna::new(self); |
| 244 | let mut out = String::with_capacity(domain.len()); |
| 245 | let result = codec.to_unicode(domain, &mut out); |
| 246 | (out, result) |
| 247 | } |
| 248 | } |
| 249 | |