1 | // Copyright 2013-2014 The rust-url developers. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
6 | // option. This file may not be copied, modified, or distributed |
7 | // except according to those terms. |
8 | |
9 | //! Deprecated API for [*Unicode IDNA Compatibility Processing* |
10 | //! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/) |
11 | |
12 | #![allow (deprecated)] |
13 | |
14 | use alloc::borrow::Cow; |
15 | use alloc::string::String; |
16 | |
17 | use crate::uts46::*; |
18 | use crate::Errors; |
19 | |
20 | /// Performs preprocessing equivalent to UTS 46 transitional processing |
21 | /// if `transitional` is `true`. If `transitional` is `false`, merely |
22 | /// lets the input pass through as-is (for call site convenience). |
23 | /// |
24 | /// The output of this function is to be passed to [`Uts46::process`]. |
25 | fn map_transitional(domain: &str, transitional: bool) -> Cow<'_, str> { |
26 | if !transitional { |
27 | return Cow::Borrowed(domain); |
28 | } |
29 | let mut chars = domain.chars(); |
30 | loop { |
31 | let prev = chars.clone(); |
32 | if let Some(c) = chars.next() { |
33 | match c { |
34 | 'ß' | 'ẞ' | 'ς' | ' \u{200C}' | ' \u{200D}' => { |
35 | let mut s = String::with_capacity(domain.len()); |
36 | let tail = prev.as_str(); |
37 | let head = &domain[..domain.len() - tail.len()]; |
38 | s.push_str(head); |
39 | for c in tail.chars() { |
40 | match c { |
41 | 'ß' | 'ẞ' => { |
42 | s.push_str("ss" ); |
43 | } |
44 | 'ς' => { |
45 | s.push('σ' ); |
46 | } |
47 | ' \u{200C}' | ' \u{200D}' => {} |
48 | _ => { |
49 | s.push(c); |
50 | } |
51 | } |
52 | } |
53 | return Cow::Owned(s); |
54 | } |
55 | _ => {} |
56 | } |
57 | } else { |
58 | break; |
59 | } |
60 | } |
61 | Cow::Borrowed(domain) |
62 | } |
63 | |
64 | /// Deprecated. Use the crate-top-level functions or [`Uts46`]. |
65 | #[derive (Default)] |
66 | #[deprecated ] |
67 | pub struct Idna { |
68 | config: Config, |
69 | } |
70 | |
71 | impl Idna { |
72 | pub fn new(config: Config) -> Self { |
73 | Self { config } |
74 | } |
75 | |
76 | /// [UTS 46 ToASCII](http://www.unicode.org/reports/tr46/#ToASCII) |
77 | #[allow (clippy::wrong_self_convention)] // Retain old weirdness in deprecated API |
78 | pub fn to_ascii(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> { |
79 | let mapped = map_transitional(domain, self.config.transitional_processing); |
80 | match Uts46::new().process( |
81 | mapped.as_bytes(), |
82 | self.config.deny_list(), |
83 | self.config.hyphens(), |
84 | ErrorPolicy::FailFast, // Old code did not appear to expect the output to be useful in the error case. |
85 | |_, _, _| false, |
86 | out, |
87 | None, |
88 | ) { |
89 | Ok(ProcessingSuccess::Passthrough) => { |
90 | if self.config.verify_dns_length && !verify_dns_length(&mapped, true) { |
91 | return Err(crate::Errors::default()); |
92 | } |
93 | out.push_str(&mapped); |
94 | Ok(()) |
95 | } |
96 | Ok(ProcessingSuccess::WroteToSink) => { |
97 | if self.config.verify_dns_length && !verify_dns_length(out, true) { |
98 | return Err(crate::Errors::default()); |
99 | } |
100 | Ok(()) |
101 | } |
102 | Err(ProcessingError::ValidityError) => Err(crate::Errors::default()), |
103 | Err(ProcessingError::SinkError) => unreachable!(), |
104 | } |
105 | } |
106 | |
107 | /// [UTS 46 ToUnicode](http://www.unicode.org/reports/tr46/#ToUnicode) |
108 | #[allow (clippy::wrong_self_convention)] // Retain old weirdness in deprecated API |
109 | pub fn to_unicode(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> { |
110 | let mapped = map_transitional(domain, self.config.transitional_processing); |
111 | match Uts46::new().process( |
112 | mapped.as_bytes(), |
113 | self.config.deny_list(), |
114 | self.config.hyphens(), |
115 | ErrorPolicy::MarkErrors, |
116 | |_, _, _| true, |
117 | out, |
118 | None, |
119 | ) { |
120 | Ok(ProcessingSuccess::Passthrough) => { |
121 | out.push_str(&mapped); |
122 | Ok(()) |
123 | } |
124 | Ok(ProcessingSuccess::WroteToSink) => Ok(()), |
125 | Err(ProcessingError::ValidityError) => Err(crate::Errors::default()), |
126 | Err(ProcessingError::SinkError) => unreachable!(), |
127 | } |
128 | } |
129 | } |
130 | |
131 | /// Deprecated configuration API. |
132 | #[derive (Clone, Copy)] |
133 | #[must_use ] |
134 | #[deprecated ] |
135 | pub struct Config { |
136 | use_std3_ascii_rules: bool, |
137 | transitional_processing: bool, |
138 | verify_dns_length: bool, |
139 | check_hyphens: bool, |
140 | } |
141 | |
142 | /// The defaults are that of _beStrict=false_ in the [WHATWG URL Standard](https://url.spec.whatwg.org/#idna) |
143 | impl Default for Config { |
144 | fn default() -> Self { |
145 | Config { |
146 | use_std3_ascii_rules: false, |
147 | transitional_processing: false, |
148 | check_hyphens: false, |
149 | // Only use for to_ascii, not to_unicode |
150 | verify_dns_length: false, |
151 | } |
152 | } |
153 | } |
154 | |
155 | impl Config { |
156 | /// Whether to enforce STD3 or WHATWG URL Standard ASCII deny list. |
157 | /// |
158 | /// `true` for STD3, `false` for no deny list. |
159 | /// |
160 | /// Note that `true` rejects pseudo-hosts used by various TXT record-based protocols. |
161 | #[inline ] |
162 | pub fn use_std3_ascii_rules(mut self, value: bool) -> Self { |
163 | self.use_std3_ascii_rules = value; |
164 | self |
165 | } |
166 | |
167 | /// Whether to enable (deprecated) transitional processing. |
168 | /// |
169 | /// Note that Firefox, Safari, and Chrome do not use transitional |
170 | /// processing. |
171 | #[inline ] |
172 | pub fn transitional_processing(mut self, value: bool) -> Self { |
173 | self.transitional_processing = value; |
174 | self |
175 | } |
176 | |
177 | /// Whether the _VerifyDNSLength_ operation should be performed |
178 | /// by `to_ascii`. |
179 | /// |
180 | /// For compatibility with previous behavior, even when set to `true`, |
181 | /// the trailing root label dot is allowed contrary to the spec. |
182 | #[inline ] |
183 | pub fn verify_dns_length(mut self, value: bool) -> Self { |
184 | self.verify_dns_length = value; |
185 | self |
186 | } |
187 | |
188 | /// Whether to enforce STD3 rules for hyphen placement. |
189 | /// |
190 | /// `true` to deny hyphens in the first and last positions. |
191 | /// `false` to not enforce hyphen placement. |
192 | /// |
193 | /// Note that for backward compatibility this is not the same as |
194 | /// UTS 46 _CheckHyphens_, which also disallows hyphens in the |
195 | /// third and fourth positions. |
196 | /// |
197 | /// Note that `true` rejects real-world names, including some GitHub user pages. |
198 | #[inline ] |
199 | pub fn check_hyphens(mut self, value: bool) -> Self { |
200 | self.check_hyphens = value; |
201 | self |
202 | } |
203 | |
204 | /// Obsolete method retained to ease migration. The argument must be `false`. |
205 | /// |
206 | /// Panics |
207 | /// |
208 | /// If the argument is `true`. |
209 | #[inline ] |
210 | #[allow (unused_mut)] |
211 | pub fn use_idna_2008_rules(mut self, value: bool) -> Self { |
212 | assert!(!value, "IDNA 2008 rules are no longer supported" ); |
213 | self |
214 | } |
215 | |
216 | /// Compute the deny list |
217 | fn deny_list(&self) -> AsciiDenyList { |
218 | if self.use_std3_ascii_rules { |
219 | AsciiDenyList::STD3 |
220 | } else { |
221 | AsciiDenyList::EMPTY |
222 | } |
223 | } |
224 | |
225 | /// Compute the hyphen mode |
226 | fn hyphens(&self) -> Hyphens { |
227 | if self.check_hyphens { |
228 | Hyphens::CheckFirstLast |
229 | } else { |
230 | Hyphens::Allow |
231 | } |
232 | } |
233 | |
234 | /// [UTS 46 ToASCII](http://www.unicode.org/reports/tr46/#ToASCII) |
235 | pub fn to_ascii(self, domain: &str) -> Result<String, Errors> { |
236 | let mut result = String::with_capacity(domain.len()); |
237 | let mut codec = Idna::new(self); |
238 | codec.to_ascii(domain, &mut result).map(|()| result) |
239 | } |
240 | |
241 | /// [UTS 46 ToUnicode](http://www.unicode.org/reports/tr46/#ToUnicode) |
242 | pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) { |
243 | let mut codec = Idna::new(self); |
244 | let mut out = String::with_capacity(domain.len()); |
245 | let result = codec.to_unicode(domain, &mut out); |
246 | (out, result) |
247 | } |
248 | } |
249 | |