1// Copyright 2013-2014 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! Deprecated API for [*Unicode IDNA Compatibility Processing*
10//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
11
12#![allow(deprecated)]
13
14use alloc::borrow::Cow;
15use alloc::string::String;
16
17use crate::uts46::*;
18use crate::Errors;
19
20/// Performs preprocessing equivalent to UTS 46 transitional processing
21/// if `transitional` is `true`. If `transitional` is `false`, merely
22/// lets the input pass through as-is (for call site convenience).
23///
24/// The output of this function is to be passed to [`Uts46::process`].
25fn map_transitional(domain: &str, transitional: bool) -> Cow<'_, str> {
26 if !transitional {
27 return Cow::Borrowed(domain);
28 }
29 let mut chars = domain.chars();
30 loop {
31 let prev = chars.clone();
32 if let Some(c) = chars.next() {
33 match c {
34 'ß' | 'ẞ' | 'ς' | '\u{200C}' | '\u{200D}' => {
35 let mut s = String::with_capacity(domain.len());
36 let tail = prev.as_str();
37 let head = &domain[..domain.len() - tail.len()];
38 s.push_str(head);
39 for c in tail.chars() {
40 match c {
41 'ß' | 'ẞ' => {
42 s.push_str("ss");
43 }
44 'ς' => {
45 s.push('σ');
46 }
47 '\u{200C}' | '\u{200D}' => {}
48 _ => {
49 s.push(c);
50 }
51 }
52 }
53 return Cow::Owned(s);
54 }
55 _ => {}
56 }
57 } else {
58 break;
59 }
60 }
61 Cow::Borrowed(domain)
62}
63
64/// Deprecated. Use the crate-top-level functions or [`Uts46`].
65#[derive(Default)]
66#[deprecated]
67pub struct Idna {
68 config: Config,
69}
70
71impl Idna {
72 pub fn new(config: Config) -> Self {
73 Self { config }
74 }
75
76 /// [UTS 46 ToASCII](http://www.unicode.org/reports/tr46/#ToASCII)
77 #[allow(clippy::wrong_self_convention)] // Retain old weirdness in deprecated API
78 pub fn to_ascii(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
79 let mapped = map_transitional(domain, self.config.transitional_processing);
80 match Uts46::new().process(
81 mapped.as_bytes(),
82 self.config.deny_list(),
83 self.config.hyphens(),
84 ErrorPolicy::FailFast, // Old code did not appear to expect the output to be useful in the error case.
85 |_, _, _| false,
86 out,
87 None,
88 ) {
89 Ok(ProcessingSuccess::Passthrough) => {
90 if self.config.verify_dns_length && !verify_dns_length(&mapped, true) {
91 return Err(crate::Errors::default());
92 }
93 out.push_str(&mapped);
94 Ok(())
95 }
96 Ok(ProcessingSuccess::WroteToSink) => {
97 if self.config.verify_dns_length && !verify_dns_length(out, true) {
98 return Err(crate::Errors::default());
99 }
100 Ok(())
101 }
102 Err(ProcessingError::ValidityError) => Err(crate::Errors::default()),
103 Err(ProcessingError::SinkError) => unreachable!(),
104 }
105 }
106
107 /// [UTS 46 ToUnicode](http://www.unicode.org/reports/tr46/#ToUnicode)
108 #[allow(clippy::wrong_self_convention)] // Retain old weirdness in deprecated API
109 pub fn to_unicode(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
110 let mapped = map_transitional(domain, self.config.transitional_processing);
111 match Uts46::new().process(
112 mapped.as_bytes(),
113 self.config.deny_list(),
114 self.config.hyphens(),
115 ErrorPolicy::MarkErrors,
116 |_, _, _| true,
117 out,
118 None,
119 ) {
120 Ok(ProcessingSuccess::Passthrough) => {
121 out.push_str(&mapped);
122 Ok(())
123 }
124 Ok(ProcessingSuccess::WroteToSink) => Ok(()),
125 Err(ProcessingError::ValidityError) => Err(crate::Errors::default()),
126 Err(ProcessingError::SinkError) => unreachable!(),
127 }
128 }
129}
130
131/// Deprecated configuration API.
132#[derive(Clone, Copy)]
133#[must_use]
134#[deprecated]
135pub struct Config {
136 use_std3_ascii_rules: bool,
137 transitional_processing: bool,
138 verify_dns_length: bool,
139 check_hyphens: bool,
140}
141
142/// The defaults are that of _beStrict=false_ in the [WHATWG URL Standard](https://url.spec.whatwg.org/#idna)
143impl Default for Config {
144 fn default() -> Self {
145 Config {
146 use_std3_ascii_rules: false,
147 transitional_processing: false,
148 check_hyphens: false,
149 // Only use for to_ascii, not to_unicode
150 verify_dns_length: false,
151 }
152 }
153}
154
155impl Config {
156 /// Whether to enforce STD3 or WHATWG URL Standard ASCII deny list.
157 ///
158 /// `true` for STD3, `false` for no deny list.
159 ///
160 /// Note that `true` rejects pseudo-hosts used by various TXT record-based protocols.
161 #[inline]
162 pub fn use_std3_ascii_rules(mut self, value: bool) -> Self {
163 self.use_std3_ascii_rules = value;
164 self
165 }
166
167 /// Whether to enable (deprecated) transitional processing.
168 ///
169 /// Note that Firefox, Safari, and Chrome do not use transitional
170 /// processing.
171 #[inline]
172 pub fn transitional_processing(mut self, value: bool) -> Self {
173 self.transitional_processing = value;
174 self
175 }
176
177 /// Whether the _VerifyDNSLength_ operation should be performed
178 /// by `to_ascii`.
179 ///
180 /// For compatibility with previous behavior, even when set to `true`,
181 /// the trailing root label dot is allowed contrary to the spec.
182 #[inline]
183 pub fn verify_dns_length(mut self, value: bool) -> Self {
184 self.verify_dns_length = value;
185 self
186 }
187
188 /// Whether to enforce STD3 rules for hyphen placement.
189 ///
190 /// `true` to deny hyphens in the first and last positions.
191 /// `false` to not enforce hyphen placement.
192 ///
193 /// Note that for backward compatibility this is not the same as
194 /// UTS 46 _CheckHyphens_, which also disallows hyphens in the
195 /// third and fourth positions.
196 ///
197 /// Note that `true` rejects real-world names, including some GitHub user pages.
198 #[inline]
199 pub fn check_hyphens(mut self, value: bool) -> Self {
200 self.check_hyphens = value;
201 self
202 }
203
204 /// Obsolete method retained to ease migration. The argument must be `false`.
205 ///
206 /// Panics
207 ///
208 /// If the argument is `true`.
209 #[inline]
210 #[allow(unused_mut)]
211 pub fn use_idna_2008_rules(mut self, value: bool) -> Self {
212 assert!(!value, "IDNA 2008 rules are no longer supported");
213 self
214 }
215
216 /// Compute the deny list
217 fn deny_list(&self) -> AsciiDenyList {
218 if self.use_std3_ascii_rules {
219 AsciiDenyList::STD3
220 } else {
221 AsciiDenyList::EMPTY
222 }
223 }
224
225 /// Compute the hyphen mode
226 fn hyphens(&self) -> Hyphens {
227 if self.check_hyphens {
228 Hyphens::CheckFirstLast
229 } else {
230 Hyphens::Allow
231 }
232 }
233
234 /// [UTS 46 ToASCII](http://www.unicode.org/reports/tr46/#ToASCII)
235 pub fn to_ascii(self, domain: &str) -> Result<String, Errors> {
236 let mut result = String::with_capacity(domain.len());
237 let mut codec = Idna::new(self);
238 codec.to_ascii(domain, &mut result).map(|()| result)
239 }
240
241 /// [UTS 46 ToUnicode](http://www.unicode.org/reports/tr46/#ToUnicode)
242 pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) {
243 let mut codec = Idna::new(self);
244 let mut out = String::with_capacity(domain.len());
245 let result = codec.to_unicode(domain, &mut out);
246 (out, result)
247 }
248}
249