1 | // Copyright 2016 The rust-url developers. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
6 | // option. This file may not be copied, modified, or distributed |
7 | // except according to those terms. |
8 | |
9 | //! This Rust crate implements IDNA |
10 | //! [per the WHATWG URL Standard](https://url.spec.whatwg.org/#idna). |
11 | //! |
12 | //! It also exposes the underlying algorithms from [*Unicode IDNA Compatibility Processing* |
13 | //! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/) |
14 | //! and [Punycode (RFC 3492)](https://tools.ietf.org/html/rfc3492). |
15 | //! |
16 | //! Quoting from [UTS #46’s introduction](http://www.unicode.org/reports/tr46/#Introduction): |
17 | //! |
18 | //! > Initially, domain names were restricted to ASCII characters. |
19 | //! > A system was introduced in 2003 for internationalized domain names (IDN). |
20 | //! > This system is called Internationalizing Domain Names for Applications, |
21 | //! > or IDNA2003 for short. |
22 | //! > This mechanism supports IDNs by means of a client software transformation |
23 | //! > into a format known as Punycode. |
24 | //! > A revision of IDNA was approved in 2010 (IDNA2008). |
25 | //! > This revision has a number of incompatibilities with IDNA2003. |
26 | //! > |
27 | //! > The incompatibilities force implementers of client software, |
28 | //! > such as browsers and emailers, |
29 | //! > to face difficult choices during the transition period |
30 | //! > as registries shift from IDNA2003 to IDNA2008. |
31 | //! > This document specifies a mechanism |
32 | //! > that minimizes the impact of this transition for client software, |
33 | //! > allowing client software to access domains that are valid under either system. |
34 | #![no_std ] |
35 | |
36 | // For forwards compatibility |
37 | #[cfg (feature = "std" )] |
38 | extern crate std; |
39 | |
40 | extern crate alloc; |
41 | |
42 | #[cfg (not(feature = "alloc" ))] |
43 | compile_error!("the `alloc` feature must be enabled" ); |
44 | |
45 | // Avoid a breaking change if in the future there's a use case for |
46 | // having a Bring-Your-Own-ICU4X-Data constructor for `Uts46` and |
47 | // not also having compiled data in the binary. |
48 | #[cfg (not(feature = "compiled_data" ))] |
49 | compile_error!("the `compiled_data` feature must be enabled" ); |
50 | |
51 | use alloc::borrow::Cow; |
52 | use alloc::string::String; |
53 | pub use uts46::AsciiDenyList; |
54 | use uts46::Uts46; |
55 | |
56 | mod deprecated; |
57 | pub mod punycode; |
58 | pub mod uts46; |
59 | |
60 | #[allow (deprecated)] |
61 | pub use crate::deprecated::{Config, Idna}; |
62 | |
63 | /// Type indicating that there were errors during UTS #46 processing. |
64 | #[derive (Default, Debug)] |
65 | #[non_exhaustive ] |
66 | pub struct Errors {} |
67 | |
68 | impl From<Errors> for Result<(), Errors> { |
69 | fn from(e: Errors) -> Result<(), Errors> { |
70 | Err(e) |
71 | } |
72 | } |
73 | |
74 | #[cfg (feature = "std" )] |
75 | impl std::error::Error for Errors {} |
76 | |
77 | #[cfg (not(feature = "std" ))] |
78 | impl core::error::Error for Errors {} |
79 | |
80 | impl core::fmt::Display for Errors { |
81 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
82 | core::fmt::Debug::fmt(self, f) |
83 | } |
84 | } |
85 | |
86 | /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm; |
87 | /// version returning a `Cow`. |
88 | /// |
89 | /// Most applications should be using this function rather than the sibling functions, |
90 | /// and most applications should pass [`AsciiDenyList::URL`] as the second argument. |
91 | /// Passing [`AsciiDenyList::URL`] as the second argument makes this function also |
92 | /// perform the [forbidden domain code point](https://url.spec.whatwg.org/#forbidden-domain-code-point) |
93 | /// check in addition to the [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) |
94 | /// algorithm. |
95 | /// |
96 | /// Returns the ASCII representation a domain name, |
97 | /// normalizing characters (upper-case to lower-case and other kinds of equivalence) |
98 | /// and using Punycode as necessary. |
99 | /// |
100 | /// This process may fail. |
101 | /// |
102 | /// If you have a `&str` instead of `&[u8]`, just call `.to_bytes()` on it before |
103 | /// passing it to this function. It's still preferable to use this function over |
104 | /// the sibling functions that take `&str`. |
105 | pub fn domain_to_ascii_cow( |
106 | domain: &[u8], |
107 | ascii_deny_list: AsciiDenyList, |
108 | ) -> Result<Cow<'_, str>, Errors> { |
109 | Uts46::new().to_ascii( |
110 | domain, |
111 | ascii_deny_list, |
112 | uts46::Hyphens::Allow, |
113 | uts46::DnsLength::Ignore, |
114 | ) |
115 | } |
116 | |
117 | /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm; |
118 | /// version returning `String` and no ASCII deny list (i.e. _UseSTD3ASCIIRules=false_). |
119 | /// |
120 | /// This function exists for backward-compatibility. Consider using [`domain_to_ascii_cow`] |
121 | /// instead. |
122 | /// |
123 | /// Return the ASCII representation a domain name, |
124 | /// normalizing characters (upper-case to lower-case and other kinds of equivalence) |
125 | /// and using Punycode as necessary. |
126 | /// |
127 | /// This process may fail. |
128 | pub fn domain_to_ascii(domain: &str) -> Result<String, Errors> { |
129 | domain_to_ascii_cow(domain.as_bytes(), AsciiDenyList::EMPTY).map(|cow: Cow<'_, str>| cow.into_owned()) |
130 | } |
131 | |
132 | /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm, |
133 | /// with the `beStrict` flag set. |
134 | /// |
135 | /// Note that this rejects various real-world names including: |
136 | /// * YouTube CDN nodes |
137 | /// * Some GitHub user pages |
138 | /// * Pseudo-hosts used by various TXT record-based protocols. |
139 | pub fn domain_to_ascii_strict(domain: &str) -> Result<String, Errors> { |
140 | Uts46::new() |
141 | .to_ascii( |
142 | domain.as_bytes(), |
143 | uts46::AsciiDenyList::STD3, |
144 | uts46::Hyphens::Check, |
145 | uts46::DnsLength::Verify, |
146 | ) |
147 | .map(|cow: Cow<'_, str>| cow.into_owned()) |
148 | } |
149 | |
150 | /// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm; |
151 | /// version returning `String` and no ASCII deny list (i.e. _UseSTD3ASCIIRules=false_). |
152 | /// |
153 | /// This function exists for backward-compatibility. Consider using [`Uts46::to_user_interface`] |
154 | /// or [`Uts46::to_unicode`]. |
155 | /// |
156 | /// Return the Unicode representation of a domain name, |
157 | /// normalizing characters (upper-case to lower-case and other kinds of equivalence) |
158 | /// and decoding Punycode as necessary. |
159 | /// |
160 | /// If the second item of the tuple indicates an error, the first item of the tuple |
161 | /// denotes errors using the REPLACEMENT CHARACTERs in order to be able to illustrate |
162 | /// errors to the user. When the second item of the return tuple signals an error, |
163 | /// the first item of the tuple must not be used in a network protocol. |
164 | pub fn domain_to_unicode(domain: &str) -> (String, Result<(), Errors>) { |
165 | let (cow: Cow<'_, str>, result: Result<(), Errors>) = Uts46::new().to_unicode( |
166 | domain_name:domain.as_bytes(), |
167 | ascii_deny_list:uts46::AsciiDenyList::EMPTY, |
168 | uts46::Hyphens::Allow, |
169 | ); |
170 | (cow.into_owned(), result) |
171 | } |
172 | |