| 1 | // Copyright 2016 The rust-url developers. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 6 | // option. This file may not be copied, modified, or distributed |
| 7 | // except according to those terms. |
| 8 | |
| 9 | //! This Rust crate implements IDNA |
| 10 | //! [per the WHATWG URL Standard](https://url.spec.whatwg.org/#idna). |
| 11 | //! |
| 12 | //! It also exposes the underlying algorithms from [*Unicode IDNA Compatibility Processing* |
| 13 | //! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/) |
| 14 | //! and [Punycode (RFC 3492)](https://tools.ietf.org/html/rfc3492). |
| 15 | //! |
| 16 | //! Quoting from [UTS #46’s introduction](http://www.unicode.org/reports/tr46/#Introduction): |
| 17 | //! |
| 18 | //! > Initially, domain names were restricted to ASCII characters. |
| 19 | //! > A system was introduced in 2003 for internationalized domain names (IDN). |
| 20 | //! > This system is called Internationalizing Domain Names for Applications, |
| 21 | //! > or IDNA2003 for short. |
| 22 | //! > This mechanism supports IDNs by means of a client software transformation |
| 23 | //! > into a format known as Punycode. |
| 24 | //! > A revision of IDNA was approved in 2010 (IDNA2008). |
| 25 | //! > This revision has a number of incompatibilities with IDNA2003. |
| 26 | //! > |
| 27 | //! > The incompatibilities force implementers of client software, |
| 28 | //! > such as browsers and emailers, |
| 29 | //! > to face difficult choices during the transition period |
| 30 | //! > as registries shift from IDNA2003 to IDNA2008. |
| 31 | //! > This document specifies a mechanism |
| 32 | //! > that minimizes the impact of this transition for client software, |
| 33 | //! > allowing client software to access domains that are valid under either system. |
| 34 | #![no_std ] |
| 35 | |
| 36 | // For forwards compatibility |
| 37 | #[cfg (feature = "std" )] |
| 38 | extern crate std; |
| 39 | |
| 40 | extern crate alloc; |
| 41 | |
| 42 | #[cfg (not(feature = "alloc" ))] |
| 43 | compile_error!("the `alloc` feature must be enabled" ); |
| 44 | |
| 45 | // Avoid a breaking change if in the future there's a use case for |
| 46 | // having a Bring-Your-Own-ICU4X-Data constructor for `Uts46` and |
| 47 | // not also having compiled data in the binary. |
| 48 | #[cfg (not(feature = "compiled_data" ))] |
| 49 | compile_error!("the `compiled_data` feature must be enabled" ); |
| 50 | |
| 51 | use alloc::borrow::Cow; |
| 52 | use alloc::string::String; |
| 53 | pub use uts46::AsciiDenyList; |
| 54 | use uts46::Uts46; |
| 55 | |
| 56 | mod deprecated; |
| 57 | pub mod punycode; |
| 58 | pub mod uts46; |
| 59 | |
| 60 | #[allow (deprecated)] |
| 61 | pub use crate::deprecated::{Config, Idna}; |
| 62 | |
| 63 | /// Type indicating that there were errors during UTS #46 processing. |
| 64 | #[derive (Default, Debug)] |
| 65 | #[non_exhaustive ] |
| 66 | pub struct Errors {} |
| 67 | |
| 68 | impl From<Errors> for Result<(), Errors> { |
| 69 | fn from(e: Errors) -> Result<(), Errors> { |
| 70 | Err(e) |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | #[cfg (feature = "std" )] |
| 75 | impl std::error::Error for Errors {} |
| 76 | |
| 77 | #[cfg (not(feature = "std" ))] |
| 78 | impl core::error::Error for Errors {} |
| 79 | |
| 80 | impl core::fmt::Display for Errors { |
| 81 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| 82 | core::fmt::Debug::fmt(self, f) |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm; |
| 87 | /// version returning a `Cow`. |
| 88 | /// |
| 89 | /// Most applications should be using this function rather than the sibling functions, |
| 90 | /// and most applications should pass [`AsciiDenyList::URL`] as the second argument. |
| 91 | /// Passing [`AsciiDenyList::URL`] as the second argument makes this function also |
| 92 | /// perform the [forbidden domain code point](https://url.spec.whatwg.org/#forbidden-domain-code-point) |
| 93 | /// check in addition to the [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) |
| 94 | /// algorithm. |
| 95 | /// |
| 96 | /// Returns the ASCII representation a domain name, |
| 97 | /// normalizing characters (upper-case to lower-case and other kinds of equivalence) |
| 98 | /// and using Punycode as necessary. |
| 99 | /// |
| 100 | /// This process may fail. |
| 101 | /// |
| 102 | /// If you have a `&str` instead of `&[u8]`, just call `.to_bytes()` on it before |
| 103 | /// passing it to this function. It's still preferable to use this function over |
| 104 | /// the sibling functions that take `&str`. |
| 105 | pub fn domain_to_ascii_cow( |
| 106 | domain: &[u8], |
| 107 | ascii_deny_list: AsciiDenyList, |
| 108 | ) -> Result<Cow<'_, str>, Errors> { |
| 109 | Uts46::new().to_ascii( |
| 110 | domain, |
| 111 | ascii_deny_list, |
| 112 | uts46::Hyphens::Allow, |
| 113 | uts46::DnsLength::Ignore, |
| 114 | ) |
| 115 | } |
| 116 | |
| 117 | /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm; |
| 118 | /// version returning `String` and no ASCII deny list (i.e. _UseSTD3ASCIIRules=false_). |
| 119 | /// |
| 120 | /// This function exists for backward-compatibility. Consider using [`domain_to_ascii_cow`] |
| 121 | /// instead. |
| 122 | /// |
| 123 | /// Return the ASCII representation a domain name, |
| 124 | /// normalizing characters (upper-case to lower-case and other kinds of equivalence) |
| 125 | /// and using Punycode as necessary. |
| 126 | /// |
| 127 | /// This process may fail. |
| 128 | pub fn domain_to_ascii(domain: &str) -> Result<String, Errors> { |
| 129 | domain_to_ascii_cow(domain.as_bytes(), AsciiDenyList::EMPTY).map(|cow: Cow<'_, str>| cow.into_owned()) |
| 130 | } |
| 131 | |
| 132 | /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm, |
| 133 | /// with the `beStrict` flag set. |
| 134 | /// |
| 135 | /// Note that this rejects various real-world names including: |
| 136 | /// * YouTube CDN nodes |
| 137 | /// * Some GitHub user pages |
| 138 | /// * Pseudo-hosts used by various TXT record-based protocols. |
| 139 | pub fn domain_to_ascii_strict(domain: &str) -> Result<String, Errors> { |
| 140 | Uts46::new() |
| 141 | .to_ascii( |
| 142 | domain.as_bytes(), |
| 143 | uts46::AsciiDenyList::STD3, |
| 144 | uts46::Hyphens::Check, |
| 145 | uts46::DnsLength::Verify, |
| 146 | ) |
| 147 | .map(|cow: Cow<'_, str>| cow.into_owned()) |
| 148 | } |
| 149 | |
| 150 | /// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm; |
| 151 | /// version returning `String` and no ASCII deny list (i.e. _UseSTD3ASCIIRules=false_). |
| 152 | /// |
| 153 | /// This function exists for backward-compatibility. Consider using [`Uts46::to_user_interface`] |
| 154 | /// or [`Uts46::to_unicode`]. |
| 155 | /// |
| 156 | /// Return the Unicode representation of a domain name, |
| 157 | /// normalizing characters (upper-case to lower-case and other kinds of equivalence) |
| 158 | /// and decoding Punycode as necessary. |
| 159 | /// |
| 160 | /// If the second item of the tuple indicates an error, the first item of the tuple |
| 161 | /// denotes errors using the REPLACEMENT CHARACTERs in order to be able to illustrate |
| 162 | /// errors to the user. When the second item of the return tuple signals an error, |
| 163 | /// the first item of the tuple must not be used in a network protocol. |
| 164 | pub fn domain_to_unicode(domain: &str) -> (String, Result<(), Errors>) { |
| 165 | let (cow: Cow<'_, str>, result: Result<(), Errors>) = Uts46::new().to_unicode( |
| 166 | domain_name:domain.as_bytes(), |
| 167 | ascii_deny_list:uts46::AsciiDenyList::EMPTY, |
| 168 | uts46::Hyphens::Allow, |
| 169 | ); |
| 170 | (cow.into_owned(), result) |
| 171 | } |
| 172 | |