| 1 | // Copyright The rust-url developers. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 6 | // option. This file may not be copied, modified, or distributed |
| 7 | // except according to those terms. |
| 8 | |
| 9 | //! This crate abstracts over a Unicode back end for the [`idna`][1] |
| 10 | //! crate. |
| 11 | //! |
| 12 | //! To work around the lack of [`global-features`][2] in Cargo, this |
| 13 | //! crate allows the top level `Cargo.lock` to choose an alternative |
| 14 | //! Unicode back end for the `idna` crate by pinning a version of this |
| 15 | //! crate. |
| 16 | //! |
| 17 | //! See the [README of the latest version][3] for more details. |
| 18 | //! |
| 19 | //! [1]: https://docs.rs/crate/idna/latest |
| 20 | //! [2]: https://internals.rust-lang.org/t/pre-rfc-mutually-excusive-global-features/19618 |
| 21 | //! [3]: https://docs.rs/crate/idna_adapter/latest |
| 22 | |
| 23 | #![no_std ] |
| 24 | |
| 25 | use icu_normalizer::properties::CanonicalCombiningClassMap; |
| 26 | use icu_normalizer::uts46::Uts46Mapper; |
| 27 | use icu_properties::maps::CodePointMapDataBorrowed; |
| 28 | use icu_properties::CanonicalCombiningClass; |
| 29 | use icu_properties::GeneralCategory; |
| 30 | |
| 31 | /// Turns a joining type into a mask for comparing with multiple type at once. |
| 32 | const fn joining_type_to_mask(jt: icu_properties::JoiningType) -> u32 { |
| 33 | 1u32 << jt.0 |
| 34 | } |
| 35 | |
| 36 | /// Mask for checking for both left and dual joining. |
| 37 | pub const LEFT_OR_DUAL_JOINING_MASK: JoiningTypeMask = JoiningTypeMask( |
| 38 | joining_type_to_mask(jt:icu_properties::JoiningType::LeftJoining) |
| 39 | | joining_type_to_mask(jt:icu_properties::JoiningType::DualJoining), |
| 40 | ); |
| 41 | |
| 42 | /// Mask for checking for both left and dual joining. |
| 43 | pub const RIGHT_OR_DUAL_JOINING_MASK: JoiningTypeMask = JoiningTypeMask( |
| 44 | joining_type_to_mask(jt:icu_properties::JoiningType::RightJoining) |
| 45 | | joining_type_to_mask(jt:icu_properties::JoiningType::DualJoining), |
| 46 | ); |
| 47 | |
| 48 | /// Turns a bidi class into a mask for comparing with multiple classes at once. |
| 49 | const fn bidi_class_to_mask(bc: icu_properties::BidiClass) -> u32 { |
| 50 | 1u32 << bc.0 |
| 51 | } |
| 52 | |
| 53 | /// Mask for checking if the domain is a bidi domain. |
| 54 | pub const RTL_MASK: BidiClassMask = BidiClassMask( |
| 55 | bidi_class_to_mask(bc:icu_properties::BidiClass::RightToLeft) |
| 56 | | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicLetter) |
| 57 | | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicNumber), |
| 58 | ); |
| 59 | |
| 60 | /// Mask for allowable bidi classes in the first character of a label |
| 61 | /// (either LTR or RTL) in a bidi domain. |
| 62 | pub const FIRST_BC_MASK: BidiClassMask = BidiClassMask( |
| 63 | bidi_class_to_mask(bc:icu_properties::BidiClass::LeftToRight) |
| 64 | | bidi_class_to_mask(bc:icu_properties::BidiClass::RightToLeft) |
| 65 | | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicLetter), |
| 66 | ); |
| 67 | |
| 68 | // Mask for allowable bidi classes of the last (non-Non-Spacing Mark) |
| 69 | // character in an LTR label in a bidi domain. |
| 70 | pub const LAST_LTR_MASK: BidiClassMask = BidiClassMask( |
| 71 | bidi_class_to_mask(bc:icu_properties::BidiClass::LeftToRight) |
| 72 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanNumber), |
| 73 | ); |
| 74 | |
| 75 | // Mask for allowable bidi classes of the last (non-Non-Spacing Mark) |
| 76 | // character in an RTL label in a bidi domain. |
| 77 | pub const LAST_RTL_MASK: BidiClassMask = BidiClassMask( |
| 78 | bidi_class_to_mask(bc:icu_properties::BidiClass::RightToLeft) |
| 79 | | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicLetter) |
| 80 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanNumber) |
| 81 | | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicNumber), |
| 82 | ); |
| 83 | |
| 84 | // Mask for allowable bidi classes of the middle characters in an LTR label in a bidi domain. |
| 85 | pub const MIDDLE_LTR_MASK: BidiClassMask = BidiClassMask( |
| 86 | bidi_class_to_mask(bc:icu_properties::BidiClass::LeftToRight) |
| 87 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanNumber) |
| 88 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanSeparator) |
| 89 | | bidi_class_to_mask(bc:icu_properties::BidiClass::CommonSeparator) |
| 90 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanTerminator) |
| 91 | | bidi_class_to_mask(bc:icu_properties::BidiClass::OtherNeutral) |
| 92 | | bidi_class_to_mask(bc:icu_properties::BidiClass::BoundaryNeutral) |
| 93 | | bidi_class_to_mask(bc:icu_properties::BidiClass::NonspacingMark), |
| 94 | ); |
| 95 | |
| 96 | // Mask for allowable bidi classes of the middle characters in an RTL label in a bidi domain. |
| 97 | pub const MIDDLE_RTL_MASK: BidiClassMask = BidiClassMask( |
| 98 | bidi_class_to_mask(bc:icu_properties::BidiClass::RightToLeft) |
| 99 | | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicLetter) |
| 100 | | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicNumber) |
| 101 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanNumber) |
| 102 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanSeparator) |
| 103 | | bidi_class_to_mask(bc:icu_properties::BidiClass::CommonSeparator) |
| 104 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanTerminator) |
| 105 | | bidi_class_to_mask(bc:icu_properties::BidiClass::OtherNeutral) |
| 106 | | bidi_class_to_mask(bc:icu_properties::BidiClass::BoundaryNeutral) |
| 107 | | bidi_class_to_mask(bc:icu_properties::BidiClass::NonspacingMark), |
| 108 | ); |
| 109 | |
| 110 | /// Turns a genecal category into a mask for comparing with multiple categories at once. |
| 111 | const fn general_category_to_mask(gc: GeneralCategory) -> u32 { |
| 112 | 1 << (gc as u32) |
| 113 | } |
| 114 | |
| 115 | /// Mask for the disallowed general categories of the first character in a label. |
| 116 | const MARK_MASK: u32 = general_category_to_mask(gc:GeneralCategory::NonspacingMark) |
| 117 | | general_category_to_mask(gc:GeneralCategory::SpacingMark) |
| 118 | | general_category_to_mask(gc:GeneralCategory::EnclosingMark); |
| 119 | |
| 120 | /// Value for the Joining_Type Unicode property. |
| 121 | #[repr (transparent)] |
| 122 | #[derive (Clone, Copy)] |
| 123 | pub struct JoiningType(icu_properties::JoiningType); |
| 124 | |
| 125 | impl JoiningType { |
| 126 | /// Returns the corresponding `JoiningTypeMask`. |
| 127 | #[inline (always)] |
| 128 | pub fn to_mask(self) -> JoiningTypeMask { |
| 129 | JoiningTypeMask(joining_type_to_mask(self.0)) |
| 130 | } |
| 131 | |
| 132 | // `true` iff this value is the Transparent value. |
| 133 | #[inline (always)] |
| 134 | pub fn is_transparent(self) -> bool { |
| 135 | self.0 == icu_properties::JoiningType::Transparent |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | /// A mask representing potentially multiple `JoiningType` |
| 140 | /// values. |
| 141 | #[repr (transparent)] |
| 142 | #[derive (Clone, Copy)] |
| 143 | pub struct JoiningTypeMask(u32); |
| 144 | |
| 145 | impl JoiningTypeMask { |
| 146 | /// `true` iff both masks have at `JoiningType` in common. |
| 147 | #[inline (always)] |
| 148 | pub fn intersects(self, other: JoiningTypeMask) -> bool { |
| 149 | self.0 & other.0 != 0 |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | /// Value for the Bidi_Class Unicode property. |
| 154 | #[repr (transparent)] |
| 155 | #[derive (Clone, Copy)] |
| 156 | pub struct BidiClass(icu_properties::BidiClass); |
| 157 | |
| 158 | impl BidiClass { |
| 159 | /// Returns the corresponding `BidiClassMask`. |
| 160 | #[inline (always)] |
| 161 | pub fn to_mask(self) -> BidiClassMask { |
| 162 | BidiClassMask(bidi_class_to_mask(self.0)) |
| 163 | } |
| 164 | |
| 165 | /// `true` iff this value is Left_To_Right |
| 166 | #[inline (always)] |
| 167 | pub fn is_ltr(self) -> bool { |
| 168 | self.0 == icu_properties::BidiClass::LeftToRight |
| 169 | } |
| 170 | |
| 171 | /// `true` iff this value is Nonspacing_Mark |
| 172 | #[inline (always)] |
| 173 | pub fn is_nonspacing_mark(self) -> bool { |
| 174 | self.0 == icu_properties::BidiClass::NonspacingMark |
| 175 | } |
| 176 | |
| 177 | /// `true` iff this value is European_Number |
| 178 | #[inline (always)] |
| 179 | pub fn is_european_number(self) -> bool { |
| 180 | self.0 == icu_properties::BidiClass::EuropeanNumber |
| 181 | } |
| 182 | |
| 183 | /// `true` iff this value is Arabic_Number |
| 184 | #[inline (always)] |
| 185 | pub fn is_arabic_number(self) -> bool { |
| 186 | self.0 == icu_properties::BidiClass::ArabicNumber |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | /// A mask representing potentially multiple `BidiClass` |
| 191 | /// values. |
| 192 | #[repr (transparent)] |
| 193 | #[derive (Clone, Copy)] |
| 194 | pub struct BidiClassMask(u32); |
| 195 | |
| 196 | impl BidiClassMask { |
| 197 | /// `true` iff both masks have at `BidiClass` in common. |
| 198 | #[inline (always)] |
| 199 | pub fn intersects(self, other: BidiClassMask) -> bool { |
| 200 | self.0 & other.0 != 0 |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | /// An adapter between a Unicode back end an the `idna` crate. |
| 205 | pub struct Adapter { |
| 206 | mapper: Uts46Mapper, |
| 207 | canonical_combining_class: CanonicalCombiningClassMap, |
| 208 | general_category: CodePointMapDataBorrowed<'static, GeneralCategory>, |
| 209 | bidi_class: CodePointMapDataBorrowed<'static, icu_properties::BidiClass>, |
| 210 | joining_type: CodePointMapDataBorrowed<'static, icu_properties::JoiningType>, |
| 211 | } |
| 212 | |
| 213 | #[cfg (feature = "compiled_data" )] |
| 214 | impl Default for Adapter { |
| 215 | fn default() -> Self { |
| 216 | Self::new() |
| 217 | } |
| 218 | } |
| 219 | |
| 220 | impl Adapter { |
| 221 | /// Constructor using data compiled into the binary. |
| 222 | #[cfg (feature = "compiled_data" )] |
| 223 | #[inline (always)] |
| 224 | pub const fn new() -> Self { |
| 225 | Self { |
| 226 | mapper: Uts46Mapper::new(), |
| 227 | canonical_combining_class: CanonicalCombiningClassMap::new(), |
| 228 | general_category: icu_properties::maps::general_category(), |
| 229 | bidi_class: icu_properties::maps::bidi_class(), |
| 230 | joining_type: icu_properties::maps::joining_type(), |
| 231 | } |
| 232 | } |
| 233 | |
| 234 | /// `true` iff the Canonical_Combining_Class of `c` is Virama. |
| 235 | #[inline (always)] |
| 236 | pub fn is_virama(&self, c: char) -> bool { |
| 237 | self.canonical_combining_class.get(c) == CanonicalCombiningClass::Virama |
| 238 | } |
| 239 | |
| 240 | /// `true` iff the General_Category of `c` is Mark, i.e. any of Nonspacing_Mark, |
| 241 | /// Spacing_Mark, or Enclosing_Mark. |
| 242 | #[inline (always)] |
| 243 | pub fn is_mark(&self, c: char) -> bool { |
| 244 | (general_category_to_mask(self.general_category.get(c)) & MARK_MASK) != 0 |
| 245 | } |
| 246 | |
| 247 | /// Returns the Bidi_Class of `c`. |
| 248 | #[inline (always)] |
| 249 | pub fn bidi_class(&self, c: char) -> BidiClass { |
| 250 | BidiClass(self.bidi_class.get(c)) |
| 251 | } |
| 252 | |
| 253 | /// Returns the Joining_Type of `c`. |
| 254 | #[inline (always)] |
| 255 | pub fn joining_type(&self, c: char) -> JoiningType { |
| 256 | JoiningType(self.joining_type.get(c)) |
| 257 | } |
| 258 | |
| 259 | /// See the [method of the same name in `icu_normalizer`][1] for the |
| 260 | /// exact semantics. |
| 261 | /// |
| 262 | /// [1]: https://docs.rs/icu_normalizer/latest/icu_normalizer/uts46/struct.Uts46Mapper.html#method.map_normalize |
| 263 | #[inline (always)] |
| 264 | pub fn map_normalize<'delegate, I: Iterator<Item = char> + 'delegate>( |
| 265 | &'delegate self, |
| 266 | iter: I, |
| 267 | ) -> impl Iterator<Item = char> + 'delegate { |
| 268 | self.mapper.map_normalize(iter) |
| 269 | } |
| 270 | |
| 271 | /// See the [method of the same name in `icu_normalizer`][1] for the |
| 272 | /// exact semantics. |
| 273 | /// |
| 274 | /// [1]: https://docs.rs/icu_normalizer/latest/icu_normalizer/uts46/struct.Uts46Mapper.html#method.normalize_validate |
| 275 | #[inline (always)] |
| 276 | pub fn normalize_validate<'delegate, I: Iterator<Item = char> + 'delegate>( |
| 277 | &'delegate self, |
| 278 | iter: I, |
| 279 | ) -> impl Iterator<Item = char> + 'delegate { |
| 280 | self.mapper.normalize_validate(iter) |
| 281 | } |
| 282 | } |
| 283 | |