1// Copyright The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! This crate abstracts over a Unicode back end for the [`idna`][1]
10//! crate.
11//!
12//! To work around the lack of [`global-features`][2] in Cargo, this
13//! crate allows the top level `Cargo.lock` to choose an alternative
14//! Unicode back end for the `idna` crate by pinning a version of this
15//! crate.
16//!
17//! See the [README of the latest version][3] for more details.
18//!
19//! [1]: https://docs.rs/crate/idna/latest
20//! [2]: https://internals.rust-lang.org/t/pre-rfc-mutually-excusive-global-features/19618
21//! [3]: https://docs.rs/crate/idna_adapter/latest
22
23#![no_std]
24
25use icu_normalizer::properties::CanonicalCombiningClassMap;
26use icu_normalizer::uts46::Uts46Mapper;
27use icu_properties::maps::CodePointMapDataBorrowed;
28use icu_properties::CanonicalCombiningClass;
29use icu_properties::GeneralCategory;
30
31/// Turns a joining type into a mask for comparing with multiple type at once.
32const fn joining_type_to_mask(jt: icu_properties::JoiningType) -> u32 {
33 1u32 << jt.0
34}
35
36/// Mask for checking for both left and dual joining.
37pub const LEFT_OR_DUAL_JOINING_MASK: JoiningTypeMask = JoiningTypeMask(
38 joining_type_to_mask(jt:icu_properties::JoiningType::LeftJoining)
39 | joining_type_to_mask(jt:icu_properties::JoiningType::DualJoining),
40);
41
42/// Mask for checking for both left and dual joining.
43pub const RIGHT_OR_DUAL_JOINING_MASK: JoiningTypeMask = JoiningTypeMask(
44 joining_type_to_mask(jt:icu_properties::JoiningType::RightJoining)
45 | joining_type_to_mask(jt:icu_properties::JoiningType::DualJoining),
46);
47
48/// Turns a bidi class into a mask for comparing with multiple classes at once.
49const fn bidi_class_to_mask(bc: icu_properties::BidiClass) -> u32 {
50 1u32 << bc.0
51}
52
53/// Mask for checking if the domain is a bidi domain.
54pub const RTL_MASK: BidiClassMask = BidiClassMask(
55 bidi_class_to_mask(bc:icu_properties::BidiClass::RightToLeft)
56 | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicLetter)
57 | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicNumber),
58);
59
60/// Mask for allowable bidi classes in the first character of a label
61/// (either LTR or RTL) in a bidi domain.
62pub const FIRST_BC_MASK: BidiClassMask = BidiClassMask(
63 bidi_class_to_mask(bc:icu_properties::BidiClass::LeftToRight)
64 | bidi_class_to_mask(bc:icu_properties::BidiClass::RightToLeft)
65 | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicLetter),
66);
67
68// Mask for allowable bidi classes of the last (non-Non-Spacing Mark)
69// character in an LTR label in a bidi domain.
70pub const LAST_LTR_MASK: BidiClassMask = BidiClassMask(
71 bidi_class_to_mask(bc:icu_properties::BidiClass::LeftToRight)
72 | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanNumber),
73);
74
75// Mask for allowable bidi classes of the last (non-Non-Spacing Mark)
76// character in an RTL label in a bidi domain.
77pub const LAST_RTL_MASK: BidiClassMask = BidiClassMask(
78 bidi_class_to_mask(bc:icu_properties::BidiClass::RightToLeft)
79 | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicLetter)
80 | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanNumber)
81 | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicNumber),
82);
83
84// Mask for allowable bidi classes of the middle characters in an LTR label in a bidi domain.
85pub const MIDDLE_LTR_MASK: BidiClassMask = BidiClassMask(
86 bidi_class_to_mask(bc:icu_properties::BidiClass::LeftToRight)
87 | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanNumber)
88 | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanSeparator)
89 | bidi_class_to_mask(bc:icu_properties::BidiClass::CommonSeparator)
90 | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanTerminator)
91 | bidi_class_to_mask(bc:icu_properties::BidiClass::OtherNeutral)
92 | bidi_class_to_mask(bc:icu_properties::BidiClass::BoundaryNeutral)
93 | bidi_class_to_mask(bc:icu_properties::BidiClass::NonspacingMark),
94);
95
96// Mask for allowable bidi classes of the middle characters in an RTL label in a bidi domain.
97pub const MIDDLE_RTL_MASK: BidiClassMask = BidiClassMask(
98 bidi_class_to_mask(bc:icu_properties::BidiClass::RightToLeft)
99 | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicLetter)
100 | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicNumber)
101 | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanNumber)
102 | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanSeparator)
103 | bidi_class_to_mask(bc:icu_properties::BidiClass::CommonSeparator)
104 | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanTerminator)
105 | bidi_class_to_mask(bc:icu_properties::BidiClass::OtherNeutral)
106 | bidi_class_to_mask(bc:icu_properties::BidiClass::BoundaryNeutral)
107 | bidi_class_to_mask(bc:icu_properties::BidiClass::NonspacingMark),
108);
109
110/// Turns a genecal category into a mask for comparing with multiple categories at once.
111const fn general_category_to_mask(gc: GeneralCategory) -> u32 {
112 1 << (gc as u32)
113}
114
115/// Mask for the disallowed general categories of the first character in a label.
116const MARK_MASK: u32 = general_category_to_mask(gc:GeneralCategory::NonspacingMark)
117 | general_category_to_mask(gc:GeneralCategory::SpacingMark)
118 | general_category_to_mask(gc:GeneralCategory::EnclosingMark);
119
120/// Value for the Joining_Type Unicode property.
121#[repr(transparent)]
122#[derive(Clone, Copy)]
123pub struct JoiningType(icu_properties::JoiningType);
124
125impl JoiningType {
126 /// Returns the corresponding `JoiningTypeMask`.
127 #[inline(always)]
128 pub fn to_mask(self) -> JoiningTypeMask {
129 JoiningTypeMask(joining_type_to_mask(self.0))
130 }
131
132 // `true` iff this value is the Transparent value.
133 #[inline(always)]
134 pub fn is_transparent(self) -> bool {
135 self.0 == icu_properties::JoiningType::Transparent
136 }
137}
138
139/// A mask representing potentially multiple `JoiningType`
140/// values.
141#[repr(transparent)]
142#[derive(Clone, Copy)]
143pub struct JoiningTypeMask(u32);
144
145impl JoiningTypeMask {
146 /// `true` iff both masks have at `JoiningType` in common.
147 #[inline(always)]
148 pub fn intersects(self, other: JoiningTypeMask) -> bool {
149 self.0 & other.0 != 0
150 }
151}
152
153/// Value for the Bidi_Class Unicode property.
154#[repr(transparent)]
155#[derive(Clone, Copy)]
156pub struct BidiClass(icu_properties::BidiClass);
157
158impl BidiClass {
159 /// Returns the corresponding `BidiClassMask`.
160 #[inline(always)]
161 pub fn to_mask(self) -> BidiClassMask {
162 BidiClassMask(bidi_class_to_mask(self.0))
163 }
164
165 /// `true` iff this value is Left_To_Right
166 #[inline(always)]
167 pub fn is_ltr(self) -> bool {
168 self.0 == icu_properties::BidiClass::LeftToRight
169 }
170
171 /// `true` iff this value is Nonspacing_Mark
172 #[inline(always)]
173 pub fn is_nonspacing_mark(self) -> bool {
174 self.0 == icu_properties::BidiClass::NonspacingMark
175 }
176
177 /// `true` iff this value is European_Number
178 #[inline(always)]
179 pub fn is_european_number(self) -> bool {
180 self.0 == icu_properties::BidiClass::EuropeanNumber
181 }
182
183 /// `true` iff this value is Arabic_Number
184 #[inline(always)]
185 pub fn is_arabic_number(self) -> bool {
186 self.0 == icu_properties::BidiClass::ArabicNumber
187 }
188}
189
190/// A mask representing potentially multiple `BidiClass`
191/// values.
192#[repr(transparent)]
193#[derive(Clone, Copy)]
194pub struct BidiClassMask(u32);
195
196impl BidiClassMask {
197 /// `true` iff both masks have at `BidiClass` in common.
198 #[inline(always)]
199 pub fn intersects(self, other: BidiClassMask) -> bool {
200 self.0 & other.0 != 0
201 }
202}
203
204/// An adapter between a Unicode back end an the `idna` crate.
205pub struct Adapter {
206 mapper: Uts46Mapper,
207 canonical_combining_class: CanonicalCombiningClassMap,
208 general_category: CodePointMapDataBorrowed<'static, GeneralCategory>,
209 bidi_class: CodePointMapDataBorrowed<'static, icu_properties::BidiClass>,
210 joining_type: CodePointMapDataBorrowed<'static, icu_properties::JoiningType>,
211}
212
213#[cfg(feature = "compiled_data")]
214impl Default for Adapter {
215 fn default() -> Self {
216 Self::new()
217 }
218}
219
220impl Adapter {
221 /// Constructor using data compiled into the binary.
222 #[cfg(feature = "compiled_data")]
223 #[inline(always)]
224 pub const fn new() -> Self {
225 Self {
226 mapper: Uts46Mapper::new(),
227 canonical_combining_class: CanonicalCombiningClassMap::new(),
228 general_category: icu_properties::maps::general_category(),
229 bidi_class: icu_properties::maps::bidi_class(),
230 joining_type: icu_properties::maps::joining_type(),
231 }
232 }
233
234 /// `true` iff the Canonical_Combining_Class of `c` is Virama.
235 #[inline(always)]
236 pub fn is_virama(&self, c: char) -> bool {
237 self.canonical_combining_class.get(c) == CanonicalCombiningClass::Virama
238 }
239
240 /// `true` iff the General_Category of `c` is Mark, i.e. any of Nonspacing_Mark,
241 /// Spacing_Mark, or Enclosing_Mark.
242 #[inline(always)]
243 pub fn is_mark(&self, c: char) -> bool {
244 (general_category_to_mask(self.general_category.get(c)) & MARK_MASK) != 0
245 }
246
247 /// Returns the Bidi_Class of `c`.
248 #[inline(always)]
249 pub fn bidi_class(&self, c: char) -> BidiClass {
250 BidiClass(self.bidi_class.get(c))
251 }
252
253 /// Returns the Joining_Type of `c`.
254 #[inline(always)]
255 pub fn joining_type(&self, c: char) -> JoiningType {
256 JoiningType(self.joining_type.get(c))
257 }
258
259 /// See the [method of the same name in `icu_normalizer`][1] for the
260 /// exact semantics.
261 ///
262 /// [1]: https://docs.rs/icu_normalizer/latest/icu_normalizer/uts46/struct.Uts46Mapper.html#method.map_normalize
263 #[inline(always)]
264 pub fn map_normalize<'delegate, I: Iterator<Item = char> + 'delegate>(
265 &'delegate self,
266 iter: I,
267 ) -> impl Iterator<Item = char> + 'delegate {
268 self.mapper.map_normalize(iter)
269 }
270
271 /// See the [method of the same name in `icu_normalizer`][1] for the
272 /// exact semantics.
273 ///
274 /// [1]: https://docs.rs/icu_normalizer/latest/icu_normalizer/uts46/struct.Uts46Mapper.html#method.normalize_validate
275 #[inline(always)]
276 pub fn normalize_validate<'delegate, I: Iterator<Item = char> + 'delegate>(
277 &'delegate self,
278 iter: I,
279 ) -> impl Iterator<Item = char> + 'delegate {
280 self.mapper.normalize_validate(iter)
281 }
282}
283