1 | // Copyright The rust-url developers. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
6 | // option. This file may not be copied, modified, or distributed |
7 | // except according to those terms. |
8 | |
9 | //! This crate abstracts over a Unicode back end for the [`idna`][1] |
10 | //! crate. |
11 | //! |
12 | //! To work around the lack of [`global-features`][2] in Cargo, this |
13 | //! crate allows the top level `Cargo.lock` to choose an alternative |
14 | //! Unicode back end for the `idna` crate by pinning a version of this |
15 | //! crate. |
16 | //! |
17 | //! See the [README of the latest version][3] for more details. |
18 | //! |
19 | //! [1]: https://docs.rs/crate/idna/latest |
20 | //! [2]: https://internals.rust-lang.org/t/pre-rfc-mutually-excusive-global-features/19618 |
21 | //! [3]: https://docs.rs/crate/idna_adapter/latest |
22 | |
23 | #![no_std ] |
24 | |
25 | use icu_normalizer::properties::CanonicalCombiningClassMap; |
26 | use icu_normalizer::uts46::Uts46Mapper; |
27 | use icu_properties::maps::CodePointMapDataBorrowed; |
28 | use icu_properties::CanonicalCombiningClass; |
29 | use icu_properties::GeneralCategory; |
30 | |
31 | /// Turns a joining type into a mask for comparing with multiple type at once. |
32 | const fn joining_type_to_mask(jt: icu_properties::JoiningType) -> u32 { |
33 | 1u32 << jt.0 |
34 | } |
35 | |
36 | /// Mask for checking for both left and dual joining. |
37 | pub const LEFT_OR_DUAL_JOINING_MASK: JoiningTypeMask = JoiningTypeMask( |
38 | joining_type_to_mask(jt:icu_properties::JoiningType::LeftJoining) |
39 | | joining_type_to_mask(jt:icu_properties::JoiningType::DualJoining), |
40 | ); |
41 | |
42 | /// Mask for checking for both left and dual joining. |
43 | pub const RIGHT_OR_DUAL_JOINING_MASK: JoiningTypeMask = JoiningTypeMask( |
44 | joining_type_to_mask(jt:icu_properties::JoiningType::RightJoining) |
45 | | joining_type_to_mask(jt:icu_properties::JoiningType::DualJoining), |
46 | ); |
47 | |
48 | /// Turns a bidi class into a mask for comparing with multiple classes at once. |
49 | const fn bidi_class_to_mask(bc: icu_properties::BidiClass) -> u32 { |
50 | 1u32 << bc.0 |
51 | } |
52 | |
53 | /// Mask for checking if the domain is a bidi domain. |
54 | pub const RTL_MASK: BidiClassMask = BidiClassMask( |
55 | bidi_class_to_mask(bc:icu_properties::BidiClass::RightToLeft) |
56 | | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicLetter) |
57 | | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicNumber), |
58 | ); |
59 | |
60 | /// Mask for allowable bidi classes in the first character of a label |
61 | /// (either LTR or RTL) in a bidi domain. |
62 | pub const FIRST_BC_MASK: BidiClassMask = BidiClassMask( |
63 | bidi_class_to_mask(bc:icu_properties::BidiClass::LeftToRight) |
64 | | bidi_class_to_mask(bc:icu_properties::BidiClass::RightToLeft) |
65 | | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicLetter), |
66 | ); |
67 | |
68 | // Mask for allowable bidi classes of the last (non-Non-Spacing Mark) |
69 | // character in an LTR label in a bidi domain. |
70 | pub const LAST_LTR_MASK: BidiClassMask = BidiClassMask( |
71 | bidi_class_to_mask(bc:icu_properties::BidiClass::LeftToRight) |
72 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanNumber), |
73 | ); |
74 | |
75 | // Mask for allowable bidi classes of the last (non-Non-Spacing Mark) |
76 | // character in an RTL label in a bidi domain. |
77 | pub const LAST_RTL_MASK: BidiClassMask = BidiClassMask( |
78 | bidi_class_to_mask(bc:icu_properties::BidiClass::RightToLeft) |
79 | | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicLetter) |
80 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanNumber) |
81 | | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicNumber), |
82 | ); |
83 | |
84 | // Mask for allowable bidi classes of the middle characters in an LTR label in a bidi domain. |
85 | pub const MIDDLE_LTR_MASK: BidiClassMask = BidiClassMask( |
86 | bidi_class_to_mask(bc:icu_properties::BidiClass::LeftToRight) |
87 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanNumber) |
88 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanSeparator) |
89 | | bidi_class_to_mask(bc:icu_properties::BidiClass::CommonSeparator) |
90 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanTerminator) |
91 | | bidi_class_to_mask(bc:icu_properties::BidiClass::OtherNeutral) |
92 | | bidi_class_to_mask(bc:icu_properties::BidiClass::BoundaryNeutral) |
93 | | bidi_class_to_mask(bc:icu_properties::BidiClass::NonspacingMark), |
94 | ); |
95 | |
96 | // Mask for allowable bidi classes of the middle characters in an RTL label in a bidi domain. |
97 | pub const MIDDLE_RTL_MASK: BidiClassMask = BidiClassMask( |
98 | bidi_class_to_mask(bc:icu_properties::BidiClass::RightToLeft) |
99 | | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicLetter) |
100 | | bidi_class_to_mask(bc:icu_properties::BidiClass::ArabicNumber) |
101 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanNumber) |
102 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanSeparator) |
103 | | bidi_class_to_mask(bc:icu_properties::BidiClass::CommonSeparator) |
104 | | bidi_class_to_mask(bc:icu_properties::BidiClass::EuropeanTerminator) |
105 | | bidi_class_to_mask(bc:icu_properties::BidiClass::OtherNeutral) |
106 | | bidi_class_to_mask(bc:icu_properties::BidiClass::BoundaryNeutral) |
107 | | bidi_class_to_mask(bc:icu_properties::BidiClass::NonspacingMark), |
108 | ); |
109 | |
110 | /// Turns a genecal category into a mask for comparing with multiple categories at once. |
111 | const fn general_category_to_mask(gc: GeneralCategory) -> u32 { |
112 | 1 << (gc as u32) |
113 | } |
114 | |
115 | /// Mask for the disallowed general categories of the first character in a label. |
116 | const MARK_MASK: u32 = general_category_to_mask(gc:GeneralCategory::NonspacingMark) |
117 | | general_category_to_mask(gc:GeneralCategory::SpacingMark) |
118 | | general_category_to_mask(gc:GeneralCategory::EnclosingMark); |
119 | |
120 | /// Value for the Joining_Type Unicode property. |
121 | #[repr (transparent)] |
122 | #[derive (Clone, Copy)] |
123 | pub struct JoiningType(icu_properties::JoiningType); |
124 | |
125 | impl JoiningType { |
126 | /// Returns the corresponding `JoiningTypeMask`. |
127 | #[inline (always)] |
128 | pub fn to_mask(self) -> JoiningTypeMask { |
129 | JoiningTypeMask(joining_type_to_mask(self.0)) |
130 | } |
131 | |
132 | // `true` iff this value is the Transparent value. |
133 | #[inline (always)] |
134 | pub fn is_transparent(self) -> bool { |
135 | self.0 == icu_properties::JoiningType::Transparent |
136 | } |
137 | } |
138 | |
139 | /// A mask representing potentially multiple `JoiningType` |
140 | /// values. |
141 | #[repr (transparent)] |
142 | #[derive (Clone, Copy)] |
143 | pub struct JoiningTypeMask(u32); |
144 | |
145 | impl JoiningTypeMask { |
146 | /// `true` iff both masks have at `JoiningType` in common. |
147 | #[inline (always)] |
148 | pub fn intersects(self, other: JoiningTypeMask) -> bool { |
149 | self.0 & other.0 != 0 |
150 | } |
151 | } |
152 | |
153 | /// Value for the Bidi_Class Unicode property. |
154 | #[repr (transparent)] |
155 | #[derive (Clone, Copy)] |
156 | pub struct BidiClass(icu_properties::BidiClass); |
157 | |
158 | impl BidiClass { |
159 | /// Returns the corresponding `BidiClassMask`. |
160 | #[inline (always)] |
161 | pub fn to_mask(self) -> BidiClassMask { |
162 | BidiClassMask(bidi_class_to_mask(self.0)) |
163 | } |
164 | |
165 | /// `true` iff this value is Left_To_Right |
166 | #[inline (always)] |
167 | pub fn is_ltr(self) -> bool { |
168 | self.0 == icu_properties::BidiClass::LeftToRight |
169 | } |
170 | |
171 | /// `true` iff this value is Nonspacing_Mark |
172 | #[inline (always)] |
173 | pub fn is_nonspacing_mark(self) -> bool { |
174 | self.0 == icu_properties::BidiClass::NonspacingMark |
175 | } |
176 | |
177 | /// `true` iff this value is European_Number |
178 | #[inline (always)] |
179 | pub fn is_european_number(self) -> bool { |
180 | self.0 == icu_properties::BidiClass::EuropeanNumber |
181 | } |
182 | |
183 | /// `true` iff this value is Arabic_Number |
184 | #[inline (always)] |
185 | pub fn is_arabic_number(self) -> bool { |
186 | self.0 == icu_properties::BidiClass::ArabicNumber |
187 | } |
188 | } |
189 | |
190 | /// A mask representing potentially multiple `BidiClass` |
191 | /// values. |
192 | #[repr (transparent)] |
193 | #[derive (Clone, Copy)] |
194 | pub struct BidiClassMask(u32); |
195 | |
196 | impl BidiClassMask { |
197 | /// `true` iff both masks have at `BidiClass` in common. |
198 | #[inline (always)] |
199 | pub fn intersects(self, other: BidiClassMask) -> bool { |
200 | self.0 & other.0 != 0 |
201 | } |
202 | } |
203 | |
204 | /// An adapter between a Unicode back end an the `idna` crate. |
205 | pub struct Adapter { |
206 | mapper: Uts46Mapper, |
207 | canonical_combining_class: CanonicalCombiningClassMap, |
208 | general_category: CodePointMapDataBorrowed<'static, GeneralCategory>, |
209 | bidi_class: CodePointMapDataBorrowed<'static, icu_properties::BidiClass>, |
210 | joining_type: CodePointMapDataBorrowed<'static, icu_properties::JoiningType>, |
211 | } |
212 | |
213 | #[cfg (feature = "compiled_data" )] |
214 | impl Default for Adapter { |
215 | fn default() -> Self { |
216 | Self::new() |
217 | } |
218 | } |
219 | |
220 | impl Adapter { |
221 | /// Constructor using data compiled into the binary. |
222 | #[cfg (feature = "compiled_data" )] |
223 | #[inline (always)] |
224 | pub const fn new() -> Self { |
225 | Self { |
226 | mapper: Uts46Mapper::new(), |
227 | canonical_combining_class: CanonicalCombiningClassMap::new(), |
228 | general_category: icu_properties::maps::general_category(), |
229 | bidi_class: icu_properties::maps::bidi_class(), |
230 | joining_type: icu_properties::maps::joining_type(), |
231 | } |
232 | } |
233 | |
234 | /// `true` iff the Canonical_Combining_Class of `c` is Virama. |
235 | #[inline (always)] |
236 | pub fn is_virama(&self, c: char) -> bool { |
237 | self.canonical_combining_class.get(c) == CanonicalCombiningClass::Virama |
238 | } |
239 | |
240 | /// `true` iff the General_Category of `c` is Mark, i.e. any of Nonspacing_Mark, |
241 | /// Spacing_Mark, or Enclosing_Mark. |
242 | #[inline (always)] |
243 | pub fn is_mark(&self, c: char) -> bool { |
244 | (general_category_to_mask(self.general_category.get(c)) & MARK_MASK) != 0 |
245 | } |
246 | |
247 | /// Returns the Bidi_Class of `c`. |
248 | #[inline (always)] |
249 | pub fn bidi_class(&self, c: char) -> BidiClass { |
250 | BidiClass(self.bidi_class.get(c)) |
251 | } |
252 | |
253 | /// Returns the Joining_Type of `c`. |
254 | #[inline (always)] |
255 | pub fn joining_type(&self, c: char) -> JoiningType { |
256 | JoiningType(self.joining_type.get(c)) |
257 | } |
258 | |
259 | /// See the [method of the same name in `icu_normalizer`][1] for the |
260 | /// exact semantics. |
261 | /// |
262 | /// [1]: https://docs.rs/icu_normalizer/latest/icu_normalizer/uts46/struct.Uts46Mapper.html#method.map_normalize |
263 | #[inline (always)] |
264 | pub fn map_normalize<'delegate, I: Iterator<Item = char> + 'delegate>( |
265 | &'delegate self, |
266 | iter: I, |
267 | ) -> impl Iterator<Item = char> + 'delegate { |
268 | self.mapper.map_normalize(iter) |
269 | } |
270 | |
271 | /// See the [method of the same name in `icu_normalizer`][1] for the |
272 | /// exact semantics. |
273 | /// |
274 | /// [1]: https://docs.rs/icu_normalizer/latest/icu_normalizer/uts46/struct.Uts46Mapper.html#method.normalize_validate |
275 | #[inline (always)] |
276 | pub fn normalize_validate<'delegate, I: Iterator<Item = char> + 'delegate>( |
277 | &'delegate self, |
278 | iter: I, |
279 | ) -> impl Iterator<Item = char> + 'delegate { |
280 | self.mapper.normalize_validate(iter) |
281 | } |
282 | } |
283 | |