1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | //! A collection of property definitions shared across contexts |
6 | //! (ex: representing trie values). |
7 | //! |
8 | //! This module defines enums / newtypes for enumerated properties. |
9 | //! String properties are represented as newtypes if their |
10 | //! values represent code points. |
11 | |
12 | use crate::provider::{names::*, *}; |
13 | use crate::PropertiesError; |
14 | use core::marker::PhantomData; |
15 | use icu_collections::codepointtrie::TrieValue; |
16 | use icu_provider::prelude::*; |
17 | use zerovec::ule::VarULE; |
18 | |
19 | #[cfg (feature = "serde" )] |
20 | use serde::{Deserialize, Serialize}; |
21 | |
22 | /// Private marker type for PropertyValueNameToEnumMapper |
23 | /// to work for all properties at once |
24 | #[derive (Clone, Copy, PartialEq, Eq, Hash, Debug)] |
25 | pub(crate) struct ErasedNameToEnumMapV1Marker; |
26 | impl DataMarker for ErasedNameToEnumMapV1Marker { |
27 | type Yokeable = PropertyValueNameToEnumMapV1<'static>; |
28 | } |
29 | |
30 | /// A struct capable of looking up a property value from a string name. |
31 | /// Access its data by calling [`Self::as_borrowed()`] and using the methods on |
32 | /// [`PropertyValueNameToEnumMapperBorrowed`]. |
33 | /// |
34 | /// The name can be a short name (`Lu`), a long name(`Uppercase_Letter`), |
35 | /// or an alias. |
36 | /// |
37 | /// Property names can be looked up using "strict" matching (looking for a name |
38 | /// that matches exactly), or "loose matching", where the name is allowed to deviate |
39 | /// in terms of ASCII casing, whitespace, underscores, and hyphens. |
40 | /// |
41 | /// # Example |
42 | /// |
43 | /// ``` |
44 | /// use icu::properties::GeneralCategory; |
45 | /// |
46 | /// let lookup = GeneralCategory::name_to_enum_mapper(); |
47 | /// // short name for value |
48 | /// assert_eq!( |
49 | /// lookup.get_strict("Lu" ), |
50 | /// Some(GeneralCategory::UppercaseLetter) |
51 | /// ); |
52 | /// assert_eq!( |
53 | /// lookup.get_strict("Pd" ), |
54 | /// Some(GeneralCategory::DashPunctuation) |
55 | /// ); |
56 | /// // long name for value |
57 | /// assert_eq!( |
58 | /// lookup.get_strict("Uppercase_Letter" ), |
59 | /// Some(GeneralCategory::UppercaseLetter) |
60 | /// ); |
61 | /// assert_eq!( |
62 | /// lookup.get_strict("Dash_Punctuation" ), |
63 | /// Some(GeneralCategory::DashPunctuation) |
64 | /// ); |
65 | /// // name has incorrect casing |
66 | /// assert_eq!(lookup.get_strict("dashpunctuation" ), None); |
67 | /// // loose matching of name |
68 | /// assert_eq!( |
69 | /// lookup.get_loose("dash-punctuation" ), |
70 | /// Some(GeneralCategory::DashPunctuation) |
71 | /// ); |
72 | /// // fake property |
73 | /// assert_eq!(lookup.get_strict("Animated_Gif" ), None); |
74 | /// ``` |
75 | #[derive (Debug)] |
76 | pub struct PropertyValueNameToEnumMapper<T> { |
77 | map: DataPayload<ErasedNameToEnumMapV1Marker>, |
78 | markers: PhantomData<fn() -> T>, |
79 | } |
80 | |
81 | /// A borrowed wrapper around property value name-to-enum data, returned by |
82 | /// [`PropertyValueNameToEnumMapper::as_borrowed()`]. More efficient to query. |
83 | #[derive (Debug, Copy, Clone)] |
84 | pub struct PropertyValueNameToEnumMapperBorrowed<'a, T> { |
85 | map: &'a PropertyValueNameToEnumMapV1<'a>, |
86 | markers: PhantomData<fn() -> T>, |
87 | } |
88 | |
89 | impl<T: TrieValue> PropertyValueNameToEnumMapper<T> { |
90 | /// Construct a borrowed version of this type that can be queried. |
91 | /// |
92 | /// This avoids a potential small underlying cost per API call (like `get_strict()`) by consolidating it |
93 | /// up front. |
94 | #[inline ] |
95 | pub fn as_borrowed(&self) -> PropertyValueNameToEnumMapperBorrowed<'_, T> { |
96 | PropertyValueNameToEnumMapperBorrowed { |
97 | map: self.map.get(), |
98 | markers: PhantomData, |
99 | } |
100 | } |
101 | |
102 | pub(crate) fn from_data<M>(data: DataPayload<M>) -> Self |
103 | where |
104 | M: DataMarker<Yokeable = PropertyValueNameToEnumMapV1<'static>>, |
105 | { |
106 | Self { |
107 | map: data.cast(), |
108 | markers: PhantomData, |
109 | } |
110 | } |
111 | |
112 | #[doc (hidden)] // used by FFI code |
113 | pub fn erase(self) -> PropertyValueNameToEnumMapper<u16> { |
114 | PropertyValueNameToEnumMapper { |
115 | map: self.map.cast(), |
116 | markers: PhantomData, |
117 | } |
118 | } |
119 | } |
120 | |
121 | impl<T: TrieValue> PropertyValueNameToEnumMapperBorrowed<'_, T> { |
122 | /// Get the property value as a u16, doing a strict search looking for |
123 | /// names that match exactly |
124 | /// |
125 | /// # Example |
126 | /// |
127 | /// ``` |
128 | /// use icu::properties::GeneralCategory; |
129 | /// |
130 | /// let lookup = GeneralCategory::name_to_enum_mapper(); |
131 | /// assert_eq!( |
132 | /// lookup.get_strict_u16("Lu" ), |
133 | /// Some(GeneralCategory::UppercaseLetter as u16) |
134 | /// ); |
135 | /// assert_eq!( |
136 | /// lookup.get_strict_u16("Uppercase_Letter" ), |
137 | /// Some(GeneralCategory::UppercaseLetter as u16) |
138 | /// ); |
139 | /// // does not do loose matching |
140 | /// assert_eq!(lookup.get_strict_u16("UppercaseLetter" ), None); |
141 | /// ``` |
142 | #[inline ] |
143 | pub fn get_strict_u16(&self, name: &str) -> Option<u16> { |
144 | get_strict_u16(self.map, name) |
145 | } |
146 | |
147 | /// Get the property value as a `T`, doing a strict search looking for |
148 | /// names that match exactly |
149 | /// |
150 | /// # Example |
151 | /// |
152 | /// ``` |
153 | /// use icu::properties::GeneralCategory; |
154 | /// |
155 | /// let lookup = GeneralCategory::name_to_enum_mapper(); |
156 | /// assert_eq!( |
157 | /// lookup.get_strict("Lu" ), |
158 | /// Some(GeneralCategory::UppercaseLetter) |
159 | /// ); |
160 | /// assert_eq!( |
161 | /// lookup.get_strict("Uppercase_Letter" ), |
162 | /// Some(GeneralCategory::UppercaseLetter) |
163 | /// ); |
164 | /// // does not do loose matching |
165 | /// assert_eq!(lookup.get_strict("UppercaseLetter" ), None); |
166 | /// ``` |
167 | #[inline ] |
168 | pub fn get_strict(&self, name: &str) -> Option<T> { |
169 | T::try_from_u32(self.get_strict_u16(name)? as u32).ok() |
170 | } |
171 | |
172 | /// Get the property value as a u16, doing a loose search looking for |
173 | /// names that match case-insensitively, ignoring ASCII hyphens, underscores, and |
174 | /// whitespaces. |
175 | /// |
176 | /// # Example |
177 | /// |
178 | /// ``` |
179 | /// use icu::properties::GeneralCategory; |
180 | /// |
181 | /// let lookup = GeneralCategory::name_to_enum_mapper(); |
182 | /// assert_eq!( |
183 | /// lookup.get_loose_u16("Lu" ), |
184 | /// Some(GeneralCategory::UppercaseLetter as u16) |
185 | /// ); |
186 | /// assert_eq!( |
187 | /// lookup.get_loose_u16("Uppercase_Letter" ), |
188 | /// Some(GeneralCategory::UppercaseLetter as u16) |
189 | /// ); |
190 | /// // does do loose matching |
191 | /// assert_eq!( |
192 | /// lookup.get_loose_u16("UppercaseLetter" ), |
193 | /// Some(GeneralCategory::UppercaseLetter as u16) |
194 | /// ); |
195 | /// ``` |
196 | #[inline ] |
197 | pub fn get_loose_u16(&self, name: &str) -> Option<u16> { |
198 | get_loose_u16(self.map, name) |
199 | } |
200 | |
201 | /// Get the property value as a `T`, doing a loose search looking for |
202 | /// names that match case-insensitively, ignoring ASCII hyphens, underscores, and |
203 | /// whitespaces. |
204 | /// |
205 | /// # Example |
206 | /// |
207 | /// ``` |
208 | /// use icu::properties::GeneralCategory; |
209 | /// |
210 | /// let lookup = GeneralCategory::name_to_enum_mapper(); |
211 | /// assert_eq!( |
212 | /// lookup.get_loose("Lu" ), |
213 | /// Some(GeneralCategory::UppercaseLetter) |
214 | /// ); |
215 | /// assert_eq!( |
216 | /// lookup.get_loose("Uppercase_Letter" ), |
217 | /// Some(GeneralCategory::UppercaseLetter) |
218 | /// ); |
219 | /// // does do loose matching |
220 | /// assert_eq!( |
221 | /// lookup.get_loose("UppercaseLetter" ), |
222 | /// Some(GeneralCategory::UppercaseLetter) |
223 | /// ); |
224 | /// ``` |
225 | #[inline ] |
226 | pub fn get_loose(&self, name: &str) -> Option<T> { |
227 | T::try_from_u32(self.get_loose_u16(name)? as u32).ok() |
228 | } |
229 | } |
230 | |
231 | impl<T: TrieValue> PropertyValueNameToEnumMapperBorrowed<'static, T> { |
232 | /// Cheaply converts a [`PropertyValueNameToEnumMapperBorrowed<'static>`] into a [`PropertyValueNameToEnumMapper`]. |
233 | /// |
234 | /// Note: Due to branching and indirection, using [`PropertyValueNameToEnumMapper`] might inhibit some |
235 | /// compile-time optimizations that are possible with [`PropertyValueNameToEnumMapperBorrowed`]. |
236 | pub const fn static_to_owned(self) -> PropertyValueNameToEnumMapper<T> { |
237 | PropertyValueNameToEnumMapper { |
238 | map: DataPayload::from_static_ref(self.map), |
239 | markers: PhantomData, |
240 | } |
241 | } |
242 | } |
243 | |
244 | /// Avoid monomorphizing multiple copies of this function |
245 | fn get_strict_u16(payload: &PropertyValueNameToEnumMapV1<'_>, name: &str) -> Option<u16> { |
246 | // NormalizedPropertyName has no invariants so this should be free, but |
247 | // avoid introducing a panic regardless |
248 | let name: &NormalizedPropertyNameStr = NormalizedPropertyNameStr::parse_byte_slice(name.as_bytes()).ok()?; |
249 | payload.map.get_copied(key:name) |
250 | } |
251 | |
252 | /// Avoid monomorphizing multiple copies of this function |
253 | fn get_loose_u16(payload: &PropertyValueNameToEnumMapV1<'_>, name: &str) -> Option<u16> { |
254 | // NormalizedPropertyName has no invariants so this should be free, but |
255 | // avoid introducing a panic regardless |
256 | let name: &NormalizedPropertyNameStr = NormalizedPropertyNameStr::parse_byte_slice(name.as_bytes()).ok()?; |
257 | payload.map.get_copied_by(|p: &NormalizedPropertyNameStr| p.cmp_loose(name)) |
258 | } |
259 | |
260 | /// Private marker type for PropertyEnumToValueNameSparseMapper |
261 | /// to work for all properties at once |
262 | #[derive (Clone, Copy, PartialEq, Eq, Hash, Debug)] |
263 | pub(crate) struct ErasedEnumToValueNameSparseMapV1Marker; |
264 | impl DataMarker for ErasedEnumToValueNameSparseMapV1Marker { |
265 | type Yokeable = PropertyEnumToValueNameSparseMapV1<'static>; |
266 | } |
267 | |
268 | /// A struct capable of looking up a property name from a value |
269 | /// Access its data by calling [`Self::as_borrowed()`] and using the methods on |
270 | /// [`PropertyEnumToValueNameSparseMapperBorrowed`]. |
271 | /// |
272 | /// This mapper is used for properties with sparse values, like [`CanonicalCombiningClass`]. |
273 | /// It may be obtained using methods like [`CanonicalCombiningClass::get_enum_to_long_name_mapper()`]. |
274 | /// |
275 | /// The name returned may be a short (`"KV"`) or long (`"Kana_Voicing"`) name, depending |
276 | /// on the constructor used. |
277 | /// |
278 | /// # Example |
279 | /// |
280 | /// ``` |
281 | /// use icu::properties::CanonicalCombiningClass; |
282 | /// |
283 | /// let lookup = CanonicalCombiningClass::enum_to_long_name_mapper(); |
284 | /// assert_eq!( |
285 | /// lookup.get(CanonicalCombiningClass::KanaVoicing), |
286 | /// Some("Kana_Voicing" ) |
287 | /// ); |
288 | /// assert_eq!( |
289 | /// lookup.get(CanonicalCombiningClass::AboveLeft), |
290 | /// Some("Above_Left" ) |
291 | /// ); |
292 | /// ``` |
293 | #[derive (Debug)] |
294 | pub struct PropertyEnumToValueNameSparseMapper<T> { |
295 | map: DataPayload<ErasedEnumToValueNameSparseMapV1Marker>, |
296 | markers: PhantomData<fn(T) -> ()>, |
297 | } |
298 | |
299 | /// A borrowed wrapper around property value name-to-enum data, returned by |
300 | /// [`PropertyEnumToValueNameSparseMapper::as_borrowed()`]. More efficient to query. |
301 | #[derive (Debug, Copy, Clone)] |
302 | pub struct PropertyEnumToValueNameSparseMapperBorrowed<'a, T> { |
303 | map: &'a PropertyEnumToValueNameSparseMapV1<'a>, |
304 | markers: PhantomData<fn(T) -> ()>, |
305 | } |
306 | |
307 | impl<T: TrieValue> PropertyEnumToValueNameSparseMapper<T> { |
308 | /// Construct a borrowed version of this type that can be queried. |
309 | /// |
310 | /// This avoids a potential small underlying cost per API call (like `get_static()`) by consolidating it |
311 | /// up front. |
312 | #[inline ] |
313 | pub fn as_borrowed(&self) -> PropertyEnumToValueNameSparseMapperBorrowed<'_, T> { |
314 | PropertyEnumToValueNameSparseMapperBorrowed { |
315 | map: self.map.get(), |
316 | markers: PhantomData, |
317 | } |
318 | } |
319 | |
320 | /// Construct a new one from loaded data |
321 | /// |
322 | /// Typically it is preferable to use methods on individual property value types |
323 | /// (like [`Script::TBD()`]) instead. |
324 | pub(crate) fn from_data<M>(data: DataPayload<M>) -> Self |
325 | where |
326 | M: DataMarker<Yokeable = PropertyEnumToValueNameSparseMapV1<'static>>, |
327 | { |
328 | Self { |
329 | map: data.cast(), |
330 | markers: PhantomData, |
331 | } |
332 | } |
333 | } |
334 | |
335 | impl<T: TrieValue> PropertyEnumToValueNameSparseMapperBorrowed<'_, T> { |
336 | /// Get the property name given a value |
337 | /// |
338 | /// # Example |
339 | /// |
340 | /// ```rust |
341 | /// use icu::properties::CanonicalCombiningClass; |
342 | /// |
343 | /// let lookup = CanonicalCombiningClass::enum_to_long_name_mapper(); |
344 | /// assert_eq!( |
345 | /// lookup.get(CanonicalCombiningClass::KanaVoicing), |
346 | /// Some("Kana_Voicing" ) |
347 | /// ); |
348 | /// assert_eq!( |
349 | /// lookup.get(CanonicalCombiningClass::AboveLeft), |
350 | /// Some("Above_Left" ) |
351 | /// ); |
352 | /// ``` |
353 | #[inline ] |
354 | pub fn get(&self, property: T) -> Option<&str> { |
355 | let prop: u16 = u16::try_from(property.to_u32()).ok()?; |
356 | self.map.map.get(&prop) |
357 | } |
358 | } |
359 | |
360 | impl<T: TrieValue> PropertyEnumToValueNameSparseMapperBorrowed<'static, T> { |
361 | /// Cheaply converts a [`PropertyEnumToValueNameSparseMapperBorrowed<'static>`] into a [`PropertyEnumToValueNameSparseMapper`]. |
362 | /// |
363 | /// Note: Due to branching and indirection, using [`PropertyEnumToValueNameSparseMapper`] might inhibit some |
364 | /// compile-time optimizations that are possible with [`PropertyEnumToValueNameSparseMapperBorrowed`]. |
365 | pub const fn static_to_owned(self) -> PropertyEnumToValueNameSparseMapper<T> { |
366 | PropertyEnumToValueNameSparseMapper { |
367 | map: DataPayload::from_static_ref(self.map), |
368 | markers: PhantomData, |
369 | } |
370 | } |
371 | } |
372 | |
373 | /// Private marker type for PropertyEnumToValueNameLinearMapper |
374 | /// to work for all properties at once |
375 | #[derive (Clone, Copy, PartialEq, Eq, Hash, Debug)] |
376 | pub(crate) struct ErasedEnumToValueNameLinearMapV1Marker; |
377 | impl DataMarker for ErasedEnumToValueNameLinearMapV1Marker { |
378 | type Yokeable = PropertyEnumToValueNameLinearMapV1<'static>; |
379 | } |
380 | |
381 | /// A struct capable of looking up a property name from a value |
382 | /// Access its data by calling [`Self::as_borrowed()`] and using the methods on |
383 | /// [`PropertyEnumToValueNameLinearMapperBorrowed`]. |
384 | /// |
385 | /// This mapper is used for properties with sequential values, like [`GeneralCategory`]. |
386 | /// It may be obtained using methods like [`GeneralCategory::get_enum_to_long_name_mapper()`]. |
387 | /// |
388 | /// The name returned may be a short (`"Lu"`) or long (`"Uppercase_Letter"`) name, depending |
389 | /// on the constructor used. |
390 | /// |
391 | /// # Example |
392 | /// |
393 | /// ``` |
394 | /// use icu::properties::GeneralCategory; |
395 | /// |
396 | /// let lookup = GeneralCategory::enum_to_long_name_mapper(); |
397 | /// assert_eq!( |
398 | /// lookup.get(GeneralCategory::UppercaseLetter), |
399 | /// Some("Uppercase_Letter" ) |
400 | /// ); |
401 | /// assert_eq!( |
402 | /// lookup.get(GeneralCategory::DashPunctuation), |
403 | /// Some("Dash_Punctuation" ) |
404 | /// ); |
405 | /// ``` |
406 | #[derive (Debug)] |
407 | pub struct PropertyEnumToValueNameLinearMapper<T> { |
408 | map: DataPayload<ErasedEnumToValueNameLinearMapV1Marker>, |
409 | markers: PhantomData<fn(T) -> ()>, |
410 | } |
411 | |
412 | /// A borrowed wrapper around property value name-to-enum data, returned by |
413 | /// [`PropertyEnumToValueNameLinearMapper::as_borrowed()`]. More efficient to query. |
414 | #[derive (Debug, Copy, Clone)] |
415 | pub struct PropertyEnumToValueNameLinearMapperBorrowed<'a, T> { |
416 | map: &'a PropertyEnumToValueNameLinearMapV1<'a>, |
417 | markers: PhantomData<fn(T) -> ()>, |
418 | } |
419 | |
420 | impl<T: TrieValue> PropertyEnumToValueNameLinearMapper<T> { |
421 | /// Construct a borrowed version of this type that can be queried. |
422 | /// |
423 | /// This avoids a potential small underlying cost per API call (like `get_static()`) by consolidating it |
424 | /// up front. |
425 | #[inline ] |
426 | pub fn as_borrowed(&self) -> PropertyEnumToValueNameLinearMapperBorrowed<'_, T> { |
427 | PropertyEnumToValueNameLinearMapperBorrowed { |
428 | map: self.map.get(), |
429 | markers: PhantomData, |
430 | } |
431 | } |
432 | |
433 | /// Construct a new one from loaded data |
434 | /// |
435 | /// Typically it is preferable to use methods on individual property value types |
436 | /// (like [`Script::TBD()`]) instead. |
437 | pub(crate) fn from_data<M>(data: DataPayload<M>) -> Self |
438 | where |
439 | M: DataMarker<Yokeable = PropertyEnumToValueNameLinearMapV1<'static>>, |
440 | { |
441 | Self { |
442 | map: data.cast(), |
443 | markers: PhantomData, |
444 | } |
445 | } |
446 | } |
447 | |
448 | impl<T: TrieValue> PropertyEnumToValueNameLinearMapperBorrowed<'_, T> { |
449 | /// Get the property name given a value |
450 | /// |
451 | /// # Example |
452 | /// |
453 | /// ```rust |
454 | /// use icu::properties::GeneralCategory; |
455 | /// |
456 | /// let lookup = GeneralCategory::enum_to_short_name_mapper(); |
457 | /// assert_eq!(lookup.get(GeneralCategory::UppercaseLetter), Some("Lu" )); |
458 | /// assert_eq!(lookup.get(GeneralCategory::DashPunctuation), Some("Pd" )); |
459 | /// ``` |
460 | #[inline ] |
461 | pub fn get(&self, property: T) -> Option<&str> { |
462 | let prop: usize = usize::try_from(property.to_u32()).ok()?; |
463 | self.map.map.get(idx:prop).filter(|x: &&str| !x.is_empty()) |
464 | } |
465 | } |
466 | |
467 | impl<T: TrieValue> PropertyEnumToValueNameLinearMapperBorrowed<'static, T> { |
468 | /// Cheaply converts a [`PropertyEnumToValueNameLinearMapperBorrowed<'static>`] into a [`PropertyEnumToValueNameLinearMapper`]. |
469 | /// |
470 | /// Note: Due to branching and indirection, using [`PropertyEnumToValueNameLinearMapper`] might inhibit some |
471 | /// compile-time optimizations that are possible with [`PropertyEnumToValueNameLinearMapperBorrowed`]. |
472 | pub const fn static_to_owned(self) -> PropertyEnumToValueNameLinearMapper<T> { |
473 | PropertyEnumToValueNameLinearMapper { |
474 | map: DataPayload::from_static_ref(self.map), |
475 | markers: PhantomData, |
476 | } |
477 | } |
478 | } |
479 | |
480 | /// Private marker type for PropertyEnumToValueNameLinearTiny4Mapper |
481 | /// to work for all properties at once |
482 | #[derive (Clone, Copy, PartialEq, Eq, Hash, Debug)] |
483 | pub(crate) struct ErasedEnumToValueNameLinearTiny4MapV1Marker; |
484 | impl DataMarker for ErasedEnumToValueNameLinearTiny4MapV1Marker { |
485 | type Yokeable = PropertyEnumToValueNameLinearTiny4MapV1<'static>; |
486 | } |
487 | |
488 | /// A struct capable of looking up a property name from a value |
489 | /// Access its data by calling [`Self::as_borrowed()`] and using the methods on |
490 | /// [`PropertyEnumToValueNameLinearTiny4MapperBorrowed`]. |
491 | /// |
492 | /// This mapper is used for properties with sequential values and names with four or fewer characters, |
493 | /// like the [`Script`] short names. |
494 | /// It may be obtained using methods like [`Script::get_enum_to_short_name_mapper()`]. |
495 | /// |
496 | /// # Example |
497 | /// |
498 | /// ``` |
499 | /// use icu::properties::Script; |
500 | /// use tinystr::tinystr; |
501 | /// |
502 | /// let lookup = Script::enum_to_short_name_mapper(); |
503 | /// assert_eq!(lookup.get(Script::Brahmi), Some(tinystr!(4, "Brah" ))); |
504 | /// assert_eq!(lookup.get(Script::Hangul), Some(tinystr!(4, "Hang" ))); |
505 | /// ``` |
506 | #[derive (Debug)] |
507 | pub struct PropertyEnumToValueNameLinearTiny4Mapper<T> { |
508 | map: DataPayload<ErasedEnumToValueNameLinearTiny4MapV1Marker>, |
509 | markers: PhantomData<fn(T) -> ()>, |
510 | } |
511 | |
512 | /// A borrowed wrapper around property value name-to-enum data, returned by |
513 | /// [`PropertyEnumToValueNameLinearTiny4Mapper::as_borrowed()`]. More efficient to query. |
514 | #[derive (Debug, Copy, Clone)] |
515 | pub struct PropertyEnumToValueNameLinearTiny4MapperBorrowed<'a, T> { |
516 | map: &'a PropertyEnumToValueNameLinearTiny4MapV1<'a>, |
517 | markers: PhantomData<fn(T) -> ()>, |
518 | } |
519 | |
520 | impl<T: TrieValue> PropertyEnumToValueNameLinearTiny4Mapper<T> { |
521 | /// Construct a borrowed version of this type that can be queried. |
522 | /// |
523 | /// This avoids a potential small underlying cost per API call (like `get_static()`) by consolidating it |
524 | /// up front. |
525 | #[inline ] |
526 | pub fn as_borrowed(&self) -> PropertyEnumToValueNameLinearTiny4MapperBorrowed<'_, T> { |
527 | PropertyEnumToValueNameLinearTiny4MapperBorrowed { |
528 | map: self.map.get(), |
529 | markers: PhantomData, |
530 | } |
531 | } |
532 | |
533 | /// Construct a new one from loaded data |
534 | /// |
535 | /// Typically it is preferable to use methods on individual property value types |
536 | /// (like [`Script::TBD()`]) instead. |
537 | pub(crate) fn from_data<M>(data: DataPayload<M>) -> Self |
538 | where |
539 | M: DataMarker<Yokeable = PropertyEnumToValueNameLinearTiny4MapV1<'static>>, |
540 | { |
541 | Self { |
542 | map: data.cast(), |
543 | markers: PhantomData, |
544 | } |
545 | } |
546 | } |
547 | |
548 | impl<T: TrieValue> PropertyEnumToValueNameLinearTiny4MapperBorrowed<'_, T> { |
549 | /// Get the property name given a value |
550 | /// |
551 | /// # Example |
552 | /// |
553 | /// ```rust |
554 | /// use icu::properties::Script; |
555 | /// use tinystr::tinystr; |
556 | /// |
557 | /// let lookup = Script::enum_to_short_name_mapper(); |
558 | /// assert_eq!(lookup.get(Script::Brahmi), Some(tinystr!(4, "Brah" ))); |
559 | /// assert_eq!(lookup.get(Script::Hangul), Some(tinystr!(4, "Hang" ))); |
560 | /// ``` |
561 | #[inline ] |
562 | pub fn get(&self, property: T) -> Option<tinystr::TinyStr4> { |
563 | let prop: usize = usize::try_from(property.to_u32()).ok()?; |
564 | self.map.map.get(index:prop).filter(|x: &TinyAsciiStr<4>| !x.is_empty()) |
565 | } |
566 | } |
567 | |
568 | impl<T: TrieValue> PropertyEnumToValueNameLinearTiny4MapperBorrowed<'static, T> { |
569 | /// Cheaply converts a [`PropertyEnumToValueNameLinearTiny4MapperBorrowed<'static>`] into a [`PropertyEnumToValueNameLinearTiny4Mapper`]. |
570 | /// |
571 | /// Note: Due to branching and indirection, using [`PropertyEnumToValueNameLinearTiny4Mapper`] might inhibit some |
572 | /// compile-time optimizations that are possible with [`PropertyEnumToValueNameLinearTiny4MapperBorrowed`]. |
573 | pub const fn static_to_owned(self) -> PropertyEnumToValueNameLinearTiny4Mapper<T> { |
574 | PropertyEnumToValueNameLinearTiny4Mapper { |
575 | map: DataPayload::from_static_ref(self.map), |
576 | markers: PhantomData, |
577 | } |
578 | } |
579 | } |
580 | |
581 | macro_rules! impl_value_getter { |
582 | ( |
583 | // the marker type for names lookup (name_to_enum, enum_to_short_name, enum_to_long_name) |
584 | markers: $marker_n2e:ident / $singleton_n2e:ident $(, $marker_e2sn:ident / $singleton_e2sn:ident, $marker_e2ln:ident / $singleton_e2ln:ident)?; |
585 | impl $ty:ident { |
586 | $(#[$attr_n2e:meta])* |
587 | $vis_n2e:vis fn $name_n2e:ident() / $cname_n2e:ident(); |
588 | $( |
589 | |
590 | $(#[$attr_e2sn:meta])* |
591 | $vis_e2sn:vis fn $name_e2sn:ident() / $cname_e2sn:ident() -> $mapper_e2sn:ident / $mapper_e2snb:ident; |
592 | $(#[$attr_e2ln:meta])* |
593 | $vis_e2ln:vis fn $name_e2ln:ident() / $cname_e2ln:ident() -> $mapper_e2ln:ident / $mapper_e2lnb:ident; |
594 | )? |
595 | } |
596 | ) => { |
597 | impl $ty { |
598 | $(#[$attr_n2e])* |
599 | #[cfg(feature = "compiled_data" )] |
600 | $vis_n2e const fn $cname_n2e() -> PropertyValueNameToEnumMapperBorrowed<'static, $ty> { |
601 | PropertyValueNameToEnumMapperBorrowed { |
602 | map: crate::provider::Baked::$singleton_n2e, |
603 | markers: PhantomData, |
604 | } |
605 | } |
606 | |
607 | #[doc = concat!("A version of [`" , stringify!($ty), "::" , stringify!($cname_n2e), "()`] that uses custom data provided by a [`DataProvider`]." )] |
608 | /// |
609 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
610 | $vis_n2e fn $name_n2e( |
611 | provider: &(impl DataProvider<$marker_n2e> + ?Sized) |
612 | ) -> Result<PropertyValueNameToEnumMapper<$ty>, PropertiesError> { |
613 | Ok(provider.load(Default::default()).and_then(DataResponse::take_payload).map(PropertyValueNameToEnumMapper::from_data)?) |
614 | } |
615 | |
616 | $( |
617 | $(#[$attr_e2sn])* |
618 | #[cfg(feature = "compiled_data" )] |
619 | $vis_e2sn const fn $cname_e2sn() -> $mapper_e2snb<'static, $ty> { |
620 | $mapper_e2snb { |
621 | map: crate::provider::Baked::$singleton_e2sn, |
622 | markers: PhantomData, |
623 | } |
624 | } |
625 | |
626 | #[doc = concat!("A version of [`" , stringify!($ty), "::" , stringify!($cname_e2sn), "()`] that uses custom data provided by a [`DataProvider`]." )] |
627 | /// |
628 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
629 | $vis_e2sn fn $name_e2sn( |
630 | provider: &(impl DataProvider<$marker_e2sn> + ?Sized) |
631 | ) -> Result<$mapper_e2sn<$ty>, PropertiesError> { |
632 | Ok(provider.load(Default::default()).and_then(DataResponse::take_payload).map($mapper_e2sn::from_data)?) |
633 | } |
634 | |
635 | $(#[$attr_e2ln])* |
636 | #[cfg(feature = "compiled_data" )] |
637 | $vis_e2ln const fn $cname_e2ln() -> $mapper_e2lnb<'static, $ty> { |
638 | $mapper_e2lnb { |
639 | map: crate::provider::Baked::$singleton_e2ln, |
640 | markers: PhantomData, |
641 | } |
642 | } |
643 | |
644 | #[doc = concat!("A version of [`" , stringify!($ty), "::" , stringify!($cname_e2ln), "()`] that uses custom data provided by a [`DataProvider`]." )] |
645 | /// |
646 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
647 | $vis_e2ln fn $name_e2ln( |
648 | provider: &(impl DataProvider<$marker_e2ln> + ?Sized) |
649 | ) -> Result<$mapper_e2ln<$ty>, PropertiesError> { |
650 | Ok(provider.load(Default::default()).and_then(DataResponse::take_payload).map($mapper_e2ln::from_data)?) |
651 | } |
652 | )? |
653 | } |
654 | } |
655 | } |
656 | |
657 | /// See [`test_enumerated_property_completeness`] for usage. |
658 | /// Example input: |
659 | /// ```ignore |
660 | /// impl EastAsianWidth { |
661 | /// pub const Neutral: EastAsianWidth = EastAsianWidth(0); |
662 | /// pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); |
663 | /// ... |
664 | /// } |
665 | /// ``` |
666 | /// Produces `const ALL_CONSTS = &[("Neutral", 0u16), ...];` by |
667 | /// explicitly casting first field of the struct to u16. |
668 | macro_rules! create_const_array { |
669 | ( |
670 | $ ( #[$meta:meta] )* |
671 | impl $enum_ty:ident { |
672 | $( $(#[$const_meta:meta])* $v:vis const $i:ident: $t:ty = $e:expr; )* |
673 | } |
674 | ) => { |
675 | $( #[$meta] )* |
676 | impl $enum_ty { |
677 | $( |
678 | $(#[$const_meta])* |
679 | $v const $i: $t = $e; |
680 | )* |
681 | |
682 | #[cfg(test)] |
683 | const ALL_CONSTS: &'static [(&'static str, u16)] = &[ |
684 | $((stringify!($i), $enum_ty::$i.0 as u16)),* |
685 | ]; |
686 | } |
687 | } |
688 | } |
689 | |
690 | /// Enumerated property Bidi_Class |
691 | /// |
692 | /// These are the categories required by the Unicode Bidirectional Algorithm. |
693 | /// For the property values, see [Bidirectional Class Values](https://unicode.org/reports/tr44/#Bidi_Class_Values). |
694 | /// For more information, see [Unicode Standard Annex #9](https://unicode.org/reports/tr41/tr41-28.html#UAX9). |
695 | #[derive (Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
696 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
697 | #[cfg_attr (feature = "datagen" , derive(databake::Bake))] |
698 | #[cfg_attr (feature = "datagen" , databake(path = icu_properties))] |
699 | #[allow (clippy::exhaustive_structs)] // newtype |
700 | #[repr (transparent)] |
701 | #[zerovec::make_ule (BidiClassULE)] |
702 | pub struct BidiClass(pub u8); |
703 | |
704 | create_const_array! { |
705 | #[allow (non_upper_case_globals)] |
706 | impl BidiClass { |
707 | /// (`L`) any strong left-to-right character |
708 | pub const LeftToRight: BidiClass = BidiClass(0); |
709 | /// (`R`) any strong right-to-left (non-Arabic-type) character |
710 | pub const RightToLeft: BidiClass = BidiClass(1); |
711 | /// (`EN`) any ASCII digit or Eastern Arabic-Indic digit |
712 | pub const EuropeanNumber: BidiClass = BidiClass(2); |
713 | /// (`ES`) plus and minus signs |
714 | pub const EuropeanSeparator: BidiClass = BidiClass(3); |
715 | /// (`ET`) a terminator in a numeric format context, includes currency signs |
716 | pub const EuropeanTerminator: BidiClass = BidiClass(4); |
717 | /// (`AN`) any Arabic-Indic digit |
718 | pub const ArabicNumber: BidiClass = BidiClass(5); |
719 | /// (`CS`) commas, colons, and slashes |
720 | pub const CommonSeparator: BidiClass = BidiClass(6); |
721 | /// (`B`) various newline characters |
722 | pub const ParagraphSeparator: BidiClass = BidiClass(7); |
723 | /// (`S`) various segment-related control codes |
724 | pub const SegmentSeparator: BidiClass = BidiClass(8); |
725 | /// (`WS`) spaces |
726 | pub const WhiteSpace: BidiClass = BidiClass(9); |
727 | /// (`ON`) most other symbols and punctuation marks |
728 | pub const OtherNeutral: BidiClass = BidiClass(10); |
729 | /// (`LRE`) U+202A: the LR embedding control |
730 | pub const LeftToRightEmbedding: BidiClass = BidiClass(11); |
731 | /// (`LRO`) U+202D: the LR override control |
732 | pub const LeftToRightOverride: BidiClass = BidiClass(12); |
733 | /// (`AL`) any strong right-to-left (Arabic-type) character |
734 | pub const ArabicLetter: BidiClass = BidiClass(13); |
735 | /// (`RLE`) U+202B: the RL embedding control |
736 | pub const RightToLeftEmbedding: BidiClass = BidiClass(14); |
737 | /// (`RLO`) U+202E: the RL override control |
738 | pub const RightToLeftOverride: BidiClass = BidiClass(15); |
739 | /// (`PDF`) U+202C: terminates an embedding or override control |
740 | pub const PopDirectionalFormat: BidiClass = BidiClass(16); |
741 | /// (`NSM`) any nonspacing mark |
742 | pub const NonspacingMark: BidiClass = BidiClass(17); |
743 | /// (`BN`) most format characters, control codes, or noncharacters |
744 | pub const BoundaryNeutral: BidiClass = BidiClass(18); |
745 | /// (`FSI`) U+2068: the first strong isolate control |
746 | pub const FirstStrongIsolate: BidiClass = BidiClass(19); |
747 | /// (`LRI`) U+2066: the LR isolate control |
748 | pub const LeftToRightIsolate: BidiClass = BidiClass(20); |
749 | /// (`RLI`) U+2067: the RL isolate control |
750 | pub const RightToLeftIsolate: BidiClass = BidiClass(21); |
751 | /// (`PDI`) U+2069: terminates an isolate control |
752 | pub const PopDirectionalIsolate: BidiClass = BidiClass(22); |
753 | } |
754 | } |
755 | |
756 | impl_value_getter! { |
757 | markers: BidiClassNameToValueV1Marker / SINGLETON_PROPNAMES_FROM_BC_V1, BidiClassValueToShortNameV1Marker / SINGLETON_PROPNAMES_TO_SHORT_LINEAR_BC_V1, BidiClassValueToLongNameV1Marker / SINGLETON_PROPNAMES_TO_LONG_LINEAR_BC_V1; |
758 | impl BidiClass { |
759 | /// Return a [`PropertyValueNameToEnumMapper`], capable of looking up values |
760 | /// from strings for the `Bidi_Class` enumerated property |
761 | /// |
762 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
763 | /// |
764 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
765 | /// |
766 | /// # Example |
767 | /// |
768 | /// ``` |
769 | /// use icu::properties::BidiClass; |
770 | /// |
771 | /// let lookup = BidiClass::name_to_enum_mapper(); |
772 | /// // short name for value |
773 | /// assert_eq!(lookup.get_strict("AN"), Some(BidiClass::ArabicNumber)); |
774 | /// assert_eq!(lookup.get_strict("NSM"), Some(BidiClass::NonspacingMark)); |
775 | /// // long name for value |
776 | /// assert_eq!(lookup.get_strict("Arabic_Number"), Some(BidiClass::ArabicNumber)); |
777 | /// assert_eq!(lookup.get_strict("Nonspacing_Mark"), Some(BidiClass::NonspacingMark)); |
778 | /// // name has incorrect casing |
779 | /// assert_eq!(lookup.get_strict("arabicnumber"), None); |
780 | /// // loose matching of name |
781 | /// assert_eq!(lookup.get_loose("arabicnumber"), Some(BidiClass::ArabicNumber)); |
782 | /// // fake property |
783 | /// assert_eq!(lookup.get_strict("Upside_Down_Vertical_Backwards_Mirrored"), None); |
784 | /// ``` |
785 | pub fn get_name_to_enum_mapper() / name_to_enum_mapper(); |
786 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up short names |
787 | /// for values of the `Bidi_Class` enumerated property |
788 | /// |
789 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
790 | /// |
791 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
792 | /// |
793 | /// # Example |
794 | /// |
795 | /// ``` |
796 | /// use icu::properties::BidiClass; |
797 | /// |
798 | /// let lookup = BidiClass::enum_to_short_name_mapper(); |
799 | /// assert_eq!(lookup.get(BidiClass::ArabicNumber), Some("AN")); |
800 | /// assert_eq!(lookup.get(BidiClass::NonspacingMark), Some("NSM")); |
801 | /// ``` |
802 | pub fn get_enum_to_short_name_mapper() / enum_to_short_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
803 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up long names |
804 | /// for values of the `Bidi_Class` enumerated property |
805 | /// |
806 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
807 | /// |
808 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
809 | /// |
810 | /// # Example |
811 | /// |
812 | /// ``` |
813 | /// use icu::properties::BidiClass; |
814 | /// |
815 | /// let lookup = BidiClass::enum_to_long_name_mapper(); |
816 | /// assert_eq!(lookup.get(BidiClass::ArabicNumber), Some("Arabic_Number")); |
817 | /// assert_eq!(lookup.get(BidiClass::NonspacingMark), Some("Nonspacing_Mark")); |
818 | /// ``` |
819 | pub fn get_enum_to_long_name_mapper() / enum_to_long_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
820 | } |
821 | } |
822 | |
823 | /// Enumerated property General_Category. |
824 | /// |
825 | /// General_Category specifies the most general classification of a code point, usually |
826 | /// determined based on the primary characteristic of the assigned character. For example, is the |
827 | /// character a letter, a mark, a number, punctuation, or a symbol, and if so, of what type? |
828 | /// |
829 | /// GeneralCategory only supports specific subcategories (eg `UppercaseLetter`). |
830 | /// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategoryGroup`]. |
831 | #[derive (Copy, Clone, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)] |
832 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
833 | #[cfg_attr (feature = "datagen" , derive(databake::Bake))] |
834 | #[cfg_attr (feature = "datagen" , databake(path = icu_properties))] |
835 | #[allow (clippy::exhaustive_enums)] // this type is stable |
836 | #[zerovec::make_ule (GeneralCategoryULE)] |
837 | #[repr (u8)] |
838 | pub enum GeneralCategory { |
839 | /// (`Cn`) A reserved unassigned code point or a noncharacter |
840 | Unassigned = 0, |
841 | |
842 | /// (`Lu`) An uppercase letter |
843 | UppercaseLetter = 1, |
844 | /// (`Ll`) A lowercase letter |
845 | LowercaseLetter = 2, |
846 | /// (`Lt`) A digraphic letter, with first part uppercase |
847 | TitlecaseLetter = 3, |
848 | /// (`Lm`) A modifier letter |
849 | ModifierLetter = 4, |
850 | /// (`Lo`) Other letters, including syllables and ideographs |
851 | OtherLetter = 5, |
852 | |
853 | /// (`Mn`) A nonspacing combining mark (zero advance width) |
854 | NonspacingMark = 6, |
855 | /// (`Mc`) A spacing combining mark (positive advance width) |
856 | SpacingMark = 8, |
857 | /// (`Me`) An enclosing combining mark |
858 | EnclosingMark = 7, |
859 | |
860 | /// (`Nd`) A decimal digit |
861 | DecimalNumber = 9, |
862 | /// (`Nl`) A letterlike numeric character |
863 | LetterNumber = 10, |
864 | /// (`No`) A numeric character of other type |
865 | OtherNumber = 11, |
866 | |
867 | /// (`Zs`) A space character (of various non-zero widths) |
868 | SpaceSeparator = 12, |
869 | /// (`Zl`) U+2028 LINE SEPARATOR only |
870 | LineSeparator = 13, |
871 | /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only |
872 | ParagraphSeparator = 14, |
873 | |
874 | /// (`Cc`) A C0 or C1 control code |
875 | Control = 15, |
876 | /// (`Cf`) A format control character |
877 | Format = 16, |
878 | /// (`Co`) A private-use character |
879 | PrivateUse = 17, |
880 | /// (`Cs`) A surrogate code point |
881 | Surrogate = 18, |
882 | |
883 | /// (`Pd`) A dash or hyphen punctuation mark |
884 | DashPunctuation = 19, |
885 | /// (`Ps`) An opening punctuation mark (of a pair) |
886 | OpenPunctuation = 20, |
887 | /// (`Pe`) A closing punctuation mark (of a pair) |
888 | ClosePunctuation = 21, |
889 | /// (`Pc`) A connecting punctuation mark, like a tie |
890 | ConnectorPunctuation = 22, |
891 | /// (`Pi`) An initial quotation mark |
892 | InitialPunctuation = 28, |
893 | /// (`Pf`) A final quotation mark |
894 | FinalPunctuation = 29, |
895 | /// (`Po`) A punctuation mark of other type |
896 | OtherPunctuation = 23, |
897 | |
898 | /// (`Sm`) A symbol of mathematical use |
899 | MathSymbol = 24, |
900 | /// (`Sc`) A currency sign |
901 | CurrencySymbol = 25, |
902 | /// (`Sk`) A non-letterlike modifier symbol |
903 | ModifierSymbol = 26, |
904 | /// (`So`) A symbol of other type |
905 | OtherSymbol = 27, |
906 | } |
907 | |
908 | impl_value_getter! { |
909 | markers: GeneralCategoryNameToValueV1Marker / SINGLETON_PROPNAMES_FROM_GC_V1, GeneralCategoryValueToShortNameV1Marker / SINGLETON_PROPNAMES_TO_SHORT_LINEAR_GC_V1, GeneralCategoryValueToLongNameV1Marker / SINGLETON_PROPNAMES_TO_LONG_LINEAR_GC_V1; |
910 | impl GeneralCategory { |
911 | /// Return a [`PropertyValueNameToEnumMapper`], capable of looking up values |
912 | /// from strings for the `General_Category` enumerated property. |
913 | /// |
914 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
915 | /// |
916 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
917 | /// |
918 | /// # Example |
919 | /// |
920 | /// ``` |
921 | /// use icu::properties::GeneralCategory; |
922 | /// |
923 | /// let lookup = GeneralCategory::name_to_enum_mapper(); |
924 | /// // short name for value |
925 | /// assert_eq!(lookup.get_strict("Lu"), Some(GeneralCategory::UppercaseLetter)); |
926 | /// assert_eq!(lookup.get_strict("Pd"), Some(GeneralCategory::DashPunctuation)); |
927 | /// // long name for value |
928 | /// assert_eq!(lookup.get_strict("Uppercase_Letter"), Some(GeneralCategory::UppercaseLetter)); |
929 | /// assert_eq!(lookup.get_strict("Dash_Punctuation"), Some(GeneralCategory::DashPunctuation)); |
930 | /// // name has incorrect casing |
931 | /// assert_eq!(lookup.get_strict("dashpunctuation"), None); |
932 | /// // loose matching of name |
933 | /// assert_eq!(lookup.get_loose("dash-punctuation"), Some(GeneralCategory::DashPunctuation)); |
934 | /// // fake property |
935 | /// assert_eq!(lookup.get_loose("Animated_Gif"), None); |
936 | /// ``` |
937 | pub fn get_name_to_enum_mapper() / name_to_enum_mapper(); |
938 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up short names |
939 | /// for values of the `General_Category` enumerated property. |
940 | /// |
941 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
942 | /// |
943 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
944 | /// |
945 | /// # Example |
946 | /// |
947 | /// ``` |
948 | /// use icu::properties::GeneralCategory; |
949 | /// |
950 | /// let lookup = GeneralCategory::enum_to_short_name_mapper(); |
951 | /// assert_eq!(lookup.get(GeneralCategory::UppercaseLetter), Some("Lu")); |
952 | /// assert_eq!(lookup.get(GeneralCategory::DashPunctuation), Some("Pd")); |
953 | /// assert_eq!(lookup.get(GeneralCategory::FinalPunctuation), Some("Pf")); |
954 | /// ``` |
955 | pub fn get_enum_to_short_name_mapper() / enum_to_short_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
956 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up long names |
957 | /// for values of the `General_Category` enumerated property. |
958 | /// |
959 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
960 | /// |
961 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
962 | /// |
963 | /// # Example |
964 | /// |
965 | /// ``` |
966 | /// use icu::properties::GeneralCategory; |
967 | /// |
968 | /// let lookup = GeneralCategory::enum_to_long_name_mapper(); |
969 | /// assert_eq!(lookup.get(GeneralCategory::UppercaseLetter), Some("Uppercase_Letter")); |
970 | /// assert_eq!(lookup.get(GeneralCategory::DashPunctuation), Some("Dash_Punctuation")); |
971 | /// assert_eq!(lookup.get(GeneralCategory::FinalPunctuation), Some("Final_Punctuation")); |
972 | /// ``` |
973 | pub fn get_enum_to_long_name_mapper() / enum_to_long_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
974 | } |
975 | } |
976 | |
977 | #[derive (Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)] |
978 | pub struct GeneralCategoryTryFromError; |
979 | |
980 | impl TryFrom<u8> for GeneralCategory { |
981 | type Error = GeneralCategoryTryFromError; |
982 | /// Construct this [`GeneralCategory`] from an integer, returning |
983 | /// an error if it is out of bounds |
984 | fn try_from(val: u8) -> Result<Self, GeneralCategoryTryFromError> { |
985 | GeneralCategory::new_from_u8(val).ok_or(GeneralCategoryTryFromError) |
986 | } |
987 | } |
988 | |
989 | /// Groupings of multiple General_Category property values. |
990 | /// |
991 | /// Instances of `GeneralCategoryGroup` represent the defined multi-category |
992 | /// values that are useful for users in certain contexts, such as regex. In |
993 | /// other words, unlike [`GeneralCategory`], this supports groups of general |
994 | /// categories: for example, `Letter` /// is the union of `UppercaseLetter`, |
995 | /// `LowercaseLetter`, etc. |
996 | /// |
997 | /// See <https://www.unicode.org/reports/tr44/> . |
998 | /// |
999 | /// The discriminants correspond to the `U_GC_XX_MASK` constants in ICU4C. |
1000 | /// Unlike [`GeneralCategory`], this supports groups of general categories: for example, `Letter` |
1001 | /// is the union of `UppercaseLetter`, `LowercaseLetter`, etc. |
1002 | /// |
1003 | /// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C. |
1004 | #[derive (Copy, Clone, PartialEq, Debug, Eq)] |
1005 | #[allow (clippy::exhaustive_structs)] // newtype |
1006 | #[repr (transparent)] |
1007 | pub struct GeneralCategoryGroup(pub(crate) u32); |
1008 | |
1009 | use GeneralCategory as GC; |
1010 | use GeneralCategoryGroup as GCG; |
1011 | |
1012 | #[allow (non_upper_case_globals)] |
1013 | impl GeneralCategoryGroup { |
1014 | /// (`Lu`) An uppercase letter |
1015 | pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32)); |
1016 | /// (`Ll`) A lowercase letter |
1017 | pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::LowercaseLetter as u32)); |
1018 | /// (`Lt`) A digraphic letter, with first part uppercase |
1019 | pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << (GC::TitlecaseLetter as u32)); |
1020 | /// (`Lm`) A modifier letter |
1021 | pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << (GC::ModifierLetter as u32)); |
1022 | /// (`Lo`) Other letters, including syllables and ideographs |
1023 | pub const OtherLetter: GeneralCategoryGroup = GCG(1 << (GC::OtherLetter as u32)); |
1024 | /// (`LC`) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter |
1025 | pub const CasedLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32) |
1026 | | 1 << (GC::LowercaseLetter as u32) |
1027 | | 1 << (GC::TitlecaseLetter as u32)); |
1028 | /// (`L`) The union of all letter categories |
1029 | pub const Letter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32) |
1030 | | 1 << (GC::LowercaseLetter as u32) |
1031 | | 1 << (GC::TitlecaseLetter as u32) |
1032 | | 1 << (GC::ModifierLetter as u32) |
1033 | | 1 << (GC::OtherLetter as u32)); |
1034 | |
1035 | /// (`Mn`) A nonspacing combining mark (zero advance width) |
1036 | pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32)); |
1037 | /// (`Mc`) A spacing combining mark (positive advance width) |
1038 | pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << (GC::EnclosingMark as u32)); |
1039 | /// (`Me`) An enclosing combining mark |
1040 | pub const SpacingMark: GeneralCategoryGroup = GCG(1 << (GC::SpacingMark as u32)); |
1041 | /// (`M`) The union of all mark categories |
1042 | pub const Mark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32) |
1043 | | 1 << (GC::EnclosingMark as u32) |
1044 | | 1 << (GC::SpacingMark as u32)); |
1045 | |
1046 | /// (`Nd`) A decimal digit |
1047 | pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32)); |
1048 | /// (`Nl`) A letterlike numeric character |
1049 | pub const LetterNumber: GeneralCategoryGroup = GCG(1 << (GC::LetterNumber as u32)); |
1050 | /// (`No`) A numeric character of other type |
1051 | pub const OtherNumber: GeneralCategoryGroup = GCG(1 << (GC::OtherNumber as u32)); |
1052 | /// (`N`) The union of all number categories |
1053 | pub const Number: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32) |
1054 | | 1 << (GC::LetterNumber as u32) |
1055 | | 1 << (GC::OtherNumber as u32)); |
1056 | |
1057 | /// (`Zs`) A space character (of various non-zero widths) |
1058 | pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32)); |
1059 | /// (`Zl`) U+2028 LINE SEPARATOR only |
1060 | pub const LineSeparator: GeneralCategoryGroup = GCG(1 << (GC::LineSeparator as u32)); |
1061 | /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only |
1062 | pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << (GC::ParagraphSeparator as u32)); |
1063 | /// (`Z`) The union of all separator categories |
1064 | pub const Separator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32) |
1065 | | 1 << (GC::LineSeparator as u32) |
1066 | | 1 << (GC::ParagraphSeparator as u32)); |
1067 | |
1068 | /// (`Cc`) A C0 or C1 control code |
1069 | pub const Control: GeneralCategoryGroup = GCG(1 << (GC::Control as u32)); |
1070 | /// (`Cf`) A format control character |
1071 | pub const Format: GeneralCategoryGroup = GCG(1 << (GC::Format as u32)); |
1072 | /// (`Co`) A private-use character |
1073 | pub const PrivateUse: GeneralCategoryGroup = GCG(1 << (GC::PrivateUse as u32)); |
1074 | /// (`Cs`) A surrogate code point |
1075 | pub const Surrogate: GeneralCategoryGroup = GCG(1 << (GC::Surrogate as u32)); |
1076 | /// (`Cn`) A reserved unassigned code point or a noncharacter |
1077 | pub const Unassigned: GeneralCategoryGroup = GCG(1 << (GC::Unassigned as u32)); |
1078 | /// (`C`) The union of all control code, reserved, and unassigned categories |
1079 | pub const Other: GeneralCategoryGroup = GCG(1 << (GC::Control as u32) |
1080 | | 1 << (GC::Format as u32) |
1081 | | 1 << (GC::PrivateUse as u32) |
1082 | | 1 << (GC::Surrogate as u32) |
1083 | | 1 << (GC::Unassigned as u32)); |
1084 | |
1085 | /// (`Pd`) A dash or hyphen punctuation mark |
1086 | pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32)); |
1087 | /// (`Ps`) An opening punctuation mark (of a pair) |
1088 | pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OpenPunctuation as u32)); |
1089 | /// (`Pe`) A closing punctuation mark (of a pair) |
1090 | pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << (GC::ClosePunctuation as u32)); |
1091 | /// (`Pc`) A connecting punctuation mark, like a tie |
1092 | pub const ConnectorPunctuation: GeneralCategoryGroup = |
1093 | GCG(1 << (GC::ConnectorPunctuation as u32)); |
1094 | /// (`Pi`) An initial quotation mark |
1095 | pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << (GC::InitialPunctuation as u32)); |
1096 | /// (`Pf`) A final quotation mark |
1097 | pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << (GC::FinalPunctuation as u32)); |
1098 | /// (`Po`) A punctuation mark of other type |
1099 | pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OtherPunctuation as u32)); |
1100 | /// (`P`) The union of all punctuation categories |
1101 | pub const Punctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32) |
1102 | | 1 << (GC::OpenPunctuation as u32) |
1103 | | 1 << (GC::ClosePunctuation as u32) |
1104 | | 1 << (GC::ConnectorPunctuation as u32) |
1105 | | 1 << (GC::OtherPunctuation as u32) |
1106 | | 1 << (GC::InitialPunctuation as u32) |
1107 | | 1 << (GC::FinalPunctuation as u32)); |
1108 | |
1109 | /// (`Sm`) A symbol of mathematical use |
1110 | pub const MathSymbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32)); |
1111 | /// (`Sc`) A currency sign |
1112 | pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << (GC::CurrencySymbol as u32)); |
1113 | /// (`Sk`) A non-letterlike modifier symbol |
1114 | pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << (GC::ModifierSymbol as u32)); |
1115 | /// (`So`) A symbol of other type |
1116 | pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << (GC::OtherSymbol as u32)); |
1117 | /// (`S`) The union of all symbol categories |
1118 | pub const Symbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32) |
1119 | | 1 << (GC::CurrencySymbol as u32) |
1120 | | 1 << (GC::ModifierSymbol as u32) |
1121 | | 1 << (GC::OtherSymbol as u32)); |
1122 | |
1123 | const ALL: u32 = (1 << (GC::FinalPunctuation as u32 + 1)) - 1; |
1124 | |
1125 | /// Return whether the code point belongs in the provided multi-value category. |
1126 | /// |
1127 | /// ``` |
1128 | /// use icu::properties::{maps, GeneralCategory, GeneralCategoryGroup}; |
1129 | /// |
1130 | /// let gc = maps::general_category(); |
1131 | /// |
1132 | /// assert_eq!(gc.get('A' ), GeneralCategory::UppercaseLetter); |
1133 | /// assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A' ))); |
1134 | /// |
1135 | /// // U+0B1E ORIYA LETTER NYA |
1136 | /// assert_eq!(gc.get('ଞ' ), GeneralCategory::OtherLetter); |
1137 | /// assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ' ))); |
1138 | /// assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ' ))); |
1139 | /// |
1140 | /// // U+0301 COMBINING ACUTE ACCENT |
1141 | /// assert_eq!(gc.get32(0x0301), GeneralCategory::NonspacingMark); |
1142 | /// assert!(GeneralCategoryGroup::Mark.contains(gc.get32(0x0301))); |
1143 | /// assert!(!GeneralCategoryGroup::Letter.contains(gc.get32(0x0301))); |
1144 | /// |
1145 | /// assert_eq!(gc.get('0' ), GeneralCategory::DecimalNumber); |
1146 | /// assert!(GeneralCategoryGroup::Number.contains(gc.get('0' ))); |
1147 | /// assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0' ))); |
1148 | /// |
1149 | /// assert_eq!(gc.get('(' ), GeneralCategory::OpenPunctuation); |
1150 | /// assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('(' ))); |
1151 | /// assert!(!GeneralCategoryGroup::Number.contains(gc.get('(' ))); |
1152 | /// |
1153 | /// // U+2713 CHECK MARK |
1154 | /// assert_eq!(gc.get('✓' ), GeneralCategory::OtherSymbol); |
1155 | /// assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓' ))); |
1156 | /// assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓' ))); |
1157 | /// |
1158 | /// assert_eq!(gc.get(' ' ), GeneralCategory::SpaceSeparator); |
1159 | /// assert!(GeneralCategoryGroup::Separator.contains(gc.get(' ' ))); |
1160 | /// assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' ' ))); |
1161 | /// |
1162 | /// // U+E007F CANCEL TAG |
1163 | /// assert_eq!(gc.get32(0xE007F), GeneralCategory::Format); |
1164 | /// assert!(GeneralCategoryGroup::Other.contains(gc.get32(0xE007F))); |
1165 | /// assert!(!GeneralCategoryGroup::Separator.contains(gc.get32(0xE007F))); |
1166 | /// ``` |
1167 | pub const fn contains(&self, val: GeneralCategory) -> bool { |
1168 | 0 != (1 << (val as u32)) & self.0 |
1169 | } |
1170 | |
1171 | /// Produce a GeneralCategoryGroup that is the inverse of this one |
1172 | /// |
1173 | /// # Example |
1174 | /// |
1175 | /// ```rust |
1176 | /// use icu::properties::{GeneralCategory, GeneralCategoryGroup}; |
1177 | /// |
1178 | /// let letter = GeneralCategoryGroup::Letter; |
1179 | /// let not_letter = letter.complement(); |
1180 | /// |
1181 | /// assert!(not_letter.contains(GeneralCategory::MathSymbol)); |
1182 | /// assert!(!letter.contains(GeneralCategory::MathSymbol)); |
1183 | /// assert!(not_letter.contains(GeneralCategory::OtherPunctuation)); |
1184 | /// assert!(!letter.contains(GeneralCategory::OtherPunctuation)); |
1185 | /// assert!(!not_letter.contains(GeneralCategory::UppercaseLetter)); |
1186 | /// assert!(letter.contains(GeneralCategory::UppercaseLetter)); |
1187 | /// ``` |
1188 | pub const fn complement(self) -> Self { |
1189 | // Mask off things not in Self::ALL to guarantee the mask |
1190 | // values stay in-range |
1191 | GeneralCategoryGroup(!self.0 & Self::ALL) |
1192 | } |
1193 | |
1194 | /// Return the group representing all GeneralCategory values |
1195 | /// |
1196 | /// # Example |
1197 | /// |
1198 | /// ```rust |
1199 | /// use icu::properties::{GeneralCategory, GeneralCategoryGroup}; |
1200 | /// |
1201 | /// let all = GeneralCategoryGroup::all(); |
1202 | /// |
1203 | /// assert!(all.contains(GeneralCategory::MathSymbol)); |
1204 | /// assert!(all.contains(GeneralCategory::OtherPunctuation)); |
1205 | /// assert!(all.contains(GeneralCategory::UppercaseLetter)); |
1206 | /// ``` |
1207 | pub const fn all() -> Self { |
1208 | Self(Self::ALL) |
1209 | } |
1210 | |
1211 | /// Return the empty group |
1212 | /// |
1213 | /// # Example |
1214 | /// |
1215 | /// ```rust |
1216 | /// use icu::properties::{GeneralCategory, GeneralCategoryGroup}; |
1217 | /// |
1218 | /// let empty = GeneralCategoryGroup::empty(); |
1219 | /// |
1220 | /// assert!(!empty.contains(GeneralCategory::MathSymbol)); |
1221 | /// assert!(!empty.contains(GeneralCategory::OtherPunctuation)); |
1222 | /// assert!(!empty.contains(GeneralCategory::UppercaseLetter)); |
1223 | /// ``` |
1224 | pub const fn empty() -> Self { |
1225 | Self(0) |
1226 | } |
1227 | |
1228 | /// Take the union of two groups |
1229 | /// |
1230 | /// # Example |
1231 | /// |
1232 | /// ```rust |
1233 | /// use icu::properties::{GeneralCategory, GeneralCategoryGroup}; |
1234 | /// |
1235 | /// let letter = GeneralCategoryGroup::Letter; |
1236 | /// let symbol = GeneralCategoryGroup::Symbol; |
1237 | /// let union = letter.union(symbol); |
1238 | /// |
1239 | /// assert!(union.contains(GeneralCategory::MathSymbol)); |
1240 | /// assert!(!union.contains(GeneralCategory::OtherPunctuation)); |
1241 | /// assert!(union.contains(GeneralCategory::UppercaseLetter)); |
1242 | /// ``` |
1243 | pub const fn union(self, other: Self) -> Self { |
1244 | Self(self.0 | other.0) |
1245 | } |
1246 | |
1247 | /// Take the intersection of two groups |
1248 | /// |
1249 | /// # Example |
1250 | /// |
1251 | /// ```rust |
1252 | /// use icu::properties::{GeneralCategory, GeneralCategoryGroup}; |
1253 | /// |
1254 | /// let letter = GeneralCategoryGroup::Letter; |
1255 | /// let lu = GeneralCategoryGroup::UppercaseLetter; |
1256 | /// let intersection = letter.intersection(lu); |
1257 | /// |
1258 | /// assert!(!intersection.contains(GeneralCategory::MathSymbol)); |
1259 | /// assert!(!intersection.contains(GeneralCategory::OtherPunctuation)); |
1260 | /// assert!(intersection.contains(GeneralCategory::UppercaseLetter)); |
1261 | /// assert!(!intersection.contains(GeneralCategory::LowercaseLetter)); |
1262 | /// ``` |
1263 | pub const fn intersection(self, other: Self) -> Self { |
1264 | Self(self.0 & other.0) |
1265 | } |
1266 | } |
1267 | |
1268 | impl_value_getter! { |
1269 | markers: GeneralCategoryMaskNameToValueV1Marker / SINGLETON_PROPNAMES_FROM_GCM_V1; |
1270 | impl GeneralCategoryGroup { |
1271 | /// Return a [`PropertyValueNameToEnumMapper`], capable of looking up values |
1272 | /// from strings for the `General_Category_Mask` mask property. |
1273 | /// |
1274 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1275 | /// |
1276 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1277 | /// |
1278 | /// # Example |
1279 | /// |
1280 | /// ``` |
1281 | /// use icu::properties::GeneralCategoryGroup; |
1282 | /// |
1283 | /// let lookup = GeneralCategoryGroup::name_to_enum_mapper(); |
1284 | /// // short name for value |
1285 | /// assert_eq!(lookup.get_strict("L"), Some(GeneralCategoryGroup::Letter)); |
1286 | /// assert_eq!(lookup.get_strict("LC"), Some(GeneralCategoryGroup::CasedLetter)); |
1287 | /// assert_eq!(lookup.get_strict("Lu"), Some(GeneralCategoryGroup::UppercaseLetter)); |
1288 | /// assert_eq!(lookup.get_strict("Zp"), Some(GeneralCategoryGroup::ParagraphSeparator)); |
1289 | /// assert_eq!(lookup.get_strict("P"), Some(GeneralCategoryGroup::Punctuation)); |
1290 | /// // long name for value |
1291 | /// assert_eq!(lookup.get_strict("Letter"), Some(GeneralCategoryGroup::Letter)); |
1292 | /// assert_eq!(lookup.get_strict("Cased_Letter"), Some(GeneralCategoryGroup::CasedLetter)); |
1293 | /// assert_eq!(lookup.get_strict("Uppercase_Letter"), Some(GeneralCategoryGroup::UppercaseLetter)); |
1294 | /// // alias name |
1295 | /// assert_eq!(lookup.get_strict("punct"), Some(GeneralCategoryGroup::Punctuation)); |
1296 | /// // name has incorrect casing |
1297 | /// assert_eq!(lookup.get_strict("letter"), None); |
1298 | /// // loose matching of name |
1299 | /// assert_eq!(lookup.get_loose("letter"), Some(GeneralCategoryGroup::Letter)); |
1300 | /// // fake property |
1301 | /// assert_eq!(lookup.get_strict("EverythingLol"), None); |
1302 | /// ``` |
1303 | pub fn get_name_to_enum_mapper() / name_to_enum_mapper(); |
1304 | } |
1305 | } |
1306 | |
1307 | impl From<GeneralCategory> for GeneralCategoryGroup { |
1308 | fn from(subcategory: GeneralCategory) -> Self { |
1309 | GeneralCategoryGroup(1 << (subcategory as u32)) |
1310 | } |
1311 | } |
1312 | impl From<u32> for GeneralCategoryGroup { |
1313 | fn from(mask: u32) -> Self { |
1314 | // Mask off things not in Self::ALL to guarantee the mask |
1315 | // values stay in-range |
1316 | GeneralCategoryGroup(mask & Self::ALL) |
1317 | } |
1318 | } |
1319 | impl From<GeneralCategoryGroup> for u32 { |
1320 | fn from(group: GeneralCategoryGroup) -> Self { |
1321 | group.0 |
1322 | } |
1323 | } |
1324 | /// Enumerated property Script. |
1325 | /// |
1326 | /// This is used with both the Script and Script_Extensions Unicode properties. |
1327 | /// Each character is assigned a single Script, but characters that are used in |
1328 | /// a particular subset of scripts will be in more than one Script_Extensions set. |
1329 | /// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the |
1330 | /// Script_Extensions set for Dogra, Kaithi, and Mahajani. |
1331 | /// |
1332 | /// For more information, see UAX #24: <http://www.unicode.org/reports/tr24/>. |
1333 | /// See `UScriptCode` in ICU4C. |
1334 | #[derive (Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
1335 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
1336 | #[cfg_attr (feature = "datagen" , derive(databake::Bake))] |
1337 | #[cfg_attr (feature = "datagen" , databake(path = icu_properties))] |
1338 | #[allow (clippy::exhaustive_structs)] // newtype |
1339 | #[repr (transparent)] |
1340 | #[zerovec::make_ule (ScriptULE)] |
1341 | pub struct Script(pub u16); |
1342 | |
1343 | #[allow (missing_docs)] // These constants don't need individual documentation. |
1344 | #[allow (non_upper_case_globals)] |
1345 | impl Script { |
1346 | pub const Adlam: Script = Script(167); |
1347 | pub const Ahom: Script = Script(161); |
1348 | pub const AnatolianHieroglyphs: Script = Script(156); |
1349 | pub const Arabic: Script = Script(2); |
1350 | pub const Armenian: Script = Script(3); |
1351 | pub const Avestan: Script = Script(117); |
1352 | pub const Balinese: Script = Script(62); |
1353 | pub const Bamum: Script = Script(130); |
1354 | pub const BassaVah: Script = Script(134); |
1355 | pub const Batak: Script = Script(63); |
1356 | pub const Bengali: Script = Script(4); |
1357 | pub const Bhaiksuki: Script = Script(168); |
1358 | pub const Bopomofo: Script = Script(5); |
1359 | pub const Brahmi: Script = Script(65); |
1360 | pub const Braille: Script = Script(46); |
1361 | pub const Buginese: Script = Script(55); |
1362 | pub const Buhid: Script = Script(44); |
1363 | pub const CanadianAboriginal: Script = Script(40); |
1364 | pub const Carian: Script = Script(104); |
1365 | pub const CaucasianAlbanian: Script = Script(159); |
1366 | pub const Chakma: Script = Script(118); |
1367 | pub const Cham: Script = Script(66); |
1368 | pub const Cherokee: Script = Script(6); |
1369 | pub const Chorasmian: Script = Script(189); |
1370 | pub const Common: Script = Script(0); |
1371 | pub const Coptic: Script = Script(7); |
1372 | pub const Cuneiform: Script = Script(101); |
1373 | pub const Cypriot: Script = Script(47); |
1374 | pub const CyproMinoan: Script = Script(193); |
1375 | pub const Cyrillic: Script = Script(8); |
1376 | pub const Deseret: Script = Script(9); |
1377 | pub const Devanagari: Script = Script(10); |
1378 | pub const DivesAkuru: Script = Script(190); |
1379 | pub const Dogra: Script = Script(178); |
1380 | pub const Duployan: Script = Script(135); |
1381 | pub const EgyptianHieroglyphs: Script = Script(71); |
1382 | pub const Elbasan: Script = Script(136); |
1383 | pub const Elymaic: Script = Script(185); |
1384 | pub const Ethiopian: Script = Script(11); |
1385 | pub const Georgian: Script = Script(12); |
1386 | pub const Glagolitic: Script = Script(56); |
1387 | pub const Gothic: Script = Script(13); |
1388 | pub const Grantha: Script = Script(137); |
1389 | pub const Greek: Script = Script(14); |
1390 | pub const Gujarati: Script = Script(15); |
1391 | pub const GunjalaGondi: Script = Script(179); |
1392 | pub const Gurmukhi: Script = Script(16); |
1393 | pub const Han: Script = Script(17); |
1394 | pub const Hangul: Script = Script(18); |
1395 | pub const HanifiRohingya: Script = Script(182); |
1396 | pub const Hanunoo: Script = Script(43); |
1397 | pub const Hatran: Script = Script(162); |
1398 | pub const Hebrew: Script = Script(19); |
1399 | pub const Hiragana: Script = Script(20); |
1400 | pub const ImperialAramaic: Script = Script(116); |
1401 | pub const Inherited: Script = Script(1); |
1402 | pub const InscriptionalPahlavi: Script = Script(122); |
1403 | pub const InscriptionalParthian: Script = Script(125); |
1404 | pub const Javanese: Script = Script(78); |
1405 | pub const Kaithi: Script = Script(120); |
1406 | pub const Kannada: Script = Script(21); |
1407 | pub const Katakana: Script = Script(22); |
1408 | pub const Kawi: Script = Script(198); |
1409 | pub const KayahLi: Script = Script(79); |
1410 | pub const Kharoshthi: Script = Script(57); |
1411 | pub const KhitanSmallScript: Script = Script(191); |
1412 | pub const Khmer: Script = Script(23); |
1413 | pub const Khojki: Script = Script(157); |
1414 | pub const Khudawadi: Script = Script(145); |
1415 | pub const Lao: Script = Script(24); |
1416 | pub const Latin: Script = Script(25); |
1417 | pub const Lepcha: Script = Script(82); |
1418 | pub const Limbu: Script = Script(48); |
1419 | pub const LinearA: Script = Script(83); |
1420 | pub const LinearB: Script = Script(49); |
1421 | pub const Lisu: Script = Script(131); |
1422 | pub const Lycian: Script = Script(107); |
1423 | pub const Lydian: Script = Script(108); |
1424 | pub const Mahajani: Script = Script(160); |
1425 | pub const Makasar: Script = Script(180); |
1426 | pub const Malayalam: Script = Script(26); |
1427 | pub const Mandaic: Script = Script(84); |
1428 | pub const Manichaean: Script = Script(121); |
1429 | pub const Marchen: Script = Script(169); |
1430 | pub const MasaramGondi: Script = Script(175); |
1431 | pub const Medefaidrin: Script = Script(181); |
1432 | pub const MeeteiMayek: Script = Script(115); |
1433 | pub const MendeKikakui: Script = Script(140); |
1434 | pub const MeroiticCursive: Script = Script(141); |
1435 | pub const MeroiticHieroglyphs: Script = Script(86); |
1436 | pub const Miao: Script = Script(92); |
1437 | pub const Modi: Script = Script(163); |
1438 | pub const Mongolian: Script = Script(27); |
1439 | pub const Mro: Script = Script(149); |
1440 | pub const Multani: Script = Script(164); |
1441 | pub const Myanmar: Script = Script(28); |
1442 | pub const Nabataean: Script = Script(143); |
1443 | pub const NagMundari: Script = Script(199); |
1444 | pub const Nandinagari: Script = Script(187); |
1445 | pub const Nastaliq: Script = Script(200); |
1446 | pub const NewTaiLue: Script = Script(59); |
1447 | pub const Newa: Script = Script(170); |
1448 | pub const Nko: Script = Script(87); |
1449 | pub const Nushu: Script = Script(150); |
1450 | pub const NyiakengPuachueHmong: Script = Script(186); |
1451 | pub const Ogham: Script = Script(29); |
1452 | pub const OlChiki: Script = Script(109); |
1453 | pub const OldHungarian: Script = Script(76); |
1454 | pub const OldItalic: Script = Script(30); |
1455 | pub const OldNorthArabian: Script = Script(142); |
1456 | pub const OldPermic: Script = Script(89); |
1457 | pub const OldPersian: Script = Script(61); |
1458 | pub const OldSogdian: Script = Script(184); |
1459 | pub const OldSouthArabian: Script = Script(133); |
1460 | pub const OldTurkic: Script = Script(88); |
1461 | pub const OldUyghur: Script = Script(194); |
1462 | pub const Oriya: Script = Script(31); |
1463 | pub const Osage: Script = Script(171); |
1464 | pub const Osmanya: Script = Script(50); |
1465 | pub const PahawhHmong: Script = Script(75); |
1466 | pub const Palmyrene: Script = Script(144); |
1467 | pub const PauCinHau: Script = Script(165); |
1468 | pub const PhagsPa: Script = Script(90); |
1469 | pub const Phoenician: Script = Script(91); |
1470 | pub const PsalterPahlavi: Script = Script(123); |
1471 | pub const Rejang: Script = Script(110); |
1472 | pub const Runic: Script = Script(32); |
1473 | pub const Samaritan: Script = Script(126); |
1474 | pub const Saurashtra: Script = Script(111); |
1475 | pub const Sharada: Script = Script(151); |
1476 | pub const Shavian: Script = Script(51); |
1477 | pub const Siddham: Script = Script(166); |
1478 | pub const SignWriting: Script = Script(112); |
1479 | pub const Sinhala: Script = Script(33); |
1480 | pub const Sogdian: Script = Script(183); |
1481 | pub const SoraSompeng: Script = Script(152); |
1482 | pub const Soyombo: Script = Script(176); |
1483 | pub const Sundanese: Script = Script(113); |
1484 | pub const SylotiNagri: Script = Script(58); |
1485 | pub const Syriac: Script = Script(34); |
1486 | pub const Tagalog: Script = Script(42); |
1487 | pub const Tagbanwa: Script = Script(45); |
1488 | pub const TaiLe: Script = Script(52); |
1489 | pub const TaiTham: Script = Script(106); |
1490 | pub const TaiViet: Script = Script(127); |
1491 | pub const Takri: Script = Script(153); |
1492 | pub const Tamil: Script = Script(35); |
1493 | pub const Tangsa: Script = Script(195); |
1494 | pub const Tangut: Script = Script(154); |
1495 | pub const Telugu: Script = Script(36); |
1496 | pub const Thaana: Script = Script(37); |
1497 | pub const Thai: Script = Script(38); |
1498 | pub const Tibetan: Script = Script(39); |
1499 | pub const Tifinagh: Script = Script(60); |
1500 | pub const Tirhuta: Script = Script(158); |
1501 | pub const Toto: Script = Script(196); |
1502 | pub const Ugaritic: Script = Script(53); |
1503 | pub const Unknown: Script = Script(103); |
1504 | pub const Vai: Script = Script(99); |
1505 | pub const Vithkuqi: Script = Script(197); |
1506 | pub const Wancho: Script = Script(188); |
1507 | pub const WarangCiti: Script = Script(146); |
1508 | pub const Yezidi: Script = Script(192); |
1509 | pub const Yi: Script = Script(41); |
1510 | pub const ZanabazarSquare: Script = Script(177); |
1511 | } |
1512 | |
1513 | impl_value_getter! { |
1514 | markers: ScriptNameToValueV1Marker / SINGLETON_PROPNAMES_FROM_SC_V1, ScriptValueToShortNameV1Marker / SINGLETON_PROPNAMES_TO_SHORT_LINEAR4_SC_V1, ScriptValueToLongNameV1Marker / SINGLETON_PROPNAMES_TO_LONG_LINEAR_SC_V1; |
1515 | impl Script { |
1516 | /// Return a [`PropertyValueNameToEnumMapper`], capable of looking up values |
1517 | /// from strings for the `Script` enumerated property. |
1518 | /// |
1519 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1520 | /// |
1521 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1522 | /// |
1523 | /// # Example |
1524 | /// |
1525 | /// ``` |
1526 | /// use icu::properties::Script; |
1527 | /// |
1528 | /// let lookup = Script::name_to_enum_mapper(); |
1529 | /// // short name for value |
1530 | /// assert_eq!(lookup.get_strict("Brah"), Some(Script::Brahmi)); |
1531 | /// assert_eq!(lookup.get_strict("Hang"), Some(Script::Hangul)); |
1532 | /// // long name for value |
1533 | /// assert_eq!(lookup.get_strict("Brahmi"), Some(Script::Brahmi)); |
1534 | /// assert_eq!(lookup.get_strict("Hangul"), Some(Script::Hangul)); |
1535 | /// // name has incorrect casing |
1536 | /// assert_eq!(lookup.get_strict("brahmi"), None); |
1537 | /// // loose matching of name |
1538 | /// assert_eq!(lookup.get_loose("brahmi"), Some(Script::Brahmi)); |
1539 | /// // fake property |
1540 | /// assert_eq!(lookup.get_strict("Linear_Z"), None); |
1541 | /// ``` |
1542 | pub fn get_name_to_enum_mapper() / name_to_enum_mapper(); |
1543 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up short names |
1544 | /// for values of the `Script` enumerated property. |
1545 | /// |
1546 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1547 | /// |
1548 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1549 | /// |
1550 | /// # Example |
1551 | /// |
1552 | /// ``` |
1553 | /// use icu::properties::Script; |
1554 | /// use tinystr::tinystr; |
1555 | /// |
1556 | /// let lookup = Script::enum_to_short_name_mapper(); |
1557 | /// assert_eq!(lookup.get(Script::Brahmi), Some(tinystr!(4, "Brah"))); |
1558 | /// assert_eq!(lookup.get(Script::Hangul), Some(tinystr!(4, "Hang"))); |
1559 | /// ``` |
1560 | pub fn get_enum_to_short_name_mapper() / enum_to_short_name_mapper() -> PropertyEnumToValueNameLinearTiny4Mapper / PropertyEnumToValueNameLinearTiny4MapperBorrowed; |
1561 | /// Return a [`PropertyEnumToValueNameLinearTiny4Mapper`], capable of looking up long names |
1562 | /// for values of the `Script` enumerated property. |
1563 | /// |
1564 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1565 | /// |
1566 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1567 | /// |
1568 | /// # Example |
1569 | /// |
1570 | /// ``` |
1571 | /// use icu::properties::Script; |
1572 | /// |
1573 | /// let lookup = Script::enum_to_long_name_mapper(); |
1574 | /// assert_eq!(lookup.get(Script::Brahmi), Some("Brahmi")); |
1575 | /// assert_eq!(lookup.get(Script::Hangul), Some("Hangul")); |
1576 | /// ``` |
1577 | pub fn get_enum_to_long_name_mapper() / enum_to_long_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
1578 | } |
1579 | } |
1580 | |
1581 | /// Enumerated property Hangul_Syllable_Type |
1582 | /// |
1583 | /// The Unicode standard provides both precomposed Hangul syllables and conjoining Jamo to compose |
1584 | /// arbitrary Hangul syllables. This property provies that ontology of Hangul code points. |
1585 | /// |
1586 | /// For more information, see the [Unicode Korean FAQ](https://www.unicode.org/faq/korean.html). |
1587 | #[derive (Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
1588 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
1589 | #[cfg_attr (feature = "datagen" , derive(databake::Bake))] |
1590 | #[cfg_attr (feature = "datagen" , databake(path = icu_properties))] |
1591 | #[allow (clippy::exhaustive_structs)] // newtype |
1592 | #[repr (transparent)] |
1593 | #[zerovec::make_ule (HangulSyllableTypeULE)] |
1594 | pub struct HangulSyllableType(pub u8); |
1595 | |
1596 | create_const_array! { |
1597 | #[allow (non_upper_case_globals)] |
1598 | impl HangulSyllableType { |
1599 | /// (`NA`) not applicable (e.g. not a Hangul code point). |
1600 | pub const NotApplicable: HangulSyllableType = HangulSyllableType(0); |
1601 | /// (`L`) a conjoining leading consonant Jamo. |
1602 | pub const LeadingJamo: HangulSyllableType = HangulSyllableType(1); |
1603 | /// (`V`) a conjoining vowel Jamo. |
1604 | pub const VowelJamo: HangulSyllableType = HangulSyllableType(2); |
1605 | /// (`T`) a conjoining trailing consonent Jamo. |
1606 | pub const TrailingJamo: HangulSyllableType = HangulSyllableType(3); |
1607 | /// (`LV`) a precomposed syllable with a leading consonant and a vowel. |
1608 | pub const LeadingVowelSyllable: HangulSyllableType = HangulSyllableType(4); |
1609 | /// (`LVT`) a precomposed syllable with a leading consonant, a vowel, and a trailing consonant. |
1610 | pub const LeadingVowelTrailingSyllable: HangulSyllableType = HangulSyllableType(5); |
1611 | } |
1612 | } |
1613 | |
1614 | impl_value_getter! { |
1615 | markers: HangulSyllableTypeNameToValueV1Marker / SINGLETON_PROPNAMES_FROM_HST_V1, HangulSyllableTypeValueToShortNameV1Marker / SINGLETON_PROPNAMES_TO_SHORT_LINEAR_HST_V1, HangulSyllableTypeValueToLongNameV1Marker / SINGLETON_PROPNAMES_TO_LONG_LINEAR_HST_V1; |
1616 | impl HangulSyllableType { |
1617 | /// Return a [`PropertyValueNameToEnumMapper`], capable of looking up values |
1618 | /// from strings for the `Bidi_Class` enumerated property |
1619 | /// |
1620 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1621 | /// |
1622 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1623 | /// |
1624 | /// # Example |
1625 | /// |
1626 | /// ``` |
1627 | /// use icu::properties::HangulSyllableType; |
1628 | /// |
1629 | /// let lookup = HangulSyllableType::name_to_enum_mapper(); |
1630 | /// // short name for value |
1631 | /// assert_eq!(lookup.get_strict("L"), Some(HangulSyllableType::LeadingJamo)); |
1632 | /// assert_eq!(lookup.get_strict("LV"), Some(HangulSyllableType::LeadingVowelSyllable)); |
1633 | /// // long name for value |
1634 | /// assert_eq!(lookup.get_strict("Leading_Jamo"), Some(HangulSyllableType::LeadingJamo)); |
1635 | /// assert_eq!(lookup.get_strict("LV_Syllable"), Some(HangulSyllableType::LeadingVowelSyllable)); |
1636 | /// // name has incorrect casing |
1637 | /// assert_eq!(lookup.get_strict("lv"), None); |
1638 | /// // loose matching of name |
1639 | /// assert_eq!(lookup.get_loose("lv"), Some(HangulSyllableType::LeadingVowelSyllable)); |
1640 | /// // fake property |
1641 | /// assert_eq!(lookup.get_strict("LT_Syllable"), None); |
1642 | /// ``` |
1643 | pub fn get_name_to_enum_mapper() / name_to_enum_mapper(); |
1644 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up short names |
1645 | /// for values of the `Bidi_Class` enumerated property |
1646 | /// |
1647 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1648 | /// |
1649 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1650 | /// |
1651 | /// # Example |
1652 | /// |
1653 | /// ``` |
1654 | /// use icu::properties::HangulSyllableType; |
1655 | /// |
1656 | /// let lookup = HangulSyllableType::enum_to_short_name_mapper(); |
1657 | /// assert_eq!(lookup.get(HangulSyllableType::LeadingJamo), Some("L")); |
1658 | /// assert_eq!(lookup.get(HangulSyllableType::LeadingVowelSyllable), Some("LV")); |
1659 | /// ``` |
1660 | pub fn get_enum_to_short_name_mapper() / enum_to_short_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
1661 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up long names |
1662 | /// for values of the `Bidi_Class` enumerated property |
1663 | /// |
1664 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1665 | /// |
1666 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1667 | /// |
1668 | /// # Example |
1669 | /// |
1670 | /// ``` |
1671 | /// use icu::properties::HangulSyllableType; |
1672 | /// |
1673 | /// let lookup = HangulSyllableType::enum_to_long_name_mapper(); |
1674 | /// assert_eq!(lookup.get(HangulSyllableType::LeadingJamo), Some("Leading_Jamo")); |
1675 | /// assert_eq!(lookup.get(HangulSyllableType::LeadingVowelSyllable), Some("LV_Syllable")); |
1676 | /// ``` |
1677 | pub fn get_enum_to_long_name_mapper() / enum_to_long_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
1678 | } |
1679 | } |
1680 | |
1681 | /// Enumerated property East_Asian_Width. |
1682 | /// |
1683 | /// See "Definition" in UAX #11 for the summary of each property value: |
1684 | /// <https://www.unicode.org/reports/tr11/#Definitions> |
1685 | /// |
1686 | /// The numeric value is compatible with `UEastAsianWidth` in ICU4C. |
1687 | #[derive (Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
1688 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
1689 | #[cfg_attr (feature = "datagen" , derive(databake::Bake))] |
1690 | #[cfg_attr (feature = "datagen" , databake(path = icu_properties))] |
1691 | #[allow (clippy::exhaustive_structs)] // newtype |
1692 | #[repr (transparent)] |
1693 | #[zerovec::make_ule (EastAsianWidthULE)] |
1694 | pub struct EastAsianWidth(pub u8); |
1695 | |
1696 | create_const_array! { |
1697 | #[allow (missing_docs)] // These constants don't need individual documentation. |
1698 | #[allow (non_upper_case_globals)] |
1699 | impl EastAsianWidth { |
1700 | pub const Neutral: EastAsianWidth = EastAsianWidth(0); //name="N" |
1701 | pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); //name="A" |
1702 | pub const Halfwidth: EastAsianWidth = EastAsianWidth(2); //name="H" |
1703 | pub const Fullwidth: EastAsianWidth = EastAsianWidth(3); //name="F" |
1704 | pub const Narrow: EastAsianWidth = EastAsianWidth(4); //name="Na" |
1705 | pub const Wide: EastAsianWidth = EastAsianWidth(5); //name="W" |
1706 | } |
1707 | } |
1708 | |
1709 | impl_value_getter! { |
1710 | markers: EastAsianWidthNameToValueV1Marker / SINGLETON_PROPNAMES_FROM_EA_V1, EastAsianWidthValueToShortNameV1Marker / SINGLETON_PROPNAMES_TO_SHORT_LINEAR_EA_V1, EastAsianWidthValueToLongNameV1Marker / SINGLETON_PROPNAMES_TO_LONG_LINEAR_EA_V1; |
1711 | impl EastAsianWidth { |
1712 | /// Return a [`PropertyValueNameToEnumMapper`], capable of looking up values |
1713 | /// from strings for the `East_Asian_Width` enumerated property. |
1714 | /// |
1715 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1716 | /// |
1717 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1718 | /// |
1719 | /// # Example |
1720 | /// |
1721 | /// ``` |
1722 | /// use icu::properties::EastAsianWidth; |
1723 | /// |
1724 | /// let lookup = EastAsianWidth::name_to_enum_mapper(); |
1725 | /// // short name for value |
1726 | /// assert_eq!(lookup.get_strict("N"), Some(EastAsianWidth::Neutral)); |
1727 | /// assert_eq!(lookup.get_strict("H"), Some(EastAsianWidth::Halfwidth)); |
1728 | /// // long name for value |
1729 | /// assert_eq!(lookup.get_strict("Neutral"), Some(EastAsianWidth::Neutral)); |
1730 | /// assert_eq!(lookup.get_strict("Halfwidth"), Some(EastAsianWidth::Halfwidth)); |
1731 | /// // name has incorrect casing / extra hyphen |
1732 | /// assert_eq!(lookup.get_strict("half-width"), None); |
1733 | /// // loose matching of name |
1734 | /// assert_eq!(lookup.get_loose("half-width"), Some(EastAsianWidth::Halfwidth)); |
1735 | /// // fake property |
1736 | /// assert_eq!(lookup.get_strict("TwoPointFiveWidth"), None); |
1737 | /// ``` |
1738 | pub fn get_name_to_enum_mapper() / name_to_enum_mapper(); |
1739 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up short names |
1740 | /// for values of the `East_Asian_Width` enumerated property. |
1741 | /// |
1742 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1743 | /// |
1744 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1745 | /// |
1746 | /// # Example |
1747 | /// |
1748 | /// ``` |
1749 | /// use icu::properties::EastAsianWidth; |
1750 | /// |
1751 | /// let lookup = EastAsianWidth::enum_to_short_name_mapper(); |
1752 | /// assert_eq!(lookup.get(EastAsianWidth::Neutral), Some("N")); |
1753 | /// assert_eq!(lookup.get(EastAsianWidth::Halfwidth), Some("H")); |
1754 | /// ``` |
1755 | pub fn get_enum_to_short_name_mapper() / enum_to_short_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
1756 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up long names |
1757 | /// for values of the `East_Asian_Width` enumerated property. |
1758 | /// |
1759 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1760 | /// |
1761 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1762 | /// |
1763 | /// # Example |
1764 | /// |
1765 | /// ``` |
1766 | /// use icu::properties::EastAsianWidth; |
1767 | /// |
1768 | /// let lookup = EastAsianWidth::enum_to_long_name_mapper(); |
1769 | /// assert_eq!(lookup.get(EastAsianWidth::Neutral), Some("Neutral")); |
1770 | /// assert_eq!(lookup.get(EastAsianWidth::Halfwidth), Some("Halfwidth")); |
1771 | /// ``` |
1772 | pub fn get_enum_to_long_name_mapper() / enum_to_long_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
1773 | } |
1774 | } |
1775 | |
1776 | /// Enumerated property Line_Break. |
1777 | /// |
1778 | /// See "Line Breaking Properties" in UAX #14 for the summary of each property |
1779 | /// value: <https://www.unicode.org/reports/tr14/#Properties> |
1780 | /// |
1781 | /// The numeric value is compatible with `ULineBreak` in ICU4C. |
1782 | #[derive (Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
1783 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
1784 | #[cfg_attr (feature = "datagen" , derive(databake::Bake))] |
1785 | #[cfg_attr (feature = "datagen" , databake(path = icu_properties))] |
1786 | #[allow (clippy::exhaustive_structs)] // newtype |
1787 | #[repr (transparent)] |
1788 | #[zerovec::make_ule (LineBreakULE)] |
1789 | pub struct LineBreak(pub u8); |
1790 | |
1791 | #[allow (missing_docs)] // These constants don't need individual documentation. |
1792 | #[allow (non_upper_case_globals)] |
1793 | impl LineBreak { |
1794 | pub const Unknown: LineBreak = LineBreak(0); // name="XX" |
1795 | pub const Ambiguous: LineBreak = LineBreak(1); // name="AI" |
1796 | pub const Alphabetic: LineBreak = LineBreak(2); // name="AL" |
1797 | pub const BreakBoth: LineBreak = LineBreak(3); // name="B2" |
1798 | pub const BreakAfter: LineBreak = LineBreak(4); // name="BA" |
1799 | pub const BreakBefore: LineBreak = LineBreak(5); // name="BB" |
1800 | pub const MandatoryBreak: LineBreak = LineBreak(6); // name="BK" |
1801 | pub const ContingentBreak: LineBreak = LineBreak(7); // name="CB" |
1802 | pub const ClosePunctuation: LineBreak = LineBreak(8); // name="CL" |
1803 | pub const CombiningMark: LineBreak = LineBreak(9); // name="CM" |
1804 | pub const CarriageReturn: LineBreak = LineBreak(10); // name="CR" |
1805 | pub const Exclamation: LineBreak = LineBreak(11); // name="EX" |
1806 | pub const Glue: LineBreak = LineBreak(12); // name="GL" |
1807 | pub const Hyphen: LineBreak = LineBreak(13); // name="HY" |
1808 | pub const Ideographic: LineBreak = LineBreak(14); // name="ID" |
1809 | pub const Inseparable: LineBreak = LineBreak(15); // name="IN" |
1810 | pub const InfixNumeric: LineBreak = LineBreak(16); // name="IS" |
1811 | pub const LineFeed: LineBreak = LineBreak(17); // name="LF" |
1812 | pub const Nonstarter: LineBreak = LineBreak(18); // name="NS" |
1813 | pub const Numeric: LineBreak = LineBreak(19); // name="NU" |
1814 | pub const OpenPunctuation: LineBreak = LineBreak(20); // name="OP" |
1815 | pub const PostfixNumeric: LineBreak = LineBreak(21); // name="PO" |
1816 | pub const PrefixNumeric: LineBreak = LineBreak(22); // name="PR" |
1817 | pub const Quotation: LineBreak = LineBreak(23); // name="QU" |
1818 | pub const ComplexContext: LineBreak = LineBreak(24); // name="SA" |
1819 | pub const Surrogate: LineBreak = LineBreak(25); // name="SG" |
1820 | pub const Space: LineBreak = LineBreak(26); // name="SP" |
1821 | pub const BreakSymbols: LineBreak = LineBreak(27); // name="SY" |
1822 | pub const ZWSpace: LineBreak = LineBreak(28); // name="ZW" |
1823 | pub const NextLine: LineBreak = LineBreak(29); // name="NL" |
1824 | pub const WordJoiner: LineBreak = LineBreak(30); // name="WJ" |
1825 | pub const H2: LineBreak = LineBreak(31); // name="H2" |
1826 | pub const H3: LineBreak = LineBreak(32); // name="H3" |
1827 | pub const JL: LineBreak = LineBreak(33); // name="JL" |
1828 | pub const JT: LineBreak = LineBreak(34); // name="JT" |
1829 | pub const JV: LineBreak = LineBreak(35); // name="JV" |
1830 | pub const CloseParenthesis: LineBreak = LineBreak(36); // name="CP" |
1831 | pub const ConditionalJapaneseStarter: LineBreak = LineBreak(37); // name="CJ" |
1832 | pub const HebrewLetter: LineBreak = LineBreak(38); // name="HL" |
1833 | pub const RegionalIndicator: LineBreak = LineBreak(39); // name="RI" |
1834 | pub const EBase: LineBreak = LineBreak(40); // name="EB" |
1835 | pub const EModifier: LineBreak = LineBreak(41); // name="EM" |
1836 | pub const ZWJ: LineBreak = LineBreak(42); // name="ZWJ" |
1837 | |
1838 | // Added in ICU 74: |
1839 | pub const Aksara: LineBreak = LineBreak(43); // name="AK" |
1840 | pub const AksaraPrebase: LineBreak = LineBreak(44); // name=AP" |
1841 | pub const AksaraStart: LineBreak = LineBreak(45); // name=AS" |
1842 | pub const ViramaFinal: LineBreak = LineBreak(46); // name=VF" |
1843 | pub const Virama: LineBreak = LineBreak(47); // name=VI" |
1844 | } |
1845 | |
1846 | impl_value_getter! { |
1847 | markers: LineBreakNameToValueV1Marker / SINGLETON_PROPNAMES_FROM_LB_V1, LineBreakValueToShortNameV1Marker / SINGLETON_PROPNAMES_TO_SHORT_LINEAR_LB_V1, LineBreakValueToLongNameV1Marker / SINGLETON_PROPNAMES_TO_LONG_LINEAR_LB_V1; |
1848 | impl LineBreak { |
1849 | /// Return a [`PropertyValueNameToEnumMapper`], capable of looking up values |
1850 | /// from strings for the `Line_Break` enumerated property. |
1851 | /// |
1852 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1853 | /// |
1854 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1855 | /// |
1856 | /// # Example |
1857 | /// |
1858 | /// ``` |
1859 | /// use icu::properties::LineBreak; |
1860 | /// |
1861 | /// let lookup = LineBreak::name_to_enum_mapper(); |
1862 | /// // short name for value |
1863 | /// assert_eq!(lookup.get_strict("BK"), Some(LineBreak::MandatoryBreak)); |
1864 | /// assert_eq!(lookup.get_strict("AL"), Some(LineBreak::Alphabetic)); |
1865 | /// // long name for value |
1866 | /// assert_eq!(lookup.get_strict("Mandatory_Break"), Some(LineBreak::MandatoryBreak)); |
1867 | /// assert_eq!(lookup.get_strict("Alphabetic"), Some(LineBreak::Alphabetic)); |
1868 | /// // name has incorrect casing and dash instead of underscore |
1869 | /// assert_eq!(lookup.get_strict("mandatory-Break"), None); |
1870 | /// // loose matching of name |
1871 | /// assert_eq!(lookup.get_loose("mandatory-Break"), Some(LineBreak::MandatoryBreak)); |
1872 | /// // fake property |
1873 | /// assert_eq!(lookup.get_strict("Stochastic_Break"), None); |
1874 | /// ``` |
1875 | pub fn get_name_to_enum_mapper() / name_to_enum_mapper(); |
1876 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up short names |
1877 | /// for values of the `Line_Break` enumerated property. |
1878 | /// |
1879 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1880 | /// |
1881 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1882 | /// |
1883 | /// # Example |
1884 | /// |
1885 | /// ``` |
1886 | /// use icu::properties::LineBreak; |
1887 | /// |
1888 | /// let lookup = LineBreak::enum_to_short_name_mapper(); |
1889 | /// assert_eq!(lookup.get(LineBreak::MandatoryBreak), Some("BK")); |
1890 | /// assert_eq!(lookup.get(LineBreak::Alphabetic), Some("AL")); |
1891 | /// ``` |
1892 | pub fn get_enum_to_short_name_mapper() / enum_to_short_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
1893 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up long names |
1894 | /// for values of the `Line_Break` enumerated property. |
1895 | /// |
1896 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1897 | /// |
1898 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1899 | /// |
1900 | /// # Example |
1901 | /// |
1902 | /// ``` |
1903 | /// use icu::properties::LineBreak; |
1904 | /// |
1905 | /// let lookup = LineBreak::enum_to_long_name_mapper(); |
1906 | /// assert_eq!(lookup.get(LineBreak::MandatoryBreak), Some("Mandatory_Break")); |
1907 | /// assert_eq!(lookup.get(LineBreak::Alphabetic), Some("Alphabetic")); |
1908 | /// ``` |
1909 | pub fn get_enum_to_long_name_mapper() / enum_to_long_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
1910 | } |
1911 | } |
1912 | |
1913 | /// Enumerated property Grapheme_Cluster_Break. |
1914 | /// |
1915 | /// See "Default Grapheme Cluster Boundary Specification" in UAX #29 for the |
1916 | /// summary of each property value: |
1917 | /// <https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table> |
1918 | /// |
1919 | /// The numeric value is compatible with `UGraphemeClusterBreak` in ICU4C. |
1920 | #[derive (Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
1921 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
1922 | #[cfg_attr (feature = "datagen" , derive(databake::Bake))] |
1923 | #[cfg_attr (feature = "datagen" , databake(path = icu_properties))] |
1924 | #[allow (clippy::exhaustive_structs)] // this type is stable |
1925 | #[repr (transparent)] |
1926 | #[zerovec::make_ule (GraphemeClusterBreakULE)] |
1927 | pub struct GraphemeClusterBreak(pub u8); |
1928 | |
1929 | #[allow (missing_docs)] // These constants don't need individual documentation. |
1930 | #[allow (non_upper_case_globals)] |
1931 | impl GraphemeClusterBreak { |
1932 | pub const Other: GraphemeClusterBreak = GraphemeClusterBreak(0); // name="XX" |
1933 | pub const Control: GraphemeClusterBreak = GraphemeClusterBreak(1); // name="CN" |
1934 | pub const CR: GraphemeClusterBreak = GraphemeClusterBreak(2); // name="CR" |
1935 | pub const Extend: GraphemeClusterBreak = GraphemeClusterBreak(3); // name="EX" |
1936 | pub const L: GraphemeClusterBreak = GraphemeClusterBreak(4); // name="L" |
1937 | pub const LF: GraphemeClusterBreak = GraphemeClusterBreak(5); // name="LF" |
1938 | pub const LV: GraphemeClusterBreak = GraphemeClusterBreak(6); // name="LV" |
1939 | pub const LVT: GraphemeClusterBreak = GraphemeClusterBreak(7); // name="LVT" |
1940 | pub const T: GraphemeClusterBreak = GraphemeClusterBreak(8); // name="T" |
1941 | pub const V: GraphemeClusterBreak = GraphemeClusterBreak(9); // name="V" |
1942 | pub const SpacingMark: GraphemeClusterBreak = GraphemeClusterBreak(10); // name="SM" |
1943 | pub const Prepend: GraphemeClusterBreak = GraphemeClusterBreak(11); // name="PP" |
1944 | pub const RegionalIndicator: GraphemeClusterBreak = GraphemeClusterBreak(12); // name="RI" |
1945 | /// This value is obsolete and unused. |
1946 | pub const EBase: GraphemeClusterBreak = GraphemeClusterBreak(13); // name="EB" |
1947 | /// This value is obsolete and unused. |
1948 | pub const EBaseGAZ: GraphemeClusterBreak = GraphemeClusterBreak(14); // name="EBG" |
1949 | /// This value is obsolete and unused. |
1950 | pub const EModifier: GraphemeClusterBreak = GraphemeClusterBreak(15); // name="EM" |
1951 | /// This value is obsolete and unused. |
1952 | pub const GlueAfterZwj: GraphemeClusterBreak = GraphemeClusterBreak(16); // name="GAZ" |
1953 | pub const ZWJ: GraphemeClusterBreak = GraphemeClusterBreak(17); // name="ZWJ" |
1954 | } |
1955 | |
1956 | impl_value_getter! { |
1957 | markers: GraphemeClusterBreakNameToValueV1Marker / SINGLETON_PROPNAMES_FROM_GCB_V1, GraphemeClusterBreakValueToShortNameV1Marker / SINGLETON_PROPNAMES_TO_SHORT_LINEAR_GCB_V1, GraphemeClusterBreakValueToLongNameV1Marker / SINGLETON_PROPNAMES_TO_LONG_LINEAR_GCB_V1; |
1958 | impl GraphemeClusterBreak { |
1959 | /// Return a [`PropertyValueNameToEnumMapper`], capable of looking up values |
1960 | /// from strings for the `Grapheme_Cluster_Break` enumerated property. |
1961 | /// |
1962 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1963 | /// |
1964 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1965 | /// |
1966 | /// # Example |
1967 | /// |
1968 | /// ``` |
1969 | /// use icu::properties::GraphemeClusterBreak; |
1970 | /// |
1971 | /// let lookup = GraphemeClusterBreak::name_to_enum_mapper(); |
1972 | /// // short name for value |
1973 | /// assert_eq!(lookup.get_strict("EX"), Some(GraphemeClusterBreak::Extend)); |
1974 | /// assert_eq!(lookup.get_strict("RI"), Some(GraphemeClusterBreak::RegionalIndicator)); |
1975 | /// // long name for value |
1976 | /// assert_eq!(lookup.get_strict("Extend"), Some(GraphemeClusterBreak::Extend)); |
1977 | /// assert_eq!(lookup.get_strict("Regional_Indicator"), Some(GraphemeClusterBreak::RegionalIndicator)); |
1978 | /// // name has incorrect casing and lacks an underscore |
1979 | /// assert_eq!(lookup.get_strict("regionalindicator"), None); |
1980 | /// // loose matching of name |
1981 | /// assert_eq!(lookup.get_loose("regionalindicator"), Some(GraphemeClusterBreak::RegionalIndicator)); |
1982 | /// // fake property |
1983 | /// assert_eq!(lookup.get_strict("Regional_Indicator_Two_Point_Oh"), None); |
1984 | /// ``` |
1985 | pub fn get_name_to_enum_mapper() / name_to_enum_mapper(); |
1986 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up short names |
1987 | /// for values of the `Grapheme_Cluster_Break` enumerated property. |
1988 | /// |
1989 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1990 | /// |
1991 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1992 | /// |
1993 | /// # Example |
1994 | /// |
1995 | /// ``` |
1996 | /// use icu::properties::GraphemeClusterBreak; |
1997 | /// |
1998 | /// let lookup = GraphemeClusterBreak::enum_to_short_name_mapper(); |
1999 | /// assert_eq!(lookup.get(GraphemeClusterBreak::Extend), Some("EX")); |
2000 | /// assert_eq!(lookup.get(GraphemeClusterBreak::RegionalIndicator), Some("RI")); |
2001 | /// ``` |
2002 | pub fn get_enum_to_short_name_mapper() / enum_to_short_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
2003 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up long names |
2004 | /// for values of the `Grapheme_Cluster_Break` enumerated property. |
2005 | /// |
2006 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2007 | /// |
2008 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2009 | /// |
2010 | /// # Example |
2011 | /// |
2012 | /// ``` |
2013 | /// use icu::properties::GraphemeClusterBreak; |
2014 | /// |
2015 | /// let lookup = GraphemeClusterBreak::enum_to_long_name_mapper(); |
2016 | /// assert_eq!(lookup.get(GraphemeClusterBreak::Extend), Some("Extend")); |
2017 | /// assert_eq!(lookup.get(GraphemeClusterBreak::RegionalIndicator), Some("Regional_Indicator")); |
2018 | /// ``` |
2019 | pub fn get_enum_to_long_name_mapper() / enum_to_long_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
2020 | } |
2021 | } |
2022 | |
2023 | /// Enumerated property Word_Break. |
2024 | /// |
2025 | /// See "Default Word Boundary Specification" in UAX #29 for the summary of |
2026 | /// each property value: |
2027 | /// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>. |
2028 | /// |
2029 | /// The numeric value is compatible with `UWordBreakValues` in ICU4C. |
2030 | #[derive (Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
2031 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
2032 | #[cfg_attr (feature = "datagen" , derive(databake::Bake))] |
2033 | #[cfg_attr (feature = "datagen" , databake(path = icu_properties))] |
2034 | #[allow (clippy::exhaustive_structs)] // newtype |
2035 | #[repr (transparent)] |
2036 | #[zerovec::make_ule (WordBreakULE)] |
2037 | pub struct WordBreak(pub u8); |
2038 | |
2039 | create_const_array! { |
2040 | #[allow (missing_docs)] // These constants don't need individual documentation. |
2041 | #[allow (non_upper_case_globals)] |
2042 | impl WordBreak { |
2043 | pub const Other: WordBreak = WordBreak(0); // name="XX" |
2044 | pub const ALetter: WordBreak = WordBreak(1); // name="LE" |
2045 | pub const Format: WordBreak = WordBreak(2); // name="FO" |
2046 | pub const Katakana: WordBreak = WordBreak(3); // name="KA" |
2047 | pub const MidLetter: WordBreak = WordBreak(4); // name="ML" |
2048 | pub const MidNum: WordBreak = WordBreak(5); // name="MN" |
2049 | pub const Numeric: WordBreak = WordBreak(6); // name="NU" |
2050 | pub const ExtendNumLet: WordBreak = WordBreak(7); // name="EX" |
2051 | pub const CR: WordBreak = WordBreak(8); // name="CR" |
2052 | pub const Extend: WordBreak = WordBreak(9); // name="Extend" |
2053 | pub const LF: WordBreak = WordBreak(10); // name="LF" |
2054 | pub const MidNumLet: WordBreak = WordBreak(11); // name="MB" |
2055 | pub const Newline: WordBreak = WordBreak(12); // name="NL" |
2056 | pub const RegionalIndicator: WordBreak = WordBreak(13); // name="RI" |
2057 | pub const HebrewLetter: WordBreak = WordBreak(14); // name="HL" |
2058 | pub const SingleQuote: WordBreak = WordBreak(15); // name="SQ" |
2059 | pub const DoubleQuote: WordBreak = WordBreak(16); // name=DQ |
2060 | /// This value is obsolete and unused. |
2061 | pub const EBase: WordBreak = WordBreak(17); // name="EB" |
2062 | /// This value is obsolete and unused. |
2063 | pub const EBaseGAZ: WordBreak = WordBreak(18); // name="EBG" |
2064 | /// This value is obsolete and unused. |
2065 | pub const EModifier: WordBreak = WordBreak(19); // name="EM" |
2066 | /// This value is obsolete and unused. |
2067 | pub const GlueAfterZwj: WordBreak = WordBreak(20); // name="GAZ" |
2068 | pub const ZWJ: WordBreak = WordBreak(21); // name="ZWJ" |
2069 | pub const WSegSpace: WordBreak = WordBreak(22); // name="WSegSpace" |
2070 | } |
2071 | } |
2072 | |
2073 | impl_value_getter! { |
2074 | markers: WordBreakNameToValueV1Marker / SINGLETON_PROPNAMES_FROM_WB_V1, WordBreakValueToShortNameV1Marker / SINGLETON_PROPNAMES_TO_SHORT_LINEAR_WB_V1, WordBreakValueToLongNameV1Marker / SINGLETON_PROPNAMES_TO_LONG_LINEAR_WB_V1; |
2075 | impl WordBreak { |
2076 | /// Return a [`PropertyValueNameToEnumMapper`], capable of looking up values |
2077 | /// from strings for the `Word_Break` enumerated property. |
2078 | /// |
2079 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2080 | /// |
2081 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2082 | /// |
2083 | /// # Example |
2084 | /// |
2085 | /// ``` |
2086 | /// use icu::properties::WordBreak; |
2087 | /// |
2088 | /// let lookup = WordBreak::name_to_enum_mapper(); |
2089 | /// // short name for value |
2090 | /// assert_eq!(lookup.get_strict("KA"), Some(WordBreak::Katakana)); |
2091 | /// assert_eq!(lookup.get_strict("LE"), Some(WordBreak::ALetter)); |
2092 | /// // long name for value |
2093 | /// assert_eq!(lookup.get_strict("Katakana"), Some(WordBreak::Katakana)); |
2094 | /// assert_eq!(lookup.get_strict("ALetter"), Some(WordBreak::ALetter)); |
2095 | /// // name has incorrect casing |
2096 | /// assert_eq!(lookup.get_strict("Aletter"), None); |
2097 | /// // loose matching of name |
2098 | /// assert_eq!(lookup.get_loose("Aletter"), Some(WordBreak::ALetter)); |
2099 | /// assert_eq!(lookup.get_loose("w_seg_space"), Some(WordBreak::WSegSpace)); |
2100 | /// // fake property |
2101 | /// assert_eq!(lookup.get_strict("Quadruple_Quote"), None); |
2102 | /// ``` |
2103 | pub fn get_name_to_enum_mapper() / name_to_enum_mapper(); |
2104 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up short names |
2105 | /// for values of the `Word_Break` enumerated property. |
2106 | /// |
2107 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2108 | /// |
2109 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2110 | /// |
2111 | /// # Example |
2112 | /// |
2113 | /// ``` |
2114 | /// use icu::properties::WordBreak; |
2115 | /// |
2116 | /// let lookup = WordBreak::enum_to_short_name_mapper(); |
2117 | /// assert_eq!(lookup.get(WordBreak::Katakana), Some("KA")); |
2118 | /// assert_eq!(lookup.get(WordBreak::ALetter), Some("LE")); |
2119 | /// assert_eq!(lookup.get(WordBreak::WSegSpace), Some("WSegSpace")); |
2120 | /// ``` |
2121 | pub fn get_enum_to_short_name_mapper() / enum_to_short_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
2122 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up long names |
2123 | /// for values of the `Word_Break` enumerated property. |
2124 | /// |
2125 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2126 | /// |
2127 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2128 | /// |
2129 | /// # Example |
2130 | /// |
2131 | /// ``` |
2132 | /// use icu::properties::WordBreak; |
2133 | /// |
2134 | /// let lookup = WordBreak::enum_to_long_name_mapper(); |
2135 | /// assert_eq!(lookup.get(WordBreak::Katakana), Some("Katakana")); |
2136 | /// assert_eq!(lookup.get(WordBreak::ALetter), Some("ALetter")); |
2137 | /// assert_eq!(lookup.get(WordBreak::WSegSpace), Some("WSegSpace")); |
2138 | /// ``` |
2139 | pub fn get_enum_to_long_name_mapper() / enum_to_long_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
2140 | } |
2141 | } |
2142 | |
2143 | /// Enumerated property Sentence_Break. |
2144 | /// See "Default Sentence Boundary Specification" in UAX #29 for the summary of |
2145 | /// each property value: |
2146 | /// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>. |
2147 | /// |
2148 | /// The numeric value is compatible with `USentenceBreak` in ICU4C. |
2149 | #[derive (Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
2150 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
2151 | #[cfg_attr (feature = "datagen" , derive(databake::Bake))] |
2152 | #[cfg_attr (feature = "datagen" , databake(path = icu_properties))] |
2153 | #[allow (clippy::exhaustive_structs)] // newtype |
2154 | #[repr (transparent)] |
2155 | #[zerovec::make_ule (SentenceBreakULE)] |
2156 | pub struct SentenceBreak(pub u8); |
2157 | |
2158 | create_const_array! { |
2159 | #[allow (missing_docs)] // These constants don't need individual documentation. |
2160 | #[allow (non_upper_case_globals)] |
2161 | impl SentenceBreak { |
2162 | pub const Other: SentenceBreak = SentenceBreak(0); // name="XX" |
2163 | pub const ATerm: SentenceBreak = SentenceBreak(1); // name="AT" |
2164 | pub const Close: SentenceBreak = SentenceBreak(2); // name="CL" |
2165 | pub const Format: SentenceBreak = SentenceBreak(3); // name="FO" |
2166 | pub const Lower: SentenceBreak = SentenceBreak(4); // name="LO" |
2167 | pub const Numeric: SentenceBreak = SentenceBreak(5); // name="NU" |
2168 | pub const OLetter: SentenceBreak = SentenceBreak(6); // name="LE" |
2169 | pub const Sep: SentenceBreak = SentenceBreak(7); // name="SE" |
2170 | pub const Sp: SentenceBreak = SentenceBreak(8); // name="SP" |
2171 | pub const STerm: SentenceBreak = SentenceBreak(9); // name="ST" |
2172 | pub const Upper: SentenceBreak = SentenceBreak(10); // name="UP" |
2173 | pub const CR: SentenceBreak = SentenceBreak(11); // name="CR" |
2174 | pub const Extend: SentenceBreak = SentenceBreak(12); // name="EX" |
2175 | pub const LF: SentenceBreak = SentenceBreak(13); // name="LF" |
2176 | pub const SContinue: SentenceBreak = SentenceBreak(14); // name="SC" |
2177 | } |
2178 | } |
2179 | |
2180 | impl_value_getter! { |
2181 | markers: SentenceBreakNameToValueV1Marker / SINGLETON_PROPNAMES_FROM_SB_V1, SentenceBreakValueToShortNameV1Marker / SINGLETON_PROPNAMES_TO_SHORT_LINEAR_SB_V1, SentenceBreakValueToLongNameV1Marker / SINGLETON_PROPNAMES_TO_LONG_LINEAR_SB_V1; |
2182 | impl SentenceBreak { |
2183 | /// Return a [`PropertyValueNameToEnumMapper`], capable of looking up values |
2184 | /// from strings for the `Sentence_Break` enumerated property. |
2185 | /// |
2186 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2187 | /// |
2188 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2189 | /// |
2190 | /// # Example |
2191 | /// |
2192 | /// ``` |
2193 | /// use icu::properties::SentenceBreak; |
2194 | /// |
2195 | /// let lookup = SentenceBreak::name_to_enum_mapper(); |
2196 | /// // short name for value |
2197 | /// assert_eq!(lookup.get_strict("FO"), Some(SentenceBreak::Format)); |
2198 | /// assert_eq!(lookup.get_strict("NU"), Some(SentenceBreak::Numeric)); |
2199 | /// // long name for value |
2200 | /// assert_eq!(lookup.get_strict("Format"), Some(SentenceBreak::Format)); |
2201 | /// assert_eq!(lookup.get_strict("Numeric"), Some(SentenceBreak::Numeric)); |
2202 | /// // name has incorrect casing |
2203 | /// assert_eq!(lookup.get_strict("fOrmat"), None); |
2204 | /// // loose matching of name |
2205 | /// assert_eq!(lookup.get_loose("fOrmat"), Some(SentenceBreak::Format)); |
2206 | /// // fake property |
2207 | /// assert_eq!(lookup.get_strict("Fixer_Upper"), None); |
2208 | /// ``` |
2209 | pub fn get_name_to_enum_mapper() / name_to_enum_mapper(); |
2210 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up short names |
2211 | /// for values of the `Sentence_Break` enumerated property. |
2212 | /// |
2213 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2214 | /// |
2215 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2216 | /// |
2217 | /// # Example |
2218 | /// |
2219 | /// ``` |
2220 | /// use icu::properties::SentenceBreak; |
2221 | /// |
2222 | /// let lookup = SentenceBreak::enum_to_short_name_mapper(); |
2223 | /// assert_eq!(lookup.get(SentenceBreak::Format), Some("FO")); |
2224 | /// assert_eq!(lookup.get(SentenceBreak::Numeric), Some("NU")); |
2225 | /// ``` |
2226 | pub fn get_enum_to_short_name_mapper() / enum_to_short_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
2227 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up long names |
2228 | /// for values of the `Sentence_Break` enumerated property. |
2229 | /// |
2230 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2231 | /// |
2232 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2233 | /// |
2234 | /// # Example |
2235 | /// |
2236 | /// ``` |
2237 | /// use icu::properties::SentenceBreak; |
2238 | /// |
2239 | /// let lookup = SentenceBreak::enum_to_long_name_mapper(); |
2240 | /// assert_eq!(lookup.get(SentenceBreak::Format), Some("Format")); |
2241 | /// assert_eq!(lookup.get(SentenceBreak::Numeric), Some("Numeric")); |
2242 | /// assert_eq!(lookup.get(SentenceBreak::SContinue), Some("SContinue")); |
2243 | /// ``` |
2244 | pub fn get_enum_to_long_name_mapper() / enum_to_long_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
2245 | } |
2246 | } |
2247 | /// Property Canonical_Combining_Class. |
2248 | /// See UAX #15: |
2249 | /// <https://www.unicode.org/reports/tr15/>. |
2250 | /// |
2251 | /// See `icu::normalizer::properties::CanonicalCombiningClassMap` for the API |
2252 | /// to look up the Canonical_Combining_Class property by scalar value. |
2253 | // |
2254 | // NOTE: The Pernosco debugger has special knowledge |
2255 | // of this struct. Please do not change the bit layout |
2256 | // or the crate-module-qualified name of this struct |
2257 | // without coordination. |
2258 | #[derive (Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
2259 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
2260 | #[cfg_attr (feature = "datagen" , derive(databake::Bake))] |
2261 | #[cfg_attr (feature = "datagen" , databake(path = icu_properties))] |
2262 | #[allow (clippy::exhaustive_structs)] // newtype |
2263 | #[repr (transparent)] |
2264 | #[zerovec::make_ule (CanonicalCombiningClassULE)] |
2265 | pub struct CanonicalCombiningClass(pub u8); |
2266 | |
2267 | create_const_array! { |
2268 | // These constant names come from PropertyValueAliases.txt |
2269 | #[allow (missing_docs)] // These constants don't need individual documentation. |
2270 | #[allow (non_upper_case_globals)] |
2271 | impl CanonicalCombiningClass { |
2272 | pub const NotReordered: CanonicalCombiningClass = CanonicalCombiningClass(0); // name="NR" |
2273 | pub const Overlay: CanonicalCombiningClass = CanonicalCombiningClass(1); // name="OV" |
2274 | pub const HanReading: CanonicalCombiningClass = CanonicalCombiningClass(6); // name="HANR" |
2275 | pub const Nukta: CanonicalCombiningClass = CanonicalCombiningClass(7); // name="NK" |
2276 | pub const KanaVoicing: CanonicalCombiningClass = CanonicalCombiningClass(8); // name="KV" |
2277 | pub const Virama: CanonicalCombiningClass = CanonicalCombiningClass(9); // name="VR" |
2278 | pub const CCC10: CanonicalCombiningClass = CanonicalCombiningClass(10); // name="CCC10" |
2279 | pub const CCC11: CanonicalCombiningClass = CanonicalCombiningClass(11); // name="CCC11" |
2280 | pub const CCC12: CanonicalCombiningClass = CanonicalCombiningClass(12); // name="CCC12" |
2281 | pub const CCC13: CanonicalCombiningClass = CanonicalCombiningClass(13); // name="CCC13" |
2282 | pub const CCC14: CanonicalCombiningClass = CanonicalCombiningClass(14); // name="CCC14" |
2283 | pub const CCC15: CanonicalCombiningClass = CanonicalCombiningClass(15); // name="CCC15" |
2284 | pub const CCC16: CanonicalCombiningClass = CanonicalCombiningClass(16); // name="CCC16" |
2285 | pub const CCC17: CanonicalCombiningClass = CanonicalCombiningClass(17); // name="CCC17" |
2286 | pub const CCC18: CanonicalCombiningClass = CanonicalCombiningClass(18); // name="CCC18" |
2287 | pub const CCC19: CanonicalCombiningClass = CanonicalCombiningClass(19); // name="CCC19" |
2288 | pub const CCC20: CanonicalCombiningClass = CanonicalCombiningClass(20); // name="CCC20" |
2289 | pub const CCC21: CanonicalCombiningClass = CanonicalCombiningClass(21); // name="CCC21" |
2290 | pub const CCC22: CanonicalCombiningClass = CanonicalCombiningClass(22); // name="CCC22" |
2291 | pub const CCC23: CanonicalCombiningClass = CanonicalCombiningClass(23); // name="CCC23" |
2292 | pub const CCC24: CanonicalCombiningClass = CanonicalCombiningClass(24); // name="CCC24" |
2293 | pub const CCC25: CanonicalCombiningClass = CanonicalCombiningClass(25); // name="CCC25" |
2294 | pub const CCC26: CanonicalCombiningClass = CanonicalCombiningClass(26); // name="CCC26" |
2295 | pub const CCC27: CanonicalCombiningClass = CanonicalCombiningClass(27); // name="CCC27" |
2296 | pub const CCC28: CanonicalCombiningClass = CanonicalCombiningClass(28); // name="CCC28" |
2297 | pub const CCC29: CanonicalCombiningClass = CanonicalCombiningClass(29); // name="CCC29" |
2298 | pub const CCC30: CanonicalCombiningClass = CanonicalCombiningClass(30); // name="CCC30" |
2299 | pub const CCC31: CanonicalCombiningClass = CanonicalCombiningClass(31); // name="CCC31" |
2300 | pub const CCC32: CanonicalCombiningClass = CanonicalCombiningClass(32); // name="CCC32" |
2301 | pub const CCC33: CanonicalCombiningClass = CanonicalCombiningClass(33); // name="CCC33" |
2302 | pub const CCC34: CanonicalCombiningClass = CanonicalCombiningClass(34); // name="CCC34" |
2303 | pub const CCC35: CanonicalCombiningClass = CanonicalCombiningClass(35); // name="CCC35" |
2304 | pub const CCC36: CanonicalCombiningClass = CanonicalCombiningClass(36); // name="CCC36" |
2305 | pub const CCC84: CanonicalCombiningClass = CanonicalCombiningClass(84); // name="CCC84" |
2306 | pub const CCC91: CanonicalCombiningClass = CanonicalCombiningClass(91); // name="CCC91" |
2307 | pub const CCC103: CanonicalCombiningClass = CanonicalCombiningClass(103); // name="CCC103" |
2308 | pub const CCC107: CanonicalCombiningClass = CanonicalCombiningClass(107); // name="CCC107" |
2309 | pub const CCC118: CanonicalCombiningClass = CanonicalCombiningClass(118); // name="CCC118" |
2310 | pub const CCC122: CanonicalCombiningClass = CanonicalCombiningClass(122); // name="CCC122" |
2311 | pub const CCC129: CanonicalCombiningClass = CanonicalCombiningClass(129); // name="CCC129" |
2312 | pub const CCC130: CanonicalCombiningClass = CanonicalCombiningClass(130); // name="CCC130" |
2313 | pub const CCC132: CanonicalCombiningClass = CanonicalCombiningClass(132); // name="CCC132" |
2314 | pub const CCC133: CanonicalCombiningClass = CanonicalCombiningClass(133); // name="CCC133" // RESERVED |
2315 | pub const AttachedBelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(200); // name="ATBL" |
2316 | pub const AttachedBelow: CanonicalCombiningClass = CanonicalCombiningClass(202); // name="ATB" |
2317 | pub const AttachedAbove: CanonicalCombiningClass = CanonicalCombiningClass(214); // name="ATA" |
2318 | pub const AttachedAboveRight: CanonicalCombiningClass = CanonicalCombiningClass(216); // name="ATAR" |
2319 | pub const BelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(218); // name="BL" |
2320 | pub const Below: CanonicalCombiningClass = CanonicalCombiningClass(220); // name="B" |
2321 | pub const BelowRight: CanonicalCombiningClass = CanonicalCombiningClass(222); // name="BR" |
2322 | pub const Left: CanonicalCombiningClass = CanonicalCombiningClass(224); // name="L" |
2323 | pub const Right: CanonicalCombiningClass = CanonicalCombiningClass(226); // name="R" |
2324 | pub const AboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(228); // name="AL" |
2325 | pub const Above: CanonicalCombiningClass = CanonicalCombiningClass(230); // name="A" |
2326 | pub const AboveRight: CanonicalCombiningClass = CanonicalCombiningClass(232); // name="AR" |
2327 | pub const DoubleBelow: CanonicalCombiningClass = CanonicalCombiningClass(233); // name="DB" |
2328 | pub const DoubleAbove: CanonicalCombiningClass = CanonicalCombiningClass(234); // name="DA" |
2329 | pub const IotaSubscript: CanonicalCombiningClass = CanonicalCombiningClass(240); // name="IS" |
2330 | } |
2331 | } |
2332 | |
2333 | impl_value_getter! { |
2334 | markers: CanonicalCombiningClassNameToValueV1Marker / SINGLETON_PROPNAMES_FROM_CCC_V1, CanonicalCombiningClassValueToShortNameV1Marker / SINGLETON_PROPNAMES_TO_SHORT_SPARSE_CCC_V1, CanonicalCombiningClassValueToLongNameV1Marker / SINGLETON_PROPNAMES_TO_LONG_SPARSE_CCC_V1; |
2335 | impl CanonicalCombiningClass { |
2336 | /// Return a [`PropertyValueNameToEnumMapper`], capable of looking up values |
2337 | /// from strings for the `Canonical_Combining_Class` enumerated property. |
2338 | /// |
2339 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2340 | /// |
2341 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2342 | /// |
2343 | /// # Example |
2344 | /// |
2345 | /// ``` |
2346 | /// use icu::properties::CanonicalCombiningClass; |
2347 | /// |
2348 | /// let lookup = CanonicalCombiningClass::name_to_enum_mapper(); |
2349 | /// // short name for value |
2350 | /// assert_eq!(lookup.get_strict("AL"), Some(CanonicalCombiningClass::AboveLeft)); |
2351 | /// assert_eq!(lookup.get_strict("ATBL"), Some(CanonicalCombiningClass::AttachedBelowLeft)); |
2352 | /// assert_eq!(lookup.get_strict("CCC10"), Some(CanonicalCombiningClass::CCC10)); |
2353 | /// // long name for value |
2354 | /// assert_eq!(lookup.get_strict("Above_Left"), Some(CanonicalCombiningClass::AboveLeft)); |
2355 | /// assert_eq!(lookup.get_strict("Attached_Below_Left"), Some(CanonicalCombiningClass::AttachedBelowLeft)); |
2356 | /// // name has incorrect casing and hyphens |
2357 | /// assert_eq!(lookup.get_strict("attached-below-left"), None); |
2358 | /// // loose matching of name |
2359 | /// assert_eq!(lookup.get_loose("attached-below-left"), Some(CanonicalCombiningClass::AttachedBelowLeft)); |
2360 | /// // fake property |
2361 | /// assert_eq!(lookup.get_strict("Linear_Z"), None); |
2362 | /// ``` |
2363 | pub fn get_name_to_enum_mapper() / name_to_enum_mapper(); |
2364 | /// Return a [`PropertyEnumToValueNameSparseMapper`], capable of looking up short names |
2365 | /// for values of the `Canonical_Combining_Class` enumerated property. |
2366 | /// |
2367 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2368 | /// |
2369 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2370 | /// |
2371 | /// # Example |
2372 | /// |
2373 | /// ``` |
2374 | /// use icu::properties::CanonicalCombiningClass; |
2375 | /// |
2376 | /// let lookup = CanonicalCombiningClass::enum_to_short_name_mapper(); |
2377 | /// assert_eq!(lookup.get(CanonicalCombiningClass::AboveLeft), Some("AL")); |
2378 | /// assert_eq!(lookup.get(CanonicalCombiningClass::AttachedBelowLeft), Some("ATBL")); |
2379 | /// assert_eq!(lookup.get(CanonicalCombiningClass::CCC10), Some("CCC10")); |
2380 | /// ``` |
2381 | pub fn get_enum_to_short_name_mapper() / enum_to_short_name_mapper() -> PropertyEnumToValueNameSparseMapper / PropertyEnumToValueNameSparseMapperBorrowed; |
2382 | /// Return a [`PropertyEnumToValueNameSparseMapper`], capable of looking up long names |
2383 | /// for values of the `Canonical_Combining_Class` enumerated property. |
2384 | /// |
2385 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2386 | /// |
2387 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2388 | /// |
2389 | /// # Example |
2390 | /// |
2391 | /// ``` |
2392 | /// use icu::properties::CanonicalCombiningClass; |
2393 | /// |
2394 | /// let lookup = CanonicalCombiningClass::enum_to_long_name_mapper(); |
2395 | /// assert_eq!(lookup.get(CanonicalCombiningClass::AboveLeft), Some("Above_Left")); |
2396 | /// assert_eq!(lookup.get(CanonicalCombiningClass::AttachedBelowLeft), Some("Attached_Below_Left")); |
2397 | /// assert_eq!(lookup.get(CanonicalCombiningClass::CCC10), Some("CCC10")); |
2398 | /// ``` |
2399 | pub fn get_enum_to_long_name_mapper() / enum_to_long_name_mapper() -> PropertyEnumToValueNameSparseMapper / PropertyEnumToValueNameSparseMapperBorrowed; |
2400 | } |
2401 | } |
2402 | |
2403 | /// Property Indic_Syllabic_Category. |
2404 | /// See UAX #44: |
2405 | /// <https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category>. |
2406 | /// |
2407 | /// The numeric value is compatible with `UIndicSyllabicCategory` in ICU4C. |
2408 | #[derive (Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
2409 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
2410 | #[cfg_attr (feature = "datagen" , derive(databake::Bake))] |
2411 | #[cfg_attr (feature = "datagen" , databake(path = icu_properties))] |
2412 | #[allow (clippy::exhaustive_structs)] // newtype |
2413 | #[repr (transparent)] |
2414 | #[zerovec::make_ule (IndicSyllabicCategoryULE)] |
2415 | pub struct IndicSyllabicCategory(pub u8); |
2416 | |
2417 | create_const_array! { |
2418 | #[allow (missing_docs)] // These constants don't need individual documentation. |
2419 | #[allow (non_upper_case_globals)] |
2420 | impl IndicSyllabicCategory { |
2421 | pub const Other: IndicSyllabicCategory = IndicSyllabicCategory(0); |
2422 | pub const Avagraha: IndicSyllabicCategory = IndicSyllabicCategory(1); |
2423 | pub const Bindu: IndicSyllabicCategory = IndicSyllabicCategory(2); |
2424 | pub const BrahmiJoiningNumber: IndicSyllabicCategory = IndicSyllabicCategory(3); |
2425 | pub const CantillationMark: IndicSyllabicCategory = IndicSyllabicCategory(4); |
2426 | pub const Consonant: IndicSyllabicCategory = IndicSyllabicCategory(5); |
2427 | pub const ConsonantDead: IndicSyllabicCategory = IndicSyllabicCategory(6); |
2428 | pub const ConsonantFinal: IndicSyllabicCategory = IndicSyllabicCategory(7); |
2429 | pub const ConsonantHeadLetter: IndicSyllabicCategory = IndicSyllabicCategory(8); |
2430 | pub const ConsonantInitialPostfixed: IndicSyllabicCategory = IndicSyllabicCategory(9); |
2431 | pub const ConsonantKiller: IndicSyllabicCategory = IndicSyllabicCategory(10); |
2432 | pub const ConsonantMedial: IndicSyllabicCategory = IndicSyllabicCategory(11); |
2433 | pub const ConsonantPlaceholder: IndicSyllabicCategory = IndicSyllabicCategory(12); |
2434 | pub const ConsonantPrecedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(13); |
2435 | pub const ConsonantPrefixed: IndicSyllabicCategory = IndicSyllabicCategory(14); |
2436 | pub const ConsonantSucceedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(15); |
2437 | pub const ConsonantSubjoined: IndicSyllabicCategory = IndicSyllabicCategory(16); |
2438 | pub const ConsonantWithStacker: IndicSyllabicCategory = IndicSyllabicCategory(17); |
2439 | pub const GeminationMark: IndicSyllabicCategory = IndicSyllabicCategory(18); |
2440 | pub const InvisibleStacker: IndicSyllabicCategory = IndicSyllabicCategory(19); |
2441 | pub const Joiner: IndicSyllabicCategory = IndicSyllabicCategory(20); |
2442 | pub const ModifyingLetter: IndicSyllabicCategory = IndicSyllabicCategory(21); |
2443 | pub const NonJoiner: IndicSyllabicCategory = IndicSyllabicCategory(22); |
2444 | pub const Nukta: IndicSyllabicCategory = IndicSyllabicCategory(23); |
2445 | pub const Number: IndicSyllabicCategory = IndicSyllabicCategory(24); |
2446 | pub const NumberJoiner: IndicSyllabicCategory = IndicSyllabicCategory(25); |
2447 | pub const PureKiller: IndicSyllabicCategory = IndicSyllabicCategory(26); |
2448 | pub const RegisterShifter: IndicSyllabicCategory = IndicSyllabicCategory(27); |
2449 | pub const SyllableModifier: IndicSyllabicCategory = IndicSyllabicCategory(28); |
2450 | pub const ToneLetter: IndicSyllabicCategory = IndicSyllabicCategory(29); |
2451 | pub const ToneMark: IndicSyllabicCategory = IndicSyllabicCategory(30); |
2452 | pub const Virama: IndicSyllabicCategory = IndicSyllabicCategory(31); |
2453 | pub const Visarga: IndicSyllabicCategory = IndicSyllabicCategory(32); |
2454 | pub const Vowel: IndicSyllabicCategory = IndicSyllabicCategory(33); |
2455 | pub const VowelDependent: IndicSyllabicCategory = IndicSyllabicCategory(34); |
2456 | pub const VowelIndependent: IndicSyllabicCategory = IndicSyllabicCategory(35); |
2457 | } |
2458 | } |
2459 | |
2460 | impl_value_getter! { |
2461 | markers: IndicSyllabicCategoryNameToValueV1Marker / SINGLETON_PROPNAMES_FROM_INSC_V1, IndicSyllabicCategoryValueToShortNameV1Marker / SINGLETON_PROPNAMES_TO_SHORT_LINEAR_INSC_V1, IndicSyllabicCategoryValueToLongNameV1Marker / SINGLETON_PROPNAMES_TO_LONG_LINEAR_INSC_V1; |
2462 | impl IndicSyllabicCategory { |
2463 | /// Return a [`PropertyValueNameToEnumMapper`], capable of looking up values |
2464 | /// from strings for the `Indic_Syllabic_Category` enumerated property. |
2465 | /// |
2466 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2467 | /// |
2468 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2469 | /// |
2470 | /// # Example |
2471 | /// |
2472 | /// ``` |
2473 | /// use icu::properties::IndicSyllabicCategory; |
2474 | /// |
2475 | /// let lookup = IndicSyllabicCategory::name_to_enum_mapper(); |
2476 | /// // long/short name for value |
2477 | /// assert_eq!(lookup.get_strict("Brahmi_Joining_Number"), Some(IndicSyllabicCategory::BrahmiJoiningNumber)); |
2478 | /// assert_eq!(lookup.get_strict("Vowel_Independent"), Some(IndicSyllabicCategory::VowelIndependent)); |
2479 | /// // name has incorrect casing and hyphens |
2480 | /// assert_eq!(lookup.get_strict("brahmi-joining-number"), None); |
2481 | /// // loose matching of name |
2482 | /// assert_eq!(lookup.get_loose("brahmi-joining-number"), Some(IndicSyllabicCategory::BrahmiJoiningNumber)); |
2483 | /// // fake property |
2484 | /// assert_eq!(lookup.get_strict("Tone_Number"), None); |
2485 | /// ``` |
2486 | pub fn get_name_to_enum_mapper() / name_to_enum_mapper(); |
2487 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up short names |
2488 | /// for values of the `Indic_Syllabic_Category` enumerated property. |
2489 | /// |
2490 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2491 | /// |
2492 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2493 | /// |
2494 | /// # Example |
2495 | /// |
2496 | /// ``` |
2497 | /// use icu::properties::IndicSyllabicCategory; |
2498 | /// |
2499 | /// let lookup = IndicSyllabicCategory::enum_to_short_name_mapper(); |
2500 | /// assert_eq!(lookup.get(IndicSyllabicCategory::BrahmiJoiningNumber), Some("Brahmi_Joining_Number")); |
2501 | /// assert_eq!(lookup.get(IndicSyllabicCategory::VowelIndependent), Some("Vowel_Independent")); |
2502 | /// ``` |
2503 | pub fn get_enum_to_short_name_mapper() / enum_to_short_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
2504 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up long names |
2505 | /// for values of the `Indic_Syllabic_Category` enumerated property. |
2506 | /// |
2507 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2508 | /// |
2509 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2510 | /// |
2511 | /// # Example |
2512 | /// |
2513 | /// ``` |
2514 | /// use icu::properties::IndicSyllabicCategory; |
2515 | /// |
2516 | /// let lookup = IndicSyllabicCategory::enum_to_long_name_mapper(); |
2517 | /// assert_eq!(lookup.get(IndicSyllabicCategory::BrahmiJoiningNumber), Some("Brahmi_Joining_Number")); |
2518 | /// assert_eq!(lookup.get(IndicSyllabicCategory::VowelIndependent), Some("Vowel_Independent")); |
2519 | /// ``` |
2520 | pub fn get_enum_to_long_name_mapper() / enum_to_long_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
2521 | } |
2522 | } |
2523 | /// Enumerated property Joining_Type. |
2524 | /// See Section 9.2, Arabic Cursive Joining in The Unicode Standard for the summary of |
2525 | /// each property value. |
2526 | /// |
2527 | /// The numeric value is compatible with `UJoiningType` in ICU4C. |
2528 | #[derive (Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] |
2529 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
2530 | #[cfg_attr (feature = "datagen" , derive(databake::Bake))] |
2531 | #[cfg_attr (feature = "datagen" , databake(path = icu_properties))] |
2532 | #[allow (clippy::exhaustive_structs)] // newtype |
2533 | #[repr (transparent)] |
2534 | #[zerovec::make_ule (JoiningTypeULE)] |
2535 | pub struct JoiningType(pub u8); |
2536 | |
2537 | create_const_array! { |
2538 | #[allow (missing_docs)] // These constants don't need individual documentation. |
2539 | #[allow (non_upper_case_globals)] |
2540 | impl JoiningType { |
2541 | pub const NonJoining: JoiningType = JoiningType(0); // name="U" |
2542 | pub const JoinCausing: JoiningType = JoiningType(1); // name="C" |
2543 | pub const DualJoining: JoiningType = JoiningType(2); // name="D" |
2544 | pub const LeftJoining: JoiningType = JoiningType(3); // name="L" |
2545 | pub const RightJoining: JoiningType = JoiningType(4); // name="R" |
2546 | pub const Transparent: JoiningType = JoiningType(5); // name="T" |
2547 | } |
2548 | } |
2549 | |
2550 | impl_value_getter! { |
2551 | markers: JoiningTypeNameToValueV1Marker / SINGLETON_PROPNAMES_FROM_JT_V1, JoiningTypeValueToShortNameV1Marker / SINGLETON_PROPNAMES_TO_SHORT_LINEAR_JT_V1, JoiningTypeValueToLongNameV1Marker / SINGLETON_PROPNAMES_TO_LONG_LINEAR_JT_V1; |
2552 | impl JoiningType { |
2553 | /// Return a [`PropertyValueNameToEnumMapper`], capable of looking up values |
2554 | /// from strings for the `Joining_Type` enumerated property. |
2555 | /// |
2556 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2557 | /// |
2558 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2559 | /// |
2560 | /// # Example |
2561 | /// |
2562 | /// ``` |
2563 | /// use icu::properties::JoiningType; |
2564 | /// |
2565 | /// let lookup = JoiningType::name_to_enum_mapper(); |
2566 | /// // short name for value |
2567 | /// assert_eq!(lookup.get_strict("T"), Some(JoiningType::Transparent)); |
2568 | /// assert_eq!(lookup.get_strict("D"), Some(JoiningType::DualJoining)); |
2569 | /// // long name for value |
2570 | /// assert_eq!(lookup.get_strict("Join_Causing"), Some(JoiningType::JoinCausing)); |
2571 | /// assert_eq!(lookup.get_strict("Non_Joining"), Some(JoiningType::NonJoining)); |
2572 | /// // name has incorrect casing |
2573 | /// assert_eq!(lookup.get_strict("LEFT_JOINING"), None); |
2574 | /// // loose matching of name |
2575 | /// assert_eq!(lookup.get_loose("LEFT_JOINING"), Some(JoiningType::LeftJoining)); |
2576 | /// // fake property |
2577 | /// assert_eq!(lookup.get_strict("Inner_Joining"), None); |
2578 | /// ``` |
2579 | pub fn get_name_to_enum_mapper() / name_to_enum_mapper(); |
2580 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up short names |
2581 | /// for values of the `Joining_Type` enumerated property. |
2582 | /// |
2583 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2584 | /// |
2585 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2586 | /// |
2587 | /// # Example |
2588 | /// |
2589 | /// ``` |
2590 | /// use icu::properties::JoiningType; |
2591 | /// |
2592 | /// let lookup = JoiningType::enum_to_short_name_mapper(); |
2593 | /// assert_eq!(lookup.get(JoiningType::JoinCausing), Some("C")); |
2594 | /// assert_eq!(lookup.get(JoiningType::LeftJoining), Some("L")); |
2595 | /// ``` |
2596 | pub fn get_enum_to_short_name_mapper() / enum_to_short_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
2597 | /// Return a [`PropertyEnumToValueNameLinearMapper`], capable of looking up long names |
2598 | /// for values of the `Joining_Type` enumerated property. |
2599 | /// |
2600 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2601 | /// |
2602 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2603 | /// |
2604 | /// # Example |
2605 | /// |
2606 | /// ``` |
2607 | /// use icu::properties::JoiningType; |
2608 | /// |
2609 | /// let lookup = JoiningType::enum_to_long_name_mapper(); |
2610 | /// assert_eq!(lookup.get(JoiningType::Transparent), Some("Transparent")); |
2611 | /// assert_eq!(lookup.get(JoiningType::NonJoining), Some("Non_Joining")); |
2612 | /// assert_eq!(lookup.get(JoiningType::RightJoining), Some("Right_Joining")); |
2613 | /// ``` |
2614 | pub fn get_enum_to_long_name_mapper() / enum_to_long_name_mapper() -> PropertyEnumToValueNameLinearMapper / PropertyEnumToValueNameLinearMapperBorrowed; |
2615 | } |
2616 | } |
2617 | #[cfg (test)] |
2618 | mod test_enumerated_property_completeness { |
2619 | use super::*; |
2620 | use alloc::collections::BTreeMap; |
2621 | |
2622 | fn check_enum<'a>( |
2623 | lookup: &PropertyValueNameToEnumMapV1<'static>, |
2624 | consts: impl IntoIterator<Item = &'a (&'static str, u16)>, |
2625 | ) { |
2626 | let mut data: BTreeMap<_, _> = lookup |
2627 | .map |
2628 | .iter_copied_values() |
2629 | .map(|(name, value)| { |
2630 | ( |
2631 | value, |
2632 | ( |
2633 | String::from_utf8(name.as_byte_slice().to_vec()).unwrap(), |
2634 | "Data" , |
2635 | ), |
2636 | ) |
2637 | }) |
2638 | .collect(); |
2639 | |
2640 | let consts = consts |
2641 | .into_iter() |
2642 | .map(|(name, value)| (*value, (name.to_string(), "Consts" ))); |
2643 | |
2644 | let mut diff = Vec::new(); |
2645 | for t @ (value, _) in consts { |
2646 | if data.remove(&value).is_none() { |
2647 | diff.push(t); |
2648 | } |
2649 | } |
2650 | diff.extend(data); |
2651 | |
2652 | let mut fmt_diff = String::new(); |
2653 | for (value, (name, source)) in diff { |
2654 | fmt_diff.push_str(&format!("{source}: \t{name} = {value:?} \n" )); |
2655 | } |
2656 | |
2657 | assert!( |
2658 | fmt_diff.is_empty(), |
2659 | "Values defined in data do not match values defined in consts. Difference: \n{}" , |
2660 | fmt_diff |
2661 | ); |
2662 | } |
2663 | |
2664 | #[test ] |
2665 | fn test_ea() { |
2666 | check_enum( |
2667 | crate::provider::Baked::SINGLETON_PROPNAMES_FROM_EA_V1, |
2668 | EastAsianWidth::ALL_CONSTS, |
2669 | ); |
2670 | } |
2671 | |
2672 | #[test ] |
2673 | fn test_ccc() { |
2674 | check_enum( |
2675 | crate::provider::Baked::SINGLETON_PROPNAMES_FROM_CCC_V1, |
2676 | CanonicalCombiningClass::ALL_CONSTS, |
2677 | ); |
2678 | } |
2679 | |
2680 | #[test ] |
2681 | fn test_jt() { |
2682 | check_enum( |
2683 | crate::provider::Baked::SINGLETON_PROPNAMES_FROM_JT_V1, |
2684 | JoiningType::ALL_CONSTS, |
2685 | ); |
2686 | } |
2687 | |
2688 | #[test ] |
2689 | fn test_insc() { |
2690 | check_enum( |
2691 | crate::provider::Baked::SINGLETON_PROPNAMES_FROM_INSC_V1, |
2692 | IndicSyllabicCategory::ALL_CONSTS, |
2693 | ); |
2694 | } |
2695 | |
2696 | #[test ] |
2697 | fn test_sb() { |
2698 | check_enum( |
2699 | crate::provider::Baked::SINGLETON_PROPNAMES_FROM_SB_V1, |
2700 | SentenceBreak::ALL_CONSTS, |
2701 | ); |
2702 | } |
2703 | |
2704 | #[test ] |
2705 | fn test_wb() { |
2706 | check_enum( |
2707 | crate::provider::Baked::SINGLETON_PROPNAMES_FROM_WB_V1, |
2708 | WordBreak::ALL_CONSTS, |
2709 | ); |
2710 | } |
2711 | |
2712 | #[test ] |
2713 | fn test_bc() { |
2714 | check_enum( |
2715 | crate::provider::Baked::SINGLETON_PROPNAMES_FROM_BC_V1, |
2716 | BidiClass::ALL_CONSTS, |
2717 | ); |
2718 | } |
2719 | |
2720 | #[test ] |
2721 | fn test_hst() { |
2722 | check_enum( |
2723 | crate::provider::Baked::SINGLETON_PROPNAMES_FROM_HST_V1, |
2724 | HangulSyllableType::ALL_CONSTS, |
2725 | ); |
2726 | } |
2727 | } |
2728 | |