1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | //! The functions in this module return a [`CodePointSetData`] containing |
6 | //! the set of characters with a particular Unicode property. |
7 | //! |
8 | //! The descriptions of most properties are taken from [`TR44`], the documentation for the |
9 | //! Unicode Character Database. Some properties are instead defined in [`TR18`], the |
10 | //! documentation for Unicode regular expressions. In particular, Annex C of this document |
11 | //! defines properties for POSIX compatibility. |
12 | //! |
13 | //! [`CodePointSetData`]: crate::sets::CodePointSetData |
14 | //! [`TR44`]: https://www.unicode.org/reports/tr44 |
15 | //! [`TR18`]: https://www.unicode.org/reports/tr18 |
16 | |
17 | use crate::error::PropertiesError; |
18 | use crate::provider::*; |
19 | use crate::*; |
20 | use core::iter::FromIterator; |
21 | use core::ops::RangeInclusive; |
22 | use icu_collections::codepointinvlist::CodePointInversionList; |
23 | use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList; |
24 | use icu_provider::prelude::*; |
25 | |
26 | // |
27 | // CodePointSet* structs, impls, & macros |
28 | // (a set with only code points) |
29 | // |
30 | |
31 | /// A wrapper around code point set data. It is returned by APIs that return Unicode |
32 | /// property data in a set-like form, ex: a set of code points sharing the same |
33 | /// value for a Unicode property. Access its data via the borrowed version, |
34 | /// [`CodePointSetDataBorrowed`]. |
35 | #[derive (Debug)] |
36 | pub struct CodePointSetData { |
37 | data: DataPayload<ErasedSetlikeMarker>, |
38 | } |
39 | |
40 | /// Private marker type for CodePointSetData |
41 | /// to work for all set properties at once |
42 | #[derive (Clone, Copy, PartialEq, Eq, Hash, Debug)] |
43 | pub(crate) struct ErasedSetlikeMarker; |
44 | impl DataMarker for ErasedSetlikeMarker { |
45 | type Yokeable = PropertyCodePointSetV1<'static>; |
46 | } |
47 | |
48 | impl CodePointSetData { |
49 | /// Construct a borrowed version of this type that can be queried. |
50 | /// |
51 | /// This owned version if returned by functions that use a runtime data provider. |
52 | #[inline ] |
53 | pub fn as_borrowed(&self) -> CodePointSetDataBorrowed<'_> { |
54 | CodePointSetDataBorrowed { |
55 | set: self.data.get(), |
56 | } |
57 | } |
58 | |
59 | /// Construct a new one from loaded data |
60 | /// |
61 | /// Typically it is preferable to use getters like [`load_ascii_hex_digit()`] instead |
62 | pub fn from_data<M>(data: DataPayload<M>) -> Self |
63 | where |
64 | M: DataMarker<Yokeable = PropertyCodePointSetV1<'static>>, |
65 | { |
66 | Self { data: data.cast() } |
67 | } |
68 | |
69 | /// Construct a new owned [`CodePointInversionList`] |
70 | pub fn from_code_point_inversion_list(set: CodePointInversionList<'static>) -> Self { |
71 | let set = PropertyCodePointSetV1::from_code_point_inversion_list(set); |
72 | CodePointSetData::from_data(DataPayload::<ErasedSetlikeMarker>::from_owned(set)) |
73 | } |
74 | |
75 | /// Convert this type to a [`CodePointInversionList`] as a borrowed value. |
76 | /// |
77 | /// The data backing this is extensible and supports multiple implementations. |
78 | /// Currently it is always [`CodePointInversionList`]; however in the future more backends may be |
79 | /// added, and users may select which at data generation time. |
80 | /// |
81 | /// This method returns an `Option` in order to return `None` when the backing data provider |
82 | /// cannot return a [`CodePointInversionList`], or cannot do so within the expected constant time |
83 | /// constraint. |
84 | pub fn as_code_point_inversion_list(&self) -> Option<&CodePointInversionList<'_>> { |
85 | self.data.get().as_code_point_inversion_list() |
86 | } |
87 | |
88 | /// Convert this type to a [`CodePointInversionList`], borrowing if possible, |
89 | /// otherwise allocating a new [`CodePointInversionList`]. |
90 | /// |
91 | /// The data backing this is extensible and supports multiple implementations. |
92 | /// Currently it is always [`CodePointInversionList`]; however in the future more backends may be |
93 | /// added, and users may select which at data generation time. |
94 | /// |
95 | /// The performance of the conversion to this specific return type will vary |
96 | /// depending on the data structure that is backing `self`. |
97 | pub fn to_code_point_inversion_list(&self) -> CodePointInversionList<'_> { |
98 | self.data.get().to_code_point_inversion_list() |
99 | } |
100 | } |
101 | |
102 | /// A borrowed wrapper around code point set data, returned by |
103 | /// [`CodePointSetData::as_borrowed()`]. More efficient to query. |
104 | #[derive (Clone, Copy, Debug)] |
105 | pub struct CodePointSetDataBorrowed<'a> { |
106 | set: &'a PropertyCodePointSetV1<'a>, |
107 | } |
108 | |
109 | impl CodePointSetDataBorrowed<'static> { |
110 | /// Cheaply converts a [`CodePointSetDataBorrowed<'static>`] into a [`CodePointSetData`]. |
111 | /// |
112 | /// Note: Due to branching and indirection, using [`CodePointSetData`] might inhibit some |
113 | /// compile-time optimizations that are possible with [`CodePointSetDataBorrowed`]. |
114 | pub const fn static_to_owned(self) -> CodePointSetData { |
115 | CodePointSetData { |
116 | data: DataPayload::from_static_ref(self.set), |
117 | } |
118 | } |
119 | } |
120 | |
121 | impl<'a> CodePointSetDataBorrowed<'a> { |
122 | /// Check if the set contains a character |
123 | /// |
124 | /// ```rust |
125 | /// use icu::properties::sets; |
126 | /// |
127 | /// let alphabetic = sets::alphabetic(); |
128 | /// |
129 | /// assert!(!alphabetic.contains('3' )); |
130 | /// assert!(!alphabetic.contains('੩' )); // U+0A69 GURMUKHI DIGIT THREE |
131 | /// assert!(alphabetic.contains('A' )); |
132 | /// assert!(alphabetic.contains('Ä' )); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
133 | /// ``` |
134 | #[inline ] |
135 | pub fn contains(self, ch: char) -> bool { |
136 | self.set.contains(ch) |
137 | } |
138 | |
139 | /// Check if the set contains a character as a UTF32 code unit |
140 | /// |
141 | /// ```rust |
142 | /// use icu::properties::sets; |
143 | /// |
144 | /// let alphabetic = sets::alphabetic(); |
145 | /// |
146 | /// assert!(!alphabetic.contains32(0x0A69)); // U+0A69 GURMUKHI DIGIT THREE |
147 | /// assert!(alphabetic.contains32(0x00C4)); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
148 | /// ``` |
149 | #[inline ] |
150 | pub fn contains32(self, ch: u32) -> bool { |
151 | self.set.contains32(ch) |
152 | } |
153 | |
154 | // Yields an [`Iterator`] returning the ranges of the code points that are |
155 | /// included in the [`CodePointSetData`] |
156 | /// |
157 | /// Ranges are returned as [`RangeInclusive`], which is inclusive of its |
158 | /// `end` bound value. An end-inclusive behavior matches the ICU4C/J |
159 | /// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`. |
160 | /// |
161 | /// # Example |
162 | /// |
163 | /// ``` |
164 | /// use icu::properties::sets; |
165 | /// |
166 | /// let alphabetic = sets::alphabetic(); |
167 | /// let mut ranges = alphabetic.iter_ranges(); |
168 | /// |
169 | /// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z' |
170 | /// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z' |
171 | /// ``` |
172 | #[inline ] |
173 | pub fn iter_ranges(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { |
174 | self.set.iter_ranges() |
175 | } |
176 | |
177 | // Yields an [`Iterator`] returning the ranges of the code points that are |
178 | /// *not* included in the [`CodePointSetData`] |
179 | /// |
180 | /// Ranges are returned as [`RangeInclusive`], which is inclusive of its |
181 | /// `end` bound value. An end-inclusive behavior matches the ICU4C/J |
182 | /// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`. |
183 | /// |
184 | /// # Example |
185 | /// |
186 | /// ``` |
187 | /// use icu::properties::sets; |
188 | /// |
189 | /// let alphabetic = sets::alphabetic(); |
190 | /// let mut ranges = alphabetic.iter_ranges(); |
191 | /// |
192 | /// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z' |
193 | /// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z' |
194 | /// ``` |
195 | #[inline ] |
196 | pub fn iter_ranges_complemented(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { |
197 | self.set.iter_ranges_complemented() |
198 | } |
199 | } |
200 | |
201 | // |
202 | // UnicodeSet* structs, impls, & macros |
203 | // (a set with code points + strings) |
204 | // |
205 | |
206 | /// A wrapper around `UnicodeSet` data (characters and strings) |
207 | #[derive (Debug)] |
208 | pub struct UnicodeSetData { |
209 | data: DataPayload<ErasedUnicodeSetlikeMarker>, |
210 | } |
211 | |
212 | #[derive (Clone, Copy, PartialEq, Eq, Hash, Debug)] |
213 | pub(crate) struct ErasedUnicodeSetlikeMarker; |
214 | impl DataMarker for ErasedUnicodeSetlikeMarker { |
215 | type Yokeable = PropertyUnicodeSetV1<'static>; |
216 | } |
217 | |
218 | impl UnicodeSetData { |
219 | /// Construct a borrowed version of this type that can be queried. |
220 | /// |
221 | /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it |
222 | /// up front. |
223 | #[inline ] |
224 | pub fn as_borrowed(&self) -> UnicodeSetDataBorrowed<'_> { |
225 | UnicodeSetDataBorrowed { |
226 | set: self.data.get(), |
227 | } |
228 | } |
229 | |
230 | /// Construct a new one from loaded data |
231 | /// |
232 | /// Typically it is preferable to use getters instead |
233 | pub fn from_data<M>(data: DataPayload<M>) -> Self |
234 | where |
235 | M: DataMarker<Yokeable = PropertyUnicodeSetV1<'static>>, |
236 | { |
237 | Self { data: data.cast() } |
238 | } |
239 | |
240 | /// Construct a new owned [`CodePointInversionListAndStringList`] |
241 | pub fn from_code_point_inversion_list_string_list( |
242 | set: CodePointInversionListAndStringList<'static>, |
243 | ) -> Self { |
244 | let set = PropertyUnicodeSetV1::from_code_point_inversion_list_string_list(set); |
245 | UnicodeSetData::from_data(DataPayload::<ErasedUnicodeSetlikeMarker>::from_owned(set)) |
246 | } |
247 | |
248 | /// Convert this type to a [`CodePointInversionListAndStringList`] as a borrowed value. |
249 | /// |
250 | /// The data backing this is extensible and supports multiple implementations. |
251 | /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be |
252 | /// added, and users may select which at data generation time. |
253 | /// |
254 | /// This method returns an `Option` in order to return `None` when the backing data provider |
255 | /// cannot return a [`CodePointInversionListAndStringList`], or cannot do so within the expected constant time |
256 | /// constraint. |
257 | pub fn as_code_point_inversion_list_string_list( |
258 | &self, |
259 | ) -> Option<&CodePointInversionListAndStringList<'_>> { |
260 | self.data.get().as_code_point_inversion_list_string_list() |
261 | } |
262 | |
263 | /// Convert this type to a [`CodePointInversionListAndStringList`], borrowing if possible, |
264 | /// otherwise allocating a new [`CodePointInversionListAndStringList`]. |
265 | /// |
266 | /// The data backing this is extensible and supports multiple implementations. |
267 | /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be |
268 | /// added, and users may select which at data generation time. |
269 | /// |
270 | /// The performance of the conversion to this specific return type will vary |
271 | /// depending on the data structure that is backing `self`. |
272 | pub fn to_code_point_inversion_list_string_list( |
273 | &self, |
274 | ) -> CodePointInversionListAndStringList<'_> { |
275 | self.data.get().to_code_point_inversion_list_string_list() |
276 | } |
277 | } |
278 | |
279 | /// A borrowed wrapper around code point set data, returned by |
280 | /// [`UnicodeSetData::as_borrowed()`]. More efficient to query. |
281 | #[derive (Clone, Copy, Debug)] |
282 | pub struct UnicodeSetDataBorrowed<'a> { |
283 | set: &'a PropertyUnicodeSetV1<'a>, |
284 | } |
285 | |
286 | impl<'a> UnicodeSetDataBorrowed<'a> { |
287 | /// Check if the set contains the string. Strings consisting of one character |
288 | /// are treated as a character/code point. |
289 | /// |
290 | /// This matches ICU behavior for ICU's `UnicodeSet`. |
291 | #[inline ] |
292 | pub fn contains(self, s: &str) -> bool { |
293 | self.set.contains(s) |
294 | } |
295 | |
296 | /// Check if the set contains a character as a UTF32 code unit |
297 | #[inline ] |
298 | pub fn contains32(&self, cp: u32) -> bool { |
299 | self.set.contains32(cp) |
300 | } |
301 | |
302 | /// Check if the set contains the code point corresponding to the Rust character. |
303 | #[inline ] |
304 | pub fn contains_char(&self, ch: char) -> bool { |
305 | self.set.contains_char(ch) |
306 | } |
307 | } |
308 | |
309 | impl UnicodeSetDataBorrowed<'static> { |
310 | /// Cheaply converts a [`UnicodeSetDataBorrowed<'static>`] into a [`UnicodeSetData`]. |
311 | /// |
312 | /// Note: Due to branching and indirection, using [`UnicodeSetData`] might inhibit some |
313 | /// compile-time optimizations that are possible with [`UnicodeSetDataBorrowed`]. |
314 | pub const fn static_to_owned(self) -> UnicodeSetData { |
315 | UnicodeSetData { |
316 | data: DataPayload::from_static_ref(self.set), |
317 | } |
318 | } |
319 | } |
320 | |
321 | pub(crate) fn load_set_data<M, P>(provider: &P) -> Result<CodePointSetData, PropertiesError> |
322 | where |
323 | M: KeyedDataMarker<Yokeable = PropertyCodePointSetV1<'static>>, |
324 | P: DataProvider<M> + ?Sized, |
325 | { |
326 | Ok(provider |
327 | .load(Default::default()) |
328 | .and_then(DataResponse::take_payload) |
329 | .map(op:CodePointSetData::from_data)?) |
330 | } |
331 | |
332 | // |
333 | // Binary property getter fns |
334 | // (data as code point sets) |
335 | // |
336 | |
337 | macro_rules! make_code_point_set_property { |
338 | ( |
339 | // currently unused |
340 | property: $property:expr; |
341 | // currently unused |
342 | marker: $marker_name:ident; |
343 | keyed_data_marker: $keyed_data_marker:ty; |
344 | func: |
345 | $(#[$doc:meta])+ |
346 | $cvis:vis const fn $constname:ident() => $singleton_name:ident; |
347 | $vis:vis fn $funcname:ident(); |
348 | ) => { |
349 | #[doc = concat!("A version of [`" , stringify!($constname), "()`] that uses custom data provided by a [`DataProvider`]." )] |
350 | /// |
351 | /// Note that this will return an owned version of the data. Functionality is available on |
352 | /// the borrowed version, accessible through [`CodePointSetData::as_borrowed`]. |
353 | $vis fn $funcname( |
354 | provider: &(impl DataProvider<$keyed_data_marker> + ?Sized) |
355 | ) -> Result<CodePointSetData, PropertiesError> { |
356 | load_set_data(provider) |
357 | } |
358 | |
359 | $(#[$doc])* |
360 | #[cfg(feature = "compiled_data" )] |
361 | $cvis const fn $constname() -> CodePointSetDataBorrowed<'static> { |
362 | CodePointSetDataBorrowed { |
363 | set: crate::provider::Baked::$singleton_name, |
364 | } |
365 | } |
366 | } |
367 | } |
368 | |
369 | make_code_point_set_property! { |
370 | property: "ASCII_Hex_Digit" ; |
371 | marker: AsciiHexDigitProperty; |
372 | keyed_data_marker: AsciiHexDigitV1Marker; |
373 | func: |
374 | /// ASCII characters commonly used for the representation of hexadecimal numbers |
375 | /// |
376 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
377 | /// |
378 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
379 | /// |
380 | /// # Example |
381 | /// |
382 | /// ``` |
383 | /// use icu::properties::sets; |
384 | /// |
385 | /// let ascii_hex_digit = sets::ascii_hex_digit(); |
386 | /// |
387 | /// assert!(ascii_hex_digit.contains('3')); |
388 | /// assert!(!ascii_hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE |
389 | /// assert!(ascii_hex_digit.contains('A')); |
390 | /// assert!(!ascii_hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
391 | /// ``` |
392 | pub const fn ascii_hex_digit() => SINGLETON_PROPS_AHEX_V1; |
393 | pub fn load_ascii_hex_digit(); |
394 | } |
395 | |
396 | make_code_point_set_property! { |
397 | property: "Alnum" ; |
398 | marker: AlnumProperty; |
399 | keyed_data_marker: AlnumV1Marker; |
400 | func: |
401 | /// Characters with the Alphabetic or Decimal_Number property |
402 | /// This is defined for POSIX compatibility. |
403 | |
404 | pub const fn alnum() => SINGLETON_PROPS_ALNUM_V1; |
405 | pub fn load_alnum(); |
406 | } |
407 | |
408 | make_code_point_set_property! { |
409 | property: "Alphabetic" ; |
410 | marker: AlphabeticProperty; |
411 | keyed_data_marker: AlphabeticV1Marker; |
412 | func: |
413 | /// Alphabetic characters |
414 | /// |
415 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
416 | /// |
417 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
418 | /// |
419 | /// # Example |
420 | /// |
421 | /// ``` |
422 | /// use icu::properties::sets; |
423 | /// |
424 | /// let alphabetic = sets::alphabetic(); |
425 | /// |
426 | /// assert!(!alphabetic.contains('3')); |
427 | /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE |
428 | /// assert!(alphabetic.contains('A')); |
429 | /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
430 | /// ``` |
431 | |
432 | pub const fn alphabetic() => SINGLETON_PROPS_ALPHA_V1; |
433 | pub fn load_alphabetic(); |
434 | } |
435 | |
436 | make_code_point_set_property! { |
437 | property: "Bidi_Control" ; |
438 | marker: BidiControlProperty; |
439 | keyed_data_marker: BidiControlV1Marker; |
440 | func: |
441 | /// Format control characters which have specific functions in the Unicode Bidirectional |
442 | /// Algorithm |
443 | /// |
444 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
445 | /// |
446 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
447 | /// |
448 | /// # Example |
449 | /// |
450 | /// ``` |
451 | /// use icu::properties::sets; |
452 | /// |
453 | /// let bidi_control = sets::bidi_control(); |
454 | /// |
455 | /// assert!(bidi_control.contains32(0x200F)); // RIGHT-TO-LEFT MARK |
456 | /// assert!(!bidi_control.contains('ش')); // U+0634 ARABIC LETTER SHEEN |
457 | /// ``` |
458 | |
459 | pub const fn bidi_control() => SINGLETON_PROPS_BIDI_C_V1; |
460 | pub fn load_bidi_control(); |
461 | } |
462 | |
463 | make_code_point_set_property! { |
464 | property: "Bidi_Mirrored" ; |
465 | marker: BidiMirroredProperty; |
466 | keyed_data_marker: BidiMirroredV1Marker; |
467 | func: |
468 | /// Characters that are mirrored in bidirectional text |
469 | /// |
470 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
471 | /// |
472 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
473 | /// |
474 | /// # Example |
475 | /// |
476 | /// ``` |
477 | /// use icu::properties::sets; |
478 | /// |
479 | /// let bidi_mirrored = sets::bidi_mirrored(); |
480 | /// |
481 | /// assert!(bidi_mirrored.contains('[')); |
482 | /// assert!(bidi_mirrored.contains(']')); |
483 | /// assert!(bidi_mirrored.contains('∑')); // U+2211 N-ARY SUMMATION |
484 | /// assert!(!bidi_mirrored.contains('ཉ')); // U+0F49 TIBETAN LETTER NYA |
485 | /// ``` |
486 | |
487 | pub const fn bidi_mirrored() => SINGLETON_PROPS_BIDI_M_V1; |
488 | pub fn load_bidi_mirrored(); |
489 | } |
490 | |
491 | make_code_point_set_property! { |
492 | property: "Blank" ; |
493 | marker: BlankProperty; |
494 | keyed_data_marker: BlankV1Marker; |
495 | func: |
496 | /// Horizontal whitespace characters |
497 | |
498 | pub const fn blank() => SINGLETON_PROPS_BLANK_V1; |
499 | pub fn load_blank(); |
500 | } |
501 | |
502 | make_code_point_set_property! { |
503 | property: "Cased" ; |
504 | marker: CasedProperty; |
505 | keyed_data_marker: CasedV1Marker; |
506 | func: |
507 | /// Uppercase, lowercase, and titlecase characters |
508 | /// |
509 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
510 | /// |
511 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
512 | /// |
513 | /// # Example |
514 | /// |
515 | /// ``` |
516 | /// use icu::properties::sets; |
517 | /// |
518 | /// let cased = sets::cased(); |
519 | /// |
520 | /// assert!(cased.contains('Ꙡ')); // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE |
521 | /// assert!(!cased.contains('ދ')); // U+078B THAANA LETTER DHAALU |
522 | /// ``` |
523 | |
524 | pub const fn cased() => SINGLETON_PROPS_CASED_V1; |
525 | pub fn load_cased(); |
526 | } |
527 | |
528 | make_code_point_set_property! { |
529 | property: "Case_Ignorable" ; |
530 | marker: CaseIgnorableProperty; |
531 | keyed_data_marker: CaseIgnorableV1Marker; |
532 | func: |
533 | /// Characters which are ignored for casing purposes |
534 | /// |
535 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
536 | /// |
537 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
538 | /// |
539 | /// # Example |
540 | /// |
541 | /// ``` |
542 | /// use icu::properties::sets; |
543 | /// |
544 | /// let case_ignorable = sets::case_ignorable(); |
545 | /// |
546 | /// assert!(case_ignorable.contains(':')); |
547 | /// assert!(!case_ignorable.contains('λ')); // U+03BB GREEK SMALL LETTER LAMDA |
548 | /// ``` |
549 | |
550 | pub const fn case_ignorable() => SINGLETON_PROPS_CI_V1; |
551 | pub fn load_case_ignorable(); |
552 | } |
553 | |
554 | make_code_point_set_property! { |
555 | property: "Full_Composition_Exclusion" ; |
556 | marker: FullCompositionExclusionProperty; |
557 | keyed_data_marker: FullCompositionExclusionV1Marker; |
558 | func: |
559 | /// Characters that are excluded from composition |
560 | /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt> |
561 | |
562 | pub const fn full_composition_exclusion() => SINGLETON_PROPS_COMP_EX_V1; |
563 | pub fn load_full_composition_exclusion(); |
564 | } |
565 | |
566 | make_code_point_set_property! { |
567 | property: "Changes_When_Casefolded" ; |
568 | marker: ChangesWhenCasefoldedProperty; |
569 | keyed_data_marker: ChangesWhenCasefoldedV1Marker; |
570 | func: |
571 | /// Characters whose normalized forms are not stable under case folding |
572 | /// |
573 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
574 | /// |
575 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
576 | /// |
577 | /// # Example |
578 | /// |
579 | /// ``` |
580 | /// use icu::properties::sets; |
581 | /// |
582 | /// let changes_when_casefolded = sets::changes_when_casefolded(); |
583 | /// |
584 | /// assert!(changes_when_casefolded.contains('ß')); // U+00DF LATIN SMALL LETTER SHARP S |
585 | /// assert!(!changes_when_casefolded.contains('ᜉ')); // U+1709 TAGALOG LETTER PA |
586 | /// ``` |
587 | |
588 | pub const fn changes_when_casefolded() => SINGLETON_PROPS_CWCF_V1; |
589 | pub fn load_changes_when_casefolded(); |
590 | } |
591 | |
592 | make_code_point_set_property! { |
593 | property: "Changes_When_Casemapped" ; |
594 | marker: ChangesWhenCasemappedProperty; |
595 | keyed_data_marker: ChangesWhenCasemappedV1Marker; |
596 | func: |
597 | /// Characters which may change when they undergo case mapping |
598 | |
599 | pub const fn changes_when_casemapped() => SINGLETON_PROPS_CWCM_V1; |
600 | pub fn load_changes_when_casemapped(); |
601 | } |
602 | |
603 | make_code_point_set_property! { |
604 | property: "Changes_When_NFKC_Casefolded" ; |
605 | marker: ChangesWhenNfkcCasefoldedProperty; |
606 | keyed_data_marker: ChangesWhenNfkcCasefoldedV1Marker; |
607 | func: |
608 | /// Characters which are not identical to their NFKC_Casefold mapping |
609 | /// |
610 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
611 | /// |
612 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
613 | /// |
614 | /// # Example |
615 | /// |
616 | /// ``` |
617 | /// use icu::properties::sets; |
618 | /// |
619 | /// let changes_when_nfkc_casefolded = sets::changes_when_nfkc_casefolded(); |
620 | /// |
621 | /// assert!(changes_when_nfkc_casefolded.contains('🄵')); // U+1F135 SQUARED LATIN CAPITAL LETTER F |
622 | /// assert!(!changes_when_nfkc_casefolded.contains('f')); |
623 | /// ``` |
624 | |
625 | pub const fn changes_when_nfkc_casefolded() => SINGLETON_PROPS_CWKCF_V1; |
626 | pub fn load_changes_when_nfkc_casefolded(); |
627 | } |
628 | |
629 | make_code_point_set_property! { |
630 | property: "Changes_When_Lowercased" ; |
631 | marker: ChangesWhenLowercasedProperty; |
632 | keyed_data_marker: ChangesWhenLowercasedV1Marker; |
633 | func: |
634 | /// Characters whose normalized forms are not stable under a toLowercase mapping |
635 | /// |
636 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
637 | /// |
638 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
639 | /// |
640 | /// # Example |
641 | /// |
642 | /// ``` |
643 | /// use icu::properties::sets; |
644 | /// |
645 | /// let changes_when_lowercased = sets::changes_when_lowercased(); |
646 | /// |
647 | /// assert!(changes_when_lowercased.contains('Ⴔ')); // U+10B4 GEORGIAN CAPITAL LETTER PHAR |
648 | /// assert!(!changes_when_lowercased.contains('ფ')); // U+10E4 GEORGIAN LETTER PHAR |
649 | /// ``` |
650 | |
651 | pub const fn changes_when_lowercased() => SINGLETON_PROPS_CWL_V1; |
652 | pub fn load_changes_when_lowercased(); |
653 | } |
654 | |
655 | make_code_point_set_property! { |
656 | property: "Changes_When_Titlecased" ; |
657 | marker: ChangesWhenTitlecasedProperty; |
658 | keyed_data_marker: ChangesWhenTitlecasedV1Marker; |
659 | func: |
660 | /// Characters whose normalized forms are not stable under a toTitlecase mapping |
661 | /// |
662 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
663 | /// |
664 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
665 | /// |
666 | /// # Example |
667 | /// |
668 | /// ``` |
669 | /// use icu::properties::sets; |
670 | /// |
671 | /// let changes_when_titlecased = sets::changes_when_titlecased(); |
672 | /// |
673 | /// assert!(changes_when_titlecased.contains('æ')); // U+00E6 LATIN SMALL LETTER AE |
674 | /// assert!(!changes_when_titlecased.contains('Æ')); // U+00E6 LATIN CAPITAL LETTER AE |
675 | /// ``` |
676 | |
677 | pub const fn changes_when_titlecased() => SINGLETON_PROPS_CWT_V1; |
678 | pub fn load_changes_when_titlecased(); |
679 | } |
680 | |
681 | make_code_point_set_property! { |
682 | property: "Changes_When_Uppercased" ; |
683 | marker: ChangesWhenUppercasedProperty; |
684 | keyed_data_marker: ChangesWhenUppercasedV1Marker; |
685 | func: |
686 | /// Characters whose normalized forms are not stable under a toUppercase mapping |
687 | /// |
688 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
689 | /// |
690 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
691 | /// |
692 | /// # Example |
693 | /// |
694 | /// ``` |
695 | /// use icu::properties::sets; |
696 | /// |
697 | /// let changes_when_uppercased = sets::changes_when_uppercased(); |
698 | /// |
699 | /// assert!(changes_when_uppercased.contains('ւ')); // U+0582 ARMENIAN SMALL LETTER YIWN |
700 | /// assert!(!changes_when_uppercased.contains('Ւ')); // U+0552 ARMENIAN CAPITAL LETTER YIWN |
701 | /// ``` |
702 | |
703 | pub const fn changes_when_uppercased() => SINGLETON_PROPS_CWU_V1; |
704 | pub fn load_changes_when_uppercased(); |
705 | } |
706 | |
707 | make_code_point_set_property! { |
708 | property: "Dash" ; |
709 | marker: DashProperty; |
710 | keyed_data_marker: DashV1Marker; |
711 | func: |
712 | /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus |
713 | /// their compatibility equivalents |
714 | /// |
715 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
716 | /// |
717 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
718 | /// |
719 | /// # Example |
720 | /// |
721 | /// ``` |
722 | /// use icu::properties::sets; |
723 | /// |
724 | /// let dash = sets::dash(); |
725 | /// |
726 | /// assert!(dash.contains('⸺')); // U+2E3A TWO-EM DASH |
727 | /// assert!(dash.contains('-')); // U+002D |
728 | /// assert!(!dash.contains('=')); // U+003D |
729 | /// ``` |
730 | |
731 | pub const fn dash() => SINGLETON_PROPS_DASH_V1; |
732 | pub fn load_dash(); |
733 | } |
734 | |
735 | make_code_point_set_property! { |
736 | property: "Deprecated" ; |
737 | marker: DeprecatedProperty; |
738 | keyed_data_marker: DeprecatedV1Marker; |
739 | func: |
740 | /// Deprecated characters. No characters will ever be removed from the standard, but the |
741 | /// usage of deprecated characters is strongly discouraged. |
742 | /// |
743 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
744 | /// |
745 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
746 | /// |
747 | /// # Example |
748 | /// |
749 | /// ``` |
750 | /// use icu::properties::sets; |
751 | /// |
752 | /// let deprecated = sets::deprecated(); |
753 | /// |
754 | /// assert!(deprecated.contains('ឣ')); // U+17A3 KHMER INDEPENDENT VOWEL QAQ |
755 | /// assert!(!deprecated.contains('A')); |
756 | /// ``` |
757 | |
758 | pub const fn deprecated() => SINGLETON_PROPS_DEP_V1; |
759 | pub fn load_deprecated(); |
760 | } |
761 | |
762 | make_code_point_set_property! { |
763 | property: "Default_Ignorable_Code_Point" ; |
764 | marker: DefaultIgnorableCodePointProperty; |
765 | keyed_data_marker: DefaultIgnorableCodePointV1Marker; |
766 | func: |
767 | /// For programmatic determination of default ignorable code points. New characters that |
768 | /// should be ignored in rendering (unless explicitly supported) will be assigned in these |
769 | /// ranges, permitting programs to correctly handle the default rendering of such |
770 | /// characters when not otherwise supported. |
771 | /// |
772 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
773 | /// |
774 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
775 | /// |
776 | /// # Example |
777 | /// |
778 | /// ``` |
779 | /// use icu::properties::sets; |
780 | /// |
781 | /// let default_ignorable_code_point = sets::default_ignorable_code_point(); |
782 | /// |
783 | /// assert!(default_ignorable_code_point.contains32(0x180B)); // MONGOLIAN FREE VARIATION SELECTOR ONE |
784 | /// assert!(!default_ignorable_code_point.contains('E')); |
785 | /// ``` |
786 | |
787 | pub const fn default_ignorable_code_point() => SINGLETON_PROPS_DI_V1; |
788 | pub fn load_default_ignorable_code_point(); |
789 | } |
790 | |
791 | make_code_point_set_property! { |
792 | property: "Diacritic" ; |
793 | marker: DiacriticProperty; |
794 | keyed_data_marker: DiacriticV1Marker; |
795 | func: |
796 | /// Characters that linguistically modify the meaning of another character to which they apply |
797 | /// |
798 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
799 | /// |
800 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
801 | /// |
802 | /// # Example |
803 | /// |
804 | /// ``` |
805 | /// use icu::properties::sets; |
806 | /// |
807 | /// let diacritic = sets::diacritic(); |
808 | /// |
809 | /// assert!(diacritic.contains('\u{05B3}')); // HEBREW POINT HATAF QAMATS |
810 | /// assert!(!diacritic.contains('א')); // U+05D0 HEBREW LETTER ALEF |
811 | /// ``` |
812 | |
813 | pub const fn diacritic() => SINGLETON_PROPS_DIA_V1; |
814 | pub fn load_diacritic(); |
815 | } |
816 | |
817 | make_code_point_set_property! { |
818 | property: "Emoji_Modifier_Base" ; |
819 | marker: EmojiModifierBaseProperty; |
820 | keyed_data_marker: EmojiModifierBaseV1Marker; |
821 | func: |
822 | /// Characters that can serve as a base for emoji modifiers |
823 | /// |
824 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
825 | /// |
826 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
827 | /// |
828 | /// # Example |
829 | /// |
830 | /// ``` |
831 | /// use icu::properties::sets; |
832 | /// |
833 | /// let emoji_modifier_base = sets::emoji_modifier_base(); |
834 | /// |
835 | /// assert!(emoji_modifier_base.contains('✊')); // U+270A RAISED FIST |
836 | /// assert!(!emoji_modifier_base.contains('⛰')); // U+26F0 MOUNTAIN |
837 | /// ``` |
838 | |
839 | pub const fn emoji_modifier_base() => SINGLETON_PROPS_EBASE_V1; |
840 | pub fn load_emoji_modifier_base(); |
841 | } |
842 | |
843 | make_code_point_set_property! { |
844 | property: "Emoji_Component" ; |
845 | marker: EmojiComponentProperty; |
846 | keyed_data_marker: EmojiComponentV1Marker; |
847 | func: |
848 | /// Characters used in emoji sequences that normally do not appear on emoji keyboards as |
849 | /// separate choices, such as base characters for emoji keycaps |
850 | /// |
851 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
852 | /// |
853 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
854 | /// |
855 | /// # Example |
856 | /// |
857 | /// ``` |
858 | /// use icu::properties::sets; |
859 | /// |
860 | /// let emoji_component = sets::emoji_component(); |
861 | /// |
862 | /// assert!(emoji_component.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T |
863 | /// assert!(emoji_component.contains32(0x20E3)); // COMBINING ENCLOSING KEYCAP |
864 | /// assert!(emoji_component.contains('7')); |
865 | /// assert!(!emoji_component.contains('T')); |
866 | /// ``` |
867 | |
868 | pub const fn emoji_component() => SINGLETON_PROPS_ECOMP_V1; |
869 | pub fn load_emoji_component(); |
870 | } |
871 | |
872 | make_code_point_set_property! { |
873 | property: "Emoji_Modifier" ; |
874 | marker: EmojiModifierProperty; |
875 | keyed_data_marker: EmojiModifierV1Marker; |
876 | func: |
877 | /// Characters that are emoji modifiers |
878 | /// |
879 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
880 | /// |
881 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
882 | /// |
883 | /// # Example |
884 | /// |
885 | /// ``` |
886 | /// use icu::properties::sets; |
887 | /// |
888 | /// let emoji_modifier = sets::emoji_modifier(); |
889 | /// |
890 | /// assert!(emoji_modifier.contains32(0x1F3FD)); // EMOJI MODIFIER FITZPATRICK TYPE-4 |
891 | /// assert!(!emoji_modifier.contains32(0x200C)); // ZERO WIDTH NON-JOINER |
892 | /// ``` |
893 | |
894 | pub const fn emoji_modifier() => SINGLETON_PROPS_EMOD_V1; |
895 | pub fn load_emoji_modifier(); |
896 | } |
897 | |
898 | make_code_point_set_property! { |
899 | property: "Emoji" ; |
900 | marker: EmojiProperty; |
901 | keyed_data_marker: EmojiV1Marker; |
902 | func: |
903 | /// Characters that are emoji |
904 | /// |
905 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
906 | /// |
907 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
908 | /// |
909 | /// # Example |
910 | /// |
911 | /// ``` |
912 | /// use icu::properties::sets; |
913 | /// |
914 | /// let emoji = sets::emoji(); |
915 | /// |
916 | /// assert!(emoji.contains('🔥')); // U+1F525 FIRE |
917 | /// assert!(!emoji.contains('V')); |
918 | /// ``` |
919 | |
920 | pub const fn emoji() => SINGLETON_PROPS_EMOJI_V1; |
921 | pub fn load_emoji(); |
922 | } |
923 | |
924 | make_code_point_set_property! { |
925 | property: "Emoji_Presentation" ; |
926 | marker: EmojiPresentationProperty; |
927 | keyed_data_marker: EmojiPresentationV1Marker; |
928 | func: |
929 | /// Characters that have emoji presentation by default |
930 | /// |
931 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
932 | /// |
933 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
934 | /// |
935 | /// # Example |
936 | /// |
937 | /// ``` |
938 | /// use icu::properties::sets; |
939 | /// |
940 | /// let emoji_presentation = sets::emoji_presentation(); |
941 | /// |
942 | /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON |
943 | /// assert!(!emoji_presentation.contains('♻')); // U+267B BLACK UNIVERSAL RECYCLING SYMBOL |
944 | /// ``` |
945 | |
946 | pub const fn emoji_presentation() => SINGLETON_PROPS_EPRES_V1; |
947 | pub fn load_emoji_presentation(); |
948 | } |
949 | |
950 | make_code_point_set_property! { |
951 | property: "Extender" ; |
952 | marker: ExtenderProperty; |
953 | keyed_data_marker: ExtenderV1Marker; |
954 | func: |
955 | /// Characters whose principal function is to extend the value of a preceding alphabetic |
956 | /// character or to extend the shape of adjacent characters. |
957 | /// |
958 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
959 | /// |
960 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
961 | /// |
962 | /// # Example |
963 | /// |
964 | /// ``` |
965 | /// use icu::properties::sets; |
966 | /// |
967 | /// let extender = sets::extender(); |
968 | /// |
969 | /// assert!(extender.contains('ヾ')); // U+30FE KATAKANA VOICED ITERATION MARK |
970 | /// assert!(extender.contains('ー')); // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK |
971 | /// assert!(!extender.contains('・')); // U+30FB KATAKANA MIDDLE DOT |
972 | /// ``` |
973 | |
974 | pub const fn extender() => SINGLETON_PROPS_EXT_V1; |
975 | pub fn load_extender(); |
976 | } |
977 | |
978 | make_code_point_set_property! { |
979 | property: "Extended_Pictographic" ; |
980 | marker: ExtendedPictographicProperty; |
981 | keyed_data_marker: ExtendedPictographicV1Marker; |
982 | func: |
983 | /// Pictographic symbols, as well as reserved ranges in blocks largely associated with |
984 | /// emoji characters |
985 | /// |
986 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
987 | /// |
988 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
989 | /// |
990 | /// # Example |
991 | /// |
992 | /// ``` |
993 | /// use icu::properties::sets; |
994 | /// |
995 | /// let extended_pictographic = sets::extended_pictographic(); |
996 | /// |
997 | /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT |
998 | /// assert!(!extended_pictographic.contains('🇪')); // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E |
999 | /// ``` |
1000 | |
1001 | pub const fn extended_pictographic() => SINGLETON_PROPS_EXTPICT_V1; |
1002 | pub fn load_extended_pictographic(); |
1003 | } |
1004 | |
1005 | make_code_point_set_property! { |
1006 | property: "Graph" ; |
1007 | marker: GraphProperty; |
1008 | keyed_data_marker: GraphV1Marker; |
1009 | func: |
1010 | /// Visible characters. |
1011 | /// This is defined for POSIX compatibility. |
1012 | |
1013 | pub const fn graph() => SINGLETON_PROPS_GRAPH_V1; |
1014 | pub fn load_graph(); |
1015 | } |
1016 | |
1017 | make_code_point_set_property! { |
1018 | property: "Grapheme_Base" ; |
1019 | marker: GraphemeBaseProperty; |
1020 | keyed_data_marker: GraphemeBaseV1Marker; |
1021 | func: |
1022 | /// Property used together with the definition of Standard Korean Syllable Block to define |
1023 | /// "Grapheme base". See D58 in Chapter 3, Conformance in the Unicode Standard. |
1024 | /// |
1025 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1026 | /// |
1027 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1028 | /// |
1029 | /// # Example |
1030 | /// |
1031 | /// ``` |
1032 | /// use icu::properties::sets; |
1033 | /// |
1034 | /// let grapheme_base = sets::grapheme_base(); |
1035 | /// |
1036 | /// assert!(grapheme_base.contains('ക')); // U+0D15 MALAYALAM LETTER KA |
1037 | /// assert!(grapheme_base.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I |
1038 | /// assert!(!grapheme_base.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA |
1039 | /// ``` |
1040 | |
1041 | pub const fn grapheme_base() => SINGLETON_PROPS_GR_BASE_V1; |
1042 | pub fn load_grapheme_base(); |
1043 | } |
1044 | |
1045 | make_code_point_set_property! { |
1046 | property: "Grapheme_Extend" ; |
1047 | marker: GraphemeExtendProperty; |
1048 | keyed_data_marker: GraphemeExtendV1Marker; |
1049 | func: |
1050 | /// Property used to define "Grapheme extender". See D59 in Chapter 3, Conformance in the |
1051 | /// Unicode Standard. |
1052 | /// |
1053 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1054 | /// |
1055 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1056 | /// |
1057 | /// # Example |
1058 | /// |
1059 | /// ``` |
1060 | /// use icu::properties::sets; |
1061 | /// |
1062 | /// let grapheme_extend = sets::grapheme_extend(); |
1063 | /// |
1064 | /// assert!(!grapheme_extend.contains('ക')); // U+0D15 MALAYALAM LETTER KA |
1065 | /// assert!(!grapheme_extend.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I |
1066 | /// assert!(grapheme_extend.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA |
1067 | /// ``` |
1068 | |
1069 | pub const fn grapheme_extend() => SINGLETON_PROPS_GR_EXT_V1; |
1070 | pub fn load_grapheme_extend(); |
1071 | } |
1072 | |
1073 | make_code_point_set_property! { |
1074 | property: "Grapheme_Link" ; |
1075 | marker: GraphemeLinkProperty; |
1076 | keyed_data_marker: GraphemeLinkV1Marker; |
1077 | func: |
1078 | /// Deprecated property. Formerly proposed for programmatic determination of grapheme |
1079 | /// cluster boundaries. |
1080 | |
1081 | pub const fn grapheme_link() => SINGLETON_PROPS_GR_LINK_V1; |
1082 | pub fn load_grapheme_link(); |
1083 | } |
1084 | |
1085 | make_code_point_set_property! { |
1086 | property: "Hex_Digit" ; |
1087 | marker: HexDigitProperty; |
1088 | keyed_data_marker: HexDigitV1Marker; |
1089 | func: |
1090 | /// Characters commonly used for the representation of hexadecimal numbers, plus their |
1091 | /// compatibility equivalents |
1092 | /// |
1093 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1094 | /// |
1095 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1096 | /// |
1097 | /// # Example |
1098 | /// |
1099 | /// ``` |
1100 | /// use icu::properties::sets; |
1101 | /// |
1102 | /// let hex_digit = sets::hex_digit(); |
1103 | /// |
1104 | /// assert!(hex_digit.contains('0')); |
1105 | /// assert!(!hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE |
1106 | /// assert!(hex_digit.contains('f')); |
1107 | /// assert!(hex_digit.contains('f')); // U+FF46 FULLWIDTH LATIN SMALL LETTER F |
1108 | /// assert!(hex_digit.contains('F')); // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F |
1109 | /// assert!(!hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
1110 | /// ``` |
1111 | |
1112 | pub const fn hex_digit() => SINGLETON_PROPS_HEX_V1; |
1113 | pub fn load_hex_digit(); |
1114 | } |
1115 | |
1116 | make_code_point_set_property! { |
1117 | property: "Hyphen" ; |
1118 | marker: HyphenProperty; |
1119 | keyed_data_marker: HyphenV1Marker; |
1120 | func: |
1121 | /// Deprecated property. Dashes which are used to mark connections between pieces of |
1122 | /// words, plus the Katakana middle dot. |
1123 | |
1124 | pub const fn hyphen() => SINGLETON_PROPS_HYPHEN_V1; |
1125 | pub fn load_hyphen(); |
1126 | } |
1127 | |
1128 | make_code_point_set_property! { |
1129 | property: "Id_Continue" ; |
1130 | marker: IdContinueProperty; |
1131 | keyed_data_marker: IdContinueV1Marker; |
1132 | func: |
1133 | /// Characters that can come after the first character in an identifier. If using NFKC to |
1134 | /// fold differences between characters, use [`load_xid_continue`] instead. See |
1135 | /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for |
1136 | /// more details. |
1137 | /// |
1138 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1139 | /// |
1140 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1141 | /// |
1142 | /// # Example |
1143 | /// |
1144 | /// ``` |
1145 | /// use icu::properties::sets; |
1146 | /// |
1147 | /// let id_continue = sets::id_continue(); |
1148 | /// |
1149 | /// assert!(id_continue.contains('x')); |
1150 | /// assert!(id_continue.contains('1')); |
1151 | /// assert!(id_continue.contains('_')); |
1152 | /// assert!(id_continue.contains('ߝ')); // U+07DD NKO LETTER FA |
1153 | /// assert!(!id_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
1154 | /// assert!(id_continue.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
1155 | /// ``` |
1156 | |
1157 | pub const fn id_continue() => SINGLETON_PROPS_IDC_V1; |
1158 | pub fn load_id_continue(); |
1159 | } |
1160 | |
1161 | make_code_point_set_property! { |
1162 | property: "Ideographic" ; |
1163 | marker: IdeographicProperty; |
1164 | keyed_data_marker: IdeographicV1Marker; |
1165 | func: |
1166 | /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese) |
1167 | /// ideographs, or related siniform ideographs |
1168 | /// |
1169 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1170 | /// |
1171 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1172 | /// |
1173 | /// # Example |
1174 | /// |
1175 | /// ``` |
1176 | /// use icu::properties::sets; |
1177 | /// |
1178 | /// let ideographic = sets::ideographic(); |
1179 | /// |
1180 | /// assert!(ideographic.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD |
1181 | /// assert!(!ideographic.contains('밥')); // U+BC25 HANGUL SYLLABLE BAB |
1182 | /// ``` |
1183 | |
1184 | pub const fn ideographic() => SINGLETON_PROPS_IDEO_V1; |
1185 | pub fn load_ideographic(); |
1186 | } |
1187 | |
1188 | make_code_point_set_property! { |
1189 | property: "Id_Start" ; |
1190 | marker: IdStartProperty; |
1191 | keyed_data_marker: IdStartV1Marker; |
1192 | func: |
1193 | /// Characters that can begin an identifier. If using NFKC to fold differences between |
1194 | /// characters, use [`load_xid_start`] instead. See [`Unicode Standard Annex |
1195 | /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details. |
1196 | /// |
1197 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1198 | /// |
1199 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1200 | /// |
1201 | /// # Example |
1202 | /// |
1203 | /// ``` |
1204 | /// use icu::properties::sets; |
1205 | /// |
1206 | /// let id_start = sets::id_start(); |
1207 | /// |
1208 | /// assert!(id_start.contains('x')); |
1209 | /// assert!(!id_start.contains('1')); |
1210 | /// assert!(!id_start.contains('_')); |
1211 | /// assert!(id_start.contains('ߝ')); // U+07DD NKO LETTER FA |
1212 | /// assert!(!id_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
1213 | /// assert!(id_start.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
1214 | /// ``` |
1215 | |
1216 | pub const fn id_start() => SINGLETON_PROPS_IDS_V1; |
1217 | pub fn load_id_start(); |
1218 | } |
1219 | |
1220 | make_code_point_set_property! { |
1221 | property: "Ids_Binary_Operator" ; |
1222 | marker: IdsBinaryOperatorProperty; |
1223 | keyed_data_marker: IdsBinaryOperatorV1Marker; |
1224 | func: |
1225 | /// Characters used in Ideographic Description Sequences |
1226 | /// |
1227 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1228 | /// |
1229 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1230 | /// |
1231 | /// # Example |
1232 | /// |
1233 | /// ``` |
1234 | /// use icu::properties::sets; |
1235 | /// |
1236 | /// let ids_binary_operator = sets::ids_binary_operator(); |
1237 | /// |
1238 | /// assert!(ids_binary_operator.contains32(0x2FF5)); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE |
1239 | /// assert!(!ids_binary_operator.contains32(0x3006)); // IDEOGRAPHIC CLOSING MARK |
1240 | /// ``` |
1241 | |
1242 | pub const fn ids_binary_operator() => SINGLETON_PROPS_IDSB_V1; |
1243 | pub fn load_ids_binary_operator(); |
1244 | } |
1245 | |
1246 | make_code_point_set_property! { |
1247 | property: "Ids_Trinary_Operator" ; |
1248 | marker: IdsTrinaryOperatorProperty; |
1249 | keyed_data_marker: IdsTrinaryOperatorV1Marker; |
1250 | func: |
1251 | /// Characters used in Ideographic Description Sequences |
1252 | /// |
1253 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1254 | /// |
1255 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1256 | /// |
1257 | /// # Example |
1258 | /// |
1259 | /// ``` |
1260 | /// use icu::properties::sets; |
1261 | /// |
1262 | /// let ids_trinary_operator = sets::ids_trinary_operator(); |
1263 | /// |
1264 | /// assert!(ids_trinary_operator.contains32(0x2FF2)); // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT |
1265 | /// assert!(ids_trinary_operator.contains32(0x2FF3)); // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW |
1266 | /// assert!(!ids_trinary_operator.contains32(0x2FF4)); |
1267 | /// assert!(!ids_trinary_operator.contains32(0x2FF5)); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE |
1268 | /// assert!(!ids_trinary_operator.contains32(0x3006)); // IDEOGRAPHIC CLOSING MARK |
1269 | /// ``` |
1270 | |
1271 | pub const fn ids_trinary_operator() => SINGLETON_PROPS_IDST_V1; |
1272 | pub fn load_ids_trinary_operator(); |
1273 | } |
1274 | |
1275 | make_code_point_set_property! { |
1276 | property: "Join_Control" ; |
1277 | marker: JoinControlProperty; |
1278 | keyed_data_marker: JoinControlV1Marker; |
1279 | func: |
1280 | /// Format control characters which have specific functions for control of cursive joining |
1281 | /// and ligation |
1282 | /// |
1283 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1284 | /// |
1285 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1286 | /// |
1287 | /// # Example |
1288 | /// |
1289 | /// ``` |
1290 | /// use icu::properties::sets; |
1291 | /// |
1292 | /// let join_control = sets::join_control(); |
1293 | /// |
1294 | /// assert!(join_control.contains32(0x200C)); // ZERO WIDTH NON-JOINER |
1295 | /// assert!(join_control.contains32(0x200D)); // ZERO WIDTH JOINER |
1296 | /// assert!(!join_control.contains32(0x200E)); |
1297 | /// ``` |
1298 | |
1299 | pub const fn join_control() => SINGLETON_PROPS_JOIN_C_V1; |
1300 | pub fn load_join_control(); |
1301 | } |
1302 | |
1303 | make_code_point_set_property! { |
1304 | property: "Logical_Order_Exception" ; |
1305 | marker: LogicalOrderExceptionProperty; |
1306 | keyed_data_marker: LogicalOrderExceptionV1Marker; |
1307 | func: |
1308 | /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao |
1309 | /// |
1310 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1311 | /// |
1312 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1313 | /// |
1314 | /// # Example |
1315 | /// |
1316 | /// ``` |
1317 | /// use icu::properties::sets; |
1318 | /// |
1319 | /// let logical_order_exception = sets::logical_order_exception(); |
1320 | /// |
1321 | /// assert!(logical_order_exception.contains('ແ')); // U+0EC1 LAO VOWEL SIGN EI |
1322 | /// assert!(!logical_order_exception.contains('ະ')); // U+0EB0 LAO VOWEL SIGN A |
1323 | /// ``` |
1324 | |
1325 | pub const fn logical_order_exception() => SINGLETON_PROPS_LOE_V1; |
1326 | pub fn load_logical_order_exception(); |
1327 | } |
1328 | |
1329 | make_code_point_set_property! { |
1330 | property: "Lowercase" ; |
1331 | marker: LowercaseProperty; |
1332 | keyed_data_marker: LowercaseV1Marker; |
1333 | func: |
1334 | /// Lowercase characters |
1335 | /// |
1336 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1337 | /// |
1338 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1339 | /// |
1340 | /// # Example |
1341 | /// |
1342 | /// ``` |
1343 | /// use icu::properties::sets; |
1344 | /// |
1345 | /// let lowercase = sets::lowercase(); |
1346 | /// |
1347 | /// assert!(lowercase.contains('a')); |
1348 | /// assert!(!lowercase.contains('A')); |
1349 | /// ``` |
1350 | |
1351 | pub const fn lowercase() => SINGLETON_PROPS_LOWER_V1; |
1352 | pub fn load_lowercase(); |
1353 | } |
1354 | |
1355 | make_code_point_set_property! { |
1356 | property: "Math" ; |
1357 | marker: MathProperty; |
1358 | keyed_data_marker: MathV1Marker; |
1359 | func: |
1360 | /// Characters used in mathematical notation |
1361 | /// |
1362 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1363 | /// |
1364 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1365 | /// |
1366 | /// # Example |
1367 | /// |
1368 | /// ``` |
1369 | /// use icu::properties::sets; |
1370 | /// |
1371 | /// let math = sets::math(); |
1372 | /// |
1373 | /// assert!(math.contains('=')); |
1374 | /// assert!(math.contains('+')); |
1375 | /// assert!(!math.contains('-')); |
1376 | /// assert!(math.contains('−')); // U+2212 MINUS SIGN |
1377 | /// assert!(!math.contains('/')); |
1378 | /// assert!(math.contains('∕')); // U+2215 DIVISION SLASH |
1379 | /// ``` |
1380 | |
1381 | pub const fn math() => SINGLETON_PROPS_MATH_V1; |
1382 | pub fn load_math(); |
1383 | } |
1384 | |
1385 | make_code_point_set_property! { |
1386 | property: "Noncharacter_Code_Point" ; |
1387 | marker: NoncharacterCodePointProperty; |
1388 | keyed_data_marker: NoncharacterCodePointV1Marker; |
1389 | func: |
1390 | /// Code points permanently reserved for internal use |
1391 | /// |
1392 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1393 | /// |
1394 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1395 | /// |
1396 | /// # Example |
1397 | /// |
1398 | /// ``` |
1399 | /// use icu::properties::sets; |
1400 | /// |
1401 | /// let noncharacter_code_point = sets::noncharacter_code_point(); |
1402 | /// |
1403 | /// assert!(noncharacter_code_point.contains32(0xFDD0)); |
1404 | /// assert!(noncharacter_code_point.contains32(0xFFFF)); |
1405 | /// assert!(!noncharacter_code_point.contains32(0x10000)); |
1406 | /// ``` |
1407 | |
1408 | pub const fn noncharacter_code_point() => SINGLETON_PROPS_NCHAR_V1; |
1409 | pub fn load_noncharacter_code_point(); |
1410 | } |
1411 | |
1412 | make_code_point_set_property! { |
1413 | property: "NFC_Inert" ; |
1414 | marker: NfcInertProperty; |
1415 | keyed_data_marker: NfcInertV1Marker; |
1416 | func: |
1417 | /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters |
1418 | |
1419 | pub const fn nfc_inert() => SINGLETON_PROPS_NFCINERT_V1; |
1420 | pub fn load_nfc_inert(); |
1421 | } |
1422 | |
1423 | make_code_point_set_property! { |
1424 | property: "NFD_Inert" ; |
1425 | marker: NfdInertProperty; |
1426 | keyed_data_marker: NfdInertV1Marker; |
1427 | func: |
1428 | /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters |
1429 | |
1430 | pub const fn nfd_inert() => SINGLETON_PROPS_NFDINERT_V1; |
1431 | pub fn load_nfd_inert(); |
1432 | } |
1433 | |
1434 | make_code_point_set_property! { |
1435 | property: "NFKC_Inert" ; |
1436 | marker: NfkcInertProperty; |
1437 | keyed_data_marker: NfkcInertV1Marker; |
1438 | func: |
1439 | /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters |
1440 | |
1441 | pub const fn nfkc_inert() => SINGLETON_PROPS_NFKCINERT_V1; |
1442 | pub fn load_nfkc_inert(); |
1443 | } |
1444 | |
1445 | make_code_point_set_property! { |
1446 | property: "NFKD_Inert" ; |
1447 | marker: NfkdInertProperty; |
1448 | keyed_data_marker: NfkdInertV1Marker; |
1449 | func: |
1450 | /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters |
1451 | |
1452 | pub const fn nfkd_inert() => SINGLETON_PROPS_NFKDINERT_V1; |
1453 | pub fn load_nfkd_inert(); |
1454 | } |
1455 | |
1456 | make_code_point_set_property! { |
1457 | property: "Pattern_Syntax" ; |
1458 | marker: PatternSyntaxProperty; |
1459 | keyed_data_marker: PatternSyntaxV1Marker; |
1460 | func: |
1461 | /// Characters used as syntax in patterns (such as regular expressions). See [`Unicode |
1462 | /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more |
1463 | /// details. |
1464 | /// |
1465 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1466 | /// |
1467 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1468 | /// |
1469 | /// # Example |
1470 | /// |
1471 | /// ``` |
1472 | /// use icu::properties::sets; |
1473 | /// |
1474 | /// let pattern_syntax = sets::pattern_syntax(); |
1475 | /// |
1476 | /// assert!(pattern_syntax.contains('{')); |
1477 | /// assert!(pattern_syntax.contains('⇒')); // U+21D2 RIGHTWARDS DOUBLE ARROW |
1478 | /// assert!(!pattern_syntax.contains('0')); |
1479 | /// ``` |
1480 | |
1481 | pub const fn pattern_syntax() => SINGLETON_PROPS_PAT_SYN_V1; |
1482 | pub fn load_pattern_syntax(); |
1483 | } |
1484 | |
1485 | make_code_point_set_property! { |
1486 | property: "Pattern_White_Space" ; |
1487 | marker: PatternWhiteSpaceProperty; |
1488 | keyed_data_marker: PatternWhiteSpaceV1Marker; |
1489 | func: |
1490 | /// Characters used as whitespace in patterns (such as regular expressions). See |
1491 | /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for |
1492 | /// more details. |
1493 | /// |
1494 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1495 | /// |
1496 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1497 | /// |
1498 | /// # Example |
1499 | /// |
1500 | /// ``` |
1501 | /// use icu::properties::sets; |
1502 | /// |
1503 | /// let pattern_white_space = sets::pattern_white_space(); |
1504 | /// |
1505 | /// assert!(pattern_white_space.contains(' ')); |
1506 | /// assert!(pattern_white_space.contains32(0x2029)); // PARAGRAPH SEPARATOR |
1507 | /// assert!(pattern_white_space.contains32(0x000A)); // NEW LINE |
1508 | /// assert!(!pattern_white_space.contains32(0x00A0)); // NO-BREAK SPACE |
1509 | /// ``` |
1510 | |
1511 | pub const fn pattern_white_space() => SINGLETON_PROPS_PAT_WS_V1; |
1512 | pub fn load_pattern_white_space(); |
1513 | } |
1514 | |
1515 | make_code_point_set_property! { |
1516 | property: "Prepended_Concatenation_Mark" ; |
1517 | marker: PrependedConcatenationMarkProperty; |
1518 | keyed_data_marker: PrependedConcatenationMarkV1Marker; |
1519 | func: |
1520 | /// A small class of visible format controls, which precede and then span a sequence of |
1521 | /// other characters, usually digits. |
1522 | |
1523 | pub const fn prepended_concatenation_mark() => SINGLETON_PROPS_PCM_V1; |
1524 | pub fn load_prepended_concatenation_mark(); |
1525 | } |
1526 | |
1527 | make_code_point_set_property! { |
1528 | property: "Print" ; |
1529 | marker: PrintProperty; |
1530 | keyed_data_marker: PrintV1Marker; |
1531 | func: |
1532 | /// Printable characters (visible characters and whitespace). |
1533 | /// This is defined for POSIX compatibility. |
1534 | |
1535 | pub const fn print() => SINGLETON_PROPS_PRINT_V1; |
1536 | pub fn load_print(); |
1537 | } |
1538 | |
1539 | make_code_point_set_property! { |
1540 | property: "Quotation_Mark" ; |
1541 | marker: QuotationMarkProperty; |
1542 | keyed_data_marker: QuotationMarkV1Marker; |
1543 | func: |
1544 | /// Punctuation characters that function as quotation marks. |
1545 | /// |
1546 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1547 | /// |
1548 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1549 | /// |
1550 | /// # Example |
1551 | /// |
1552 | /// ``` |
1553 | /// use icu::properties::sets; |
1554 | /// |
1555 | /// let quotation_mark = sets::quotation_mark(); |
1556 | /// |
1557 | /// assert!(quotation_mark.contains('\'')); |
1558 | /// assert!(quotation_mark.contains('„')); // U+201E DOUBLE LOW-9 QUOTATION MARK |
1559 | /// assert!(!quotation_mark.contains('<')); |
1560 | /// ``` |
1561 | |
1562 | pub const fn quotation_mark() => SINGLETON_PROPS_QMARK_V1; |
1563 | pub fn load_quotation_mark(); |
1564 | } |
1565 | |
1566 | make_code_point_set_property! { |
1567 | property: "Radical" ; |
1568 | marker: RadicalProperty; |
1569 | keyed_data_marker: RadicalV1Marker; |
1570 | func: |
1571 | /// Characters used in the definition of Ideographic Description Sequences |
1572 | /// |
1573 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1574 | /// |
1575 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1576 | /// |
1577 | /// # Example |
1578 | /// |
1579 | /// ``` |
1580 | /// use icu::properties::sets; |
1581 | /// |
1582 | /// let radical = sets::radical(); |
1583 | /// |
1584 | /// assert!(radical.contains('⺆')); // U+2E86 CJK RADICAL BOX |
1585 | /// assert!(!radical.contains('丹')); // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E |
1586 | /// ``` |
1587 | |
1588 | pub const fn radical() => SINGLETON_PROPS_RADICAL_V1; |
1589 | pub fn load_radical(); |
1590 | } |
1591 | |
1592 | make_code_point_set_property! { |
1593 | property: "Regional_Indicator" ; |
1594 | marker: RegionalIndicatorProperty; |
1595 | keyed_data_marker: RegionalIndicatorV1Marker; |
1596 | func: |
1597 | /// Regional indicator characters, U+1F1E6..U+1F1FF |
1598 | /// |
1599 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1600 | /// |
1601 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1602 | /// |
1603 | /// # Example |
1604 | /// |
1605 | /// ``` |
1606 | /// use icu::properties::sets; |
1607 | /// |
1608 | /// let regional_indicator = sets::regional_indicator(); |
1609 | /// |
1610 | /// assert!(regional_indicator.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T |
1611 | /// assert!(!regional_indicator.contains('Ⓣ')); // U+24C9 CIRCLED LATIN CAPITAL LETTER T |
1612 | /// assert!(!regional_indicator.contains('T')); |
1613 | /// ``` |
1614 | |
1615 | pub const fn regional_indicator() => SINGLETON_PROPS_RI_V1; |
1616 | pub fn load_regional_indicator(); |
1617 | } |
1618 | |
1619 | make_code_point_set_property! { |
1620 | property: "Soft_Dotted" ; |
1621 | marker: SoftDottedProperty; |
1622 | keyed_data_marker: SoftDottedV1Marker; |
1623 | func: |
1624 | /// Characters with a "soft dot", like i or j. An accent placed on these characters causes |
1625 | /// the dot to disappear. |
1626 | /// |
1627 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1628 | /// |
1629 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1630 | /// |
1631 | /// # Example |
1632 | /// |
1633 | /// ``` |
1634 | /// use icu::properties::sets; |
1635 | /// |
1636 | /// let soft_dotted = sets::soft_dotted(); |
1637 | /// |
1638 | /// assert!(soft_dotted.contains('і')); //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I |
1639 | /// assert!(!soft_dotted.contains('ı')); // U+0131 LATIN SMALL LETTER DOTLESS I |
1640 | /// ``` |
1641 | |
1642 | pub const fn soft_dotted() => SINGLETON_PROPS_SD_V1; |
1643 | pub fn load_soft_dotted(); |
1644 | } |
1645 | |
1646 | make_code_point_set_property! { |
1647 | property: "Segment_Starter" ; |
1648 | marker: SegmentStarterProperty; |
1649 | keyed_data_marker: SegmentStarterV1Marker; |
1650 | func: |
1651 | /// Characters that are starters in terms of Unicode normalization and combining character |
1652 | /// sequences |
1653 | |
1654 | pub const fn segment_starter() => SINGLETON_PROPS_SEGSTART_V1; |
1655 | pub fn load_segment_starter(); |
1656 | } |
1657 | |
1658 | make_code_point_set_property! { |
1659 | property: "Case_Sensitive" ; |
1660 | marker: CaseSensitiveProperty; |
1661 | keyed_data_marker: CaseSensitiveV1Marker; |
1662 | func: |
1663 | /// Characters that are either the source of a case mapping or in the target of a case |
1664 | /// mapping |
1665 | |
1666 | pub const fn case_sensitive() => SINGLETON_PROPS_SENSITIVE_V1; |
1667 | pub fn load_case_sensitive(); |
1668 | } |
1669 | |
1670 | make_code_point_set_property! { |
1671 | property: "Sentence_Terminal" ; |
1672 | marker: SentenceTerminalProperty; |
1673 | keyed_data_marker: SentenceTerminalV1Marker; |
1674 | func: |
1675 | /// Punctuation characters that generally mark the end of sentences |
1676 | /// |
1677 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1678 | /// |
1679 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1680 | /// |
1681 | /// # Example |
1682 | /// |
1683 | /// ``` |
1684 | /// use icu::properties::sets; |
1685 | /// |
1686 | /// let sentence_terminal = sets::sentence_terminal(); |
1687 | /// |
1688 | /// assert!(sentence_terminal.contains('.')); |
1689 | /// assert!(sentence_terminal.contains('?')); |
1690 | /// assert!(sentence_terminal.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN |
1691 | /// assert!(!sentence_terminal.contains(',')); |
1692 | /// assert!(!sentence_terminal.contains('¿')); // U+00BF INVERTED QUESTION MARK |
1693 | /// ``` |
1694 | |
1695 | pub const fn sentence_terminal() => SINGLETON_PROPS_STERM_V1; |
1696 | pub fn load_sentence_terminal(); |
1697 | } |
1698 | |
1699 | make_code_point_set_property! { |
1700 | property: "Terminal_Punctuation" ; |
1701 | marker: TerminalPunctuationProperty; |
1702 | keyed_data_marker: TerminalPunctuationV1Marker; |
1703 | func: |
1704 | /// Punctuation characters that generally mark the end of textual units |
1705 | /// |
1706 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1707 | /// |
1708 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1709 | /// |
1710 | /// # Example |
1711 | /// |
1712 | /// ``` |
1713 | /// use icu::properties::sets; |
1714 | /// |
1715 | /// let terminal_punctuation = sets::terminal_punctuation(); |
1716 | /// |
1717 | /// assert!(terminal_punctuation.contains('.')); |
1718 | /// assert!(terminal_punctuation.contains('?')); |
1719 | /// assert!(terminal_punctuation.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN |
1720 | /// assert!(terminal_punctuation.contains(',')); |
1721 | /// assert!(!terminal_punctuation.contains('¿')); // U+00BF INVERTED QUESTION MARK |
1722 | /// ``` |
1723 | |
1724 | pub const fn terminal_punctuation() => SINGLETON_PROPS_TERM_V1; |
1725 | pub fn load_terminal_punctuation(); |
1726 | } |
1727 | |
1728 | make_code_point_set_property! { |
1729 | property: "Unified_Ideograph" ; |
1730 | marker: UnifiedIdeographProperty; |
1731 | keyed_data_marker: UnifiedIdeographV1Marker; |
1732 | func: |
1733 | /// A property which specifies the exact set of Unified CJK Ideographs in the standard |
1734 | /// |
1735 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1736 | /// |
1737 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1738 | /// |
1739 | /// # Example |
1740 | /// |
1741 | /// ``` |
1742 | /// use icu::properties::sets; |
1743 | /// |
1744 | /// let unified_ideograph = sets::unified_ideograph(); |
1745 | /// |
1746 | /// assert!(unified_ideograph.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD |
1747 | /// assert!(unified_ideograph.contains('木')); // U+6728 CJK UNIFIED IDEOGRAPH-6728 |
1748 | /// assert!(!unified_ideograph.contains('𛅸')); // U+1B178 NUSHU CHARACTER-1B178 |
1749 | /// ``` |
1750 | |
1751 | pub const fn unified_ideograph() => SINGLETON_PROPS_UIDEO_V1; |
1752 | pub fn load_unified_ideograph(); |
1753 | } |
1754 | |
1755 | make_code_point_set_property! { |
1756 | property: "Uppercase" ; |
1757 | marker: UppercaseProperty; |
1758 | keyed_data_marker: UppercaseV1Marker; |
1759 | func: |
1760 | /// Uppercase characters |
1761 | /// |
1762 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1763 | /// |
1764 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1765 | /// |
1766 | /// # Example |
1767 | /// |
1768 | /// ``` |
1769 | /// use icu::properties::sets; |
1770 | /// |
1771 | /// let uppercase = sets::uppercase(); |
1772 | /// |
1773 | /// assert!(uppercase.contains('U')); |
1774 | /// assert!(!uppercase.contains('u')); |
1775 | /// ``` |
1776 | |
1777 | pub const fn uppercase() => SINGLETON_PROPS_UPPER_V1; |
1778 | pub fn load_uppercase(); |
1779 | } |
1780 | |
1781 | make_code_point_set_property! { |
1782 | property: "Variation_Selector" ; |
1783 | marker: VariationSelectorProperty; |
1784 | keyed_data_marker: VariationSelectorV1Marker; |
1785 | func: |
1786 | /// Characters that are Variation Selectors. |
1787 | /// |
1788 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1789 | /// |
1790 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1791 | /// |
1792 | /// # Example |
1793 | /// |
1794 | /// ``` |
1795 | /// use icu::properties::sets; |
1796 | /// |
1797 | /// let variation_selector = sets::variation_selector(); |
1798 | /// |
1799 | /// assert!(variation_selector.contains32(0x180D)); // MONGOLIAN FREE VARIATION SELECTOR THREE |
1800 | /// assert!(!variation_selector.contains32(0x303E)); // IDEOGRAPHIC VARIATION INDICATOR |
1801 | /// assert!(variation_selector.contains32(0xFE0F)); // VARIATION SELECTOR-16 |
1802 | /// assert!(!variation_selector.contains32(0xFE10)); // PRESENTATION FORM FOR VERTICAL COMMA |
1803 | /// assert!(variation_selector.contains32(0xE01EF)); // VARIATION SELECTOR-256 |
1804 | /// ``` |
1805 | |
1806 | pub const fn variation_selector() => SINGLETON_PROPS_VS_V1; |
1807 | pub fn load_variation_selector(); |
1808 | } |
1809 | |
1810 | make_code_point_set_property! { |
1811 | property: "White_Space" ; |
1812 | marker: WhiteSpaceProperty; |
1813 | keyed_data_marker: WhiteSpaceV1Marker; |
1814 | func: |
1815 | /// Spaces, separator characters and other control characters which should be treated by |
1816 | /// programming languages as "white space" for the purpose of parsing elements |
1817 | /// |
1818 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1819 | /// |
1820 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1821 | /// |
1822 | /// # Example |
1823 | /// |
1824 | /// ``` |
1825 | /// use icu::properties::sets; |
1826 | /// |
1827 | /// let white_space = sets::white_space(); |
1828 | /// |
1829 | /// assert!(white_space.contains(' ')); |
1830 | /// assert!(white_space.contains32(0x000A)); // NEW LINE |
1831 | /// assert!(white_space.contains32(0x00A0)); // NO-BREAK SPACE |
1832 | /// assert!(!white_space.contains32(0x200B)); // ZERO WIDTH SPACE |
1833 | /// ``` |
1834 | |
1835 | pub const fn white_space() => SINGLETON_PROPS_WSPACE_V1; |
1836 | pub fn load_white_space(); |
1837 | } |
1838 | |
1839 | make_code_point_set_property! { |
1840 | property: "Xdigit" ; |
1841 | marker: XdigitProperty; |
1842 | keyed_data_marker: XdigitV1Marker; |
1843 | func: |
1844 | /// Hexadecimal digits |
1845 | /// This is defined for POSIX compatibility. |
1846 | |
1847 | pub const fn xdigit() => SINGLETON_PROPS_XDIGIT_V1; |
1848 | pub fn load_xdigit(); |
1849 | } |
1850 | |
1851 | make_code_point_set_property! { |
1852 | property: "XID_Continue" ; |
1853 | marker: XidContinueProperty; |
1854 | keyed_data_marker: XidContinueV1Marker; |
1855 | func: |
1856 | /// Characters that can come after the first character in an identifier. See [`Unicode Standard Annex |
1857 | /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details. |
1858 | /// |
1859 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1860 | /// |
1861 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1862 | /// |
1863 | /// # Example |
1864 | /// |
1865 | /// ``` |
1866 | /// use icu::properties::sets; |
1867 | /// |
1868 | /// let xid_continue = sets::xid_continue(); |
1869 | /// |
1870 | /// assert!(xid_continue.contains('x')); |
1871 | /// assert!(xid_continue.contains('1')); |
1872 | /// assert!(xid_continue.contains('_')); |
1873 | /// assert!(xid_continue.contains('ߝ')); // U+07DD NKO LETTER FA |
1874 | /// assert!(!xid_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
1875 | /// assert!(!xid_continue.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
1876 | /// ``` |
1877 | |
1878 | pub const fn xid_continue() => SINGLETON_PROPS_XIDC_V1; |
1879 | pub fn load_xid_continue(); |
1880 | } |
1881 | |
1882 | make_code_point_set_property! { |
1883 | property: "XID_Start" ; |
1884 | marker: XidStartProperty; |
1885 | keyed_data_marker: XidStartV1Marker; |
1886 | func: |
1887 | /// Characters that can begin an identifier. See [`Unicode |
1888 | /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more |
1889 | /// details. |
1890 | /// |
1891 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1892 | /// |
1893 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1894 | /// |
1895 | /// # Example |
1896 | /// |
1897 | /// ``` |
1898 | /// use icu::properties::sets; |
1899 | /// |
1900 | /// let xid_start = sets::xid_start(); |
1901 | /// |
1902 | /// assert!(xid_start.contains('x')); |
1903 | /// assert!(!xid_start.contains('1')); |
1904 | /// assert!(!xid_start.contains('_')); |
1905 | /// assert!(xid_start.contains('ߝ')); // U+07DD NKO LETTER FA |
1906 | /// assert!(!xid_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
1907 | /// assert!(!xid_start.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
1908 | /// ``` |
1909 | |
1910 | pub const fn xid_start() => SINGLETON_PROPS_XIDS_V1; |
1911 | pub fn load_xid_start(); |
1912 | } |
1913 | |
1914 | // |
1915 | // Binary property getter fns |
1916 | // (data as sets of strings + code points) |
1917 | // |
1918 | |
1919 | macro_rules! make_unicode_set_property { |
1920 | ( |
1921 | // currently unused |
1922 | property: $property:expr; |
1923 | // currently unused |
1924 | marker: $marker_name:ident; |
1925 | keyed_data_marker: $keyed_data_marker:ty; |
1926 | func: |
1927 | $(#[$doc:meta])+ |
1928 | $cvis:vis const fn $constname:ident() => $singleton:ident; |
1929 | $vis:vis fn $funcname:ident(); |
1930 | ) => { |
1931 | #[doc = concat!("A version of [`" , stringify!($constname), "()`] that uses custom data provided by a [`DataProvider`]." )] |
1932 | $vis fn $funcname( |
1933 | provider: &(impl DataProvider<$keyed_data_marker> + ?Sized) |
1934 | ) -> Result<UnicodeSetData, PropertiesError> { |
1935 | Ok(provider.load(Default::default()).and_then(DataResponse::take_payload).map(UnicodeSetData::from_data)?) |
1936 | } |
1937 | $(#[$doc])* |
1938 | #[cfg(feature = "compiled_data" )] |
1939 | $cvis const fn $constname() -> UnicodeSetDataBorrowed<'static> { |
1940 | UnicodeSetDataBorrowed { |
1941 | set: crate::provider::Baked::$singleton |
1942 | } |
1943 | } |
1944 | } |
1945 | } |
1946 | |
1947 | make_unicode_set_property! { |
1948 | property: "Basic_Emoji" ; |
1949 | marker: BasicEmojiProperty; |
1950 | keyed_data_marker: BasicEmojiV1Marker; |
1951 | func: |
1952 | /// Characters and character sequences intended for general-purpose, independent, direct input. |
1953 | /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more |
1954 | /// details. |
1955 | /// |
1956 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
1957 | /// |
1958 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1959 | /// |
1960 | /// # Example |
1961 | /// |
1962 | /// ``` |
1963 | /// use icu::properties::sets; |
1964 | /// |
1965 | /// let basic_emoji = sets::basic_emoji(); |
1966 | /// |
1967 | /// assert!(!basic_emoji.contains32(0x0020)); |
1968 | /// assert!(!basic_emoji.contains_char('\n')); |
1969 | /// assert!(basic_emoji.contains_char('🦃')); // U+1F983 TURKEY |
1970 | /// assert!(basic_emoji.contains("\u{1F983}")); |
1971 | /// assert!(basic_emoji.contains("\u{1F6E4}\u{FE0F}")); // railway track |
1972 | /// assert!(!basic_emoji.contains("\u{0033}\u{FE0F}\u{20E3}")); // Emoji_Keycap_Sequence, keycap 3 |
1973 | /// ``` |
1974 | pub const fn basic_emoji() => SINGLETON_PROPS_BASIC_EMOJI_V1; |
1975 | pub fn load_basic_emoji(); |
1976 | } |
1977 | |
1978 | // |
1979 | // Enumerated property getter fns |
1980 | // |
1981 | |
1982 | /// A version of [`for_general_category_group()`] that uses custom data provided by a [`DataProvider`]. |
1983 | /// |
1984 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
1985 | pub fn load_for_general_category_group( |
1986 | provider: &(impl DataProvider<GeneralCategoryV1Marker> + ?Sized), |
1987 | enum_val: GeneralCategoryGroup, |
1988 | ) -> Result<CodePointSetData, PropertiesError> { |
1989 | let gc_map_payload: CodePointMapData = maps::load_general_category(provider)?; |
1990 | let gc_map: CodePointMapDataBorrowed<'_, …> = gc_map_payload.as_borrowed(); |
1991 | let matching_gc_ranges: impl Iterator- >
= gc_mapimpl Iterator- >
|
1992 | .iter_ranges() |
1993 | .filter(|cpm_range: &CodePointMapRange| (1 << cpm_range.value as u32) & enum_val.0 != 0) |
1994 | .map(|cpm_range: CodePointMapRange| cpm_range.range); |
1995 | let set: CodePointInversionList<'_> = CodePointInversionList::from_iter(matching_gc_ranges); |
1996 | Ok(CodePointSetData::from_code_point_inversion_list(set)) |
1997 | } |
1998 | |
1999 | /// Return a [`CodePointSetData`] for a value or a grouping of values of the General_Category property. See [`GeneralCategoryGroup`]. |
2000 | /// |
2001 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2002 | /// |
2003 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2004 | #[cfg (feature = "compiled_data" )] |
2005 | pub fn for_general_category_group(enum_val: GeneralCategoryGroup) -> CodePointSetData { |
2006 | let matching_gc_ranges: impl Iterator- >
= mapsimpl Iterator- >
::general_category() |
2007 | .iter_ranges() |
2008 | .filter(|cpm_range: &CodePointMapRange| (1 << cpm_range.value as u32) & enum_val.0 != 0) |
2009 | .map(|cpm_range: CodePointMapRange| cpm_range.range); |
2010 | let set: CodePointInversionList<'_> = CodePointInversionList::from_iter(matching_gc_ranges); |
2011 | CodePointSetData::from_code_point_inversion_list(set) |
2012 | } |
2013 | |
2014 | /// Returns a type capable of looking up values for a property specified as a string, as long as it is a |
2015 | /// [binary property listed in ECMA-262][ecma], using strict matching on the names in the spec. |
2016 | /// |
2017 | /// This handles every property required by ECMA-262 `/u` regular expressions, except for: |
2018 | /// |
2019 | /// - `Script` and `General_Category`: handle these directly with [`maps::load_general_category()`] and |
2020 | /// [`maps::load_script()`]. |
2021 | /// using property values parsed via [`GeneralCategory::get_name_to_enum_mapper()`] and [`Script::get_name_to_enum_mapper()`] |
2022 | /// if necessary. |
2023 | /// - `Script_Extensions`: handle this directly using APIs from [`crate::script`], like [`script::load_script_with_extensions_unstable()`] |
2024 | /// - `General_Category` mask values: Handle this alongside `General_Category` using [`GeneralCategoryGroup`], |
2025 | /// using property values parsed via [`GeneralCategoryGroup::get_name_to_enum_mapper()`] if necessary |
2026 | /// - `Assigned`, `All`, and `ASCII` pseudoproperties: Handle these using their equivalent sets: |
2027 | /// - `Any` can be expressed as the range `[\u{0}-\u{10FFFF}]` |
2028 | /// - `Assigned` can be expressed as the inverse of the set `gc=Cn` (i.e., `\P{gc=Cn}`). |
2029 | /// - `ASCII` can be expressed as the range `[\u{0}-\u{7F}]` |
2030 | /// - `General_Category` property values can themselves be treated like properties using a shorthand in ECMA262, |
2031 | /// simply create the corresponding `GeneralCategory` set. |
2032 | /// |
2033 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
2034 | /// |
2035 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
2036 | /// |
2037 | /// ``` |
2038 | /// use icu::properties::sets; |
2039 | /// |
2040 | /// let emoji = sets::load_for_ecma262("Emoji" ).expect("loading data failed" ); |
2041 | /// |
2042 | /// assert!(emoji.contains('🔥' )); // U+1F525 FIRE |
2043 | /// assert!(!emoji.contains('V' )); |
2044 | /// ``` |
2045 | /// |
2046 | /// [ecma]: https://tc39.es/ecma262/#table-binary-unicode-properties |
2047 | #[cfg (feature = "compiled_data" )] |
2048 | pub fn load_for_ecma262(name: &str) -> Result<CodePointSetDataBorrowed<'static>, PropertiesError> { |
2049 | use crate::runtime::UnicodeProperty; |
2050 | |
2051 | let prop = if let Some(prop) = UnicodeProperty::parse_ecma262_name(name) { |
2052 | prop |
2053 | } else { |
2054 | return Err(PropertiesError::UnexpectedPropertyName); |
2055 | }; |
2056 | Ok(match prop { |
2057 | UnicodeProperty::AsciiHexDigit => ascii_hex_digit(), |
2058 | UnicodeProperty::Alphabetic => alphabetic(), |
2059 | UnicodeProperty::BidiControl => bidi_control(), |
2060 | UnicodeProperty::BidiMirrored => bidi_mirrored(), |
2061 | UnicodeProperty::CaseIgnorable => case_ignorable(), |
2062 | UnicodeProperty::Cased => cased(), |
2063 | UnicodeProperty::ChangesWhenCasefolded => changes_when_casefolded(), |
2064 | UnicodeProperty::ChangesWhenCasemapped => changes_when_casemapped(), |
2065 | UnicodeProperty::ChangesWhenLowercased => changes_when_lowercased(), |
2066 | UnicodeProperty::ChangesWhenNfkcCasefolded => changes_when_nfkc_casefolded(), |
2067 | UnicodeProperty::ChangesWhenTitlecased => changes_when_titlecased(), |
2068 | UnicodeProperty::ChangesWhenUppercased => changes_when_uppercased(), |
2069 | UnicodeProperty::Dash => dash(), |
2070 | UnicodeProperty::DefaultIgnorableCodePoint => default_ignorable_code_point(), |
2071 | UnicodeProperty::Deprecated => deprecated(), |
2072 | UnicodeProperty::Diacritic => diacritic(), |
2073 | UnicodeProperty::Emoji => emoji(), |
2074 | UnicodeProperty::EmojiComponent => emoji_component(), |
2075 | UnicodeProperty::EmojiModifier => emoji_modifier(), |
2076 | UnicodeProperty::EmojiModifierBase => emoji_modifier_base(), |
2077 | UnicodeProperty::EmojiPresentation => emoji_presentation(), |
2078 | UnicodeProperty::ExtendedPictographic => extended_pictographic(), |
2079 | UnicodeProperty::Extender => extender(), |
2080 | UnicodeProperty::GraphemeBase => grapheme_base(), |
2081 | UnicodeProperty::GraphemeExtend => grapheme_extend(), |
2082 | UnicodeProperty::HexDigit => hex_digit(), |
2083 | UnicodeProperty::IdsBinaryOperator => ids_binary_operator(), |
2084 | UnicodeProperty::IdsTrinaryOperator => ids_trinary_operator(), |
2085 | UnicodeProperty::IdContinue => id_continue(), |
2086 | UnicodeProperty::IdStart => id_start(), |
2087 | UnicodeProperty::Ideographic => ideographic(), |
2088 | UnicodeProperty::JoinControl => join_control(), |
2089 | UnicodeProperty::LogicalOrderException => logical_order_exception(), |
2090 | UnicodeProperty::Lowercase => lowercase(), |
2091 | UnicodeProperty::Math => math(), |
2092 | UnicodeProperty::NoncharacterCodePoint => noncharacter_code_point(), |
2093 | UnicodeProperty::PatternSyntax => pattern_syntax(), |
2094 | UnicodeProperty::PatternWhiteSpace => pattern_white_space(), |
2095 | UnicodeProperty::QuotationMark => quotation_mark(), |
2096 | UnicodeProperty::Radical => radical(), |
2097 | UnicodeProperty::RegionalIndicator => regional_indicator(), |
2098 | UnicodeProperty::SentenceTerminal => sentence_terminal(), |
2099 | UnicodeProperty::SoftDotted => soft_dotted(), |
2100 | UnicodeProperty::TerminalPunctuation => terminal_punctuation(), |
2101 | UnicodeProperty::UnifiedIdeograph => unified_ideograph(), |
2102 | UnicodeProperty::Uppercase => uppercase(), |
2103 | UnicodeProperty::VariationSelector => variation_selector(), |
2104 | UnicodeProperty::WhiteSpace => white_space(), |
2105 | UnicodeProperty::XidContinue => xid_continue(), |
2106 | UnicodeProperty::XidStart => xid_start(), |
2107 | _ => return Err(PropertiesError::UnexpectedPropertyName), |
2108 | }) |
2109 | } |
2110 | |
2111 | icu_provider::gen_any_buffer_data_constructors!( |
2112 | locale: skip, |
2113 | name: &str, |
2114 | result: Result<CodePointSetData, PropertiesError>, |
2115 | #[cfg (skip)] |
2116 | functions: [ |
2117 | load_for_ecma262, |
2118 | load_for_ecma262_with_any_provider, |
2119 | load_for_ecma262_with_buffer_provider, |
2120 | load_for_ecma262_unstable, |
2121 | ] |
2122 | ); |
2123 | |
2124 | #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, load_for_ecma262)] |
2125 | pub fn load_for_ecma262_unstable<P>( |
2126 | provider: &P, |
2127 | name: &str, |
2128 | ) -> Result<CodePointSetData, PropertiesError> |
2129 | where |
2130 | P: ?Sized |
2131 | + DataProvider<AsciiHexDigitV1Marker> |
2132 | + DataProvider<AlphabeticV1Marker> |
2133 | + DataProvider<BidiControlV1Marker> |
2134 | + DataProvider<BidiMirroredV1Marker> |
2135 | + DataProvider<CaseIgnorableV1Marker> |
2136 | + DataProvider<CasedV1Marker> |
2137 | + DataProvider<ChangesWhenCasefoldedV1Marker> |
2138 | + DataProvider<ChangesWhenCasemappedV1Marker> |
2139 | + DataProvider<ChangesWhenLowercasedV1Marker> |
2140 | + DataProvider<ChangesWhenNfkcCasefoldedV1Marker> |
2141 | + DataProvider<ChangesWhenTitlecasedV1Marker> |
2142 | + DataProvider<ChangesWhenUppercasedV1Marker> |
2143 | + DataProvider<DashV1Marker> |
2144 | + DataProvider<DefaultIgnorableCodePointV1Marker> |
2145 | + DataProvider<DeprecatedV1Marker> |
2146 | + DataProvider<DiacriticV1Marker> |
2147 | + DataProvider<EmojiV1Marker> |
2148 | + DataProvider<EmojiComponentV1Marker> |
2149 | + DataProvider<EmojiModifierV1Marker> |
2150 | + DataProvider<EmojiModifierBaseV1Marker> |
2151 | + DataProvider<EmojiPresentationV1Marker> |
2152 | + DataProvider<ExtendedPictographicV1Marker> |
2153 | + DataProvider<ExtenderV1Marker> |
2154 | + DataProvider<GraphemeBaseV1Marker> |
2155 | + DataProvider<GraphemeExtendV1Marker> |
2156 | + DataProvider<HexDigitV1Marker> |
2157 | + DataProvider<IdsBinaryOperatorV1Marker> |
2158 | + DataProvider<IdsTrinaryOperatorV1Marker> |
2159 | + DataProvider<IdContinueV1Marker> |
2160 | + DataProvider<IdStartV1Marker> |
2161 | + DataProvider<IdeographicV1Marker> |
2162 | + DataProvider<JoinControlV1Marker> |
2163 | + DataProvider<LogicalOrderExceptionV1Marker> |
2164 | + DataProvider<LowercaseV1Marker> |
2165 | + DataProvider<MathV1Marker> |
2166 | + DataProvider<NoncharacterCodePointV1Marker> |
2167 | + DataProvider<PatternSyntaxV1Marker> |
2168 | + DataProvider<PatternWhiteSpaceV1Marker> |
2169 | + DataProvider<QuotationMarkV1Marker> |
2170 | + DataProvider<RadicalV1Marker> |
2171 | + DataProvider<RegionalIndicatorV1Marker> |
2172 | + DataProvider<SentenceTerminalV1Marker> |
2173 | + DataProvider<SoftDottedV1Marker> |
2174 | + DataProvider<TerminalPunctuationV1Marker> |
2175 | + DataProvider<UnifiedIdeographV1Marker> |
2176 | + DataProvider<UppercaseV1Marker> |
2177 | + DataProvider<VariationSelectorV1Marker> |
2178 | + DataProvider<WhiteSpaceV1Marker> |
2179 | + DataProvider<XidContinueV1Marker> |
2180 | + DataProvider<XidStartV1Marker>, |
2181 | { |
2182 | use crate::runtime::UnicodeProperty; |
2183 | |
2184 | let prop = if let Some(prop) = UnicodeProperty::parse_ecma262_name(name) { |
2185 | prop |
2186 | } else { |
2187 | return Err(PropertiesError::UnexpectedPropertyName); |
2188 | }; |
2189 | match prop { |
2190 | UnicodeProperty::AsciiHexDigit => load_ascii_hex_digit(provider), |
2191 | UnicodeProperty::Alphabetic => load_alphabetic(provider), |
2192 | UnicodeProperty::BidiControl => load_bidi_control(provider), |
2193 | UnicodeProperty::BidiMirrored => load_bidi_mirrored(provider), |
2194 | UnicodeProperty::CaseIgnorable => load_case_ignorable(provider), |
2195 | UnicodeProperty::Cased => load_cased(provider), |
2196 | UnicodeProperty::ChangesWhenCasefolded => load_changes_when_casefolded(provider), |
2197 | UnicodeProperty::ChangesWhenCasemapped => load_changes_when_casemapped(provider), |
2198 | UnicodeProperty::ChangesWhenLowercased => load_changes_when_lowercased(provider), |
2199 | UnicodeProperty::ChangesWhenNfkcCasefolded => load_changes_when_nfkc_casefolded(provider), |
2200 | UnicodeProperty::ChangesWhenTitlecased => load_changes_when_titlecased(provider), |
2201 | UnicodeProperty::ChangesWhenUppercased => load_changes_when_uppercased(provider), |
2202 | UnicodeProperty::Dash => load_dash(provider), |
2203 | UnicodeProperty::DefaultIgnorableCodePoint => load_default_ignorable_code_point(provider), |
2204 | UnicodeProperty::Deprecated => load_deprecated(provider), |
2205 | UnicodeProperty::Diacritic => load_diacritic(provider), |
2206 | UnicodeProperty::Emoji => load_emoji(provider), |
2207 | UnicodeProperty::EmojiComponent => load_emoji_component(provider), |
2208 | UnicodeProperty::EmojiModifier => load_emoji_modifier(provider), |
2209 | UnicodeProperty::EmojiModifierBase => load_emoji_modifier_base(provider), |
2210 | UnicodeProperty::EmojiPresentation => load_emoji_presentation(provider), |
2211 | UnicodeProperty::ExtendedPictographic => load_extended_pictographic(provider), |
2212 | UnicodeProperty::Extender => load_extender(provider), |
2213 | UnicodeProperty::GraphemeBase => load_grapheme_base(provider), |
2214 | UnicodeProperty::GraphemeExtend => load_grapheme_extend(provider), |
2215 | UnicodeProperty::HexDigit => load_hex_digit(provider), |
2216 | UnicodeProperty::IdsBinaryOperator => load_ids_binary_operator(provider), |
2217 | UnicodeProperty::IdsTrinaryOperator => load_ids_trinary_operator(provider), |
2218 | UnicodeProperty::IdContinue => load_id_continue(provider), |
2219 | UnicodeProperty::IdStart => load_id_start(provider), |
2220 | UnicodeProperty::Ideographic => load_ideographic(provider), |
2221 | UnicodeProperty::JoinControl => load_join_control(provider), |
2222 | UnicodeProperty::LogicalOrderException => load_logical_order_exception(provider), |
2223 | UnicodeProperty::Lowercase => load_lowercase(provider), |
2224 | UnicodeProperty::Math => load_math(provider), |
2225 | UnicodeProperty::NoncharacterCodePoint => load_noncharacter_code_point(provider), |
2226 | UnicodeProperty::PatternSyntax => load_pattern_syntax(provider), |
2227 | UnicodeProperty::PatternWhiteSpace => load_pattern_white_space(provider), |
2228 | UnicodeProperty::QuotationMark => load_quotation_mark(provider), |
2229 | UnicodeProperty::Radical => load_radical(provider), |
2230 | UnicodeProperty::RegionalIndicator => load_regional_indicator(provider), |
2231 | UnicodeProperty::SentenceTerminal => load_sentence_terminal(provider), |
2232 | UnicodeProperty::SoftDotted => load_soft_dotted(provider), |
2233 | UnicodeProperty::TerminalPunctuation => load_terminal_punctuation(provider), |
2234 | UnicodeProperty::UnifiedIdeograph => load_unified_ideograph(provider), |
2235 | UnicodeProperty::Uppercase => load_uppercase(provider), |
2236 | UnicodeProperty::VariationSelector => load_variation_selector(provider), |
2237 | UnicodeProperty::WhiteSpace => load_white_space(provider), |
2238 | UnicodeProperty::XidContinue => load_xid_continue(provider), |
2239 | UnicodeProperty::XidStart => load_xid_start(provider), |
2240 | _ => Err(PropertiesError::UnexpectedPropertyName), |
2241 | } |
2242 | } |
2243 | |
2244 | #[cfg (test)] |
2245 | mod tests { |
2246 | |
2247 | #[test ] |
2248 | fn test_general_category() { |
2249 | use icu::properties::sets; |
2250 | use icu::properties::GeneralCategoryGroup; |
2251 | |
2252 | let digits_data = sets::for_general_category_group(GeneralCategoryGroup::Number); |
2253 | let digits = digits_data.as_borrowed(); |
2254 | |
2255 | assert!(digits.contains('5' )); |
2256 | assert!(digits.contains(' \u{0665}' )); // U+0665 ARABIC-INDIC DIGIT FIVE |
2257 | assert!(digits.contains(' \u{096b}' )); // U+0969 DEVANAGARI DIGIT FIVE |
2258 | |
2259 | assert!(!digits.contains('A' )); |
2260 | } |
2261 | |
2262 | #[test ] |
2263 | fn test_script() { |
2264 | use icu::properties::maps; |
2265 | use icu::properties::Script; |
2266 | |
2267 | let thai_data = maps::script().get_set_for_value(Script::Thai); |
2268 | let thai = thai_data.as_borrowed(); |
2269 | |
2270 | assert!(thai.contains(' \u{0e01}' )); // U+0E01 THAI CHARACTER KO KAI |
2271 | assert!(thai.contains(' \u{0e50}' )); // U+0E50 THAI DIGIT ZERO |
2272 | |
2273 | assert!(!thai.contains('A' )); |
2274 | assert!(!thai.contains(' \u{0e3f}' )); // U+0E50 THAI CURRENCY SYMBOL BAHT |
2275 | } |
2276 | |
2277 | #[test ] |
2278 | fn test_gc_groupings() { |
2279 | use icu::properties::{maps, sets}; |
2280 | use icu::properties::{GeneralCategory, GeneralCategoryGroup}; |
2281 | use icu_collections::codepointinvlist::CodePointInversionListBuilder; |
2282 | |
2283 | let test_group = |category: GeneralCategoryGroup, subcategories: &[GeneralCategory]| { |
2284 | let category_set = sets::for_general_category_group(category); |
2285 | let category_set = category_set |
2286 | .as_code_point_inversion_list() |
2287 | .expect("The data should be valid" ); |
2288 | |
2289 | let mut builder = CodePointInversionListBuilder::new(); |
2290 | for subcategory in subcategories { |
2291 | let gc_set_data = &maps::general_category().get_set_for_value(*subcategory); |
2292 | let gc_set = gc_set_data.as_borrowed(); |
2293 | for range in gc_set.iter_ranges() { |
2294 | builder.add_range32(&range); |
2295 | } |
2296 | } |
2297 | let combined_set = builder.build(); |
2298 | println!("{category:?} {subcategories:?}" ); |
2299 | assert_eq!( |
2300 | category_set.get_inversion_list_vec(), |
2301 | combined_set.get_inversion_list_vec() |
2302 | ); |
2303 | }; |
2304 | |
2305 | test_group( |
2306 | GeneralCategoryGroup::Letter, |
2307 | &[ |
2308 | GeneralCategory::UppercaseLetter, |
2309 | GeneralCategory::LowercaseLetter, |
2310 | GeneralCategory::TitlecaseLetter, |
2311 | GeneralCategory::ModifierLetter, |
2312 | GeneralCategory::OtherLetter, |
2313 | ], |
2314 | ); |
2315 | test_group( |
2316 | GeneralCategoryGroup::Other, |
2317 | &[ |
2318 | GeneralCategory::Control, |
2319 | GeneralCategory::Format, |
2320 | GeneralCategory::Unassigned, |
2321 | GeneralCategory::PrivateUse, |
2322 | GeneralCategory::Surrogate, |
2323 | ], |
2324 | ); |
2325 | test_group( |
2326 | GeneralCategoryGroup::Mark, |
2327 | &[ |
2328 | GeneralCategory::SpacingMark, |
2329 | GeneralCategory::EnclosingMark, |
2330 | GeneralCategory::NonspacingMark, |
2331 | ], |
2332 | ); |
2333 | test_group( |
2334 | GeneralCategoryGroup::Number, |
2335 | &[ |
2336 | GeneralCategory::DecimalNumber, |
2337 | GeneralCategory::LetterNumber, |
2338 | GeneralCategory::OtherNumber, |
2339 | ], |
2340 | ); |
2341 | test_group( |
2342 | GeneralCategoryGroup::Punctuation, |
2343 | &[ |
2344 | GeneralCategory::ConnectorPunctuation, |
2345 | GeneralCategory::DashPunctuation, |
2346 | GeneralCategory::ClosePunctuation, |
2347 | GeneralCategory::FinalPunctuation, |
2348 | GeneralCategory::InitialPunctuation, |
2349 | GeneralCategory::OtherPunctuation, |
2350 | GeneralCategory::OpenPunctuation, |
2351 | ], |
2352 | ); |
2353 | test_group( |
2354 | GeneralCategoryGroup::Symbol, |
2355 | &[ |
2356 | GeneralCategory::CurrencySymbol, |
2357 | GeneralCategory::ModifierSymbol, |
2358 | GeneralCategory::MathSymbol, |
2359 | GeneralCategory::OtherSymbol, |
2360 | ], |
2361 | ); |
2362 | test_group( |
2363 | GeneralCategoryGroup::Separator, |
2364 | &[ |
2365 | GeneralCategory::LineSeparator, |
2366 | GeneralCategory::ParagraphSeparator, |
2367 | GeneralCategory::SpaceSeparator, |
2368 | ], |
2369 | ); |
2370 | } |
2371 | |
2372 | #[test ] |
2373 | fn test_gc_surrogate() { |
2374 | use icu::properties::maps; |
2375 | use icu::properties::GeneralCategory; |
2376 | |
2377 | let surrogates_data = |
2378 | maps::general_category().get_set_for_value(GeneralCategory::Surrogate); |
2379 | let surrogates = surrogates_data.as_borrowed(); |
2380 | |
2381 | assert!(surrogates.contains32(0xd800)); |
2382 | assert!(surrogates.contains32(0xd900)); |
2383 | assert!(surrogates.contains32(0xdfff)); |
2384 | |
2385 | assert!(!surrogates.contains('A' )); |
2386 | } |
2387 | } |
2388 | |