| 1 | // This file is part of ICU4X. For terms of use, please see the file |
| 2 | // called LICENSE at the top level of the ICU4X source tree |
| 3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | |
| 5 | //! The functions in this module return a [`CodePointSetData`] containing |
| 6 | //! the set of characters with a particular Unicode property. |
| 7 | //! |
| 8 | //! The descriptions of most properties are taken from [`TR44`], the documentation for the |
| 9 | //! Unicode Character Database. Some properties are instead defined in [`TR18`], the |
| 10 | //! documentation for Unicode regular expressions. In particular, Annex C of this document |
| 11 | //! defines properties for POSIX compatibility. |
| 12 | //! |
| 13 | //! [`CodePointSetData`]: crate::sets::CodePointSetData |
| 14 | //! [`TR44`]: https://www.unicode.org/reports/tr44 |
| 15 | //! [`TR18`]: https://www.unicode.org/reports/tr18 |
| 16 | |
| 17 | use crate::error::PropertiesError; |
| 18 | use crate::provider::*; |
| 19 | use crate::*; |
| 20 | use core::iter::FromIterator; |
| 21 | use core::ops::RangeInclusive; |
| 22 | use icu_collections::codepointinvlist::CodePointInversionList; |
| 23 | use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList; |
| 24 | use icu_provider::prelude::*; |
| 25 | |
| 26 | // |
| 27 | // CodePointSet* structs, impls, & macros |
| 28 | // (a set with only code points) |
| 29 | // |
| 30 | |
| 31 | /// A wrapper around code point set data. It is returned by APIs that return Unicode |
| 32 | /// property data in a set-like form, ex: a set of code points sharing the same |
| 33 | /// value for a Unicode property. Access its data via the borrowed version, |
| 34 | /// [`CodePointSetDataBorrowed`]. |
| 35 | #[derive (Debug)] |
| 36 | pub struct CodePointSetData { |
| 37 | data: DataPayload<ErasedSetlikeMarker>, |
| 38 | } |
| 39 | |
| 40 | /// Private marker type for CodePointSetData |
| 41 | /// to work for all set properties at once |
| 42 | #[derive (Clone, Copy, PartialEq, Eq, Hash, Debug)] |
| 43 | pub(crate) struct ErasedSetlikeMarker; |
| 44 | impl DataMarker for ErasedSetlikeMarker { |
| 45 | type Yokeable = PropertyCodePointSetV1<'static>; |
| 46 | } |
| 47 | |
| 48 | impl CodePointSetData { |
| 49 | /// Construct a borrowed version of this type that can be queried. |
| 50 | /// |
| 51 | /// This owned version if returned by functions that use a runtime data provider. |
| 52 | #[inline ] |
| 53 | pub fn as_borrowed(&self) -> CodePointSetDataBorrowed<'_> { |
| 54 | CodePointSetDataBorrowed { |
| 55 | set: self.data.get(), |
| 56 | } |
| 57 | } |
| 58 | |
| 59 | /// Construct a new one from loaded data |
| 60 | /// |
| 61 | /// Typically it is preferable to use getters like [`load_ascii_hex_digit()`] instead |
| 62 | pub fn from_data<M>(data: DataPayload<M>) -> Self |
| 63 | where |
| 64 | M: DataMarker<Yokeable = PropertyCodePointSetV1<'static>>, |
| 65 | { |
| 66 | Self { data: data.cast() } |
| 67 | } |
| 68 | |
| 69 | /// Construct a new owned [`CodePointInversionList`] |
| 70 | pub fn from_code_point_inversion_list(set: CodePointInversionList<'static>) -> Self { |
| 71 | let set = PropertyCodePointSetV1::from_code_point_inversion_list(set); |
| 72 | CodePointSetData::from_data(DataPayload::<ErasedSetlikeMarker>::from_owned(set)) |
| 73 | } |
| 74 | |
| 75 | /// Convert this type to a [`CodePointInversionList`] as a borrowed value. |
| 76 | /// |
| 77 | /// The data backing this is extensible and supports multiple implementations. |
| 78 | /// Currently it is always [`CodePointInversionList`]; however in the future more backends may be |
| 79 | /// added, and users may select which at data generation time. |
| 80 | /// |
| 81 | /// This method returns an `Option` in order to return `None` when the backing data provider |
| 82 | /// cannot return a [`CodePointInversionList`], or cannot do so within the expected constant time |
| 83 | /// constraint. |
| 84 | pub fn as_code_point_inversion_list(&self) -> Option<&CodePointInversionList<'_>> { |
| 85 | self.data.get().as_code_point_inversion_list() |
| 86 | } |
| 87 | |
| 88 | /// Convert this type to a [`CodePointInversionList`], borrowing if possible, |
| 89 | /// otherwise allocating a new [`CodePointInversionList`]. |
| 90 | /// |
| 91 | /// The data backing this is extensible and supports multiple implementations. |
| 92 | /// Currently it is always [`CodePointInversionList`]; however in the future more backends may be |
| 93 | /// added, and users may select which at data generation time. |
| 94 | /// |
| 95 | /// The performance of the conversion to this specific return type will vary |
| 96 | /// depending on the data structure that is backing `self`. |
| 97 | pub fn to_code_point_inversion_list(&self) -> CodePointInversionList<'_> { |
| 98 | self.data.get().to_code_point_inversion_list() |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | /// A borrowed wrapper around code point set data, returned by |
| 103 | /// [`CodePointSetData::as_borrowed()`]. More efficient to query. |
| 104 | #[derive (Clone, Copy, Debug)] |
| 105 | pub struct CodePointSetDataBorrowed<'a> { |
| 106 | set: &'a PropertyCodePointSetV1<'a>, |
| 107 | } |
| 108 | |
| 109 | impl CodePointSetDataBorrowed<'static> { |
| 110 | /// Cheaply converts a [`CodePointSetDataBorrowed<'static>`] into a [`CodePointSetData`]. |
| 111 | /// |
| 112 | /// Note: Due to branching and indirection, using [`CodePointSetData`] might inhibit some |
| 113 | /// compile-time optimizations that are possible with [`CodePointSetDataBorrowed`]. |
| 114 | pub const fn static_to_owned(self) -> CodePointSetData { |
| 115 | CodePointSetData { |
| 116 | data: DataPayload::from_static_ref(self.set), |
| 117 | } |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | impl<'a> CodePointSetDataBorrowed<'a> { |
| 122 | /// Check if the set contains a character |
| 123 | /// |
| 124 | /// ```rust |
| 125 | /// use icu::properties::sets; |
| 126 | /// |
| 127 | /// let alphabetic = sets::alphabetic(); |
| 128 | /// |
| 129 | /// assert!(!alphabetic.contains('3' )); |
| 130 | /// assert!(!alphabetic.contains('੩' )); // U+0A69 GURMUKHI DIGIT THREE |
| 131 | /// assert!(alphabetic.contains('A' )); |
| 132 | /// assert!(alphabetic.contains('Ä' )); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
| 133 | /// ``` |
| 134 | #[inline ] |
| 135 | pub fn contains(self, ch: char) -> bool { |
| 136 | self.set.contains(ch) |
| 137 | } |
| 138 | |
| 139 | /// Check if the set contains a character as a UTF32 code unit |
| 140 | /// |
| 141 | /// ```rust |
| 142 | /// use icu::properties::sets; |
| 143 | /// |
| 144 | /// let alphabetic = sets::alphabetic(); |
| 145 | /// |
| 146 | /// assert!(!alphabetic.contains32(0x0A69)); // U+0A69 GURMUKHI DIGIT THREE |
| 147 | /// assert!(alphabetic.contains32(0x00C4)); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
| 148 | /// ``` |
| 149 | #[inline ] |
| 150 | pub fn contains32(self, ch: u32) -> bool { |
| 151 | self.set.contains32(ch) |
| 152 | } |
| 153 | |
| 154 | // Yields an [`Iterator`] returning the ranges of the code points that are |
| 155 | /// included in the [`CodePointSetData`] |
| 156 | /// |
| 157 | /// Ranges are returned as [`RangeInclusive`], which is inclusive of its |
| 158 | /// `end` bound value. An end-inclusive behavior matches the ICU4C/J |
| 159 | /// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`. |
| 160 | /// |
| 161 | /// # Example |
| 162 | /// |
| 163 | /// ``` |
| 164 | /// use icu::properties::sets; |
| 165 | /// |
| 166 | /// let alphabetic = sets::alphabetic(); |
| 167 | /// let mut ranges = alphabetic.iter_ranges(); |
| 168 | /// |
| 169 | /// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z' |
| 170 | /// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z' |
| 171 | /// ``` |
| 172 | #[inline ] |
| 173 | pub fn iter_ranges(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { |
| 174 | self.set.iter_ranges() |
| 175 | } |
| 176 | |
| 177 | // Yields an [`Iterator`] returning the ranges of the code points that are |
| 178 | /// *not* included in the [`CodePointSetData`] |
| 179 | /// |
| 180 | /// Ranges are returned as [`RangeInclusive`], which is inclusive of its |
| 181 | /// `end` bound value. An end-inclusive behavior matches the ICU4C/J |
| 182 | /// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`. |
| 183 | /// |
| 184 | /// # Example |
| 185 | /// |
| 186 | /// ``` |
| 187 | /// use icu::properties::sets; |
| 188 | /// |
| 189 | /// let alphabetic = sets::alphabetic(); |
| 190 | /// let mut ranges = alphabetic.iter_ranges(); |
| 191 | /// |
| 192 | /// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z' |
| 193 | /// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z' |
| 194 | /// ``` |
| 195 | #[inline ] |
| 196 | pub fn iter_ranges_complemented(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { |
| 197 | self.set.iter_ranges_complemented() |
| 198 | } |
| 199 | } |
| 200 | |
| 201 | // |
| 202 | // UnicodeSet* structs, impls, & macros |
| 203 | // (a set with code points + strings) |
| 204 | // |
| 205 | |
| 206 | /// A wrapper around `UnicodeSet` data (characters and strings) |
| 207 | #[derive (Debug)] |
| 208 | pub struct UnicodeSetData { |
| 209 | data: DataPayload<ErasedUnicodeSetlikeMarker>, |
| 210 | } |
| 211 | |
| 212 | #[derive (Clone, Copy, PartialEq, Eq, Hash, Debug)] |
| 213 | pub(crate) struct ErasedUnicodeSetlikeMarker; |
| 214 | impl DataMarker for ErasedUnicodeSetlikeMarker { |
| 215 | type Yokeable = PropertyUnicodeSetV1<'static>; |
| 216 | } |
| 217 | |
| 218 | impl UnicodeSetData { |
| 219 | /// Construct a borrowed version of this type that can be queried. |
| 220 | /// |
| 221 | /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it |
| 222 | /// up front. |
| 223 | #[inline ] |
| 224 | pub fn as_borrowed(&self) -> UnicodeSetDataBorrowed<'_> { |
| 225 | UnicodeSetDataBorrowed { |
| 226 | set: self.data.get(), |
| 227 | } |
| 228 | } |
| 229 | |
| 230 | /// Construct a new one from loaded data |
| 231 | /// |
| 232 | /// Typically it is preferable to use getters instead |
| 233 | pub fn from_data<M>(data: DataPayload<M>) -> Self |
| 234 | where |
| 235 | M: DataMarker<Yokeable = PropertyUnicodeSetV1<'static>>, |
| 236 | { |
| 237 | Self { data: data.cast() } |
| 238 | } |
| 239 | |
| 240 | /// Construct a new owned [`CodePointInversionListAndStringList`] |
| 241 | pub fn from_code_point_inversion_list_string_list( |
| 242 | set: CodePointInversionListAndStringList<'static>, |
| 243 | ) -> Self { |
| 244 | let set = PropertyUnicodeSetV1::from_code_point_inversion_list_string_list(set); |
| 245 | UnicodeSetData::from_data(DataPayload::<ErasedUnicodeSetlikeMarker>::from_owned(set)) |
| 246 | } |
| 247 | |
| 248 | /// Convert this type to a [`CodePointInversionListAndStringList`] as a borrowed value. |
| 249 | /// |
| 250 | /// The data backing this is extensible and supports multiple implementations. |
| 251 | /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be |
| 252 | /// added, and users may select which at data generation time. |
| 253 | /// |
| 254 | /// This method returns an `Option` in order to return `None` when the backing data provider |
| 255 | /// cannot return a [`CodePointInversionListAndStringList`], or cannot do so within the expected constant time |
| 256 | /// constraint. |
| 257 | pub fn as_code_point_inversion_list_string_list( |
| 258 | &self, |
| 259 | ) -> Option<&CodePointInversionListAndStringList<'_>> { |
| 260 | self.data.get().as_code_point_inversion_list_string_list() |
| 261 | } |
| 262 | |
| 263 | /// Convert this type to a [`CodePointInversionListAndStringList`], borrowing if possible, |
| 264 | /// otherwise allocating a new [`CodePointInversionListAndStringList`]. |
| 265 | /// |
| 266 | /// The data backing this is extensible and supports multiple implementations. |
| 267 | /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be |
| 268 | /// added, and users may select which at data generation time. |
| 269 | /// |
| 270 | /// The performance of the conversion to this specific return type will vary |
| 271 | /// depending on the data structure that is backing `self`. |
| 272 | pub fn to_code_point_inversion_list_string_list( |
| 273 | &self, |
| 274 | ) -> CodePointInversionListAndStringList<'_> { |
| 275 | self.data.get().to_code_point_inversion_list_string_list() |
| 276 | } |
| 277 | } |
| 278 | |
| 279 | /// A borrowed wrapper around code point set data, returned by |
| 280 | /// [`UnicodeSetData::as_borrowed()`]. More efficient to query. |
| 281 | #[derive (Clone, Copy, Debug)] |
| 282 | pub struct UnicodeSetDataBorrowed<'a> { |
| 283 | set: &'a PropertyUnicodeSetV1<'a>, |
| 284 | } |
| 285 | |
| 286 | impl<'a> UnicodeSetDataBorrowed<'a> { |
| 287 | /// Check if the set contains the string. Strings consisting of one character |
| 288 | /// are treated as a character/code point. |
| 289 | /// |
| 290 | /// This matches ICU behavior for ICU's `UnicodeSet`. |
| 291 | #[inline ] |
| 292 | pub fn contains(self, s: &str) -> bool { |
| 293 | self.set.contains(s) |
| 294 | } |
| 295 | |
| 296 | /// Check if the set contains a character as a UTF32 code unit |
| 297 | #[inline ] |
| 298 | pub fn contains32(&self, cp: u32) -> bool { |
| 299 | self.set.contains32(cp) |
| 300 | } |
| 301 | |
| 302 | /// Check if the set contains the code point corresponding to the Rust character. |
| 303 | #[inline ] |
| 304 | pub fn contains_char(&self, ch: char) -> bool { |
| 305 | self.set.contains_char(ch) |
| 306 | } |
| 307 | } |
| 308 | |
| 309 | impl UnicodeSetDataBorrowed<'static> { |
| 310 | /// Cheaply converts a [`UnicodeSetDataBorrowed<'static>`] into a [`UnicodeSetData`]. |
| 311 | /// |
| 312 | /// Note: Due to branching and indirection, using [`UnicodeSetData`] might inhibit some |
| 313 | /// compile-time optimizations that are possible with [`UnicodeSetDataBorrowed`]. |
| 314 | pub const fn static_to_owned(self) -> UnicodeSetData { |
| 315 | UnicodeSetData { |
| 316 | data: DataPayload::from_static_ref(self.set), |
| 317 | } |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | pub(crate) fn load_set_data<M, P>(provider: &P) -> Result<CodePointSetData, PropertiesError> |
| 322 | where |
| 323 | M: KeyedDataMarker<Yokeable = PropertyCodePointSetV1<'static>>, |
| 324 | P: DataProvider<M> + ?Sized, |
| 325 | { |
| 326 | Ok(provider |
| 327 | .load(Default::default()) |
| 328 | .and_then(DataResponse::take_payload) |
| 329 | .map(op:CodePointSetData::from_data)?) |
| 330 | } |
| 331 | |
| 332 | // |
| 333 | // Binary property getter fns |
| 334 | // (data as code point sets) |
| 335 | // |
| 336 | |
| 337 | macro_rules! make_code_point_set_property { |
| 338 | ( |
| 339 | // currently unused |
| 340 | property: $property:expr; |
| 341 | // currently unused |
| 342 | marker: $marker_name:ident; |
| 343 | keyed_data_marker: $keyed_data_marker:ty; |
| 344 | func: |
| 345 | $(#[$doc:meta])+ |
| 346 | $cvis:vis const fn $constname:ident() => $singleton_name:ident; |
| 347 | $vis:vis fn $funcname:ident(); |
| 348 | ) => { |
| 349 | #[doc = concat!("A version of [`" , stringify!($constname), "()`] that uses custom data provided by a [`DataProvider`]." )] |
| 350 | /// |
| 351 | /// Note that this will return an owned version of the data. Functionality is available on |
| 352 | /// the borrowed version, accessible through [`CodePointSetData::as_borrowed`]. |
| 353 | $vis fn $funcname( |
| 354 | provider: &(impl DataProvider<$keyed_data_marker> + ?Sized) |
| 355 | ) -> Result<CodePointSetData, PropertiesError> { |
| 356 | load_set_data(provider) |
| 357 | } |
| 358 | |
| 359 | $(#[$doc])* |
| 360 | #[cfg(feature = "compiled_data" )] |
| 361 | $cvis const fn $constname() -> CodePointSetDataBorrowed<'static> { |
| 362 | CodePointSetDataBorrowed { |
| 363 | set: crate::provider::Baked::$singleton_name, |
| 364 | } |
| 365 | } |
| 366 | } |
| 367 | } |
| 368 | |
| 369 | make_code_point_set_property! { |
| 370 | property: "ASCII_Hex_Digit" ; |
| 371 | marker: AsciiHexDigitProperty; |
| 372 | keyed_data_marker: AsciiHexDigitV1Marker; |
| 373 | func: |
| 374 | /// ASCII characters commonly used for the representation of hexadecimal numbers |
| 375 | /// |
| 376 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 377 | /// |
| 378 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 379 | /// |
| 380 | /// # Example |
| 381 | /// |
| 382 | /// ``` |
| 383 | /// use icu::properties::sets; |
| 384 | /// |
| 385 | /// let ascii_hex_digit = sets::ascii_hex_digit(); |
| 386 | /// |
| 387 | /// assert!(ascii_hex_digit.contains('3')); |
| 388 | /// assert!(!ascii_hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE |
| 389 | /// assert!(ascii_hex_digit.contains('A')); |
| 390 | /// assert!(!ascii_hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
| 391 | /// ``` |
| 392 | pub const fn ascii_hex_digit() => SINGLETON_PROPS_AHEX_V1; |
| 393 | pub fn load_ascii_hex_digit(); |
| 394 | } |
| 395 | |
| 396 | make_code_point_set_property! { |
| 397 | property: "Alnum" ; |
| 398 | marker: AlnumProperty; |
| 399 | keyed_data_marker: AlnumV1Marker; |
| 400 | func: |
| 401 | /// Characters with the Alphabetic or Decimal_Number property |
| 402 | /// This is defined for POSIX compatibility. |
| 403 | |
| 404 | pub const fn alnum() => SINGLETON_PROPS_ALNUM_V1; |
| 405 | pub fn load_alnum(); |
| 406 | } |
| 407 | |
| 408 | make_code_point_set_property! { |
| 409 | property: "Alphabetic" ; |
| 410 | marker: AlphabeticProperty; |
| 411 | keyed_data_marker: AlphabeticV1Marker; |
| 412 | func: |
| 413 | /// Alphabetic characters |
| 414 | /// |
| 415 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 416 | /// |
| 417 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 418 | /// |
| 419 | /// # Example |
| 420 | /// |
| 421 | /// ``` |
| 422 | /// use icu::properties::sets; |
| 423 | /// |
| 424 | /// let alphabetic = sets::alphabetic(); |
| 425 | /// |
| 426 | /// assert!(!alphabetic.contains('3')); |
| 427 | /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE |
| 428 | /// assert!(alphabetic.contains('A')); |
| 429 | /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
| 430 | /// ``` |
| 431 | |
| 432 | pub const fn alphabetic() => SINGLETON_PROPS_ALPHA_V1; |
| 433 | pub fn load_alphabetic(); |
| 434 | } |
| 435 | |
| 436 | make_code_point_set_property! { |
| 437 | property: "Bidi_Control" ; |
| 438 | marker: BidiControlProperty; |
| 439 | keyed_data_marker: BidiControlV1Marker; |
| 440 | func: |
| 441 | /// Format control characters which have specific functions in the Unicode Bidirectional |
| 442 | /// Algorithm |
| 443 | /// |
| 444 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 445 | /// |
| 446 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 447 | /// |
| 448 | /// # Example |
| 449 | /// |
| 450 | /// ``` |
| 451 | /// use icu::properties::sets; |
| 452 | /// |
| 453 | /// let bidi_control = sets::bidi_control(); |
| 454 | /// |
| 455 | /// assert!(bidi_control.contains32(0x200F)); // RIGHT-TO-LEFT MARK |
| 456 | /// assert!(!bidi_control.contains('ش')); // U+0634 ARABIC LETTER SHEEN |
| 457 | /// ``` |
| 458 | |
| 459 | pub const fn bidi_control() => SINGLETON_PROPS_BIDI_C_V1; |
| 460 | pub fn load_bidi_control(); |
| 461 | } |
| 462 | |
| 463 | make_code_point_set_property! { |
| 464 | property: "Bidi_Mirrored" ; |
| 465 | marker: BidiMirroredProperty; |
| 466 | keyed_data_marker: BidiMirroredV1Marker; |
| 467 | func: |
| 468 | /// Characters that are mirrored in bidirectional text |
| 469 | /// |
| 470 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 471 | /// |
| 472 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 473 | /// |
| 474 | /// # Example |
| 475 | /// |
| 476 | /// ``` |
| 477 | /// use icu::properties::sets; |
| 478 | /// |
| 479 | /// let bidi_mirrored = sets::bidi_mirrored(); |
| 480 | /// |
| 481 | /// assert!(bidi_mirrored.contains('[')); |
| 482 | /// assert!(bidi_mirrored.contains(']')); |
| 483 | /// assert!(bidi_mirrored.contains('∑')); // U+2211 N-ARY SUMMATION |
| 484 | /// assert!(!bidi_mirrored.contains('ཉ')); // U+0F49 TIBETAN LETTER NYA |
| 485 | /// ``` |
| 486 | |
| 487 | pub const fn bidi_mirrored() => SINGLETON_PROPS_BIDI_M_V1; |
| 488 | pub fn load_bidi_mirrored(); |
| 489 | } |
| 490 | |
| 491 | make_code_point_set_property! { |
| 492 | property: "Blank" ; |
| 493 | marker: BlankProperty; |
| 494 | keyed_data_marker: BlankV1Marker; |
| 495 | func: |
| 496 | /// Horizontal whitespace characters |
| 497 | |
| 498 | pub const fn blank() => SINGLETON_PROPS_BLANK_V1; |
| 499 | pub fn load_blank(); |
| 500 | } |
| 501 | |
| 502 | make_code_point_set_property! { |
| 503 | property: "Cased" ; |
| 504 | marker: CasedProperty; |
| 505 | keyed_data_marker: CasedV1Marker; |
| 506 | func: |
| 507 | /// Uppercase, lowercase, and titlecase characters |
| 508 | /// |
| 509 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 510 | /// |
| 511 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 512 | /// |
| 513 | /// # Example |
| 514 | /// |
| 515 | /// ``` |
| 516 | /// use icu::properties::sets; |
| 517 | /// |
| 518 | /// let cased = sets::cased(); |
| 519 | /// |
| 520 | /// assert!(cased.contains('Ꙡ')); // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE |
| 521 | /// assert!(!cased.contains('ދ')); // U+078B THAANA LETTER DHAALU |
| 522 | /// ``` |
| 523 | |
| 524 | pub const fn cased() => SINGLETON_PROPS_CASED_V1; |
| 525 | pub fn load_cased(); |
| 526 | } |
| 527 | |
| 528 | make_code_point_set_property! { |
| 529 | property: "Case_Ignorable" ; |
| 530 | marker: CaseIgnorableProperty; |
| 531 | keyed_data_marker: CaseIgnorableV1Marker; |
| 532 | func: |
| 533 | /// Characters which are ignored for casing purposes |
| 534 | /// |
| 535 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 536 | /// |
| 537 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 538 | /// |
| 539 | /// # Example |
| 540 | /// |
| 541 | /// ``` |
| 542 | /// use icu::properties::sets; |
| 543 | /// |
| 544 | /// let case_ignorable = sets::case_ignorable(); |
| 545 | /// |
| 546 | /// assert!(case_ignorable.contains(':')); |
| 547 | /// assert!(!case_ignorable.contains('λ')); // U+03BB GREEK SMALL LETTER LAMDA |
| 548 | /// ``` |
| 549 | |
| 550 | pub const fn case_ignorable() => SINGLETON_PROPS_CI_V1; |
| 551 | pub fn load_case_ignorable(); |
| 552 | } |
| 553 | |
| 554 | make_code_point_set_property! { |
| 555 | property: "Full_Composition_Exclusion" ; |
| 556 | marker: FullCompositionExclusionProperty; |
| 557 | keyed_data_marker: FullCompositionExclusionV1Marker; |
| 558 | func: |
| 559 | /// Characters that are excluded from composition |
| 560 | /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt> |
| 561 | |
| 562 | pub const fn full_composition_exclusion() => SINGLETON_PROPS_COMP_EX_V1; |
| 563 | pub fn load_full_composition_exclusion(); |
| 564 | } |
| 565 | |
| 566 | make_code_point_set_property! { |
| 567 | property: "Changes_When_Casefolded" ; |
| 568 | marker: ChangesWhenCasefoldedProperty; |
| 569 | keyed_data_marker: ChangesWhenCasefoldedV1Marker; |
| 570 | func: |
| 571 | /// Characters whose normalized forms are not stable under case folding |
| 572 | /// |
| 573 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 574 | /// |
| 575 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 576 | /// |
| 577 | /// # Example |
| 578 | /// |
| 579 | /// ``` |
| 580 | /// use icu::properties::sets; |
| 581 | /// |
| 582 | /// let changes_when_casefolded = sets::changes_when_casefolded(); |
| 583 | /// |
| 584 | /// assert!(changes_when_casefolded.contains('ß')); // U+00DF LATIN SMALL LETTER SHARP S |
| 585 | /// assert!(!changes_when_casefolded.contains('ᜉ')); // U+1709 TAGALOG LETTER PA |
| 586 | /// ``` |
| 587 | |
| 588 | pub const fn changes_when_casefolded() => SINGLETON_PROPS_CWCF_V1; |
| 589 | pub fn load_changes_when_casefolded(); |
| 590 | } |
| 591 | |
| 592 | make_code_point_set_property! { |
| 593 | property: "Changes_When_Casemapped" ; |
| 594 | marker: ChangesWhenCasemappedProperty; |
| 595 | keyed_data_marker: ChangesWhenCasemappedV1Marker; |
| 596 | func: |
| 597 | /// Characters which may change when they undergo case mapping |
| 598 | |
| 599 | pub const fn changes_when_casemapped() => SINGLETON_PROPS_CWCM_V1; |
| 600 | pub fn load_changes_when_casemapped(); |
| 601 | } |
| 602 | |
| 603 | make_code_point_set_property! { |
| 604 | property: "Changes_When_NFKC_Casefolded" ; |
| 605 | marker: ChangesWhenNfkcCasefoldedProperty; |
| 606 | keyed_data_marker: ChangesWhenNfkcCasefoldedV1Marker; |
| 607 | func: |
| 608 | /// Characters which are not identical to their NFKC_Casefold mapping |
| 609 | /// |
| 610 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 611 | /// |
| 612 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 613 | /// |
| 614 | /// # Example |
| 615 | /// |
| 616 | /// ``` |
| 617 | /// use icu::properties::sets; |
| 618 | /// |
| 619 | /// let changes_when_nfkc_casefolded = sets::changes_when_nfkc_casefolded(); |
| 620 | /// |
| 621 | /// assert!(changes_when_nfkc_casefolded.contains('🄵')); // U+1F135 SQUARED LATIN CAPITAL LETTER F |
| 622 | /// assert!(!changes_when_nfkc_casefolded.contains('f')); |
| 623 | /// ``` |
| 624 | |
| 625 | pub const fn changes_when_nfkc_casefolded() => SINGLETON_PROPS_CWKCF_V1; |
| 626 | pub fn load_changes_when_nfkc_casefolded(); |
| 627 | } |
| 628 | |
| 629 | make_code_point_set_property! { |
| 630 | property: "Changes_When_Lowercased" ; |
| 631 | marker: ChangesWhenLowercasedProperty; |
| 632 | keyed_data_marker: ChangesWhenLowercasedV1Marker; |
| 633 | func: |
| 634 | /// Characters whose normalized forms are not stable under a toLowercase mapping |
| 635 | /// |
| 636 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 637 | /// |
| 638 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 639 | /// |
| 640 | /// # Example |
| 641 | /// |
| 642 | /// ``` |
| 643 | /// use icu::properties::sets; |
| 644 | /// |
| 645 | /// let changes_when_lowercased = sets::changes_when_lowercased(); |
| 646 | /// |
| 647 | /// assert!(changes_when_lowercased.contains('Ⴔ')); // U+10B4 GEORGIAN CAPITAL LETTER PHAR |
| 648 | /// assert!(!changes_when_lowercased.contains('ფ')); // U+10E4 GEORGIAN LETTER PHAR |
| 649 | /// ``` |
| 650 | |
| 651 | pub const fn changes_when_lowercased() => SINGLETON_PROPS_CWL_V1; |
| 652 | pub fn load_changes_when_lowercased(); |
| 653 | } |
| 654 | |
| 655 | make_code_point_set_property! { |
| 656 | property: "Changes_When_Titlecased" ; |
| 657 | marker: ChangesWhenTitlecasedProperty; |
| 658 | keyed_data_marker: ChangesWhenTitlecasedV1Marker; |
| 659 | func: |
| 660 | /// Characters whose normalized forms are not stable under a toTitlecase mapping |
| 661 | /// |
| 662 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 663 | /// |
| 664 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 665 | /// |
| 666 | /// # Example |
| 667 | /// |
| 668 | /// ``` |
| 669 | /// use icu::properties::sets; |
| 670 | /// |
| 671 | /// let changes_when_titlecased = sets::changes_when_titlecased(); |
| 672 | /// |
| 673 | /// assert!(changes_when_titlecased.contains('æ')); // U+00E6 LATIN SMALL LETTER AE |
| 674 | /// assert!(!changes_when_titlecased.contains('Æ')); // U+00E6 LATIN CAPITAL LETTER AE |
| 675 | /// ``` |
| 676 | |
| 677 | pub const fn changes_when_titlecased() => SINGLETON_PROPS_CWT_V1; |
| 678 | pub fn load_changes_when_titlecased(); |
| 679 | } |
| 680 | |
| 681 | make_code_point_set_property! { |
| 682 | property: "Changes_When_Uppercased" ; |
| 683 | marker: ChangesWhenUppercasedProperty; |
| 684 | keyed_data_marker: ChangesWhenUppercasedV1Marker; |
| 685 | func: |
| 686 | /// Characters whose normalized forms are not stable under a toUppercase mapping |
| 687 | /// |
| 688 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 689 | /// |
| 690 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 691 | /// |
| 692 | /// # Example |
| 693 | /// |
| 694 | /// ``` |
| 695 | /// use icu::properties::sets; |
| 696 | /// |
| 697 | /// let changes_when_uppercased = sets::changes_when_uppercased(); |
| 698 | /// |
| 699 | /// assert!(changes_when_uppercased.contains('ւ')); // U+0582 ARMENIAN SMALL LETTER YIWN |
| 700 | /// assert!(!changes_when_uppercased.contains('Ւ')); // U+0552 ARMENIAN CAPITAL LETTER YIWN |
| 701 | /// ``` |
| 702 | |
| 703 | pub const fn changes_when_uppercased() => SINGLETON_PROPS_CWU_V1; |
| 704 | pub fn load_changes_when_uppercased(); |
| 705 | } |
| 706 | |
| 707 | make_code_point_set_property! { |
| 708 | property: "Dash" ; |
| 709 | marker: DashProperty; |
| 710 | keyed_data_marker: DashV1Marker; |
| 711 | func: |
| 712 | /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus |
| 713 | /// their compatibility equivalents |
| 714 | /// |
| 715 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 716 | /// |
| 717 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 718 | /// |
| 719 | /// # Example |
| 720 | /// |
| 721 | /// ``` |
| 722 | /// use icu::properties::sets; |
| 723 | /// |
| 724 | /// let dash = sets::dash(); |
| 725 | /// |
| 726 | /// assert!(dash.contains('⸺')); // U+2E3A TWO-EM DASH |
| 727 | /// assert!(dash.contains('-')); // U+002D |
| 728 | /// assert!(!dash.contains('=')); // U+003D |
| 729 | /// ``` |
| 730 | |
| 731 | pub const fn dash() => SINGLETON_PROPS_DASH_V1; |
| 732 | pub fn load_dash(); |
| 733 | } |
| 734 | |
| 735 | make_code_point_set_property! { |
| 736 | property: "Deprecated" ; |
| 737 | marker: DeprecatedProperty; |
| 738 | keyed_data_marker: DeprecatedV1Marker; |
| 739 | func: |
| 740 | /// Deprecated characters. No characters will ever be removed from the standard, but the |
| 741 | /// usage of deprecated characters is strongly discouraged. |
| 742 | /// |
| 743 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 744 | /// |
| 745 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 746 | /// |
| 747 | /// # Example |
| 748 | /// |
| 749 | /// ``` |
| 750 | /// use icu::properties::sets; |
| 751 | /// |
| 752 | /// let deprecated = sets::deprecated(); |
| 753 | /// |
| 754 | /// assert!(deprecated.contains('ឣ')); // U+17A3 KHMER INDEPENDENT VOWEL QAQ |
| 755 | /// assert!(!deprecated.contains('A')); |
| 756 | /// ``` |
| 757 | |
| 758 | pub const fn deprecated() => SINGLETON_PROPS_DEP_V1; |
| 759 | pub fn load_deprecated(); |
| 760 | } |
| 761 | |
| 762 | make_code_point_set_property! { |
| 763 | property: "Default_Ignorable_Code_Point" ; |
| 764 | marker: DefaultIgnorableCodePointProperty; |
| 765 | keyed_data_marker: DefaultIgnorableCodePointV1Marker; |
| 766 | func: |
| 767 | /// For programmatic determination of default ignorable code points. New characters that |
| 768 | /// should be ignored in rendering (unless explicitly supported) will be assigned in these |
| 769 | /// ranges, permitting programs to correctly handle the default rendering of such |
| 770 | /// characters when not otherwise supported. |
| 771 | /// |
| 772 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 773 | /// |
| 774 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 775 | /// |
| 776 | /// # Example |
| 777 | /// |
| 778 | /// ``` |
| 779 | /// use icu::properties::sets; |
| 780 | /// |
| 781 | /// let default_ignorable_code_point = sets::default_ignorable_code_point(); |
| 782 | /// |
| 783 | /// assert!(default_ignorable_code_point.contains32(0x180B)); // MONGOLIAN FREE VARIATION SELECTOR ONE |
| 784 | /// assert!(!default_ignorable_code_point.contains('E')); |
| 785 | /// ``` |
| 786 | |
| 787 | pub const fn default_ignorable_code_point() => SINGLETON_PROPS_DI_V1; |
| 788 | pub fn load_default_ignorable_code_point(); |
| 789 | } |
| 790 | |
| 791 | make_code_point_set_property! { |
| 792 | property: "Diacritic" ; |
| 793 | marker: DiacriticProperty; |
| 794 | keyed_data_marker: DiacriticV1Marker; |
| 795 | func: |
| 796 | /// Characters that linguistically modify the meaning of another character to which they apply |
| 797 | /// |
| 798 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 799 | /// |
| 800 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 801 | /// |
| 802 | /// # Example |
| 803 | /// |
| 804 | /// ``` |
| 805 | /// use icu::properties::sets; |
| 806 | /// |
| 807 | /// let diacritic = sets::diacritic(); |
| 808 | /// |
| 809 | /// assert!(diacritic.contains('\u{05B3}')); // HEBREW POINT HATAF QAMATS |
| 810 | /// assert!(!diacritic.contains('א')); // U+05D0 HEBREW LETTER ALEF |
| 811 | /// ``` |
| 812 | |
| 813 | pub const fn diacritic() => SINGLETON_PROPS_DIA_V1; |
| 814 | pub fn load_diacritic(); |
| 815 | } |
| 816 | |
| 817 | make_code_point_set_property! { |
| 818 | property: "Emoji_Modifier_Base" ; |
| 819 | marker: EmojiModifierBaseProperty; |
| 820 | keyed_data_marker: EmojiModifierBaseV1Marker; |
| 821 | func: |
| 822 | /// Characters that can serve as a base for emoji modifiers |
| 823 | /// |
| 824 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 825 | /// |
| 826 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 827 | /// |
| 828 | /// # Example |
| 829 | /// |
| 830 | /// ``` |
| 831 | /// use icu::properties::sets; |
| 832 | /// |
| 833 | /// let emoji_modifier_base = sets::emoji_modifier_base(); |
| 834 | /// |
| 835 | /// assert!(emoji_modifier_base.contains('✊')); // U+270A RAISED FIST |
| 836 | /// assert!(!emoji_modifier_base.contains('⛰')); // U+26F0 MOUNTAIN |
| 837 | /// ``` |
| 838 | |
| 839 | pub const fn emoji_modifier_base() => SINGLETON_PROPS_EBASE_V1; |
| 840 | pub fn load_emoji_modifier_base(); |
| 841 | } |
| 842 | |
| 843 | make_code_point_set_property! { |
| 844 | property: "Emoji_Component" ; |
| 845 | marker: EmojiComponentProperty; |
| 846 | keyed_data_marker: EmojiComponentV1Marker; |
| 847 | func: |
| 848 | /// Characters used in emoji sequences that normally do not appear on emoji keyboards as |
| 849 | /// separate choices, such as base characters for emoji keycaps |
| 850 | /// |
| 851 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 852 | /// |
| 853 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 854 | /// |
| 855 | /// # Example |
| 856 | /// |
| 857 | /// ``` |
| 858 | /// use icu::properties::sets; |
| 859 | /// |
| 860 | /// let emoji_component = sets::emoji_component(); |
| 861 | /// |
| 862 | /// assert!(emoji_component.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T |
| 863 | /// assert!(emoji_component.contains32(0x20E3)); // COMBINING ENCLOSING KEYCAP |
| 864 | /// assert!(emoji_component.contains('7')); |
| 865 | /// assert!(!emoji_component.contains('T')); |
| 866 | /// ``` |
| 867 | |
| 868 | pub const fn emoji_component() => SINGLETON_PROPS_ECOMP_V1; |
| 869 | pub fn load_emoji_component(); |
| 870 | } |
| 871 | |
| 872 | make_code_point_set_property! { |
| 873 | property: "Emoji_Modifier" ; |
| 874 | marker: EmojiModifierProperty; |
| 875 | keyed_data_marker: EmojiModifierV1Marker; |
| 876 | func: |
| 877 | /// Characters that are emoji modifiers |
| 878 | /// |
| 879 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 880 | /// |
| 881 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 882 | /// |
| 883 | /// # Example |
| 884 | /// |
| 885 | /// ``` |
| 886 | /// use icu::properties::sets; |
| 887 | /// |
| 888 | /// let emoji_modifier = sets::emoji_modifier(); |
| 889 | /// |
| 890 | /// assert!(emoji_modifier.contains32(0x1F3FD)); // EMOJI MODIFIER FITZPATRICK TYPE-4 |
| 891 | /// assert!(!emoji_modifier.contains32(0x200C)); // ZERO WIDTH NON-JOINER |
| 892 | /// ``` |
| 893 | |
| 894 | pub const fn emoji_modifier() => SINGLETON_PROPS_EMOD_V1; |
| 895 | pub fn load_emoji_modifier(); |
| 896 | } |
| 897 | |
| 898 | make_code_point_set_property! { |
| 899 | property: "Emoji" ; |
| 900 | marker: EmojiProperty; |
| 901 | keyed_data_marker: EmojiV1Marker; |
| 902 | func: |
| 903 | /// Characters that are emoji |
| 904 | /// |
| 905 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 906 | /// |
| 907 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 908 | /// |
| 909 | /// # Example |
| 910 | /// |
| 911 | /// ``` |
| 912 | /// use icu::properties::sets; |
| 913 | /// |
| 914 | /// let emoji = sets::emoji(); |
| 915 | /// |
| 916 | /// assert!(emoji.contains('🔥')); // U+1F525 FIRE |
| 917 | /// assert!(!emoji.contains('V')); |
| 918 | /// ``` |
| 919 | |
| 920 | pub const fn emoji() => SINGLETON_PROPS_EMOJI_V1; |
| 921 | pub fn load_emoji(); |
| 922 | } |
| 923 | |
| 924 | make_code_point_set_property! { |
| 925 | property: "Emoji_Presentation" ; |
| 926 | marker: EmojiPresentationProperty; |
| 927 | keyed_data_marker: EmojiPresentationV1Marker; |
| 928 | func: |
| 929 | /// Characters that have emoji presentation by default |
| 930 | /// |
| 931 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 932 | /// |
| 933 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 934 | /// |
| 935 | /// # Example |
| 936 | /// |
| 937 | /// ``` |
| 938 | /// use icu::properties::sets; |
| 939 | /// |
| 940 | /// let emoji_presentation = sets::emoji_presentation(); |
| 941 | /// |
| 942 | /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON |
| 943 | /// assert!(!emoji_presentation.contains('♻')); // U+267B BLACK UNIVERSAL RECYCLING SYMBOL |
| 944 | /// ``` |
| 945 | |
| 946 | pub const fn emoji_presentation() => SINGLETON_PROPS_EPRES_V1; |
| 947 | pub fn load_emoji_presentation(); |
| 948 | } |
| 949 | |
| 950 | make_code_point_set_property! { |
| 951 | property: "Extender" ; |
| 952 | marker: ExtenderProperty; |
| 953 | keyed_data_marker: ExtenderV1Marker; |
| 954 | func: |
| 955 | /// Characters whose principal function is to extend the value of a preceding alphabetic |
| 956 | /// character or to extend the shape of adjacent characters. |
| 957 | /// |
| 958 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 959 | /// |
| 960 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 961 | /// |
| 962 | /// # Example |
| 963 | /// |
| 964 | /// ``` |
| 965 | /// use icu::properties::sets; |
| 966 | /// |
| 967 | /// let extender = sets::extender(); |
| 968 | /// |
| 969 | /// assert!(extender.contains('ヾ')); // U+30FE KATAKANA VOICED ITERATION MARK |
| 970 | /// assert!(extender.contains('ー')); // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK |
| 971 | /// assert!(!extender.contains('・')); // U+30FB KATAKANA MIDDLE DOT |
| 972 | /// ``` |
| 973 | |
| 974 | pub const fn extender() => SINGLETON_PROPS_EXT_V1; |
| 975 | pub fn load_extender(); |
| 976 | } |
| 977 | |
| 978 | make_code_point_set_property! { |
| 979 | property: "Extended_Pictographic" ; |
| 980 | marker: ExtendedPictographicProperty; |
| 981 | keyed_data_marker: ExtendedPictographicV1Marker; |
| 982 | func: |
| 983 | /// Pictographic symbols, as well as reserved ranges in blocks largely associated with |
| 984 | /// emoji characters |
| 985 | /// |
| 986 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 987 | /// |
| 988 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 989 | /// |
| 990 | /// # Example |
| 991 | /// |
| 992 | /// ``` |
| 993 | /// use icu::properties::sets; |
| 994 | /// |
| 995 | /// let extended_pictographic = sets::extended_pictographic(); |
| 996 | /// |
| 997 | /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT |
| 998 | /// assert!(!extended_pictographic.contains('🇪')); // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E |
| 999 | /// ``` |
| 1000 | |
| 1001 | pub const fn extended_pictographic() => SINGLETON_PROPS_EXTPICT_V1; |
| 1002 | pub fn load_extended_pictographic(); |
| 1003 | } |
| 1004 | |
| 1005 | make_code_point_set_property! { |
| 1006 | property: "Graph" ; |
| 1007 | marker: GraphProperty; |
| 1008 | keyed_data_marker: GraphV1Marker; |
| 1009 | func: |
| 1010 | /// Visible characters. |
| 1011 | /// This is defined for POSIX compatibility. |
| 1012 | |
| 1013 | pub const fn graph() => SINGLETON_PROPS_GRAPH_V1; |
| 1014 | pub fn load_graph(); |
| 1015 | } |
| 1016 | |
| 1017 | make_code_point_set_property! { |
| 1018 | property: "Grapheme_Base" ; |
| 1019 | marker: GraphemeBaseProperty; |
| 1020 | keyed_data_marker: GraphemeBaseV1Marker; |
| 1021 | func: |
| 1022 | /// Property used together with the definition of Standard Korean Syllable Block to define |
| 1023 | /// "Grapheme base". See D58 in Chapter 3, Conformance in the Unicode Standard. |
| 1024 | /// |
| 1025 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1026 | /// |
| 1027 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1028 | /// |
| 1029 | /// # Example |
| 1030 | /// |
| 1031 | /// ``` |
| 1032 | /// use icu::properties::sets; |
| 1033 | /// |
| 1034 | /// let grapheme_base = sets::grapheme_base(); |
| 1035 | /// |
| 1036 | /// assert!(grapheme_base.contains('ക')); // U+0D15 MALAYALAM LETTER KA |
| 1037 | /// assert!(grapheme_base.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I |
| 1038 | /// assert!(!grapheme_base.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA |
| 1039 | /// ``` |
| 1040 | |
| 1041 | pub const fn grapheme_base() => SINGLETON_PROPS_GR_BASE_V1; |
| 1042 | pub fn load_grapheme_base(); |
| 1043 | } |
| 1044 | |
| 1045 | make_code_point_set_property! { |
| 1046 | property: "Grapheme_Extend" ; |
| 1047 | marker: GraphemeExtendProperty; |
| 1048 | keyed_data_marker: GraphemeExtendV1Marker; |
| 1049 | func: |
| 1050 | /// Property used to define "Grapheme extender". See D59 in Chapter 3, Conformance in the |
| 1051 | /// Unicode Standard. |
| 1052 | /// |
| 1053 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1054 | /// |
| 1055 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1056 | /// |
| 1057 | /// # Example |
| 1058 | /// |
| 1059 | /// ``` |
| 1060 | /// use icu::properties::sets; |
| 1061 | /// |
| 1062 | /// let grapheme_extend = sets::grapheme_extend(); |
| 1063 | /// |
| 1064 | /// assert!(!grapheme_extend.contains('ക')); // U+0D15 MALAYALAM LETTER KA |
| 1065 | /// assert!(!grapheme_extend.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I |
| 1066 | /// assert!(grapheme_extend.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA |
| 1067 | /// ``` |
| 1068 | |
| 1069 | pub const fn grapheme_extend() => SINGLETON_PROPS_GR_EXT_V1; |
| 1070 | pub fn load_grapheme_extend(); |
| 1071 | } |
| 1072 | |
| 1073 | make_code_point_set_property! { |
| 1074 | property: "Grapheme_Link" ; |
| 1075 | marker: GraphemeLinkProperty; |
| 1076 | keyed_data_marker: GraphemeLinkV1Marker; |
| 1077 | func: |
| 1078 | /// Deprecated property. Formerly proposed for programmatic determination of grapheme |
| 1079 | /// cluster boundaries. |
| 1080 | |
| 1081 | pub const fn grapheme_link() => SINGLETON_PROPS_GR_LINK_V1; |
| 1082 | pub fn load_grapheme_link(); |
| 1083 | } |
| 1084 | |
| 1085 | make_code_point_set_property! { |
| 1086 | property: "Hex_Digit" ; |
| 1087 | marker: HexDigitProperty; |
| 1088 | keyed_data_marker: HexDigitV1Marker; |
| 1089 | func: |
| 1090 | /// Characters commonly used for the representation of hexadecimal numbers, plus their |
| 1091 | /// compatibility equivalents |
| 1092 | /// |
| 1093 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1094 | /// |
| 1095 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1096 | /// |
| 1097 | /// # Example |
| 1098 | /// |
| 1099 | /// ``` |
| 1100 | /// use icu::properties::sets; |
| 1101 | /// |
| 1102 | /// let hex_digit = sets::hex_digit(); |
| 1103 | /// |
| 1104 | /// assert!(hex_digit.contains('0')); |
| 1105 | /// assert!(!hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE |
| 1106 | /// assert!(hex_digit.contains('f')); |
| 1107 | /// assert!(hex_digit.contains('f')); // U+FF46 FULLWIDTH LATIN SMALL LETTER F |
| 1108 | /// assert!(hex_digit.contains('F')); // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F |
| 1109 | /// assert!(!hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS |
| 1110 | /// ``` |
| 1111 | |
| 1112 | pub const fn hex_digit() => SINGLETON_PROPS_HEX_V1; |
| 1113 | pub fn load_hex_digit(); |
| 1114 | } |
| 1115 | |
| 1116 | make_code_point_set_property! { |
| 1117 | property: "Hyphen" ; |
| 1118 | marker: HyphenProperty; |
| 1119 | keyed_data_marker: HyphenV1Marker; |
| 1120 | func: |
| 1121 | /// Deprecated property. Dashes which are used to mark connections between pieces of |
| 1122 | /// words, plus the Katakana middle dot. |
| 1123 | |
| 1124 | pub const fn hyphen() => SINGLETON_PROPS_HYPHEN_V1; |
| 1125 | pub fn load_hyphen(); |
| 1126 | } |
| 1127 | |
| 1128 | make_code_point_set_property! { |
| 1129 | property: "Id_Continue" ; |
| 1130 | marker: IdContinueProperty; |
| 1131 | keyed_data_marker: IdContinueV1Marker; |
| 1132 | func: |
| 1133 | /// Characters that can come after the first character in an identifier. If using NFKC to |
| 1134 | /// fold differences between characters, use [`load_xid_continue`] instead. See |
| 1135 | /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for |
| 1136 | /// more details. |
| 1137 | /// |
| 1138 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1139 | /// |
| 1140 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1141 | /// |
| 1142 | /// # Example |
| 1143 | /// |
| 1144 | /// ``` |
| 1145 | /// use icu::properties::sets; |
| 1146 | /// |
| 1147 | /// let id_continue = sets::id_continue(); |
| 1148 | /// |
| 1149 | /// assert!(id_continue.contains('x')); |
| 1150 | /// assert!(id_continue.contains('1')); |
| 1151 | /// assert!(id_continue.contains('_')); |
| 1152 | /// assert!(id_continue.contains('ߝ')); // U+07DD NKO LETTER FA |
| 1153 | /// assert!(!id_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
| 1154 | /// assert!(id_continue.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
| 1155 | /// ``` |
| 1156 | |
| 1157 | pub const fn id_continue() => SINGLETON_PROPS_IDC_V1; |
| 1158 | pub fn load_id_continue(); |
| 1159 | } |
| 1160 | |
| 1161 | make_code_point_set_property! { |
| 1162 | property: "Ideographic" ; |
| 1163 | marker: IdeographicProperty; |
| 1164 | keyed_data_marker: IdeographicV1Marker; |
| 1165 | func: |
| 1166 | /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese) |
| 1167 | /// ideographs, or related siniform ideographs |
| 1168 | /// |
| 1169 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1170 | /// |
| 1171 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1172 | /// |
| 1173 | /// # Example |
| 1174 | /// |
| 1175 | /// ``` |
| 1176 | /// use icu::properties::sets; |
| 1177 | /// |
| 1178 | /// let ideographic = sets::ideographic(); |
| 1179 | /// |
| 1180 | /// assert!(ideographic.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD |
| 1181 | /// assert!(!ideographic.contains('밥')); // U+BC25 HANGUL SYLLABLE BAB |
| 1182 | /// ``` |
| 1183 | |
| 1184 | pub const fn ideographic() => SINGLETON_PROPS_IDEO_V1; |
| 1185 | pub fn load_ideographic(); |
| 1186 | } |
| 1187 | |
| 1188 | make_code_point_set_property! { |
| 1189 | property: "Id_Start" ; |
| 1190 | marker: IdStartProperty; |
| 1191 | keyed_data_marker: IdStartV1Marker; |
| 1192 | func: |
| 1193 | /// Characters that can begin an identifier. If using NFKC to fold differences between |
| 1194 | /// characters, use [`load_xid_start`] instead. See [`Unicode Standard Annex |
| 1195 | /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details. |
| 1196 | /// |
| 1197 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1198 | /// |
| 1199 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1200 | /// |
| 1201 | /// # Example |
| 1202 | /// |
| 1203 | /// ``` |
| 1204 | /// use icu::properties::sets; |
| 1205 | /// |
| 1206 | /// let id_start = sets::id_start(); |
| 1207 | /// |
| 1208 | /// assert!(id_start.contains('x')); |
| 1209 | /// assert!(!id_start.contains('1')); |
| 1210 | /// assert!(!id_start.contains('_')); |
| 1211 | /// assert!(id_start.contains('ߝ')); // U+07DD NKO LETTER FA |
| 1212 | /// assert!(!id_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
| 1213 | /// assert!(id_start.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
| 1214 | /// ``` |
| 1215 | |
| 1216 | pub const fn id_start() => SINGLETON_PROPS_IDS_V1; |
| 1217 | pub fn load_id_start(); |
| 1218 | } |
| 1219 | |
| 1220 | make_code_point_set_property! { |
| 1221 | property: "Ids_Binary_Operator" ; |
| 1222 | marker: IdsBinaryOperatorProperty; |
| 1223 | keyed_data_marker: IdsBinaryOperatorV1Marker; |
| 1224 | func: |
| 1225 | /// Characters used in Ideographic Description Sequences |
| 1226 | /// |
| 1227 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1228 | /// |
| 1229 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1230 | /// |
| 1231 | /// # Example |
| 1232 | /// |
| 1233 | /// ``` |
| 1234 | /// use icu::properties::sets; |
| 1235 | /// |
| 1236 | /// let ids_binary_operator = sets::ids_binary_operator(); |
| 1237 | /// |
| 1238 | /// assert!(ids_binary_operator.contains32(0x2FF5)); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE |
| 1239 | /// assert!(!ids_binary_operator.contains32(0x3006)); // IDEOGRAPHIC CLOSING MARK |
| 1240 | /// ``` |
| 1241 | |
| 1242 | pub const fn ids_binary_operator() => SINGLETON_PROPS_IDSB_V1; |
| 1243 | pub fn load_ids_binary_operator(); |
| 1244 | } |
| 1245 | |
| 1246 | make_code_point_set_property! { |
| 1247 | property: "Ids_Trinary_Operator" ; |
| 1248 | marker: IdsTrinaryOperatorProperty; |
| 1249 | keyed_data_marker: IdsTrinaryOperatorV1Marker; |
| 1250 | func: |
| 1251 | /// Characters used in Ideographic Description Sequences |
| 1252 | /// |
| 1253 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1254 | /// |
| 1255 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1256 | /// |
| 1257 | /// # Example |
| 1258 | /// |
| 1259 | /// ``` |
| 1260 | /// use icu::properties::sets; |
| 1261 | /// |
| 1262 | /// let ids_trinary_operator = sets::ids_trinary_operator(); |
| 1263 | /// |
| 1264 | /// assert!(ids_trinary_operator.contains32(0x2FF2)); // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT |
| 1265 | /// assert!(ids_trinary_operator.contains32(0x2FF3)); // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW |
| 1266 | /// assert!(!ids_trinary_operator.contains32(0x2FF4)); |
| 1267 | /// assert!(!ids_trinary_operator.contains32(0x2FF5)); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE |
| 1268 | /// assert!(!ids_trinary_operator.contains32(0x3006)); // IDEOGRAPHIC CLOSING MARK |
| 1269 | /// ``` |
| 1270 | |
| 1271 | pub const fn ids_trinary_operator() => SINGLETON_PROPS_IDST_V1; |
| 1272 | pub fn load_ids_trinary_operator(); |
| 1273 | } |
| 1274 | |
| 1275 | make_code_point_set_property! { |
| 1276 | property: "Join_Control" ; |
| 1277 | marker: JoinControlProperty; |
| 1278 | keyed_data_marker: JoinControlV1Marker; |
| 1279 | func: |
| 1280 | /// Format control characters which have specific functions for control of cursive joining |
| 1281 | /// and ligation |
| 1282 | /// |
| 1283 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1284 | /// |
| 1285 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1286 | /// |
| 1287 | /// # Example |
| 1288 | /// |
| 1289 | /// ``` |
| 1290 | /// use icu::properties::sets; |
| 1291 | /// |
| 1292 | /// let join_control = sets::join_control(); |
| 1293 | /// |
| 1294 | /// assert!(join_control.contains32(0x200C)); // ZERO WIDTH NON-JOINER |
| 1295 | /// assert!(join_control.contains32(0x200D)); // ZERO WIDTH JOINER |
| 1296 | /// assert!(!join_control.contains32(0x200E)); |
| 1297 | /// ``` |
| 1298 | |
| 1299 | pub const fn join_control() => SINGLETON_PROPS_JOIN_C_V1; |
| 1300 | pub fn load_join_control(); |
| 1301 | } |
| 1302 | |
| 1303 | make_code_point_set_property! { |
| 1304 | property: "Logical_Order_Exception" ; |
| 1305 | marker: LogicalOrderExceptionProperty; |
| 1306 | keyed_data_marker: LogicalOrderExceptionV1Marker; |
| 1307 | func: |
| 1308 | /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao |
| 1309 | /// |
| 1310 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1311 | /// |
| 1312 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1313 | /// |
| 1314 | /// # Example |
| 1315 | /// |
| 1316 | /// ``` |
| 1317 | /// use icu::properties::sets; |
| 1318 | /// |
| 1319 | /// let logical_order_exception = sets::logical_order_exception(); |
| 1320 | /// |
| 1321 | /// assert!(logical_order_exception.contains('ແ')); // U+0EC1 LAO VOWEL SIGN EI |
| 1322 | /// assert!(!logical_order_exception.contains('ະ')); // U+0EB0 LAO VOWEL SIGN A |
| 1323 | /// ``` |
| 1324 | |
| 1325 | pub const fn logical_order_exception() => SINGLETON_PROPS_LOE_V1; |
| 1326 | pub fn load_logical_order_exception(); |
| 1327 | } |
| 1328 | |
| 1329 | make_code_point_set_property! { |
| 1330 | property: "Lowercase" ; |
| 1331 | marker: LowercaseProperty; |
| 1332 | keyed_data_marker: LowercaseV1Marker; |
| 1333 | func: |
| 1334 | /// Lowercase characters |
| 1335 | /// |
| 1336 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1337 | /// |
| 1338 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1339 | /// |
| 1340 | /// # Example |
| 1341 | /// |
| 1342 | /// ``` |
| 1343 | /// use icu::properties::sets; |
| 1344 | /// |
| 1345 | /// let lowercase = sets::lowercase(); |
| 1346 | /// |
| 1347 | /// assert!(lowercase.contains('a')); |
| 1348 | /// assert!(!lowercase.contains('A')); |
| 1349 | /// ``` |
| 1350 | |
| 1351 | pub const fn lowercase() => SINGLETON_PROPS_LOWER_V1; |
| 1352 | pub fn load_lowercase(); |
| 1353 | } |
| 1354 | |
| 1355 | make_code_point_set_property! { |
| 1356 | property: "Math" ; |
| 1357 | marker: MathProperty; |
| 1358 | keyed_data_marker: MathV1Marker; |
| 1359 | func: |
| 1360 | /// Characters used in mathematical notation |
| 1361 | /// |
| 1362 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1363 | /// |
| 1364 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1365 | /// |
| 1366 | /// # Example |
| 1367 | /// |
| 1368 | /// ``` |
| 1369 | /// use icu::properties::sets; |
| 1370 | /// |
| 1371 | /// let math = sets::math(); |
| 1372 | /// |
| 1373 | /// assert!(math.contains('=')); |
| 1374 | /// assert!(math.contains('+')); |
| 1375 | /// assert!(!math.contains('-')); |
| 1376 | /// assert!(math.contains('−')); // U+2212 MINUS SIGN |
| 1377 | /// assert!(!math.contains('/')); |
| 1378 | /// assert!(math.contains('∕')); // U+2215 DIVISION SLASH |
| 1379 | /// ``` |
| 1380 | |
| 1381 | pub const fn math() => SINGLETON_PROPS_MATH_V1; |
| 1382 | pub fn load_math(); |
| 1383 | } |
| 1384 | |
| 1385 | make_code_point_set_property! { |
| 1386 | property: "Noncharacter_Code_Point" ; |
| 1387 | marker: NoncharacterCodePointProperty; |
| 1388 | keyed_data_marker: NoncharacterCodePointV1Marker; |
| 1389 | func: |
| 1390 | /// Code points permanently reserved for internal use |
| 1391 | /// |
| 1392 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1393 | /// |
| 1394 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1395 | /// |
| 1396 | /// # Example |
| 1397 | /// |
| 1398 | /// ``` |
| 1399 | /// use icu::properties::sets; |
| 1400 | /// |
| 1401 | /// let noncharacter_code_point = sets::noncharacter_code_point(); |
| 1402 | /// |
| 1403 | /// assert!(noncharacter_code_point.contains32(0xFDD0)); |
| 1404 | /// assert!(noncharacter_code_point.contains32(0xFFFF)); |
| 1405 | /// assert!(!noncharacter_code_point.contains32(0x10000)); |
| 1406 | /// ``` |
| 1407 | |
| 1408 | pub const fn noncharacter_code_point() => SINGLETON_PROPS_NCHAR_V1; |
| 1409 | pub fn load_noncharacter_code_point(); |
| 1410 | } |
| 1411 | |
| 1412 | make_code_point_set_property! { |
| 1413 | property: "NFC_Inert" ; |
| 1414 | marker: NfcInertProperty; |
| 1415 | keyed_data_marker: NfcInertV1Marker; |
| 1416 | func: |
| 1417 | /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters |
| 1418 | |
| 1419 | pub const fn nfc_inert() => SINGLETON_PROPS_NFCINERT_V1; |
| 1420 | pub fn load_nfc_inert(); |
| 1421 | } |
| 1422 | |
| 1423 | make_code_point_set_property! { |
| 1424 | property: "NFD_Inert" ; |
| 1425 | marker: NfdInertProperty; |
| 1426 | keyed_data_marker: NfdInertV1Marker; |
| 1427 | func: |
| 1428 | /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters |
| 1429 | |
| 1430 | pub const fn nfd_inert() => SINGLETON_PROPS_NFDINERT_V1; |
| 1431 | pub fn load_nfd_inert(); |
| 1432 | } |
| 1433 | |
| 1434 | make_code_point_set_property! { |
| 1435 | property: "NFKC_Inert" ; |
| 1436 | marker: NfkcInertProperty; |
| 1437 | keyed_data_marker: NfkcInertV1Marker; |
| 1438 | func: |
| 1439 | /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters |
| 1440 | |
| 1441 | pub const fn nfkc_inert() => SINGLETON_PROPS_NFKCINERT_V1; |
| 1442 | pub fn load_nfkc_inert(); |
| 1443 | } |
| 1444 | |
| 1445 | make_code_point_set_property! { |
| 1446 | property: "NFKD_Inert" ; |
| 1447 | marker: NfkdInertProperty; |
| 1448 | keyed_data_marker: NfkdInertV1Marker; |
| 1449 | func: |
| 1450 | /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters |
| 1451 | |
| 1452 | pub const fn nfkd_inert() => SINGLETON_PROPS_NFKDINERT_V1; |
| 1453 | pub fn load_nfkd_inert(); |
| 1454 | } |
| 1455 | |
| 1456 | make_code_point_set_property! { |
| 1457 | property: "Pattern_Syntax" ; |
| 1458 | marker: PatternSyntaxProperty; |
| 1459 | keyed_data_marker: PatternSyntaxV1Marker; |
| 1460 | func: |
| 1461 | /// Characters used as syntax in patterns (such as regular expressions). See [`Unicode |
| 1462 | /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more |
| 1463 | /// details. |
| 1464 | /// |
| 1465 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1466 | /// |
| 1467 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1468 | /// |
| 1469 | /// # Example |
| 1470 | /// |
| 1471 | /// ``` |
| 1472 | /// use icu::properties::sets; |
| 1473 | /// |
| 1474 | /// let pattern_syntax = sets::pattern_syntax(); |
| 1475 | /// |
| 1476 | /// assert!(pattern_syntax.contains('{')); |
| 1477 | /// assert!(pattern_syntax.contains('⇒')); // U+21D2 RIGHTWARDS DOUBLE ARROW |
| 1478 | /// assert!(!pattern_syntax.contains('0')); |
| 1479 | /// ``` |
| 1480 | |
| 1481 | pub const fn pattern_syntax() => SINGLETON_PROPS_PAT_SYN_V1; |
| 1482 | pub fn load_pattern_syntax(); |
| 1483 | } |
| 1484 | |
| 1485 | make_code_point_set_property! { |
| 1486 | property: "Pattern_White_Space" ; |
| 1487 | marker: PatternWhiteSpaceProperty; |
| 1488 | keyed_data_marker: PatternWhiteSpaceV1Marker; |
| 1489 | func: |
| 1490 | /// Characters used as whitespace in patterns (such as regular expressions). See |
| 1491 | /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for |
| 1492 | /// more details. |
| 1493 | /// |
| 1494 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1495 | /// |
| 1496 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1497 | /// |
| 1498 | /// # Example |
| 1499 | /// |
| 1500 | /// ``` |
| 1501 | /// use icu::properties::sets; |
| 1502 | /// |
| 1503 | /// let pattern_white_space = sets::pattern_white_space(); |
| 1504 | /// |
| 1505 | /// assert!(pattern_white_space.contains(' ')); |
| 1506 | /// assert!(pattern_white_space.contains32(0x2029)); // PARAGRAPH SEPARATOR |
| 1507 | /// assert!(pattern_white_space.contains32(0x000A)); // NEW LINE |
| 1508 | /// assert!(!pattern_white_space.contains32(0x00A0)); // NO-BREAK SPACE |
| 1509 | /// ``` |
| 1510 | |
| 1511 | pub const fn pattern_white_space() => SINGLETON_PROPS_PAT_WS_V1; |
| 1512 | pub fn load_pattern_white_space(); |
| 1513 | } |
| 1514 | |
| 1515 | make_code_point_set_property! { |
| 1516 | property: "Prepended_Concatenation_Mark" ; |
| 1517 | marker: PrependedConcatenationMarkProperty; |
| 1518 | keyed_data_marker: PrependedConcatenationMarkV1Marker; |
| 1519 | func: |
| 1520 | /// A small class of visible format controls, which precede and then span a sequence of |
| 1521 | /// other characters, usually digits. |
| 1522 | |
| 1523 | pub const fn prepended_concatenation_mark() => SINGLETON_PROPS_PCM_V1; |
| 1524 | pub fn load_prepended_concatenation_mark(); |
| 1525 | } |
| 1526 | |
| 1527 | make_code_point_set_property! { |
| 1528 | property: "Print" ; |
| 1529 | marker: PrintProperty; |
| 1530 | keyed_data_marker: PrintV1Marker; |
| 1531 | func: |
| 1532 | /// Printable characters (visible characters and whitespace). |
| 1533 | /// This is defined for POSIX compatibility. |
| 1534 | |
| 1535 | pub const fn print() => SINGLETON_PROPS_PRINT_V1; |
| 1536 | pub fn load_print(); |
| 1537 | } |
| 1538 | |
| 1539 | make_code_point_set_property! { |
| 1540 | property: "Quotation_Mark" ; |
| 1541 | marker: QuotationMarkProperty; |
| 1542 | keyed_data_marker: QuotationMarkV1Marker; |
| 1543 | func: |
| 1544 | /// Punctuation characters that function as quotation marks. |
| 1545 | /// |
| 1546 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1547 | /// |
| 1548 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1549 | /// |
| 1550 | /// # Example |
| 1551 | /// |
| 1552 | /// ``` |
| 1553 | /// use icu::properties::sets; |
| 1554 | /// |
| 1555 | /// let quotation_mark = sets::quotation_mark(); |
| 1556 | /// |
| 1557 | /// assert!(quotation_mark.contains('\'')); |
| 1558 | /// assert!(quotation_mark.contains('„')); // U+201E DOUBLE LOW-9 QUOTATION MARK |
| 1559 | /// assert!(!quotation_mark.contains('<')); |
| 1560 | /// ``` |
| 1561 | |
| 1562 | pub const fn quotation_mark() => SINGLETON_PROPS_QMARK_V1; |
| 1563 | pub fn load_quotation_mark(); |
| 1564 | } |
| 1565 | |
| 1566 | make_code_point_set_property! { |
| 1567 | property: "Radical" ; |
| 1568 | marker: RadicalProperty; |
| 1569 | keyed_data_marker: RadicalV1Marker; |
| 1570 | func: |
| 1571 | /// Characters used in the definition of Ideographic Description Sequences |
| 1572 | /// |
| 1573 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1574 | /// |
| 1575 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1576 | /// |
| 1577 | /// # Example |
| 1578 | /// |
| 1579 | /// ``` |
| 1580 | /// use icu::properties::sets; |
| 1581 | /// |
| 1582 | /// let radical = sets::radical(); |
| 1583 | /// |
| 1584 | /// assert!(radical.contains('⺆')); // U+2E86 CJK RADICAL BOX |
| 1585 | /// assert!(!radical.contains('丹')); // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E |
| 1586 | /// ``` |
| 1587 | |
| 1588 | pub const fn radical() => SINGLETON_PROPS_RADICAL_V1; |
| 1589 | pub fn load_radical(); |
| 1590 | } |
| 1591 | |
| 1592 | make_code_point_set_property! { |
| 1593 | property: "Regional_Indicator" ; |
| 1594 | marker: RegionalIndicatorProperty; |
| 1595 | keyed_data_marker: RegionalIndicatorV1Marker; |
| 1596 | func: |
| 1597 | /// Regional indicator characters, U+1F1E6..U+1F1FF |
| 1598 | /// |
| 1599 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1600 | /// |
| 1601 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1602 | /// |
| 1603 | /// # Example |
| 1604 | /// |
| 1605 | /// ``` |
| 1606 | /// use icu::properties::sets; |
| 1607 | /// |
| 1608 | /// let regional_indicator = sets::regional_indicator(); |
| 1609 | /// |
| 1610 | /// assert!(regional_indicator.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T |
| 1611 | /// assert!(!regional_indicator.contains('Ⓣ')); // U+24C9 CIRCLED LATIN CAPITAL LETTER T |
| 1612 | /// assert!(!regional_indicator.contains('T')); |
| 1613 | /// ``` |
| 1614 | |
| 1615 | pub const fn regional_indicator() => SINGLETON_PROPS_RI_V1; |
| 1616 | pub fn load_regional_indicator(); |
| 1617 | } |
| 1618 | |
| 1619 | make_code_point_set_property! { |
| 1620 | property: "Soft_Dotted" ; |
| 1621 | marker: SoftDottedProperty; |
| 1622 | keyed_data_marker: SoftDottedV1Marker; |
| 1623 | func: |
| 1624 | /// Characters with a "soft dot", like i or j. An accent placed on these characters causes |
| 1625 | /// the dot to disappear. |
| 1626 | /// |
| 1627 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1628 | /// |
| 1629 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1630 | /// |
| 1631 | /// # Example |
| 1632 | /// |
| 1633 | /// ``` |
| 1634 | /// use icu::properties::sets; |
| 1635 | /// |
| 1636 | /// let soft_dotted = sets::soft_dotted(); |
| 1637 | /// |
| 1638 | /// assert!(soft_dotted.contains('і')); //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I |
| 1639 | /// assert!(!soft_dotted.contains('ı')); // U+0131 LATIN SMALL LETTER DOTLESS I |
| 1640 | /// ``` |
| 1641 | |
| 1642 | pub const fn soft_dotted() => SINGLETON_PROPS_SD_V1; |
| 1643 | pub fn load_soft_dotted(); |
| 1644 | } |
| 1645 | |
| 1646 | make_code_point_set_property! { |
| 1647 | property: "Segment_Starter" ; |
| 1648 | marker: SegmentStarterProperty; |
| 1649 | keyed_data_marker: SegmentStarterV1Marker; |
| 1650 | func: |
| 1651 | /// Characters that are starters in terms of Unicode normalization and combining character |
| 1652 | /// sequences |
| 1653 | |
| 1654 | pub const fn segment_starter() => SINGLETON_PROPS_SEGSTART_V1; |
| 1655 | pub fn load_segment_starter(); |
| 1656 | } |
| 1657 | |
| 1658 | make_code_point_set_property! { |
| 1659 | property: "Case_Sensitive" ; |
| 1660 | marker: CaseSensitiveProperty; |
| 1661 | keyed_data_marker: CaseSensitiveV1Marker; |
| 1662 | func: |
| 1663 | /// Characters that are either the source of a case mapping or in the target of a case |
| 1664 | /// mapping |
| 1665 | |
| 1666 | pub const fn case_sensitive() => SINGLETON_PROPS_SENSITIVE_V1; |
| 1667 | pub fn load_case_sensitive(); |
| 1668 | } |
| 1669 | |
| 1670 | make_code_point_set_property! { |
| 1671 | property: "Sentence_Terminal" ; |
| 1672 | marker: SentenceTerminalProperty; |
| 1673 | keyed_data_marker: SentenceTerminalV1Marker; |
| 1674 | func: |
| 1675 | /// Punctuation characters that generally mark the end of sentences |
| 1676 | /// |
| 1677 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1678 | /// |
| 1679 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1680 | /// |
| 1681 | /// # Example |
| 1682 | /// |
| 1683 | /// ``` |
| 1684 | /// use icu::properties::sets; |
| 1685 | /// |
| 1686 | /// let sentence_terminal = sets::sentence_terminal(); |
| 1687 | /// |
| 1688 | /// assert!(sentence_terminal.contains('.')); |
| 1689 | /// assert!(sentence_terminal.contains('?')); |
| 1690 | /// assert!(sentence_terminal.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN |
| 1691 | /// assert!(!sentence_terminal.contains(',')); |
| 1692 | /// assert!(!sentence_terminal.contains('¿')); // U+00BF INVERTED QUESTION MARK |
| 1693 | /// ``` |
| 1694 | |
| 1695 | pub const fn sentence_terminal() => SINGLETON_PROPS_STERM_V1; |
| 1696 | pub fn load_sentence_terminal(); |
| 1697 | } |
| 1698 | |
| 1699 | make_code_point_set_property! { |
| 1700 | property: "Terminal_Punctuation" ; |
| 1701 | marker: TerminalPunctuationProperty; |
| 1702 | keyed_data_marker: TerminalPunctuationV1Marker; |
| 1703 | func: |
| 1704 | /// Punctuation characters that generally mark the end of textual units |
| 1705 | /// |
| 1706 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1707 | /// |
| 1708 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1709 | /// |
| 1710 | /// # Example |
| 1711 | /// |
| 1712 | /// ``` |
| 1713 | /// use icu::properties::sets; |
| 1714 | /// |
| 1715 | /// let terminal_punctuation = sets::terminal_punctuation(); |
| 1716 | /// |
| 1717 | /// assert!(terminal_punctuation.contains('.')); |
| 1718 | /// assert!(terminal_punctuation.contains('?')); |
| 1719 | /// assert!(terminal_punctuation.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN |
| 1720 | /// assert!(terminal_punctuation.contains(',')); |
| 1721 | /// assert!(!terminal_punctuation.contains('¿')); // U+00BF INVERTED QUESTION MARK |
| 1722 | /// ``` |
| 1723 | |
| 1724 | pub const fn terminal_punctuation() => SINGLETON_PROPS_TERM_V1; |
| 1725 | pub fn load_terminal_punctuation(); |
| 1726 | } |
| 1727 | |
| 1728 | make_code_point_set_property! { |
| 1729 | property: "Unified_Ideograph" ; |
| 1730 | marker: UnifiedIdeographProperty; |
| 1731 | keyed_data_marker: UnifiedIdeographV1Marker; |
| 1732 | func: |
| 1733 | /// A property which specifies the exact set of Unified CJK Ideographs in the standard |
| 1734 | /// |
| 1735 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1736 | /// |
| 1737 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1738 | /// |
| 1739 | /// # Example |
| 1740 | /// |
| 1741 | /// ``` |
| 1742 | /// use icu::properties::sets; |
| 1743 | /// |
| 1744 | /// let unified_ideograph = sets::unified_ideograph(); |
| 1745 | /// |
| 1746 | /// assert!(unified_ideograph.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD |
| 1747 | /// assert!(unified_ideograph.contains('木')); // U+6728 CJK UNIFIED IDEOGRAPH-6728 |
| 1748 | /// assert!(!unified_ideograph.contains('𛅸')); // U+1B178 NUSHU CHARACTER-1B178 |
| 1749 | /// ``` |
| 1750 | |
| 1751 | pub const fn unified_ideograph() => SINGLETON_PROPS_UIDEO_V1; |
| 1752 | pub fn load_unified_ideograph(); |
| 1753 | } |
| 1754 | |
| 1755 | make_code_point_set_property! { |
| 1756 | property: "Uppercase" ; |
| 1757 | marker: UppercaseProperty; |
| 1758 | keyed_data_marker: UppercaseV1Marker; |
| 1759 | func: |
| 1760 | /// Uppercase characters |
| 1761 | /// |
| 1762 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1763 | /// |
| 1764 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1765 | /// |
| 1766 | /// # Example |
| 1767 | /// |
| 1768 | /// ``` |
| 1769 | /// use icu::properties::sets; |
| 1770 | /// |
| 1771 | /// let uppercase = sets::uppercase(); |
| 1772 | /// |
| 1773 | /// assert!(uppercase.contains('U')); |
| 1774 | /// assert!(!uppercase.contains('u')); |
| 1775 | /// ``` |
| 1776 | |
| 1777 | pub const fn uppercase() => SINGLETON_PROPS_UPPER_V1; |
| 1778 | pub fn load_uppercase(); |
| 1779 | } |
| 1780 | |
| 1781 | make_code_point_set_property! { |
| 1782 | property: "Variation_Selector" ; |
| 1783 | marker: VariationSelectorProperty; |
| 1784 | keyed_data_marker: VariationSelectorV1Marker; |
| 1785 | func: |
| 1786 | /// Characters that are Variation Selectors. |
| 1787 | /// |
| 1788 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1789 | /// |
| 1790 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1791 | /// |
| 1792 | /// # Example |
| 1793 | /// |
| 1794 | /// ``` |
| 1795 | /// use icu::properties::sets; |
| 1796 | /// |
| 1797 | /// let variation_selector = sets::variation_selector(); |
| 1798 | /// |
| 1799 | /// assert!(variation_selector.contains32(0x180D)); // MONGOLIAN FREE VARIATION SELECTOR THREE |
| 1800 | /// assert!(!variation_selector.contains32(0x303E)); // IDEOGRAPHIC VARIATION INDICATOR |
| 1801 | /// assert!(variation_selector.contains32(0xFE0F)); // VARIATION SELECTOR-16 |
| 1802 | /// assert!(!variation_selector.contains32(0xFE10)); // PRESENTATION FORM FOR VERTICAL COMMA |
| 1803 | /// assert!(variation_selector.contains32(0xE01EF)); // VARIATION SELECTOR-256 |
| 1804 | /// ``` |
| 1805 | |
| 1806 | pub const fn variation_selector() => SINGLETON_PROPS_VS_V1; |
| 1807 | pub fn load_variation_selector(); |
| 1808 | } |
| 1809 | |
| 1810 | make_code_point_set_property! { |
| 1811 | property: "White_Space" ; |
| 1812 | marker: WhiteSpaceProperty; |
| 1813 | keyed_data_marker: WhiteSpaceV1Marker; |
| 1814 | func: |
| 1815 | /// Spaces, separator characters and other control characters which should be treated by |
| 1816 | /// programming languages as "white space" for the purpose of parsing elements |
| 1817 | /// |
| 1818 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1819 | /// |
| 1820 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1821 | /// |
| 1822 | /// # Example |
| 1823 | /// |
| 1824 | /// ``` |
| 1825 | /// use icu::properties::sets; |
| 1826 | /// |
| 1827 | /// let white_space = sets::white_space(); |
| 1828 | /// |
| 1829 | /// assert!(white_space.contains(' ')); |
| 1830 | /// assert!(white_space.contains32(0x000A)); // NEW LINE |
| 1831 | /// assert!(white_space.contains32(0x00A0)); // NO-BREAK SPACE |
| 1832 | /// assert!(!white_space.contains32(0x200B)); // ZERO WIDTH SPACE |
| 1833 | /// ``` |
| 1834 | |
| 1835 | pub const fn white_space() => SINGLETON_PROPS_WSPACE_V1; |
| 1836 | pub fn load_white_space(); |
| 1837 | } |
| 1838 | |
| 1839 | make_code_point_set_property! { |
| 1840 | property: "Xdigit" ; |
| 1841 | marker: XdigitProperty; |
| 1842 | keyed_data_marker: XdigitV1Marker; |
| 1843 | func: |
| 1844 | /// Hexadecimal digits |
| 1845 | /// This is defined for POSIX compatibility. |
| 1846 | |
| 1847 | pub const fn xdigit() => SINGLETON_PROPS_XDIGIT_V1; |
| 1848 | pub fn load_xdigit(); |
| 1849 | } |
| 1850 | |
| 1851 | make_code_point_set_property! { |
| 1852 | property: "XID_Continue" ; |
| 1853 | marker: XidContinueProperty; |
| 1854 | keyed_data_marker: XidContinueV1Marker; |
| 1855 | func: |
| 1856 | /// Characters that can come after the first character in an identifier. See [`Unicode Standard Annex |
| 1857 | /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details. |
| 1858 | /// |
| 1859 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1860 | /// |
| 1861 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1862 | /// |
| 1863 | /// # Example |
| 1864 | /// |
| 1865 | /// ``` |
| 1866 | /// use icu::properties::sets; |
| 1867 | /// |
| 1868 | /// let xid_continue = sets::xid_continue(); |
| 1869 | /// |
| 1870 | /// assert!(xid_continue.contains('x')); |
| 1871 | /// assert!(xid_continue.contains('1')); |
| 1872 | /// assert!(xid_continue.contains('_')); |
| 1873 | /// assert!(xid_continue.contains('ߝ')); // U+07DD NKO LETTER FA |
| 1874 | /// assert!(!xid_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
| 1875 | /// assert!(!xid_continue.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
| 1876 | /// ``` |
| 1877 | |
| 1878 | pub const fn xid_continue() => SINGLETON_PROPS_XIDC_V1; |
| 1879 | pub fn load_xid_continue(); |
| 1880 | } |
| 1881 | |
| 1882 | make_code_point_set_property! { |
| 1883 | property: "XID_Start" ; |
| 1884 | marker: XidStartProperty; |
| 1885 | keyed_data_marker: XidStartV1Marker; |
| 1886 | func: |
| 1887 | /// Characters that can begin an identifier. See [`Unicode |
| 1888 | /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more |
| 1889 | /// details. |
| 1890 | /// |
| 1891 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1892 | /// |
| 1893 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1894 | /// |
| 1895 | /// # Example |
| 1896 | /// |
| 1897 | /// ``` |
| 1898 | /// use icu::properties::sets; |
| 1899 | /// |
| 1900 | /// let xid_start = sets::xid_start(); |
| 1901 | /// |
| 1902 | /// assert!(xid_start.contains('x')); |
| 1903 | /// assert!(!xid_start.contains('1')); |
| 1904 | /// assert!(!xid_start.contains('_')); |
| 1905 | /// assert!(xid_start.contains('ߝ')); // U+07DD NKO LETTER FA |
| 1906 | /// assert!(!xid_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X |
| 1907 | /// assert!(!xid_start.contains32(0xFC5E)); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM |
| 1908 | /// ``` |
| 1909 | |
| 1910 | pub const fn xid_start() => SINGLETON_PROPS_XIDS_V1; |
| 1911 | pub fn load_xid_start(); |
| 1912 | } |
| 1913 | |
| 1914 | // |
| 1915 | // Binary property getter fns |
| 1916 | // (data as sets of strings + code points) |
| 1917 | // |
| 1918 | |
| 1919 | macro_rules! make_unicode_set_property { |
| 1920 | ( |
| 1921 | // currently unused |
| 1922 | property: $property:expr; |
| 1923 | // currently unused |
| 1924 | marker: $marker_name:ident; |
| 1925 | keyed_data_marker: $keyed_data_marker:ty; |
| 1926 | func: |
| 1927 | $(#[$doc:meta])+ |
| 1928 | $cvis:vis const fn $constname:ident() => $singleton:ident; |
| 1929 | $vis:vis fn $funcname:ident(); |
| 1930 | ) => { |
| 1931 | #[doc = concat!("A version of [`" , stringify!($constname), "()`] that uses custom data provided by a [`DataProvider`]." )] |
| 1932 | $vis fn $funcname( |
| 1933 | provider: &(impl DataProvider<$keyed_data_marker> + ?Sized) |
| 1934 | ) -> Result<UnicodeSetData, PropertiesError> { |
| 1935 | Ok(provider.load(Default::default()).and_then(DataResponse::take_payload).map(UnicodeSetData::from_data)?) |
| 1936 | } |
| 1937 | $(#[$doc])* |
| 1938 | #[cfg(feature = "compiled_data" )] |
| 1939 | $cvis const fn $constname() -> UnicodeSetDataBorrowed<'static> { |
| 1940 | UnicodeSetDataBorrowed { |
| 1941 | set: crate::provider::Baked::$singleton |
| 1942 | } |
| 1943 | } |
| 1944 | } |
| 1945 | } |
| 1946 | |
| 1947 | make_unicode_set_property! { |
| 1948 | property: "Basic_Emoji" ; |
| 1949 | marker: BasicEmojiProperty; |
| 1950 | keyed_data_marker: BasicEmojiV1Marker; |
| 1951 | func: |
| 1952 | /// Characters and character sequences intended for general-purpose, independent, direct input. |
| 1953 | /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more |
| 1954 | /// details. |
| 1955 | /// |
| 1956 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 1957 | /// |
| 1958 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1959 | /// |
| 1960 | /// # Example |
| 1961 | /// |
| 1962 | /// ``` |
| 1963 | /// use icu::properties::sets; |
| 1964 | /// |
| 1965 | /// let basic_emoji = sets::basic_emoji(); |
| 1966 | /// |
| 1967 | /// assert!(!basic_emoji.contains32(0x0020)); |
| 1968 | /// assert!(!basic_emoji.contains_char('\n')); |
| 1969 | /// assert!(basic_emoji.contains_char('🦃')); // U+1F983 TURKEY |
| 1970 | /// assert!(basic_emoji.contains("\u{1F983}")); |
| 1971 | /// assert!(basic_emoji.contains("\u{1F6E4}\u{FE0F}")); // railway track |
| 1972 | /// assert!(!basic_emoji.contains("\u{0033}\u{FE0F}\u{20E3}")); // Emoji_Keycap_Sequence, keycap 3 |
| 1973 | /// ``` |
| 1974 | pub const fn basic_emoji() => SINGLETON_PROPS_BASIC_EMOJI_V1; |
| 1975 | pub fn load_basic_emoji(); |
| 1976 | } |
| 1977 | |
| 1978 | // |
| 1979 | // Enumerated property getter fns |
| 1980 | // |
| 1981 | |
| 1982 | /// A version of [`for_general_category_group()`] that uses custom data provided by a [`DataProvider`]. |
| 1983 | /// |
| 1984 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 1985 | pub fn load_for_general_category_group( |
| 1986 | provider: &(impl DataProvider<GeneralCategoryV1Marker> + ?Sized), |
| 1987 | enum_val: GeneralCategoryGroup, |
| 1988 | ) -> Result<CodePointSetData, PropertiesError> { |
| 1989 | let gc_map_payload: CodePointMapData = maps::load_general_category(provider)?; |
| 1990 | let gc_map: CodePointMapDataBorrowed<'_, …> = gc_map_payload.as_borrowed(); |
| 1991 | let matching_gc_ranges: impl Iterator- >
= gc_mapimpl Iterator- >
|
| 1992 | .iter_ranges() |
| 1993 | .filter(|cpm_range: &CodePointMapRange| (1 << cpm_range.value as u32) & enum_val.0 != 0) |
| 1994 | .map(|cpm_range: CodePointMapRange| cpm_range.range); |
| 1995 | let set: CodePointInversionList<'_> = CodePointInversionList::from_iter(matching_gc_ranges); |
| 1996 | Ok(CodePointSetData::from_code_point_inversion_list(set)) |
| 1997 | } |
| 1998 | |
| 1999 | /// Return a [`CodePointSetData`] for a value or a grouping of values of the General_Category property. See [`GeneralCategoryGroup`]. |
| 2000 | /// |
| 2001 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 2002 | /// |
| 2003 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 2004 | #[cfg (feature = "compiled_data" )] |
| 2005 | pub fn for_general_category_group(enum_val: GeneralCategoryGroup) -> CodePointSetData { |
| 2006 | let matching_gc_ranges: impl Iterator- >
= mapsimpl Iterator- >
::general_category() |
| 2007 | .iter_ranges() |
| 2008 | .filter(|cpm_range: &CodePointMapRange| (1 << cpm_range.value as u32) & enum_val.0 != 0) |
| 2009 | .map(|cpm_range: CodePointMapRange| cpm_range.range); |
| 2010 | let set: CodePointInversionList<'_> = CodePointInversionList::from_iter(matching_gc_ranges); |
| 2011 | CodePointSetData::from_code_point_inversion_list(set) |
| 2012 | } |
| 2013 | |
| 2014 | /// Returns a type capable of looking up values for a property specified as a string, as long as it is a |
| 2015 | /// [binary property listed in ECMA-262][ecma], using strict matching on the names in the spec. |
| 2016 | /// |
| 2017 | /// This handles every property required by ECMA-262 `/u` regular expressions, except for: |
| 2018 | /// |
| 2019 | /// - `Script` and `General_Category`: handle these directly with [`maps::load_general_category()`] and |
| 2020 | /// [`maps::load_script()`]. |
| 2021 | /// using property values parsed via [`GeneralCategory::get_name_to_enum_mapper()`] and [`Script::get_name_to_enum_mapper()`] |
| 2022 | /// if necessary. |
| 2023 | /// - `Script_Extensions`: handle this directly using APIs from [`crate::script`], like [`script::load_script_with_extensions_unstable()`] |
| 2024 | /// - `General_Category` mask values: Handle this alongside `General_Category` using [`GeneralCategoryGroup`], |
| 2025 | /// using property values parsed via [`GeneralCategoryGroup::get_name_to_enum_mapper()`] if necessary |
| 2026 | /// - `Assigned`, `All`, and `ASCII` pseudoproperties: Handle these using their equivalent sets: |
| 2027 | /// - `Any` can be expressed as the range `[\u{0}-\u{10FFFF}]` |
| 2028 | /// - `Assigned` can be expressed as the inverse of the set `gc=Cn` (i.e., `\P{gc=Cn}`). |
| 2029 | /// - `ASCII` can be expressed as the range `[\u{0}-\u{7F}]` |
| 2030 | /// - `General_Category` property values can themselves be treated like properties using a shorthand in ECMA262, |
| 2031 | /// simply create the corresponding `GeneralCategory` set. |
| 2032 | /// |
| 2033 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 2034 | /// |
| 2035 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 2036 | /// |
| 2037 | /// ``` |
| 2038 | /// use icu::properties::sets; |
| 2039 | /// |
| 2040 | /// let emoji = sets::load_for_ecma262("Emoji" ).expect("loading data failed" ); |
| 2041 | /// |
| 2042 | /// assert!(emoji.contains('🔥' )); // U+1F525 FIRE |
| 2043 | /// assert!(!emoji.contains('V' )); |
| 2044 | /// ``` |
| 2045 | /// |
| 2046 | /// [ecma]: https://tc39.es/ecma262/#table-binary-unicode-properties |
| 2047 | #[cfg (feature = "compiled_data" )] |
| 2048 | pub fn load_for_ecma262(name: &str) -> Result<CodePointSetDataBorrowed<'static>, PropertiesError> { |
| 2049 | use crate::runtime::UnicodeProperty; |
| 2050 | |
| 2051 | let prop = if let Some(prop) = UnicodeProperty::parse_ecma262_name(name) { |
| 2052 | prop |
| 2053 | } else { |
| 2054 | return Err(PropertiesError::UnexpectedPropertyName); |
| 2055 | }; |
| 2056 | Ok(match prop { |
| 2057 | UnicodeProperty::AsciiHexDigit => ascii_hex_digit(), |
| 2058 | UnicodeProperty::Alphabetic => alphabetic(), |
| 2059 | UnicodeProperty::BidiControl => bidi_control(), |
| 2060 | UnicodeProperty::BidiMirrored => bidi_mirrored(), |
| 2061 | UnicodeProperty::CaseIgnorable => case_ignorable(), |
| 2062 | UnicodeProperty::Cased => cased(), |
| 2063 | UnicodeProperty::ChangesWhenCasefolded => changes_when_casefolded(), |
| 2064 | UnicodeProperty::ChangesWhenCasemapped => changes_when_casemapped(), |
| 2065 | UnicodeProperty::ChangesWhenLowercased => changes_when_lowercased(), |
| 2066 | UnicodeProperty::ChangesWhenNfkcCasefolded => changes_when_nfkc_casefolded(), |
| 2067 | UnicodeProperty::ChangesWhenTitlecased => changes_when_titlecased(), |
| 2068 | UnicodeProperty::ChangesWhenUppercased => changes_when_uppercased(), |
| 2069 | UnicodeProperty::Dash => dash(), |
| 2070 | UnicodeProperty::DefaultIgnorableCodePoint => default_ignorable_code_point(), |
| 2071 | UnicodeProperty::Deprecated => deprecated(), |
| 2072 | UnicodeProperty::Diacritic => diacritic(), |
| 2073 | UnicodeProperty::Emoji => emoji(), |
| 2074 | UnicodeProperty::EmojiComponent => emoji_component(), |
| 2075 | UnicodeProperty::EmojiModifier => emoji_modifier(), |
| 2076 | UnicodeProperty::EmojiModifierBase => emoji_modifier_base(), |
| 2077 | UnicodeProperty::EmojiPresentation => emoji_presentation(), |
| 2078 | UnicodeProperty::ExtendedPictographic => extended_pictographic(), |
| 2079 | UnicodeProperty::Extender => extender(), |
| 2080 | UnicodeProperty::GraphemeBase => grapheme_base(), |
| 2081 | UnicodeProperty::GraphemeExtend => grapheme_extend(), |
| 2082 | UnicodeProperty::HexDigit => hex_digit(), |
| 2083 | UnicodeProperty::IdsBinaryOperator => ids_binary_operator(), |
| 2084 | UnicodeProperty::IdsTrinaryOperator => ids_trinary_operator(), |
| 2085 | UnicodeProperty::IdContinue => id_continue(), |
| 2086 | UnicodeProperty::IdStart => id_start(), |
| 2087 | UnicodeProperty::Ideographic => ideographic(), |
| 2088 | UnicodeProperty::JoinControl => join_control(), |
| 2089 | UnicodeProperty::LogicalOrderException => logical_order_exception(), |
| 2090 | UnicodeProperty::Lowercase => lowercase(), |
| 2091 | UnicodeProperty::Math => math(), |
| 2092 | UnicodeProperty::NoncharacterCodePoint => noncharacter_code_point(), |
| 2093 | UnicodeProperty::PatternSyntax => pattern_syntax(), |
| 2094 | UnicodeProperty::PatternWhiteSpace => pattern_white_space(), |
| 2095 | UnicodeProperty::QuotationMark => quotation_mark(), |
| 2096 | UnicodeProperty::Radical => radical(), |
| 2097 | UnicodeProperty::RegionalIndicator => regional_indicator(), |
| 2098 | UnicodeProperty::SentenceTerminal => sentence_terminal(), |
| 2099 | UnicodeProperty::SoftDotted => soft_dotted(), |
| 2100 | UnicodeProperty::TerminalPunctuation => terminal_punctuation(), |
| 2101 | UnicodeProperty::UnifiedIdeograph => unified_ideograph(), |
| 2102 | UnicodeProperty::Uppercase => uppercase(), |
| 2103 | UnicodeProperty::VariationSelector => variation_selector(), |
| 2104 | UnicodeProperty::WhiteSpace => white_space(), |
| 2105 | UnicodeProperty::XidContinue => xid_continue(), |
| 2106 | UnicodeProperty::XidStart => xid_start(), |
| 2107 | _ => return Err(PropertiesError::UnexpectedPropertyName), |
| 2108 | }) |
| 2109 | } |
| 2110 | |
| 2111 | icu_provider::gen_any_buffer_data_constructors!( |
| 2112 | locale: skip, |
| 2113 | name: &str, |
| 2114 | result: Result<CodePointSetData, PropertiesError>, |
| 2115 | #[cfg (skip)] |
| 2116 | functions: [ |
| 2117 | load_for_ecma262, |
| 2118 | load_for_ecma262_with_any_provider, |
| 2119 | load_for_ecma262_with_buffer_provider, |
| 2120 | load_for_ecma262_unstable, |
| 2121 | ] |
| 2122 | ); |
| 2123 | |
| 2124 | #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, load_for_ecma262)] |
| 2125 | pub fn load_for_ecma262_unstable<P>( |
| 2126 | provider: &P, |
| 2127 | name: &str, |
| 2128 | ) -> Result<CodePointSetData, PropertiesError> |
| 2129 | where |
| 2130 | P: ?Sized |
| 2131 | + DataProvider<AsciiHexDigitV1Marker> |
| 2132 | + DataProvider<AlphabeticV1Marker> |
| 2133 | + DataProvider<BidiControlV1Marker> |
| 2134 | + DataProvider<BidiMirroredV1Marker> |
| 2135 | + DataProvider<CaseIgnorableV1Marker> |
| 2136 | + DataProvider<CasedV1Marker> |
| 2137 | + DataProvider<ChangesWhenCasefoldedV1Marker> |
| 2138 | + DataProvider<ChangesWhenCasemappedV1Marker> |
| 2139 | + DataProvider<ChangesWhenLowercasedV1Marker> |
| 2140 | + DataProvider<ChangesWhenNfkcCasefoldedV1Marker> |
| 2141 | + DataProvider<ChangesWhenTitlecasedV1Marker> |
| 2142 | + DataProvider<ChangesWhenUppercasedV1Marker> |
| 2143 | + DataProvider<DashV1Marker> |
| 2144 | + DataProvider<DefaultIgnorableCodePointV1Marker> |
| 2145 | + DataProvider<DeprecatedV1Marker> |
| 2146 | + DataProvider<DiacriticV1Marker> |
| 2147 | + DataProvider<EmojiV1Marker> |
| 2148 | + DataProvider<EmojiComponentV1Marker> |
| 2149 | + DataProvider<EmojiModifierV1Marker> |
| 2150 | + DataProvider<EmojiModifierBaseV1Marker> |
| 2151 | + DataProvider<EmojiPresentationV1Marker> |
| 2152 | + DataProvider<ExtendedPictographicV1Marker> |
| 2153 | + DataProvider<ExtenderV1Marker> |
| 2154 | + DataProvider<GraphemeBaseV1Marker> |
| 2155 | + DataProvider<GraphemeExtendV1Marker> |
| 2156 | + DataProvider<HexDigitV1Marker> |
| 2157 | + DataProvider<IdsBinaryOperatorV1Marker> |
| 2158 | + DataProvider<IdsTrinaryOperatorV1Marker> |
| 2159 | + DataProvider<IdContinueV1Marker> |
| 2160 | + DataProvider<IdStartV1Marker> |
| 2161 | + DataProvider<IdeographicV1Marker> |
| 2162 | + DataProvider<JoinControlV1Marker> |
| 2163 | + DataProvider<LogicalOrderExceptionV1Marker> |
| 2164 | + DataProvider<LowercaseV1Marker> |
| 2165 | + DataProvider<MathV1Marker> |
| 2166 | + DataProvider<NoncharacterCodePointV1Marker> |
| 2167 | + DataProvider<PatternSyntaxV1Marker> |
| 2168 | + DataProvider<PatternWhiteSpaceV1Marker> |
| 2169 | + DataProvider<QuotationMarkV1Marker> |
| 2170 | + DataProvider<RadicalV1Marker> |
| 2171 | + DataProvider<RegionalIndicatorV1Marker> |
| 2172 | + DataProvider<SentenceTerminalV1Marker> |
| 2173 | + DataProvider<SoftDottedV1Marker> |
| 2174 | + DataProvider<TerminalPunctuationV1Marker> |
| 2175 | + DataProvider<UnifiedIdeographV1Marker> |
| 2176 | + DataProvider<UppercaseV1Marker> |
| 2177 | + DataProvider<VariationSelectorV1Marker> |
| 2178 | + DataProvider<WhiteSpaceV1Marker> |
| 2179 | + DataProvider<XidContinueV1Marker> |
| 2180 | + DataProvider<XidStartV1Marker>, |
| 2181 | { |
| 2182 | use crate::runtime::UnicodeProperty; |
| 2183 | |
| 2184 | let prop = if let Some(prop) = UnicodeProperty::parse_ecma262_name(name) { |
| 2185 | prop |
| 2186 | } else { |
| 2187 | return Err(PropertiesError::UnexpectedPropertyName); |
| 2188 | }; |
| 2189 | match prop { |
| 2190 | UnicodeProperty::AsciiHexDigit => load_ascii_hex_digit(provider), |
| 2191 | UnicodeProperty::Alphabetic => load_alphabetic(provider), |
| 2192 | UnicodeProperty::BidiControl => load_bidi_control(provider), |
| 2193 | UnicodeProperty::BidiMirrored => load_bidi_mirrored(provider), |
| 2194 | UnicodeProperty::CaseIgnorable => load_case_ignorable(provider), |
| 2195 | UnicodeProperty::Cased => load_cased(provider), |
| 2196 | UnicodeProperty::ChangesWhenCasefolded => load_changes_when_casefolded(provider), |
| 2197 | UnicodeProperty::ChangesWhenCasemapped => load_changes_when_casemapped(provider), |
| 2198 | UnicodeProperty::ChangesWhenLowercased => load_changes_when_lowercased(provider), |
| 2199 | UnicodeProperty::ChangesWhenNfkcCasefolded => load_changes_when_nfkc_casefolded(provider), |
| 2200 | UnicodeProperty::ChangesWhenTitlecased => load_changes_when_titlecased(provider), |
| 2201 | UnicodeProperty::ChangesWhenUppercased => load_changes_when_uppercased(provider), |
| 2202 | UnicodeProperty::Dash => load_dash(provider), |
| 2203 | UnicodeProperty::DefaultIgnorableCodePoint => load_default_ignorable_code_point(provider), |
| 2204 | UnicodeProperty::Deprecated => load_deprecated(provider), |
| 2205 | UnicodeProperty::Diacritic => load_diacritic(provider), |
| 2206 | UnicodeProperty::Emoji => load_emoji(provider), |
| 2207 | UnicodeProperty::EmojiComponent => load_emoji_component(provider), |
| 2208 | UnicodeProperty::EmojiModifier => load_emoji_modifier(provider), |
| 2209 | UnicodeProperty::EmojiModifierBase => load_emoji_modifier_base(provider), |
| 2210 | UnicodeProperty::EmojiPresentation => load_emoji_presentation(provider), |
| 2211 | UnicodeProperty::ExtendedPictographic => load_extended_pictographic(provider), |
| 2212 | UnicodeProperty::Extender => load_extender(provider), |
| 2213 | UnicodeProperty::GraphemeBase => load_grapheme_base(provider), |
| 2214 | UnicodeProperty::GraphemeExtend => load_grapheme_extend(provider), |
| 2215 | UnicodeProperty::HexDigit => load_hex_digit(provider), |
| 2216 | UnicodeProperty::IdsBinaryOperator => load_ids_binary_operator(provider), |
| 2217 | UnicodeProperty::IdsTrinaryOperator => load_ids_trinary_operator(provider), |
| 2218 | UnicodeProperty::IdContinue => load_id_continue(provider), |
| 2219 | UnicodeProperty::IdStart => load_id_start(provider), |
| 2220 | UnicodeProperty::Ideographic => load_ideographic(provider), |
| 2221 | UnicodeProperty::JoinControl => load_join_control(provider), |
| 2222 | UnicodeProperty::LogicalOrderException => load_logical_order_exception(provider), |
| 2223 | UnicodeProperty::Lowercase => load_lowercase(provider), |
| 2224 | UnicodeProperty::Math => load_math(provider), |
| 2225 | UnicodeProperty::NoncharacterCodePoint => load_noncharacter_code_point(provider), |
| 2226 | UnicodeProperty::PatternSyntax => load_pattern_syntax(provider), |
| 2227 | UnicodeProperty::PatternWhiteSpace => load_pattern_white_space(provider), |
| 2228 | UnicodeProperty::QuotationMark => load_quotation_mark(provider), |
| 2229 | UnicodeProperty::Radical => load_radical(provider), |
| 2230 | UnicodeProperty::RegionalIndicator => load_regional_indicator(provider), |
| 2231 | UnicodeProperty::SentenceTerminal => load_sentence_terminal(provider), |
| 2232 | UnicodeProperty::SoftDotted => load_soft_dotted(provider), |
| 2233 | UnicodeProperty::TerminalPunctuation => load_terminal_punctuation(provider), |
| 2234 | UnicodeProperty::UnifiedIdeograph => load_unified_ideograph(provider), |
| 2235 | UnicodeProperty::Uppercase => load_uppercase(provider), |
| 2236 | UnicodeProperty::VariationSelector => load_variation_selector(provider), |
| 2237 | UnicodeProperty::WhiteSpace => load_white_space(provider), |
| 2238 | UnicodeProperty::XidContinue => load_xid_continue(provider), |
| 2239 | UnicodeProperty::XidStart => load_xid_start(provider), |
| 2240 | _ => Err(PropertiesError::UnexpectedPropertyName), |
| 2241 | } |
| 2242 | } |
| 2243 | |
| 2244 | #[cfg (test)] |
| 2245 | mod tests { |
| 2246 | |
| 2247 | #[test ] |
| 2248 | fn test_general_category() { |
| 2249 | use icu::properties::sets; |
| 2250 | use icu::properties::GeneralCategoryGroup; |
| 2251 | |
| 2252 | let digits_data = sets::for_general_category_group(GeneralCategoryGroup::Number); |
| 2253 | let digits = digits_data.as_borrowed(); |
| 2254 | |
| 2255 | assert!(digits.contains('5' )); |
| 2256 | assert!(digits.contains(' \u{0665}' )); // U+0665 ARABIC-INDIC DIGIT FIVE |
| 2257 | assert!(digits.contains(' \u{096b}' )); // U+0969 DEVANAGARI DIGIT FIVE |
| 2258 | |
| 2259 | assert!(!digits.contains('A' )); |
| 2260 | } |
| 2261 | |
| 2262 | #[test ] |
| 2263 | fn test_script() { |
| 2264 | use icu::properties::maps; |
| 2265 | use icu::properties::Script; |
| 2266 | |
| 2267 | let thai_data = maps::script().get_set_for_value(Script::Thai); |
| 2268 | let thai = thai_data.as_borrowed(); |
| 2269 | |
| 2270 | assert!(thai.contains(' \u{0e01}' )); // U+0E01 THAI CHARACTER KO KAI |
| 2271 | assert!(thai.contains(' \u{0e50}' )); // U+0E50 THAI DIGIT ZERO |
| 2272 | |
| 2273 | assert!(!thai.contains('A' )); |
| 2274 | assert!(!thai.contains(' \u{0e3f}' )); // U+0E50 THAI CURRENCY SYMBOL BAHT |
| 2275 | } |
| 2276 | |
| 2277 | #[test ] |
| 2278 | fn test_gc_groupings() { |
| 2279 | use icu::properties::{maps, sets}; |
| 2280 | use icu::properties::{GeneralCategory, GeneralCategoryGroup}; |
| 2281 | use icu_collections::codepointinvlist::CodePointInversionListBuilder; |
| 2282 | |
| 2283 | let test_group = |category: GeneralCategoryGroup, subcategories: &[GeneralCategory]| { |
| 2284 | let category_set = sets::for_general_category_group(category); |
| 2285 | let category_set = category_set |
| 2286 | .as_code_point_inversion_list() |
| 2287 | .expect("The data should be valid" ); |
| 2288 | |
| 2289 | let mut builder = CodePointInversionListBuilder::new(); |
| 2290 | for subcategory in subcategories { |
| 2291 | let gc_set_data = &maps::general_category().get_set_for_value(*subcategory); |
| 2292 | let gc_set = gc_set_data.as_borrowed(); |
| 2293 | for range in gc_set.iter_ranges() { |
| 2294 | builder.add_range32(&range); |
| 2295 | } |
| 2296 | } |
| 2297 | let combined_set = builder.build(); |
| 2298 | println!("{category:?} {subcategories:?}" ); |
| 2299 | assert_eq!( |
| 2300 | category_set.get_inversion_list_vec(), |
| 2301 | combined_set.get_inversion_list_vec() |
| 2302 | ); |
| 2303 | }; |
| 2304 | |
| 2305 | test_group( |
| 2306 | GeneralCategoryGroup::Letter, |
| 2307 | &[ |
| 2308 | GeneralCategory::UppercaseLetter, |
| 2309 | GeneralCategory::LowercaseLetter, |
| 2310 | GeneralCategory::TitlecaseLetter, |
| 2311 | GeneralCategory::ModifierLetter, |
| 2312 | GeneralCategory::OtherLetter, |
| 2313 | ], |
| 2314 | ); |
| 2315 | test_group( |
| 2316 | GeneralCategoryGroup::Other, |
| 2317 | &[ |
| 2318 | GeneralCategory::Control, |
| 2319 | GeneralCategory::Format, |
| 2320 | GeneralCategory::Unassigned, |
| 2321 | GeneralCategory::PrivateUse, |
| 2322 | GeneralCategory::Surrogate, |
| 2323 | ], |
| 2324 | ); |
| 2325 | test_group( |
| 2326 | GeneralCategoryGroup::Mark, |
| 2327 | &[ |
| 2328 | GeneralCategory::SpacingMark, |
| 2329 | GeneralCategory::EnclosingMark, |
| 2330 | GeneralCategory::NonspacingMark, |
| 2331 | ], |
| 2332 | ); |
| 2333 | test_group( |
| 2334 | GeneralCategoryGroup::Number, |
| 2335 | &[ |
| 2336 | GeneralCategory::DecimalNumber, |
| 2337 | GeneralCategory::LetterNumber, |
| 2338 | GeneralCategory::OtherNumber, |
| 2339 | ], |
| 2340 | ); |
| 2341 | test_group( |
| 2342 | GeneralCategoryGroup::Punctuation, |
| 2343 | &[ |
| 2344 | GeneralCategory::ConnectorPunctuation, |
| 2345 | GeneralCategory::DashPunctuation, |
| 2346 | GeneralCategory::ClosePunctuation, |
| 2347 | GeneralCategory::FinalPunctuation, |
| 2348 | GeneralCategory::InitialPunctuation, |
| 2349 | GeneralCategory::OtherPunctuation, |
| 2350 | GeneralCategory::OpenPunctuation, |
| 2351 | ], |
| 2352 | ); |
| 2353 | test_group( |
| 2354 | GeneralCategoryGroup::Symbol, |
| 2355 | &[ |
| 2356 | GeneralCategory::CurrencySymbol, |
| 2357 | GeneralCategory::ModifierSymbol, |
| 2358 | GeneralCategory::MathSymbol, |
| 2359 | GeneralCategory::OtherSymbol, |
| 2360 | ], |
| 2361 | ); |
| 2362 | test_group( |
| 2363 | GeneralCategoryGroup::Separator, |
| 2364 | &[ |
| 2365 | GeneralCategory::LineSeparator, |
| 2366 | GeneralCategory::ParagraphSeparator, |
| 2367 | GeneralCategory::SpaceSeparator, |
| 2368 | ], |
| 2369 | ); |
| 2370 | } |
| 2371 | |
| 2372 | #[test ] |
| 2373 | fn test_gc_surrogate() { |
| 2374 | use icu::properties::maps; |
| 2375 | use icu::properties::GeneralCategory; |
| 2376 | |
| 2377 | let surrogates_data = |
| 2378 | maps::general_category().get_set_for_value(GeneralCategory::Surrogate); |
| 2379 | let surrogates = surrogates_data.as_borrowed(); |
| 2380 | |
| 2381 | assert!(surrogates.contains32(0xd800)); |
| 2382 | assert!(surrogates.contains32(0xd900)); |
| 2383 | assert!(surrogates.contains32(0xdfff)); |
| 2384 | |
| 2385 | assert!(!surrogates.contains('A' )); |
| 2386 | } |
| 2387 | } |
| 2388 | |