| 1 | // This file is part of ICU4X. For terms of use, please see the file |
| 2 | // called LICENSE at the top level of the ICU4X source tree |
| 3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | |
| 5 | //! Access to the Unicode properties or property-based operations that |
| 6 | //! are required for NFC and NFD. |
| 7 | //! |
| 8 | //! Applications should generally use the full normalizers that are |
| 9 | //! provided at the top level of this crate. However, the APIs in this |
| 10 | //! module are provided for callers such as HarfBuzz that specifically |
| 11 | //! want access to the raw canonical composition operation e.g. for use in a |
| 12 | //! glyph-availability-guided custom normalizer. |
| 13 | |
| 14 | use crate::char_from_u16; |
| 15 | use crate::error::NormalizerError; |
| 16 | use crate::in_inclusive_range; |
| 17 | use crate::provider::CanonicalCompositionsV1Marker; |
| 18 | use crate::provider::CanonicalDecompositionDataV1Marker; |
| 19 | use crate::provider::CanonicalDecompositionTablesV1Marker; |
| 20 | use crate::provider::NonRecursiveDecompositionSupplementV1Marker; |
| 21 | use crate::trie_value_has_ccc; |
| 22 | use crate::trie_value_indicates_special_non_starter_decomposition; |
| 23 | use crate::BACKWARD_COMBINING_STARTER_MARKER; |
| 24 | use crate::FDFA_MARKER; |
| 25 | use crate::HANGUL_L_BASE; |
| 26 | use crate::HANGUL_N_COUNT; |
| 27 | use crate::HANGUL_S_BASE; |
| 28 | use crate::HANGUL_S_COUNT; |
| 29 | use crate::HANGUL_T_BASE; |
| 30 | use crate::HANGUL_T_COUNT; |
| 31 | use crate::HANGUL_V_BASE; |
| 32 | use crate::NON_ROUND_TRIP_MARKER; |
| 33 | use crate::SPECIAL_NON_STARTER_DECOMPOSITION_MARKER_U16; |
| 34 | /// want access to the underlying properties e.g. for use in a |
| 35 | /// glyph-availability-guided custom normalizer. |
| 36 | use icu_properties::CanonicalCombiningClass; |
| 37 | use icu_provider::prelude::*; |
| 38 | |
| 39 | /// The raw canonical composition operation. |
| 40 | /// |
| 41 | /// Callers should generally use `ComposingNormalizer` instead of this API. |
| 42 | /// However, this API is provided for callers such as HarfBuzz that specifically |
| 43 | /// want access to the raw canonical composition operation e.g. for use in a |
| 44 | /// glyph-availability-guided custom normalizer. |
| 45 | #[derive (Debug)] |
| 46 | pub struct CanonicalComposition { |
| 47 | canonical_compositions: DataPayload<CanonicalCompositionsV1Marker>, |
| 48 | } |
| 49 | |
| 50 | #[cfg (feature = "compiled_data" )] |
| 51 | impl Default for CanonicalComposition { |
| 52 | fn default() -> Self { |
| 53 | Self::new() |
| 54 | } |
| 55 | } |
| 56 | |
| 57 | impl CanonicalComposition { |
| 58 | /// Performs canonical composition (including Hangul) on a pair of |
| 59 | /// characters or returns `None` if these characters don't compose. |
| 60 | /// Composition exclusions are taken into account. |
| 61 | /// |
| 62 | /// # Examples |
| 63 | /// |
| 64 | /// ``` |
| 65 | /// let comp = icu::normalizer::properties::CanonicalComposition::new(); |
| 66 | /// |
| 67 | /// assert_eq!(comp.compose('a' , 'b' ), None); // Just two non-composing starters |
| 68 | /// assert_eq!(comp.compose('a' , ' \u{0308}' ), Some('ä' )); |
| 69 | /// assert_eq!(comp.compose('ẹ' , ' \u{0302}' ), Some('ệ' )); |
| 70 | /// assert_eq!(comp.compose('𝅗' , '𝅥' ), None); // Composition exclusion |
| 71 | /// assert_eq!(comp.compose('ে' , 'া' ), Some('ো' )); // Second is starter |
| 72 | /// assert_eq!(comp.compose('ᄀ' , 'ᅡ' ), Some('가' )); // Hangul LV |
| 73 | /// assert_eq!(comp.compose('가' , 'ᆨ' ), Some('각' )); // Hangul LVT |
| 74 | /// ``` |
| 75 | #[inline (always)] |
| 76 | pub fn compose(&self, starter: char, second: char) -> Option<char> { |
| 77 | crate::compose( |
| 78 | self.canonical_compositions |
| 79 | .get() |
| 80 | .canonical_compositions |
| 81 | .iter(), |
| 82 | starter, |
| 83 | second, |
| 84 | ) |
| 85 | } |
| 86 | |
| 87 | /// Constructs a new `CanonicalComposition` using compiled data. |
| 88 | /// |
| 89 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 90 | /// |
| 91 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 92 | #[cfg (feature = "compiled_data" )] |
| 93 | pub const fn new() -> Self { |
| 94 | Self { |
| 95 | canonical_compositions: DataPayload::from_static_ref( |
| 96 | crate::provider::Baked::SINGLETON_NORMALIZER_COMP_V1, |
| 97 | ), |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: NormalizerError, |
| 102 | #[cfg (skip)] |
| 103 | functions: [ |
| 104 | new, |
| 105 | try_new_with_any_provider, |
| 106 | try_new_with_buffer_provider, |
| 107 | try_new_unstable, |
| 108 | Self, |
| 109 | ] |
| 110 | ); |
| 111 | |
| 112 | #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] |
| 113 | pub fn try_new_unstable<D>(provider: &D) -> Result<Self, NormalizerError> |
| 114 | where |
| 115 | D: DataProvider<CanonicalCompositionsV1Marker> + ?Sized, |
| 116 | { |
| 117 | let canonical_compositions: DataPayload<CanonicalCompositionsV1Marker> = |
| 118 | provider.load(Default::default())?.take_payload()?; |
| 119 | Ok(CanonicalComposition { |
| 120 | canonical_compositions, |
| 121 | }) |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | /// The outcome of non-recursive canonical decomposition of a character. |
| 126 | #[allow (clippy::exhaustive_enums)] |
| 127 | #[derive (Debug, PartialEq, Eq)] |
| 128 | pub enum Decomposed { |
| 129 | /// The character is its own canonical decomposition. |
| 130 | Default, |
| 131 | /// The character decomposes to a single different character. |
| 132 | Singleton(char), |
| 133 | /// The character decomposes to two characters. |
| 134 | Expansion(char, char), |
| 135 | } |
| 136 | |
| 137 | /// The raw (non-recursive) canonical decomposition operation. |
| 138 | /// |
| 139 | /// Callers should generally use `DecomposingNormalizer` instead of this API. |
| 140 | /// However, this API is provided for callers such as HarfBuzz that specifically |
| 141 | /// want access to non-recursive canonical decomposition e.g. for use in a |
| 142 | /// glyph-availability-guided custom normalizer. |
| 143 | #[derive (Debug)] |
| 144 | pub struct CanonicalDecomposition { |
| 145 | decompositions: DataPayload<CanonicalDecompositionDataV1Marker>, |
| 146 | tables: DataPayload<CanonicalDecompositionTablesV1Marker>, |
| 147 | non_recursive: DataPayload<NonRecursiveDecompositionSupplementV1Marker>, |
| 148 | } |
| 149 | |
| 150 | #[cfg (feature = "compiled_data" )] |
| 151 | impl Default for CanonicalDecomposition { |
| 152 | fn default() -> Self { |
| 153 | Self::new() |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | impl CanonicalDecomposition { |
| 158 | /// Performs non-recursive canonical decomposition (including for Hangul). |
| 159 | /// |
| 160 | /// ``` |
| 161 | /// use icu::normalizer::properties::Decomposed; |
| 162 | /// let decomp = icu::normalizer::properties::CanonicalDecomposition::new(); |
| 163 | /// |
| 164 | /// assert_eq!(decomp.decompose('e' ), Decomposed::Default); |
| 165 | /// assert_eq!( |
| 166 | /// decomp.decompose('ệ' ), |
| 167 | /// Decomposed::Expansion('ẹ' , ' \u{0302}' ) |
| 168 | /// ); |
| 169 | /// assert_eq!(decomp.decompose('각' ), Decomposed::Expansion('가' , 'ᆨ' )); |
| 170 | /// assert_eq!(decomp.decompose(' \u{212B}' ), Decomposed::Singleton('Å' )); // ANGSTROM SIGN |
| 171 | /// assert_eq!(decomp.decompose(' \u{2126}' ), Decomposed::Singleton('Ω' )); // OHM SIGN |
| 172 | /// assert_eq!(decomp.decompose(' \u{1F71}' ), Decomposed::Singleton('ά' )); // oxia |
| 173 | /// ``` |
| 174 | #[inline ] |
| 175 | pub fn decompose(&self, c: char) -> Decomposed { |
| 176 | let lvt = u32::from(c).wrapping_sub(HANGUL_S_BASE); |
| 177 | if lvt >= HANGUL_S_COUNT { |
| 178 | return self.decompose_non_hangul(c); |
| 179 | } |
| 180 | let t = lvt % HANGUL_T_COUNT; |
| 181 | if t == 0 { |
| 182 | let l = lvt / HANGUL_N_COUNT; |
| 183 | let v = (lvt % HANGUL_N_COUNT) / HANGUL_T_COUNT; |
| 184 | // Safe because values known to be in range |
| 185 | return Decomposed::Expansion( |
| 186 | unsafe { char::from_u32_unchecked(HANGUL_L_BASE + l) }, |
| 187 | unsafe { char::from_u32_unchecked(HANGUL_V_BASE + v) }, |
| 188 | ); |
| 189 | } |
| 190 | let lv = lvt - t; |
| 191 | // Safe because values known to be in range |
| 192 | Decomposed::Expansion( |
| 193 | unsafe { char::from_u32_unchecked(HANGUL_S_BASE + lv) }, |
| 194 | unsafe { char::from_u32_unchecked(HANGUL_T_BASE + t) }, |
| 195 | ) |
| 196 | } |
| 197 | |
| 198 | /// Performs non-recursive canonical decomposition except Hangul syllables |
| 199 | /// are reported as `Decomposed::Default`. |
| 200 | #[inline (always)] |
| 201 | fn decompose_non_hangul(&self, c: char) -> Decomposed { |
| 202 | let decomposition = self.decompositions.get().trie.get(c); |
| 203 | if decomposition <= BACKWARD_COMBINING_STARTER_MARKER { |
| 204 | return Decomposed::Default; |
| 205 | } |
| 206 | // The loop is only broken out of as goto forward |
| 207 | #[allow (clippy::never_loop)] |
| 208 | loop { |
| 209 | let trail_or_complex = (decomposition >> 16) as u16; |
| 210 | let lead = decomposition as u16; |
| 211 | if lead > NON_ROUND_TRIP_MARKER && trail_or_complex != 0 { |
| 212 | // Decomposition into two BMP characters: starter and non-starter |
| 213 | if in_inclusive_range(c, ' \u{1F71}' , ' \u{1FFB}' ) { |
| 214 | // Look in the other trie due to oxia singleton |
| 215 | // mappings to corresponding character with tonos. |
| 216 | break; |
| 217 | } |
| 218 | return Decomposed::Expansion(char_from_u16(lead), char_from_u16(trail_or_complex)); |
| 219 | } |
| 220 | if lead > NON_ROUND_TRIP_MARKER { |
| 221 | // Decomposition into one BMP character or non-starter |
| 222 | debug_assert_ne!( |
| 223 | lead, FDFA_MARKER, |
| 224 | "How come we got the U+FDFA NFKD marker here?" |
| 225 | ); |
| 226 | if lead == SPECIAL_NON_STARTER_DECOMPOSITION_MARKER_U16 { |
| 227 | // Non-starter |
| 228 | if !in_inclusive_range(c, ' \u{0340}' , ' \u{0F81}' ) { |
| 229 | return Decomposed::Default; |
| 230 | } |
| 231 | return match c { |
| 232 | ' \u{0340}' => { |
| 233 | // COMBINING GRAVE TONE MARK |
| 234 | Decomposed::Singleton(' \u{0300}' ) |
| 235 | } |
| 236 | ' \u{0341}' => { |
| 237 | // COMBINING ACUTE TONE MARK |
| 238 | Decomposed::Singleton(' \u{0301}' ) |
| 239 | } |
| 240 | ' \u{0343}' => { |
| 241 | // COMBINING GREEK KORONIS |
| 242 | Decomposed::Singleton(' \u{0313}' ) |
| 243 | } |
| 244 | ' \u{0344}' => { |
| 245 | // COMBINING GREEK DIALYTIKA TONOS |
| 246 | Decomposed::Expansion(' \u{0308}' , ' \u{0301}' ) |
| 247 | } |
| 248 | ' \u{0F73}' => { |
| 249 | // TIBETAN VOWEL SIGN II |
| 250 | Decomposed::Expansion(' \u{0F71}' , ' \u{0F72}' ) |
| 251 | } |
| 252 | ' \u{0F75}' => { |
| 253 | // TIBETAN VOWEL SIGN UU |
| 254 | Decomposed::Expansion(' \u{0F71}' , ' \u{0F74}' ) |
| 255 | } |
| 256 | ' \u{0F81}' => { |
| 257 | // TIBETAN VOWEL SIGN REVERSED II |
| 258 | Decomposed::Expansion(' \u{0F71}' , ' \u{0F80}' ) |
| 259 | } |
| 260 | _ => Decomposed::Default, |
| 261 | }; |
| 262 | } |
| 263 | return Decomposed::Singleton(char_from_u16(lead)); |
| 264 | } |
| 265 | // The recursive decomposition of ANGSTROM SIGN is in the complex |
| 266 | // decomposition structure to avoid a branch in `potential_passthrough` |
| 267 | // for the BMP case. |
| 268 | if c == ' \u{212B}' { |
| 269 | // ANGSTROM SIGN |
| 270 | return Decomposed::Singleton(' \u{00C5}' ); |
| 271 | } |
| 272 | // Complex decomposition |
| 273 | // Format for 16-bit value: |
| 274 | // 15..13: length minus two for 16-bit case and length minus one for |
| 275 | // the 32-bit case. Length 8 needs to fit in three bits in |
| 276 | // the 16-bit case, and this way the value is future-proofed |
| 277 | // up to 9 in the 16-bit case. Zero is unused and length one |
| 278 | // in the 16-bit case goes directly into the trie. |
| 279 | // 12: 1 if all trailing characters are guaranteed non-starters, |
| 280 | // 0 if no guarantees about non-starterness. |
| 281 | // Note: The bit choice is this way around to allow for |
| 282 | // dynamically falling back to not having this but instead |
| 283 | // having one more bit for length by merely choosing |
| 284 | // different masks. |
| 285 | // 11..0: Start offset in storage. The offset is to the logical |
| 286 | // sequence of scalars16, scalars32, supplementary_scalars16, |
| 287 | // supplementary_scalars32. |
| 288 | let offset = usize::from(trail_or_complex & 0xFFF); |
| 289 | let tables = self.tables.get(); |
| 290 | if offset < tables.scalars16.len() { |
| 291 | if usize::from(trail_or_complex >> 13) != 0 { |
| 292 | // i.e. logical len isn't 2 |
| 293 | break; |
| 294 | } |
| 295 | if let Some(first) = tables.scalars16.get(offset) { |
| 296 | if let Some(second) = tables.scalars16.get(offset + 1) { |
| 297 | // Two BMP starters |
| 298 | return Decomposed::Expansion(char_from_u16(first), char_from_u16(second)); |
| 299 | } |
| 300 | } |
| 301 | // GIGO case |
| 302 | debug_assert!(false); |
| 303 | return Decomposed::Default; |
| 304 | } |
| 305 | let len = usize::from(trail_or_complex >> 13) + 1; |
| 306 | if len > 2 { |
| 307 | break; |
| 308 | } |
| 309 | let offset24 = offset - tables.scalars16.len(); |
| 310 | if let Some(first_c) = tables.scalars24.get(offset24) { |
| 311 | if len == 1 { |
| 312 | if c != first_c { |
| 313 | return Decomposed::Singleton(first_c); |
| 314 | } else { |
| 315 | // Singleton representation used to avoid |
| 316 | // NFC passthrough of characters that combine |
| 317 | // with starters that can occur as the first |
| 318 | // character of an expansion decomposition. |
| 319 | // See section 5 of |
| 320 | // https://www.unicode.org/L2/L2024/24009-utc178-properties-recs.pdf |
| 321 | return Decomposed::Default; |
| 322 | } |
| 323 | } |
| 324 | if let Some(second_c) = tables.scalars24.get(offset24 + 1) { |
| 325 | return Decomposed::Expansion(first_c, second_c); |
| 326 | } |
| 327 | } |
| 328 | // GIGO case |
| 329 | debug_assert!(false); |
| 330 | return Decomposed::Default; |
| 331 | } |
| 332 | let non_recursive = self.non_recursive.get(); |
| 333 | let non_recursive_decomposition = non_recursive.trie.get(c); |
| 334 | if non_recursive_decomposition == 0 { |
| 335 | // GIGO case |
| 336 | debug_assert!(false); |
| 337 | return Decomposed::Default; |
| 338 | } |
| 339 | let trail_or_complex = (non_recursive_decomposition >> 16) as u16; |
| 340 | let lead = non_recursive_decomposition as u16; |
| 341 | if lead != 0 && trail_or_complex != 0 { |
| 342 | // Decomposition into two BMP characters |
| 343 | return Decomposed::Expansion(char_from_u16(lead), char_from_u16(trail_or_complex)); |
| 344 | } |
| 345 | if lead != 0 { |
| 346 | // Decomposition into one BMP character |
| 347 | return Decomposed::Singleton(char_from_u16(lead)); |
| 348 | } |
| 349 | // Decomposition into two non-BMP characters |
| 350 | // Low is offset into a table plus one to keep it non-zero. |
| 351 | let offset = usize::from(trail_or_complex - 1); |
| 352 | if let Some(first) = non_recursive.scalars24.get(offset) { |
| 353 | if let Some(second) = non_recursive.scalars24.get(offset + 1) { |
| 354 | return Decomposed::Expansion(first, second); |
| 355 | } |
| 356 | } |
| 357 | // GIGO case |
| 358 | debug_assert!(false); |
| 359 | Decomposed::Default |
| 360 | } |
| 361 | |
| 362 | /// Construct from compiled data. |
| 363 | /// |
| 364 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 365 | /// |
| 366 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 367 | #[cfg (feature = "compiled_data" )] |
| 368 | pub const fn new() -> Self { |
| 369 | const _: () = assert!( |
| 370 | crate::provider::Baked::SINGLETON_NORMALIZER_NFDEX_V1 |
| 371 | .scalars16 |
| 372 | .const_len() |
| 373 | + crate::provider::Baked::SINGLETON_NORMALIZER_NFDEX_V1 |
| 374 | .scalars24 |
| 375 | .const_len() |
| 376 | <= 0xFFF, |
| 377 | "NormalizerError::FutureExtension" |
| 378 | ); |
| 379 | |
| 380 | Self { |
| 381 | decompositions: DataPayload::from_static_ref( |
| 382 | crate::provider::Baked::SINGLETON_NORMALIZER_NFD_V1, |
| 383 | ), |
| 384 | tables: DataPayload::from_static_ref( |
| 385 | crate::provider::Baked::SINGLETON_NORMALIZER_NFDEX_V1, |
| 386 | ), |
| 387 | non_recursive: DataPayload::from_static_ref( |
| 388 | crate::provider::Baked::SINGLETON_NORMALIZER_DECOMP_V1, |
| 389 | ), |
| 390 | } |
| 391 | } |
| 392 | |
| 393 | icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: NormalizerError, |
| 394 | #[cfg (skip)] |
| 395 | functions: [ |
| 396 | new, |
| 397 | try_new_with_any_provider, |
| 398 | try_new_with_buffer_provider, |
| 399 | try_new_unstable, |
| 400 | Self, |
| 401 | ] |
| 402 | ); |
| 403 | |
| 404 | #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] |
| 405 | pub fn try_new_unstable<D>(provider: &D) -> Result<Self, NormalizerError> |
| 406 | where |
| 407 | D: DataProvider<CanonicalDecompositionDataV1Marker> |
| 408 | + DataProvider<CanonicalDecompositionTablesV1Marker> |
| 409 | + DataProvider<NonRecursiveDecompositionSupplementV1Marker> |
| 410 | + ?Sized, |
| 411 | { |
| 412 | let decompositions: DataPayload<CanonicalDecompositionDataV1Marker> = |
| 413 | provider.load(Default::default())?.take_payload()?; |
| 414 | let tables: DataPayload<CanonicalDecompositionTablesV1Marker> = |
| 415 | provider.load(Default::default())?.take_payload()?; |
| 416 | |
| 417 | if tables.get().scalars16.len() + tables.get().scalars24.len() > 0xFFF { |
| 418 | // The data is from a future where there exists a normalization flavor whose |
| 419 | // complex decompositions take more than 0xFFF but fewer than 0x1FFF code points |
| 420 | // of space. If a good use case from such a decomposition flavor arises, we can |
| 421 | // dynamically change the bit masks so that the length mask becomes 0x1FFF instead |
| 422 | // of 0xFFF and the all-non-starters mask becomes 0 instead of 0x1000. However, |
| 423 | // since for now the masks are hard-coded, error out. |
| 424 | return Err(NormalizerError::FutureExtension); |
| 425 | } |
| 426 | |
| 427 | let non_recursive: DataPayload<NonRecursiveDecompositionSupplementV1Marker> = |
| 428 | provider.load(Default::default())?.take_payload()?; |
| 429 | |
| 430 | Ok(CanonicalDecomposition { |
| 431 | decompositions, |
| 432 | tables, |
| 433 | non_recursive, |
| 434 | }) |
| 435 | } |
| 436 | } |
| 437 | |
| 438 | /// Lookup of the Canonical_Combining_Class Unicode property. |
| 439 | /// |
| 440 | /// # Example |
| 441 | /// |
| 442 | /// ``` |
| 443 | /// use icu::properties::CanonicalCombiningClass; |
| 444 | /// use icu::normalizer::properties::CanonicalCombiningClassMap; |
| 445 | /// |
| 446 | /// let map = CanonicalCombiningClassMap::new(); |
| 447 | /// assert_eq!(map.get('a' ), CanonicalCombiningClass::NotReordered); // U+0061: LATIN SMALL LETTER A |
| 448 | /// assert_eq!(map.get32(0x0301), CanonicalCombiningClass::Above); // U+0301: COMBINING ACUTE ACCENT |
| 449 | /// ``` |
| 450 | #[derive (Debug)] |
| 451 | pub struct CanonicalCombiningClassMap { |
| 452 | /// The data trie |
| 453 | decompositions: DataPayload<CanonicalDecompositionDataV1Marker>, |
| 454 | } |
| 455 | |
| 456 | #[cfg (feature = "compiled_data" )] |
| 457 | impl Default for CanonicalCombiningClassMap { |
| 458 | fn default() -> Self { |
| 459 | Self::new() |
| 460 | } |
| 461 | } |
| 462 | |
| 463 | impl CanonicalCombiningClassMap { |
| 464 | /// Look up the canonical combining class for a scalar value |
| 465 | #[inline (always)] |
| 466 | pub fn get(&self, c: char) -> CanonicalCombiningClass { |
| 467 | self.get32(u32::from(c)) |
| 468 | } |
| 469 | |
| 470 | /// Look up the canonical combining class for a scalar value |
| 471 | /// represented as `u32`. If the argument is outside the scalar |
| 472 | /// value range, `CanonicalCombiningClass::NotReordered` is returned. |
| 473 | pub fn get32(&self, c: u32) -> CanonicalCombiningClass { |
| 474 | let trie_value = self.decompositions.get().trie.get32(c); |
| 475 | if trie_value_has_ccc(trie_value) { |
| 476 | CanonicalCombiningClass(trie_value as u8) |
| 477 | } else if trie_value_indicates_special_non_starter_decomposition(trie_value) { |
| 478 | match c { |
| 479 | 0x0340 | 0x0341 | 0x0343 | 0x0344 => CanonicalCombiningClass::Above, |
| 480 | _ => CanonicalCombiningClass::NotReordered, |
| 481 | } |
| 482 | } else { |
| 483 | CanonicalCombiningClass::NotReordered |
| 484 | } |
| 485 | } |
| 486 | |
| 487 | /// Construct from compiled data. |
| 488 | /// |
| 489 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 490 | /// |
| 491 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 492 | #[cfg (feature = "compiled_data" )] |
| 493 | pub const fn new() -> Self { |
| 494 | CanonicalCombiningClassMap { |
| 495 | decompositions: DataPayload::from_static_ref( |
| 496 | crate::provider::Baked::SINGLETON_NORMALIZER_NFD_V1, |
| 497 | ), |
| 498 | } |
| 499 | } |
| 500 | |
| 501 | icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: NormalizerError, |
| 502 | #[cfg (skip)] |
| 503 | functions: [ |
| 504 | new, |
| 505 | try_new_with_any_provider, |
| 506 | try_new_with_buffer_provider, |
| 507 | try_new_unstable, |
| 508 | Self, |
| 509 | ]); |
| 510 | |
| 511 | #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] |
| 512 | pub fn try_new_unstable<D>(provider: &D) -> Result<Self, NormalizerError> |
| 513 | where |
| 514 | D: DataProvider<CanonicalDecompositionDataV1Marker> + ?Sized, |
| 515 | { |
| 516 | let decompositions: DataPayload<CanonicalDecompositionDataV1Marker> = |
| 517 | provider.load(Default::default())?.take_payload()?; |
| 518 | Ok(CanonicalCombiningClassMap { decompositions }) |
| 519 | } |
| 520 | } |
| 521 | |