1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | //! The functions in this module return a [`CodePointMapData`] representing, for |
6 | //! each code point in the entire range of code points, the property values |
7 | //! for a particular Unicode property. |
8 | //! |
9 | //! The descriptions of most properties are taken from [`TR44`], the documentation for the |
10 | //! Unicode Character Database. |
11 | //! |
12 | //! [`TR44`]: https://www.unicode.org/reports/tr44 |
13 | |
14 | #[cfg (doc)] |
15 | use super::*; |
16 | use crate::error::PropertiesError; |
17 | use crate::provider::*; |
18 | use crate::sets::CodePointSetData; |
19 | use core::marker::PhantomData; |
20 | use core::ops::RangeInclusive; |
21 | use icu_collections::codepointtrie::{CodePointMapRange, CodePointTrie, TrieValue}; |
22 | use icu_provider::prelude::*; |
23 | use zerovec::ZeroVecError; |
24 | |
25 | /// A wrapper around code point map data. It is returned by APIs that return Unicode |
26 | /// property data in a map-like form, ex: enumerated property value data keyed |
27 | /// by code point. Access its data via the borrowed version, |
28 | /// [`CodePointMapDataBorrowed`]. |
29 | #[derive (Debug, Clone)] |
30 | pub struct CodePointMapData<T: TrieValue> { |
31 | data: DataPayload<ErasedMaplikeMarker<T>>, |
32 | } |
33 | |
34 | /// Private marker type for CodePointMapData |
35 | /// to work for all same-value map properties at once |
36 | #[derive (Clone, Copy, PartialEq, Eq, Hash, Debug)] |
37 | struct ErasedMaplikeMarker<T>(PhantomData<T>); |
38 | impl<T: TrieValue> DataMarker for ErasedMaplikeMarker<T> { |
39 | type Yokeable = PropertyCodePointMapV1<'static, T>; |
40 | } |
41 | |
42 | impl<T: TrieValue> CodePointMapData<T> { |
43 | /// Construct a borrowed version of this type that can be queried. |
44 | /// |
45 | /// This avoids a potential small underlying cost per API call (like `get()`) by consolidating it |
46 | /// up front. |
47 | /// |
48 | /// This owned version if returned by functions that use a runtime data provider. |
49 | #[inline ] |
50 | pub fn as_borrowed(&self) -> CodePointMapDataBorrowed<'_, T> { |
51 | CodePointMapDataBorrowed { |
52 | map: self.data.get(), |
53 | } |
54 | } |
55 | |
56 | /// Convert this map to a map around another type |
57 | /// |
58 | /// Typically useful for type-erasing maps into maps around integers. |
59 | /// |
60 | /// # Panics |
61 | /// Will panic if T and P are different sizes |
62 | /// |
63 | /// # Example |
64 | /// |
65 | /// ``` |
66 | /// use icu::properties::{maps, GeneralCategory}; |
67 | /// |
68 | /// let data = maps::general_category().static_to_owned(); |
69 | /// |
70 | /// let gc = data.try_into_converted::<u8>().unwrap(); |
71 | /// let gc = gc.as_borrowed(); |
72 | /// |
73 | /// assert_eq!(gc.get('ζ¨' ), GeneralCategory::OtherLetter as u8); // U+6728 |
74 | /// assert_eq!(gc.get('π' ), GeneralCategory::OtherSymbol as u8); // U+1F383 JACK-O-LANTERN |
75 | /// ``` |
76 | pub fn try_into_converted<P>(self) -> Result<CodePointMapData<P>, ZeroVecError> |
77 | where |
78 | P: TrieValue, |
79 | { |
80 | self.data |
81 | .try_map_project::<ErasedMaplikeMarker<P>, _, _>(move |data, _| { |
82 | data.try_into_converted() |
83 | }) |
84 | .map(CodePointMapData::from_data) |
85 | } |
86 | |
87 | /// Construct a new one from loaded data |
88 | /// |
89 | /// Typically it is preferable to use getters like [`load_general_category()`] instead |
90 | pub fn from_data<M>(data: DataPayload<M>) -> Self |
91 | where |
92 | M: DataMarker<Yokeable = PropertyCodePointMapV1<'static, T>>, |
93 | { |
94 | Self { data: data.cast() } |
95 | } |
96 | |
97 | /// Construct a new one an owned [`CodePointTrie`] |
98 | pub fn from_code_point_trie(trie: CodePointTrie<'static, T>) -> Self { |
99 | let set = PropertyCodePointMapV1::from_code_point_trie(trie); |
100 | CodePointMapData::from_data(DataPayload::<ErasedMaplikeMarker<T>>::from_owned(set)) |
101 | } |
102 | |
103 | /// Convert this type to a [`CodePointTrie`] as a borrowed value. |
104 | /// |
105 | /// The data backing this is extensible and supports multiple implementations. |
106 | /// Currently it is always [`CodePointTrie`]; however in the future more backends may be |
107 | /// added, and users may select which at data generation time. |
108 | /// |
109 | /// This method returns an `Option` in order to return `None` when the backing data provider |
110 | /// cannot return a [`CodePointTrie`], or cannot do so within the expected constant time |
111 | /// constraint. |
112 | pub fn as_code_point_trie(&self) -> Option<&CodePointTrie<'_, T>> { |
113 | self.data.get().as_code_point_trie() |
114 | } |
115 | |
116 | /// Convert this type to a [`CodePointTrie`], borrowing if possible, |
117 | /// otherwise allocating a new [`CodePointTrie`]. |
118 | /// |
119 | /// The data backing this is extensible and supports multiple implementations. |
120 | /// Currently it is always [`CodePointTrie`]; however in the future more backends may be |
121 | /// added, and users may select which at data generation time. |
122 | /// |
123 | /// The performance of the conversion to this specific return type will vary |
124 | /// depending on the data structure that is backing `self`. |
125 | pub fn to_code_point_trie(&self) -> CodePointTrie<'_, T> { |
126 | self.data.get().to_code_point_trie() |
127 | } |
128 | } |
129 | |
130 | /// A borrowed wrapper around code point set data, returned by |
131 | /// [`CodePointSetData::as_borrowed()`]. More efficient to query. |
132 | #[derive (Clone, Copy, Debug)] |
133 | pub struct CodePointMapDataBorrowed<'a, T: TrieValue> { |
134 | map: &'a PropertyCodePointMapV1<'a, T>, |
135 | } |
136 | |
137 | impl<'a, T: TrieValue> CodePointMapDataBorrowed<'a, T> { |
138 | /// Get the value this map has associated with code point `ch` |
139 | /// |
140 | /// # Example |
141 | /// |
142 | /// ``` |
143 | /// use icu::properties::{maps, GeneralCategory}; |
144 | /// |
145 | /// let gc = maps::general_category(); |
146 | /// |
147 | /// assert_eq!(gc.get('ζ¨' ), GeneralCategory::OtherLetter); // U+6728 |
148 | /// assert_eq!(gc.get('π' ), GeneralCategory::OtherSymbol); // U+1F383 JACK-O-LANTERN |
149 | /// ``` |
150 | pub fn get(self, ch: char) -> T { |
151 | self.map.get32(ch as u32) |
152 | } |
153 | |
154 | /// Get the value this map has associated with code point `ch` |
155 | /// |
156 | /// # Example |
157 | /// |
158 | /// ``` |
159 | /// use icu::properties::{maps, GeneralCategory}; |
160 | /// |
161 | /// let gc = maps::general_category(); |
162 | /// |
163 | /// assert_eq!(gc.get32(0x6728), GeneralCategory::OtherLetter); // U+6728 (ζ¨) |
164 | /// assert_eq!(gc.get32(0x1F383), GeneralCategory::OtherSymbol); // U+1F383 JACK-O-LANTERN |
165 | /// ``` |
166 | pub fn get32(self, ch: u32) -> T { |
167 | self.map.get32(ch) |
168 | } |
169 | |
170 | /// Get a [`CodePointSetData`] for all elements corresponding to a particular value |
171 | /// |
172 | /// # Example |
173 | /// |
174 | /// ``` |
175 | /// use icu::properties::{maps, GeneralCategory}; |
176 | /// |
177 | /// let gc = maps::general_category(); |
178 | /// |
179 | /// let other_letter_set_data = |
180 | /// gc.get_set_for_value(GeneralCategory::OtherLetter); |
181 | /// let other_letter_set = other_letter_set_data.as_borrowed(); |
182 | /// |
183 | /// assert!(other_letter_set.contains('ζ¨' )); // U+6728 |
184 | /// assert!(!other_letter_set.contains('π' )); // U+1F383 JACK-O-LANTERN |
185 | /// ``` |
186 | pub fn get_set_for_value(self, value: T) -> CodePointSetData { |
187 | let set = self.map.get_set_for_value(value); |
188 | CodePointSetData::from_code_point_inversion_list(set) |
189 | } |
190 | |
191 | /// Yields an [`Iterator`] returning ranges of consecutive code points that |
192 | /// share the same value in the [`CodePointMapData`]. |
193 | /// |
194 | /// # Examples |
195 | /// |
196 | /// ``` |
197 | /// use icu::properties::maps; |
198 | /// use icu::properties::GeneralCategory; |
199 | /// |
200 | /// let gc = maps::general_category(); |
201 | /// let mut ranges = gc.iter_ranges(); |
202 | /// let next = ranges.next().unwrap(); |
203 | /// assert_eq!(next.range, 0..=31); |
204 | /// assert_eq!(next.value, GeneralCategory::Control); |
205 | /// let next = ranges.next().unwrap(); |
206 | /// assert_eq!(next.range, 32..=32); |
207 | /// assert_eq!(next.value, GeneralCategory::SpaceSeparator); |
208 | /// ``` |
209 | pub fn iter_ranges(self) -> impl Iterator<Item = CodePointMapRange<T>> + 'a { |
210 | self.map.iter_ranges() |
211 | } |
212 | |
213 | /// Yields an [`Iterator`] returning ranges of consecutive code points that |
214 | /// share the same value `v` in the [`CodePointMapData`]. |
215 | /// |
216 | /// # Examples |
217 | /// |
218 | /// |
219 | /// ``` |
220 | /// use icu::properties::maps; |
221 | /// use icu::properties::GeneralCategory; |
222 | /// |
223 | /// let gc = maps::general_category(); |
224 | /// let mut ranges = gc.iter_ranges_for_value(GeneralCategory::UppercaseLetter); |
225 | /// assert_eq!(ranges.next().unwrap(), 'A' as u32..='Z' as u32); |
226 | /// assert_eq!(ranges.next().unwrap(), 'Γ' as u32..='Γ' as u32); |
227 | /// assert_eq!(ranges.next().unwrap(), 'Γ' as u32..='Γ' as u32); |
228 | /// ``` |
229 | pub fn iter_ranges_for_value(self, val: T) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { |
230 | self.map |
231 | .iter_ranges() |
232 | .filter(move |r| r.value == val) |
233 | .map(|r| r.range) |
234 | } |
235 | |
236 | /// Yields an [`Iterator`] returning ranges of consecutive code points that |
237 | /// do *not* have the value `v` in the [`CodePointMapData`]. |
238 | pub fn iter_ranges_for_value_complemented( |
239 | self, |
240 | val: T, |
241 | ) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { |
242 | self.map |
243 | .iter_ranges_mapped(move |value| value != val) |
244 | .filter(|v| v.value) |
245 | .map(|v| v.range) |
246 | } |
247 | |
248 | /// Exposed for FFI needs, could be exposed in general in the future but we should |
249 | /// have a use case first. |
250 | /// |
251 | /// FFI needs this since it operates on erased maps and can't use `iter_ranges_for_group()` |
252 | #[doc (hidden)] |
253 | pub fn iter_ranges_mapped<U: Eq + 'a>( |
254 | self, |
255 | predicate: impl FnMut(T) -> U + Copy + 'a, |
256 | ) -> impl Iterator<Item = CodePointMapRange<U>> + 'a { |
257 | self.map.iter_ranges_mapped(predicate) |
258 | } |
259 | } |
260 | |
261 | impl<T: TrieValue> CodePointMapDataBorrowed<'static, T> { |
262 | /// Cheaply converts a [`CodePointMapDataBorrowed<'static>`] into a [`CodePointMapData`]. |
263 | /// |
264 | /// Note: Due to branching and indirection, using [`CodePointMapData`] might inhibit some |
265 | /// compile-time optimizations that are possible with [`CodePointMapDataBorrowed`]. |
266 | pub const fn static_to_owned(self) -> CodePointMapData<T> { |
267 | CodePointMapData { |
268 | data: DataPayload::from_static_ref(self.map), |
269 | } |
270 | } |
271 | } |
272 | |
273 | impl<'a> CodePointMapDataBorrowed<'a, crate::GeneralCategory> { |
274 | /// Yields an [`Iterator`] returning ranges of consecutive code points that |
275 | /// have a `General_Category` value belonging to the specified [`GeneralCategoryGroup`] |
276 | /// |
277 | /// # Examples |
278 | /// |
279 | /// |
280 | /// ``` |
281 | /// use core::ops::RangeInclusive; |
282 | /// use icu::properties::maps::{self, CodePointMapData}; |
283 | /// use icu::properties::GeneralCategoryGroup; |
284 | /// |
285 | /// let gc = maps::general_category(); |
286 | /// let mut ranges = gc.iter_ranges_for_group(GeneralCategoryGroup::Letter); |
287 | /// assert_eq!(ranges.next().unwrap(), 'A' as u32..='Z' as u32); |
288 | /// assert_eq!(ranges.next().unwrap(), 'a' as u32..='z' as u32); |
289 | /// assert_eq!(ranges.next().unwrap(), 'Βͺ' as u32..='Βͺ' as u32); |
290 | /// assert_eq!(ranges.next().unwrap(), 'Β΅' as u32..='Β΅' as u32); |
291 | /// assert_eq!(ranges.next().unwrap(), 'ΒΊ' as u32..='ΒΊ' as u32); |
292 | /// assert_eq!(ranges.next().unwrap(), 'Γ' as u32..='Γ' as u32); |
293 | /// assert_eq!(ranges.next().unwrap(), 'Γ' as u32..='ΓΆ' as u32); |
294 | /// ``` |
295 | pub fn iter_ranges_for_group( |
296 | self, |
297 | group: crate::GeneralCategoryGroup, |
298 | ) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { |
299 | self.map |
300 | .iter_ranges_mapped(move |value| group.contains(value)) |
301 | .filter(|v| v.value) |
302 | .map(|v| v.range) |
303 | } |
304 | } |
305 | |
306 | macro_rules! make_map_property { |
307 | ( |
308 | // currently unused |
309 | property: $prop_name:expr; |
310 | // currently unused |
311 | marker: $marker_name:ident; |
312 | value: $value_ty:path; |
313 | keyed_data_marker: $keyed_data_marker:ty; |
314 | func: |
315 | $(#[$doc:meta])* |
316 | $vis2:vis const $constname:ident => $singleton:ident; |
317 | $vis:vis fn $name:ident(); |
318 | ) => { |
319 | #[doc = concat!("A version of [`" , stringify!($constname), "()`] that uses custom data provided by a [`DataProvider`]." )] |
320 | /// |
321 | /// Note that this will return an owned version of the data. Functionality is available on |
322 | /// the borrowed version, accessible through [`CodePointMapData::as_borrowed`]. |
323 | /// |
324 | /// [π Help choosing a constructor](icu_provider::constructors) |
325 | $vis fn $name( |
326 | provider: &(impl DataProvider<$keyed_data_marker> + ?Sized) |
327 | ) -> Result<CodePointMapData<$value_ty>, PropertiesError> { |
328 | Ok(provider.load(Default::default()).and_then(DataResponse::take_payload).map(CodePointMapData::from_data)?) |
329 | } |
330 | $(#[$doc])* |
331 | #[cfg(feature = "compiled_data" )] |
332 | pub const fn $constname() -> CodePointMapDataBorrowed<'static, $value_ty> { |
333 | CodePointMapDataBorrowed { |
334 | map: crate::provider::Baked::$singleton |
335 | } |
336 | } |
337 | }; |
338 | } |
339 | |
340 | make_map_property! { |
341 | property: "General_Category" ; |
342 | marker: GeneralCategoryProperty; |
343 | value: crate::GeneralCategory; |
344 | keyed_data_marker: GeneralCategoryV1Marker; |
345 | func: |
346 | /// Return a [`CodePointMapDataBorrowed`] for the General_Category Unicode enumerated property. See [`GeneralCategory`]. |
347 | /// |
348 | /// β¨ *Enabled with the `compiled_data` Cargo feature.* |
349 | /// |
350 | /// [π Help choosing a constructor](icu_provider::constructors) |
351 | /// |
352 | /// # Example |
353 | /// |
354 | /// ``` |
355 | /// use icu::properties::{maps, GeneralCategory}; |
356 | /// |
357 | /// assert_eq!(maps::general_category().get('ζ¨'), GeneralCategory::OtherLetter); // U+6728 |
358 | /// assert_eq!(maps::general_category().get('π'), GeneralCategory::OtherSymbol); // U+1F383 JACK-O-LANTERN |
359 | /// ``` |
360 | pub const general_category => SINGLETON_PROPS_GC_V1; |
361 | pub fn load_general_category(); |
362 | } |
363 | |
364 | make_map_property! { |
365 | property: "Bidi_Class" ; |
366 | marker: BidiClassProperty; |
367 | value: crate::BidiClass; |
368 | keyed_data_marker: BidiClassV1Marker; |
369 | func: |
370 | /// Return a [`CodePointMapDataBorrowed`] for the Bidi_Class Unicode enumerated property. See [`BidiClass`]. |
371 | /// |
372 | /// β¨ *Enabled with the `compiled_data` Cargo feature.* |
373 | /// |
374 | /// [π Help choosing a constructor](icu_provider::constructors) |
375 | /// |
376 | /// # Example |
377 | /// |
378 | /// ``` |
379 | /// use icu::properties::{maps, BidiClass}; |
380 | /// |
381 | /// assert_eq!(maps::bidi_class().get('y'), BidiClass::LeftToRight); // U+0079 |
382 | /// assert_eq!(maps::bidi_class().get('ΨΉ'), BidiClass::ArabicLetter); // U+0639 |
383 | /// ``` |
384 | pub const bidi_class => SINGLETON_PROPS_BC_V1; |
385 | pub fn load_bidi_class(); |
386 | } |
387 | |
388 | make_map_property! { |
389 | property: "Script" ; |
390 | marker: ScriptProperty; |
391 | value: crate::Script; |
392 | keyed_data_marker: ScriptV1Marker; |
393 | func: |
394 | /// Return a [`CodePointMapDataBorrowed`] for the Script Unicode enumerated property. See [`Script`]. |
395 | /// |
396 | /// **Note:** Some code points are associated with multiple scripts. If you are trying to |
397 | /// determine whether a code point belongs to a certain script, you should use |
398 | /// [`load_script_with_extensions_unstable`] and [`ScriptWithExtensionsBorrowed::has_script`] |
399 | /// instead of this function. |
400 | /// |
401 | /// β¨ *Enabled with the `compiled_data` Cargo feature.* |
402 | /// |
403 | /// [π Help choosing a constructor](icu_provider::constructors) |
404 | /// |
405 | /// # Example |
406 | /// |
407 | /// ``` |
408 | /// use icu::properties::{maps, Script}; |
409 | /// |
410 | /// assert_eq!(maps::script().get('ζ¨'), Script::Han); // U+6728 |
411 | /// assert_eq!(maps::script().get('π'), Script::Common); // U+1F383 JACK-O-LANTERN |
412 | /// ``` |
413 | /// [`load_script_with_extensions_unstable`]: crate::script::load_script_with_extensions_unstable |
414 | /// [`ScriptWithExtensionsBorrowed::has_script`]: crate::script::ScriptWithExtensionsBorrowed::has_script |
415 | pub const script => SINGLETON_PROPS_SC_V1; |
416 | pub fn load_script(); |
417 | } |
418 | |
419 | make_map_property! { |
420 | property: "Hangul_Syllable_Type" ; |
421 | marker: HangulSyllableTypeProperty; |
422 | value: crate::HangulSyllableType; |
423 | keyed_data_marker: HangulSyllableTypeV1Marker; |
424 | func: |
425 | /// Returns a [`CodePointMapDataBorrowed`] for the Hangul_Syllable_Type |
426 | /// Unicode enumerated property. See [`HangulSyllableType`]. |
427 | /// |
428 | /// β¨ *Enabled with the `compiled_data` Cargo feature.* |
429 | /// |
430 | /// [π Help choosing a constructor](icu_provider::constructors) |
431 | /// |
432 | /// # Example |
433 | /// |
434 | /// ``` |
435 | /// use icu::properties::{maps, HangulSyllableType}; |
436 | /// |
437 | /// assert_eq!(maps::hangul_syllable_type().get('α'), HangulSyllableType::LeadingJamo); // U+1100 |
438 | /// assert_eq!(maps::hangul_syllable_type().get('κ°'), HangulSyllableType::LeadingVowelSyllable); // U+AC00 |
439 | /// ``` |
440 | |
441 | pub const hangul_syllable_type => SINGLETON_PROPS_HST_V1; |
442 | pub fn load_hangul_syllable_type(); |
443 | } |
444 | |
445 | make_map_property! { |
446 | property: "East_Asian_Width" ; |
447 | marker: EastAsianWidthProperty; |
448 | value: crate::EastAsianWidth; |
449 | keyed_data_marker: EastAsianWidthV1Marker; |
450 | func: |
451 | /// Return a [`CodePointMapDataBorrowed`] for the East_Asian_Width Unicode enumerated |
452 | /// property. See [`EastAsianWidth`]. |
453 | /// |
454 | /// β¨ *Enabled with the `compiled_data` Cargo feature.* |
455 | /// |
456 | /// [π Help choosing a constructor](icu_provider::constructors) |
457 | /// |
458 | /// # Example |
459 | /// |
460 | /// ``` |
461 | /// use icu::properties::{maps, EastAsianWidth}; |
462 | /// |
463 | /// assert_eq!(maps::east_asian_width().get('ο½±'), EastAsianWidth::Halfwidth); // U+FF71: Halfwidth Katakana Letter A |
464 | /// assert_eq!(maps::east_asian_width().get('γ’'), EastAsianWidth::Wide); //U+30A2: Katakana Letter A |
465 | /// ``` |
466 | pub const east_asian_width => SINGLETON_PROPS_EA_V1; |
467 | pub fn load_east_asian_width(); |
468 | } |
469 | |
470 | make_map_property! { |
471 | property: "Line_Break" ; |
472 | marker: LineBreakProperty; |
473 | value: crate::LineBreak; |
474 | keyed_data_marker: LineBreakV1Marker; |
475 | func: |
476 | /// Return a [`CodePointMapDataBorrowed`] for the Line_Break Unicode enumerated |
477 | /// property. See [`LineBreak`]. |
478 | /// |
479 | /// β¨ *Enabled with the `compiled_data` Cargo feature.* |
480 | /// |
481 | /// [π Help choosing a constructor](icu_provider::constructors) |
482 | /// |
483 | /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation. |
484 | /// |
485 | /// # Example |
486 | /// |
487 | /// ``` |
488 | /// use icu::properties::{maps, LineBreak}; |
489 | /// |
490 | /// assert_eq!(maps::line_break().get(')'), LineBreak::CloseParenthesis); // U+0029: Right Parenthesis |
491 | /// assert_eq!(maps::line_break().get('γ'), LineBreak::ConditionalJapaneseStarter); //U+3041: Hiragana Letter Small A |
492 | /// ``` |
493 | pub const line_break => SINGLETON_PROPS_LB_V1; |
494 | pub fn load_line_break(); |
495 | } |
496 | |
497 | make_map_property! { |
498 | property: "Grapheme_Cluster_Break" ; |
499 | marker: GraphemeClusterBreakProperty; |
500 | value: crate::GraphemeClusterBreak; |
501 | keyed_data_marker: GraphemeClusterBreakV1Marker; |
502 | func: |
503 | /// Return a [`CodePointMapDataBorrowed`] for the Grapheme_Cluster_Break Unicode enumerated |
504 | /// property. See [`GraphemeClusterBreak`]. |
505 | /// |
506 | /// β¨ *Enabled with the `compiled_data` Cargo feature.* |
507 | /// |
508 | /// [π Help choosing a constructor](icu_provider::constructors) |
509 | /// |
510 | /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation. |
511 | /// |
512 | /// # Example |
513 | /// |
514 | /// ``` |
515 | /// use icu::properties::{maps, GraphemeClusterBreak}; |
516 | /// |
517 | /// assert_eq!(maps::grapheme_cluster_break().get('π¦'), GraphemeClusterBreak::RegionalIndicator); // U+1F1E6: Regional Indicator Symbol Letter A |
518 | /// assert_eq!(maps::grapheme_cluster_break().get('ΰΈ³'), GraphemeClusterBreak::SpacingMark); //U+0E33: Thai Character Sara Am |
519 | /// ``` |
520 | pub const grapheme_cluster_break => SINGLETON_PROPS_GCB_V1; |
521 | pub fn load_grapheme_cluster_break(); |
522 | } |
523 | |
524 | make_map_property! { |
525 | property: "Word_Break" ; |
526 | marker: WordBreakProperty; |
527 | value: crate::WordBreak; |
528 | keyed_data_marker: WordBreakV1Marker; |
529 | func: |
530 | /// Return a [`CodePointMapDataBorrowed`] for the Word_Break Unicode enumerated |
531 | /// property. See [`WordBreak`]. |
532 | /// |
533 | /// β¨ *Enabled with the `compiled_data` Cargo feature.* |
534 | /// |
535 | /// [π Help choosing a constructor](icu_provider::constructors) |
536 | /// |
537 | /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation. |
538 | /// |
539 | /// # Example |
540 | /// |
541 | /// ``` |
542 | /// use icu::properties::{maps, WordBreak}; |
543 | /// |
544 | /// assert_eq!(maps::word_break().get('.'), WordBreak::MidNumLet); // U+002E: Full Stop |
545 | /// assert_eq!(maps::word_break().get('οΌ'), WordBreak::MidNum); // U+FF0C: Fullwidth Comma |
546 | /// ``` |
547 | pub const word_break => SINGLETON_PROPS_WB_V1; |
548 | pub fn load_word_break(); |
549 | } |
550 | |
551 | make_map_property! { |
552 | property: "Sentence_Break" ; |
553 | marker: SentenceBreakProperty; |
554 | value: crate::SentenceBreak; |
555 | keyed_data_marker: SentenceBreakV1Marker; |
556 | func: |
557 | /// Return a [`CodePointMapDataBorrowed`] for the Sentence_Break Unicode enumerated |
558 | /// property. See [`SentenceBreak`]. |
559 | /// |
560 | /// β¨ *Enabled with the `compiled_data` Cargo feature.* |
561 | /// |
562 | /// [π Help choosing a constructor](icu_provider::constructors) |
563 | /// |
564 | /// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation. |
565 | /// |
566 | /// # Example |
567 | /// |
568 | /// ``` |
569 | /// use icu::properties::{maps, SentenceBreak}; |
570 | /// |
571 | /// assert_eq!(maps::sentence_break().get('οΌ'), SentenceBreak::Numeric); // U+FF19: Fullwidth Digit Nine |
572 | /// assert_eq!(maps::sentence_break().get(','), SentenceBreak::SContinue); // U+002C: Comma |
573 | /// ``` |
574 | pub const sentence_break => SINGLETON_PROPS_SB_V1; |
575 | pub fn load_sentence_break(); |
576 | } |
577 | |
578 | make_map_property! { |
579 | property: "Canonical_Combining_Class" ; |
580 | marker: CanonicalCombiningClassProperty; |
581 | value: crate::CanonicalCombiningClass; |
582 | keyed_data_marker: CanonicalCombiningClassV1Marker; |
583 | func: |
584 | /// Return a [`CodePointMapData`] for the Canonical_Combining_Class Unicode property. See |
585 | /// [`CanonicalCombiningClass`]. |
586 | /// |
587 | /// β¨ *Enabled with the `compiled_data` Cargo feature.* |
588 | /// |
589 | /// [π Help choosing a constructor](icu_provider::constructors) |
590 | /// |
591 | /// **Note:** See `icu::normalizer::CanonicalCombiningClassMap` for the preferred API |
592 | /// to look up the Canonical_Combining_Class property by scalar value. |
593 | /// |
594 | /// # Example |
595 | /// |
596 | /// ``` |
597 | /// use icu::properties::{maps, CanonicalCombiningClass}; |
598 | /// |
599 | /// assert_eq!(maps::canonical_combining_class().get('a'), CanonicalCombiningClass::NotReordered); // U+0061: LATIN SMALL LETTER A |
600 | /// assert_eq!(maps::canonical_combining_class().get32(0x0301), CanonicalCombiningClass::Above); // U+0301: COMBINING ACUTE ACCENT |
601 | /// ``` |
602 | pub const canonical_combining_class => SINGLETON_PROPS_CCC_V1; |
603 | pub fn load_canonical_combining_class(); |
604 | } |
605 | |
606 | make_map_property! { |
607 | property: "Indic_Syllabic_Category" ; |
608 | marker: IndicSyllabicCategoryProperty; |
609 | value: crate::IndicSyllabicCategory; |
610 | keyed_data_marker: IndicSyllabicCategoryV1Marker; |
611 | func: |
612 | /// Return a [`CodePointMapData`] for the Indic_Syllabic_Category Unicode property. See |
613 | /// [`IndicSyllabicCategory`]. |
614 | /// |
615 | /// β¨ *Enabled with the `compiled_data` Cargo feature.* |
616 | /// |
617 | /// [π Help choosing a constructor](icu_provider::constructors) |
618 | /// |
619 | /// # Example |
620 | /// |
621 | /// ``` |
622 | /// use icu::properties::{maps, IndicSyllabicCategory}; |
623 | /// |
624 | /// assert_eq!(maps::indic_syllabic_category().get('a'), IndicSyllabicCategory::Other); |
625 | /// assert_eq!(maps::indic_syllabic_category().get32(0x0900), IndicSyllabicCategory::Bindu); // U+0900: DEVANAGARI SIGN INVERTED CANDRABINDU |
626 | /// ``` |
627 | pub const indic_syllabic_category => SINGLETON_PROPS_INSC_V1; |
628 | pub fn load_indic_syllabic_category(); |
629 | } |
630 | |
631 | make_map_property! { |
632 | property: "Joining_Type" ; |
633 | marker: JoiningTypeProperty; |
634 | value: crate::JoiningType; |
635 | keyed_data_marker: JoiningTypeV1Marker; |
636 | func: |
637 | /// Return a [`CodePointMapDataBorrowed`] for the Joining_Type Unicode enumerated |
638 | /// property. See [`JoiningType`]. |
639 | /// |
640 | /// β¨ *Enabled with the `compiled_data` Cargo feature.* |
641 | /// |
642 | /// [π Help choosing a constructor](icu_provider::constructors) |
643 | /// |
644 | /// # Example |
645 | /// |
646 | /// ``` |
647 | /// use icu::properties::{maps, JoiningType}; |
648 | /// |
649 | /// assert_eq!(maps::joining_type().get('Ψ '), JoiningType::DualJoining); // U+0620: Arabic Letter Kashmiri Yeh |
650 | /// assert_eq!(maps::joining_type().get('π«'), JoiningType::LeftJoining); // U+10ACD: Manichaean Letter Heth |
651 | /// ``` |
652 | pub const joining_type => SINGLETON_PROPS_JT_V1; |
653 | pub fn load_joining_type(); |
654 | } |
655 | |