1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | // Provider structs must be stable |
6 | #![allow (clippy::exhaustive_structs, clippy::exhaustive_enums)] |
7 | |
8 | //! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component. |
9 | //! |
10 | //! <div class="stab unstable"> |
11 | //! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
12 | //! including in SemVer minor releases. While the serde representation of data structs is guaranteed |
13 | //! to be stable, their Rust representation might not be. Use with caution. |
14 | //! </div> |
15 | //! |
16 | //! Read more about data providers: [`icu_provider`] |
17 | |
18 | pub mod names; |
19 | |
20 | use crate::script::ScriptWithExt; |
21 | use crate::Script; |
22 | |
23 | use core::ops::RangeInclusive; |
24 | use core::str; |
25 | use icu_collections::codepointinvlist::CodePointInversionList; |
26 | use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList; |
27 | use icu_collections::codepointtrie::{CodePointMapRange, CodePointTrie, TrieValue}; |
28 | use icu_provider::prelude::*; |
29 | use icu_provider::{DataKeyMetadata, FallbackPriority}; |
30 | use zerofrom::ZeroFrom; |
31 | |
32 | use zerovec::{VarZeroVec, ZeroSlice, ZeroVecError}; |
33 | |
34 | #[cfg (feature = "compiled_data" )] |
35 | #[derive (Debug)] |
36 | /// Baked data |
37 | /// |
38 | /// <div class="stab unstable"> |
39 | /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
40 | /// including in SemVer minor releases. In particular, the `DataProvider` implementations are only |
41 | /// guaranteed to match with this version's `*_unstable` providers. Use with caution. |
42 | /// </div> |
43 | pub struct Baked; |
44 | |
45 | #[cfg (feature = "compiled_data" )] |
46 | const _: () = { |
47 | pub mod icu { |
48 | pub use crate as properties; |
49 | pub use icu_collections as collections; |
50 | #[allow (unused_imports)] // baked data may or may not need this |
51 | pub use icu_locid_transform as locid_transform; |
52 | } |
53 | icu_properties_data::make_provider!(Baked); |
54 | icu_properties_data::impl_propnames_from_gcb_v1!(Baked); |
55 | icu_properties_data::impl_propnames_from_bc_v1!(Baked); |
56 | icu_properties_data::impl_propnames_from_ccc_v1!(Baked); |
57 | icu_properties_data::impl_propnames_from_ea_v1!(Baked); |
58 | icu_properties_data::impl_propnames_from_gc_v1!(Baked); |
59 | icu_properties_data::impl_propnames_from_gcm_v1!(Baked); |
60 | icu_properties_data::impl_propnames_from_hst_v1!(Baked); |
61 | icu_properties_data::impl_propnames_from_insc_v1!(Baked); |
62 | icu_properties_data::impl_propnames_from_jt_v1!(Baked); |
63 | icu_properties_data::impl_propnames_from_lb_v1!(Baked); |
64 | icu_properties_data::impl_propnames_from_sb_v1!(Baked); |
65 | icu_properties_data::impl_propnames_from_sc_v1!(Baked); |
66 | icu_properties_data::impl_propnames_from_wb_v1!(Baked); |
67 | icu_properties_data::impl_propnames_to_long_linear_bc_v1!(Baked); |
68 | icu_properties_data::impl_propnames_to_long_linear_ea_v1!(Baked); |
69 | icu_properties_data::impl_propnames_to_long_linear_gc_v1!(Baked); |
70 | icu_properties_data::impl_propnames_to_long_linear_gcb_v1!(Baked); |
71 | icu_properties_data::impl_propnames_to_long_linear_hst_v1!(Baked); |
72 | icu_properties_data::impl_propnames_to_long_linear_insc_v1!(Baked); |
73 | icu_properties_data::impl_propnames_to_long_linear_jt_v1!(Baked); |
74 | icu_properties_data::impl_propnames_to_long_linear_lb_v1!(Baked); |
75 | icu_properties_data::impl_propnames_to_long_linear_sb_v1!(Baked); |
76 | icu_properties_data::impl_propnames_to_long_linear_sc_v1!(Baked); |
77 | icu_properties_data::impl_propnames_to_long_linear_wb_v1!(Baked); |
78 | icu_properties_data::impl_propnames_to_long_sparse_ccc_v1!(Baked); |
79 | icu_properties_data::impl_propnames_to_short_linear_bc_v1!(Baked); |
80 | icu_properties_data::impl_propnames_to_short_linear_ea_v1!(Baked); |
81 | icu_properties_data::impl_propnames_to_short_linear_gc_v1!(Baked); |
82 | icu_properties_data::impl_propnames_to_short_linear_gcb_v1!(Baked); |
83 | icu_properties_data::impl_propnames_to_short_linear_hst_v1!(Baked); |
84 | icu_properties_data::impl_propnames_to_short_linear_insc_v1!(Baked); |
85 | icu_properties_data::impl_propnames_to_short_linear_jt_v1!(Baked); |
86 | icu_properties_data::impl_propnames_to_short_linear_lb_v1!(Baked); |
87 | icu_properties_data::impl_propnames_to_short_linear_sb_v1!(Baked); |
88 | icu_properties_data::impl_propnames_to_short_linear_wb_v1!(Baked); |
89 | icu_properties_data::impl_propnames_to_short_linear4_sc_v1!(Baked); |
90 | icu_properties_data::impl_propnames_to_short_sparse_ccc_v1!(Baked); |
91 | icu_properties_data::impl_props_ahex_v1!(Baked); |
92 | icu_properties_data::impl_props_alnum_v1!(Baked); |
93 | icu_properties_data::impl_props_alpha_v1!(Baked); |
94 | icu_properties_data::impl_props_basic_emoji_v1!(Baked); |
95 | icu_properties_data::impl_props_bc_v1!(Baked); |
96 | icu_properties_data::impl_props_bidi_c_v1!(Baked); |
97 | icu_properties_data::impl_props_bidi_m_v1!(Baked); |
98 | icu_properties_data::impl_props_bidiauxiliaryprops_v1!(Baked); |
99 | icu_properties_data::impl_props_blank_v1!(Baked); |
100 | icu_properties_data::impl_props_cased_v1!(Baked); |
101 | icu_properties_data::impl_props_ccc_v1!(Baked); |
102 | icu_properties_data::impl_props_ci_v1!(Baked); |
103 | icu_properties_data::impl_props_comp_ex_v1!(Baked); |
104 | icu_properties_data::impl_props_cwcf_v1!(Baked); |
105 | icu_properties_data::impl_props_cwcm_v1!(Baked); |
106 | icu_properties_data::impl_props_cwkcf_v1!(Baked); |
107 | icu_properties_data::impl_props_cwl_v1!(Baked); |
108 | icu_properties_data::impl_props_cwt_v1!(Baked); |
109 | icu_properties_data::impl_props_cwu_v1!(Baked); |
110 | icu_properties_data::impl_props_dash_v1!(Baked); |
111 | icu_properties_data::impl_props_dep_v1!(Baked); |
112 | icu_properties_data::impl_props_di_v1!(Baked); |
113 | icu_properties_data::impl_props_dia_v1!(Baked); |
114 | icu_properties_data::impl_props_ea_v1!(Baked); |
115 | icu_properties_data::impl_props_ebase_v1!(Baked); |
116 | icu_properties_data::impl_props_ecomp_v1!(Baked); |
117 | icu_properties_data::impl_props_emod_v1!(Baked); |
118 | icu_properties_data::impl_props_emoji_v1!(Baked); |
119 | icu_properties_data::impl_props_epres_v1!(Baked); |
120 | icu_properties_data::impl_props_exemplarchars_auxiliary_v1!(Baked); |
121 | icu_properties_data::impl_props_exemplarchars_index_v1!(Baked); |
122 | icu_properties_data::impl_props_exemplarchars_main_v1!(Baked); |
123 | icu_properties_data::impl_props_exemplarchars_numbers_v1!(Baked); |
124 | icu_properties_data::impl_props_exemplarchars_punctuation_v1!(Baked); |
125 | icu_properties_data::impl_props_ext_v1!(Baked); |
126 | icu_properties_data::impl_props_extpict_v1!(Baked); |
127 | icu_properties_data::impl_props_gc_v1!(Baked); |
128 | icu_properties_data::impl_props_gcb_v1!(Baked); |
129 | icu_properties_data::impl_props_gr_base_v1!(Baked); |
130 | icu_properties_data::impl_props_gr_ext_v1!(Baked); |
131 | icu_properties_data::impl_props_gr_link_v1!(Baked); |
132 | icu_properties_data::impl_props_graph_v1!(Baked); |
133 | icu_properties_data::impl_props_hex_v1!(Baked); |
134 | icu_properties_data::impl_props_hst_v1!(Baked); |
135 | icu_properties_data::impl_props_hyphen_v1!(Baked); |
136 | icu_properties_data::impl_props_idc_v1!(Baked); |
137 | icu_properties_data::impl_props_ideo_v1!(Baked); |
138 | icu_properties_data::impl_props_ids_v1!(Baked); |
139 | icu_properties_data::impl_props_idsb_v1!(Baked); |
140 | icu_properties_data::impl_props_idst_v1!(Baked); |
141 | icu_properties_data::impl_props_insc_v1!(Baked); |
142 | icu_properties_data::impl_props_join_c_v1!(Baked); |
143 | icu_properties_data::impl_props_jt_v1!(Baked); |
144 | icu_properties_data::impl_props_lb_v1!(Baked); |
145 | icu_properties_data::impl_props_loe_v1!(Baked); |
146 | icu_properties_data::impl_props_lower_v1!(Baked); |
147 | icu_properties_data::impl_props_math_v1!(Baked); |
148 | icu_properties_data::impl_props_nchar_v1!(Baked); |
149 | icu_properties_data::impl_props_nfcinert_v1!(Baked); |
150 | icu_properties_data::impl_props_nfdinert_v1!(Baked); |
151 | icu_properties_data::impl_props_nfkcinert_v1!(Baked); |
152 | icu_properties_data::impl_props_nfkdinert_v1!(Baked); |
153 | icu_properties_data::impl_props_pat_syn_v1!(Baked); |
154 | icu_properties_data::impl_props_pat_ws_v1!(Baked); |
155 | icu_properties_data::impl_props_pcm_v1!(Baked); |
156 | icu_properties_data::impl_props_print_v1!(Baked); |
157 | icu_properties_data::impl_props_qmark_v1!(Baked); |
158 | icu_properties_data::impl_props_radical_v1!(Baked); |
159 | icu_properties_data::impl_props_ri_v1!(Baked); |
160 | icu_properties_data::impl_props_sb_v1!(Baked); |
161 | icu_properties_data::impl_props_sc_v1!(Baked); |
162 | icu_properties_data::impl_props_scx_v1!(Baked); |
163 | icu_properties_data::impl_props_sd_v1!(Baked); |
164 | icu_properties_data::impl_props_segstart_v1!(Baked); |
165 | icu_properties_data::impl_props_sensitive_v1!(Baked); |
166 | icu_properties_data::impl_props_sterm_v1!(Baked); |
167 | icu_properties_data::impl_props_term_v1!(Baked); |
168 | icu_properties_data::impl_props_uideo_v1!(Baked); |
169 | icu_properties_data::impl_props_upper_v1!(Baked); |
170 | icu_properties_data::impl_props_vs_v1!(Baked); |
171 | icu_properties_data::impl_props_wb_v1!(Baked); |
172 | icu_properties_data::impl_props_wspace_v1!(Baked); |
173 | icu_properties_data::impl_props_xdigit_v1!(Baked); |
174 | icu_properties_data::impl_props_xidc_v1!(Baked); |
175 | icu_properties_data::impl_props_xids_v1!(Baked); |
176 | }; |
177 | |
178 | // include the specialized structs for the compact representation of Bidi property data |
179 | pub mod bidi_data; |
180 | |
181 | /// A set of characters which share a particular property value. |
182 | /// |
183 | /// This data enum is extensible, more backends may be added in the future. |
184 | /// Old data can be used with newer code but not vice versa. |
185 | /// |
186 | /// <div class="stab unstable"> |
187 | /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
188 | /// including in SemVer minor releases. While the serde representation of data structs is guaranteed |
189 | /// to be stable, their Rust representation might not be. Use with caution. |
190 | /// </div> |
191 | #[derive (Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] |
192 | #[cfg_attr ( |
193 | feature = "datagen" , |
194 | derive(serde::Serialize, databake::Bake), |
195 | databake(path = icu_properties::provider), |
196 | )] |
197 | #[cfg_attr (feature = "serde" , derive(serde::Deserialize))] |
198 | #[non_exhaustive ] |
199 | pub enum PropertyCodePointSetV1<'data> { |
200 | /// The set of characters, represented as an inversion list |
201 | InversionList(#[cfg_attr (feature = "serde" , serde(borrow))] CodePointInversionList<'data>), |
202 | // new variants should go BELOW existing ones |
203 | // Serde serializes based on variant name and index in the enum |
204 | // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant |
205 | } |
206 | |
207 | /// A map efficiently storing data about individual characters. |
208 | /// |
209 | /// This data enum is extensible, more backends may be added in the future. |
210 | /// Old data can be used with newer code but not vice versa. |
211 | /// |
212 | /// <div class="stab unstable"> |
213 | /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
214 | /// including in SemVer minor releases. While the serde representation of data structs is guaranteed |
215 | /// to be stable, their Rust representation might not be. Use with caution. |
216 | /// </div> |
217 | #[derive (Clone, Debug, Eq, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)] |
218 | #[cfg_attr ( |
219 | feature = "datagen" , |
220 | derive(serde::Serialize, databake::Bake), |
221 | databake(path = icu_properties::provider), |
222 | )] |
223 | #[cfg_attr (feature = "serde" , derive(serde::Deserialize))] |
224 | #[non_exhaustive ] |
225 | pub enum PropertyCodePointMapV1<'data, T: TrieValue> { |
226 | /// A codepoint trie storing the data |
227 | CodePointTrie(#[cfg_attr (feature = "serde" , serde(borrow))] CodePointTrie<'data, T>), |
228 | // new variants should go BELOW existing ones |
229 | // Serde serializes based on variant name and index in the enum |
230 | // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant |
231 | } |
232 | |
233 | /// A set of characters and strings which share a particular property value. |
234 | /// |
235 | /// <div class="stab unstable"> |
236 | /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
237 | /// including in SemVer minor releases. While the serde representation of data structs is guaranteed |
238 | /// to be stable, their Rust representation might not be. Use with caution. |
239 | /// </div> |
240 | #[derive (Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] |
241 | #[cfg_attr ( |
242 | feature = "datagen" , |
243 | derive(serde::Serialize, databake::Bake), |
244 | databake(path = icu_properties::provider), |
245 | )] |
246 | #[cfg_attr (feature = "serde" , derive(serde::Deserialize))] |
247 | #[non_exhaustive ] |
248 | pub enum PropertyUnicodeSetV1<'data> { |
249 | /// A set representing characters in an inversion list, and the strings in a list. |
250 | CPInversionListStrList( |
251 | #[cfg_attr (feature = "serde" , serde(borrow))] CodePointInversionListAndStringList<'data>, |
252 | ), |
253 | // new variants should go BELOW existing ones |
254 | // Serde serializes based on variant name and index in the enum |
255 | // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant |
256 | } |
257 | |
258 | impl<'data> PropertyUnicodeSetV1<'data> { |
259 | #[inline ] |
260 | pub(crate) fn contains(&self, s: &str) -> bool { |
261 | match *self { |
262 | Self::CPInversionListStrList(ref l) => l.contains(s), |
263 | } |
264 | } |
265 | |
266 | #[inline ] |
267 | pub(crate) fn contains32(&self, cp: u32) -> bool { |
268 | match *self { |
269 | Self::CPInversionListStrList(ref l) => l.contains32(cp), |
270 | } |
271 | } |
272 | |
273 | #[inline ] |
274 | pub(crate) fn contains_char(&self, ch: char) -> bool { |
275 | match *self { |
276 | Self::CPInversionListStrList(ref l) => l.contains_char(ch), |
277 | } |
278 | } |
279 | |
280 | #[inline ] |
281 | pub(crate) fn from_code_point_inversion_list_string_list( |
282 | l: CodePointInversionListAndStringList<'static>, |
283 | ) -> Self { |
284 | Self::CPInversionListStrList(l) |
285 | } |
286 | |
287 | #[inline ] |
288 | pub(crate) fn as_code_point_inversion_list_string_list( |
289 | &'_ self, |
290 | ) -> Option<&'_ CodePointInversionListAndStringList<'data>> { |
291 | match *self { |
292 | Self::CPInversionListStrList(ref l) => Some(l), |
293 | // any other backing data structure that cannot return a CPInversionListStrList in O(1) time should return None |
294 | } |
295 | } |
296 | |
297 | #[inline ] |
298 | pub(crate) fn to_code_point_inversion_list_string_list( |
299 | &self, |
300 | ) -> CodePointInversionListAndStringList<'_> { |
301 | match *self { |
302 | Self::CPInversionListStrList(ref t) => ZeroFrom::zero_from(t), |
303 | } |
304 | } |
305 | } |
306 | |
307 | /// A struct that efficiently stores `Script` and `Script_Extensions` property data. |
308 | /// |
309 | /// <div class="stab unstable"> |
310 | /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
311 | /// including in SemVer minor releases. While the serde representation of data structs is guaranteed |
312 | /// to be stable, their Rust representation might not be. Use with caution. |
313 | /// </div> |
314 | #[icu_provider::data_struct (marker( |
315 | ScriptWithExtensionsPropertyV1Marker, |
316 | "props/scx@1" , |
317 | singleton |
318 | ))] |
319 | #[derive (Debug, Eq, PartialEq, Clone)] |
320 | #[cfg_attr ( |
321 | feature = "datagen" , |
322 | derive(serde::Serialize, databake::Bake), |
323 | databake(path = icu_properties::provider), |
324 | )] |
325 | #[cfg_attr (feature = "serde" , derive(serde::Deserialize))] |
326 | pub struct ScriptWithExtensionsPropertyV1<'data> { |
327 | /// Note: The `ScriptWithExt` values in this array will assume a 12-bit layout. The 2 |
328 | /// higher order bits 11..10 will indicate how to deduce the Script value and |
329 | /// Script_Extensions value, nearly matching the representation |
330 | /// [in ICU](https://github.com/unicode-org/icu/blob/main/icu4c/source/common/uprops.h): |
331 | /// |
332 | /// | High order 2 bits value | Script | Script_Extensions | |
333 | /// |-------------------------|--------------------------------------------------------|----------------------------------------------------------------| |
334 | /// | 3 | First value in sub-array, index given by lower 10 bits | Sub-array excluding first value, index given by lower 10 bits | |
335 | /// | 2 | Script=Inherited | Entire sub-array, index given by lower 10 bits | |
336 | /// | 1 | Script=Common | Entire sub-array, index given by lower 10 bits | |
337 | /// | 0 | Value in lower 10 bits | `[ Script value ]` single-element array | |
338 | /// |
339 | /// When the lower 10 bits of the value are used as an index, that index is |
340 | /// used for the outer-level vector of the nested `extensions` structure. |
341 | #[cfg_attr (feature = "serde" , serde(borrow))] |
342 | pub trie: CodePointTrie<'data, ScriptWithExt>, |
343 | |
344 | /// This companion structure stores Script_Extensions values, which are |
345 | /// themselves arrays / vectors. This structure only stores the values for |
346 | /// cases in which `scx(cp) != [ sc(cp) ]`. Each sub-vector is distinct. The |
347 | /// sub-vector represents the Script_Extensions array value for a code point, |
348 | /// and may also indicate Script value, as described for the `trie` field. |
349 | #[cfg_attr (feature = "serde" , serde(borrow))] |
350 | pub extensions: VarZeroVec<'data, ZeroSlice<Script>>, |
351 | } |
352 | |
353 | impl<'data> ScriptWithExtensionsPropertyV1<'data> { |
354 | // This method is intended to be used by constructors of deserialized data |
355 | // in a data provider. |
356 | #[doc (hidden)] |
357 | pub fn new( |
358 | trie: CodePointTrie<'data, ScriptWithExt>, |
359 | extensions: VarZeroVec<'data, ZeroSlice<Script>>, |
360 | ) -> ScriptWithExtensionsPropertyV1<'data> { |
361 | ScriptWithExtensionsPropertyV1 { trie, extensions } |
362 | } |
363 | } |
364 | |
365 | // See CodePointSetData for documentation of these functions |
366 | impl<'data> PropertyCodePointSetV1<'data> { |
367 | #[inline ] |
368 | pub(crate) fn contains(&self, ch: char) -> bool { |
369 | match *self { |
370 | Self::InversionList(ref l) => l.contains(ch), |
371 | } |
372 | } |
373 | |
374 | #[inline ] |
375 | pub(crate) fn contains32(&self, ch: u32) -> bool { |
376 | match *self { |
377 | Self::InversionList(ref l) => l.contains32(ch), |
378 | } |
379 | } |
380 | |
381 | #[inline ] |
382 | pub(crate) fn iter_ranges(&self) -> impl Iterator<Item = RangeInclusive<u32>> + '_ { |
383 | match *self { |
384 | Self::InversionList(ref l) => l.iter_ranges(), |
385 | } |
386 | } |
387 | |
388 | #[inline ] |
389 | pub(crate) fn iter_ranges_complemented( |
390 | &self, |
391 | ) -> impl Iterator<Item = RangeInclusive<u32>> + '_ { |
392 | match *self { |
393 | Self::InversionList(ref l) => l.iter_ranges_complemented(), |
394 | } |
395 | } |
396 | |
397 | #[inline ] |
398 | pub(crate) fn from_code_point_inversion_list(l: CodePointInversionList<'static>) -> Self { |
399 | Self::InversionList(l) |
400 | } |
401 | |
402 | #[inline ] |
403 | pub(crate) fn as_code_point_inversion_list( |
404 | &'_ self, |
405 | ) -> Option<&'_ CodePointInversionList<'data>> { |
406 | match *self { |
407 | Self::InversionList(ref l) => Some(l), |
408 | // any other backing data structure that cannot return a CPInvList in O(1) time should return None |
409 | } |
410 | } |
411 | |
412 | #[inline ] |
413 | pub(crate) fn to_code_point_inversion_list(&self) -> CodePointInversionList<'_> { |
414 | match *self { |
415 | Self::InversionList(ref t) => ZeroFrom::zero_from(t), |
416 | } |
417 | } |
418 | } |
419 | |
420 | // See CodePointMapData for documentation of these functions |
421 | impl<'data, T: TrieValue> PropertyCodePointMapV1<'data, T> { |
422 | #[inline ] |
423 | pub(crate) fn get32(&self, ch: u32) -> T { |
424 | match *self { |
425 | Self::CodePointTrie(ref t) => t.get32(ch), |
426 | } |
427 | } |
428 | |
429 | #[inline ] |
430 | pub(crate) fn try_into_converted<P>( |
431 | self, |
432 | ) -> Result<PropertyCodePointMapV1<'data, P>, ZeroVecError> |
433 | where |
434 | P: TrieValue, |
435 | { |
436 | match self { |
437 | Self::CodePointTrie(t) => t |
438 | .try_into_converted() |
439 | .map(PropertyCodePointMapV1::CodePointTrie), |
440 | } |
441 | } |
442 | |
443 | #[inline ] |
444 | pub(crate) fn get_set_for_value(&self, value: T) -> CodePointInversionList<'static> { |
445 | match *self { |
446 | Self::CodePointTrie(ref t) => t.get_set_for_value(value), |
447 | } |
448 | } |
449 | |
450 | #[inline ] |
451 | pub(crate) fn iter_ranges(&self) -> impl Iterator<Item = CodePointMapRange<T>> + '_ { |
452 | match *self { |
453 | Self::CodePointTrie(ref t) => t.iter_ranges(), |
454 | } |
455 | } |
456 | #[inline ] |
457 | pub(crate) fn iter_ranges_mapped<'a, U: Eq + 'a>( |
458 | &'a self, |
459 | map: impl FnMut(T) -> U + Copy + 'a, |
460 | ) -> impl Iterator<Item = CodePointMapRange<U>> + 'a { |
461 | match *self { |
462 | Self::CodePointTrie(ref t) => t.iter_ranges_mapped(map), |
463 | } |
464 | } |
465 | |
466 | #[inline ] |
467 | pub(crate) fn from_code_point_trie(trie: CodePointTrie<'static, T>) -> Self { |
468 | Self::CodePointTrie(trie) |
469 | } |
470 | |
471 | #[inline ] |
472 | pub(crate) fn as_code_point_trie(&self) -> Option<&CodePointTrie<'data, T>> { |
473 | match *self { |
474 | Self::CodePointTrie(ref t) => Some(t), |
475 | // any other backing data structure that cannot return a CPT in O(1) time should return None |
476 | } |
477 | } |
478 | |
479 | #[inline ] |
480 | pub(crate) fn to_code_point_trie(&self) -> CodePointTrie<'_, T> { |
481 | match *self { |
482 | Self::CodePointTrie(ref t) => ZeroFrom::zero_from(t), |
483 | } |
484 | } |
485 | } |
486 | |
487 | macro_rules! expand { |
488 | ( |
489 | ($(($code_point_set_marker:ident, $bin_cp_s:literal),)+), |
490 | ($(($unicode_set_marker:ident, $bin_us_s:literal, $us_singleton:literal),)+), |
491 | ($(($code_point_map_marker:ident, |
492 | $name_value_marker:ident, |
493 | |
494 | $((sparse: $value_short_name_marker_sparse:ident, $value_long_name_marker_sparse:ident),)? |
495 | $((linear: $value_short_name_marker_linear:ident, $value_long_name_marker_linear:ident ),)? |
496 | $((linear4: $value_short_name_marker_linear4:ident, $value_long_name_marker_linear4:ident ),)? |
497 | $enum_s:literal, $value_ty:ident),)+) |
498 | ) => { |
499 | |
500 | // Data keys that return code point sets (represented as CodePointSetData). |
501 | // For now, synonymous with binary properties of code points only. |
502 | $( |
503 | #[doc = core::concat!("Data marker for the '" , $bin_cp_s, "' Unicode property" )] |
504 | #[derive(Debug, Default)] |
505 | #[cfg_attr( |
506 | feature = "datagen" , |
507 | derive(databake::Bake), |
508 | databake(path = icu_properties::provider), |
509 | )] |
510 | pub struct $code_point_set_marker; |
511 | |
512 | impl DataMarker for $code_point_set_marker { |
513 | type Yokeable = PropertyCodePointSetV1<'static>; |
514 | } |
515 | impl KeyedDataMarker for $code_point_set_marker { |
516 | const KEY: DataKey = data_key!(concat!("props/" , $bin_cp_s, "@1" ), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true)); |
517 | } |
518 | |
519 | )+ |
520 | |
521 | // Data keys that return sets of strings + code points (represented as UnicodeSetData). |
522 | // Includes: |
523 | // - binary properties of strings + code points |
524 | // - exemplar characters |
525 | $( |
526 | #[doc = core::concat!("Data marker for the '" , $bin_us_s, "' Unicode property" )] |
527 | #[derive(Debug, Default)] |
528 | #[cfg_attr( |
529 | feature = "datagen" , |
530 | derive(databake::Bake), |
531 | databake(path = icu_properties::provider), |
532 | )] |
533 | pub struct $unicode_set_marker; |
534 | |
535 | impl DataMarker for $unicode_set_marker { |
536 | type Yokeable = PropertyUnicodeSetV1<'static>; |
537 | } |
538 | impl KeyedDataMarker for $unicode_set_marker { |
539 | const KEY: DataKey = data_key!(concat!("props/" , $bin_us_s, "@1" ), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, $us_singleton)); |
540 | } |
541 | )+ |
542 | |
543 | // Data keys that return code point map (represented as CodePointMapData). |
544 | // For now, synonymous with enumerated properties [of code points only]. |
545 | $( |
546 | #[doc = core::concat!("Data marker for the '" , $enum_s, "' Unicode property" )] |
547 | #[derive(Debug, Default)] |
548 | #[cfg_attr( |
549 | feature = "datagen" , |
550 | derive(databake::Bake), |
551 | databake(path = icu_properties::provider), |
552 | )] |
553 | pub struct $code_point_map_marker; |
554 | |
555 | impl DataMarker for $code_point_map_marker { |
556 | type Yokeable = PropertyCodePointMapV1<'static, crate::$value_ty>; |
557 | } |
558 | |
559 | impl KeyedDataMarker for $code_point_map_marker { |
560 | const KEY: DataKey = data_key!(concat!("props/" , $enum_s, "@1" ), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true)); |
561 | } |
562 | |
563 | |
564 | #[doc = core::concat!("Data marker for parsing the names of the values of the '" , $enum_s, "' Unicode property" )] |
565 | #[derive(Debug, Default)] |
566 | #[cfg_attr( |
567 | feature = "datagen" , |
568 | derive(databake::Bake), |
569 | databake(path = icu_properties::provider), |
570 | )] |
571 | pub struct $name_value_marker; |
572 | |
573 | impl DataMarker for $name_value_marker { |
574 | type Yokeable = names::PropertyValueNameToEnumMapV1<'static>; |
575 | } |
576 | |
577 | impl KeyedDataMarker for $name_value_marker { |
578 | const KEY: DataKey = data_key!(concat!("propnames/from/" , $enum_s, "@1" ), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true)); |
579 | } |
580 | |
581 | $( |
582 | #[doc = core::concat!("Data marker for producing short names of the values of the '" , $enum_s, "' Unicode property" )] |
583 | #[derive(Debug, Default)] |
584 | #[cfg_attr( |
585 | feature = "datagen" , |
586 | derive(databake::Bake), |
587 | databake(path = icu_properties::provider), |
588 | )] |
589 | pub struct $value_short_name_marker_sparse; |
590 | |
591 | impl DataMarker for $value_short_name_marker_sparse { |
592 | type Yokeable = names::PropertyEnumToValueNameSparseMapV1<'static>; |
593 | } |
594 | |
595 | impl KeyedDataMarker for $value_short_name_marker_sparse { |
596 | const KEY: DataKey = data_key!(concat!("propnames/to/short/sparse/" , $enum_s, "@1" ), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true)); |
597 | } |
598 | |
599 | #[doc = core::concat!("Data marker for producing long names of the values of the '" , $enum_s, "' Unicode property" )] |
600 | #[derive(Debug, Default)] |
601 | #[cfg_attr( |
602 | feature = "datagen" , |
603 | derive(databake::Bake), |
604 | databake(path = icu_properties::provider), |
605 | )] |
606 | pub struct $value_long_name_marker_sparse; |
607 | |
608 | impl DataMarker for $value_long_name_marker_sparse { |
609 | type Yokeable = names::PropertyEnumToValueNameSparseMapV1<'static>; |
610 | } |
611 | |
612 | impl KeyedDataMarker for $value_long_name_marker_sparse { |
613 | const KEY: DataKey = data_key!(concat!("propnames/to/long/sparse/" , $enum_s, "@1" ), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true)); |
614 | } |
615 | )? |
616 | |
617 | $( |
618 | #[doc = core::concat!("Data marker for producing short names of the values of the '" , $enum_s, "' Unicode property" )] |
619 | #[derive(Debug, Default)] |
620 | #[cfg_attr( |
621 | feature = "datagen" , |
622 | derive(databake::Bake), |
623 | databake(path = icu_properties::provider), |
624 | )] |
625 | pub struct $value_short_name_marker_linear; |
626 | |
627 | impl DataMarker for $value_short_name_marker_linear { |
628 | type Yokeable = names::PropertyEnumToValueNameLinearMapV1<'static>; |
629 | } |
630 | |
631 | impl KeyedDataMarker for $value_short_name_marker_linear { |
632 | const KEY: DataKey = data_key!(concat!("propnames/to/short/linear/" , $enum_s, "@1" ), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true)); |
633 | } |
634 | |
635 | #[doc = core::concat!("Data marker for producing long names of the values of the '" , $enum_s, "' Unicode property" )] |
636 | #[derive(Debug, Default)] |
637 | #[cfg_attr( |
638 | feature = "datagen" , |
639 | derive(databake::Bake), |
640 | databake(path = icu_properties::provider), |
641 | )] |
642 | pub struct $value_long_name_marker_linear; |
643 | |
644 | impl DataMarker for $value_long_name_marker_linear { |
645 | type Yokeable = names::PropertyEnumToValueNameLinearMapV1<'static>; |
646 | } |
647 | |
648 | impl KeyedDataMarker for $value_long_name_marker_linear { |
649 | const KEY: DataKey = data_key!(concat!("propnames/to/long/linear/" , $enum_s, "@1" ), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true)); |
650 | } |
651 | )? |
652 | |
653 | $( |
654 | #[doc = core::concat!("Data marker for producing short names of the values of the '" , $enum_s, "' Unicode property" )] |
655 | #[derive(Debug, Default)] |
656 | #[cfg_attr( |
657 | feature = "datagen" , |
658 | derive(databake::Bake), |
659 | databake(path = icu_properties::provider), |
660 | )] |
661 | pub struct $value_short_name_marker_linear4; |
662 | |
663 | impl DataMarker for $value_short_name_marker_linear4 { |
664 | type Yokeable = names::PropertyEnumToValueNameLinearTiny4MapV1<'static>; |
665 | } |
666 | |
667 | impl KeyedDataMarker for $value_short_name_marker_linear4 { |
668 | const KEY: DataKey = data_key!(concat!("propnames/to/short/linear4/" , $enum_s, "@1" ), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true)); |
669 | } |
670 | |
671 | #[doc = core::concat!("Data marker for producing long names of the values of the '" , $enum_s, "' Unicode property" )] |
672 | #[derive(Debug, Default)] |
673 | #[cfg_attr( |
674 | feature = "datagen" , |
675 | derive(databake::Bake), |
676 | databake(path = icu_properties::provider), |
677 | )] |
678 | pub struct $value_long_name_marker_linear4; |
679 | |
680 | impl DataMarker for $value_long_name_marker_linear4 { |
681 | // Tiny4 is only for short names |
682 | type Yokeable = names::PropertyEnumToValueNameLinearMapV1<'static>; |
683 | } |
684 | |
685 | impl KeyedDataMarker for $value_long_name_marker_linear4 { |
686 | const KEY: DataKey = data_key!(concat!("propnames/to/long/linear/" , $enum_s, "@1" ), DataKeyMetadata::construct_internal(FallbackPriority::Language, None, None, true)); |
687 | } |
688 | )? |
689 | )+ |
690 | |
691 | /// All data keys in this module. |
692 | pub const KEYS: &[DataKey] = &[ |
693 | $($code_point_set_marker::KEY,)+ |
694 | $($unicode_set_marker::KEY,)+ |
695 | $( |
696 | $code_point_map_marker::KEY, |
697 | $name_value_marker::KEY, |
698 | $($value_short_name_marker_sparse::KEY, $value_long_name_marker_sparse::KEY,)? |
699 | $($value_short_name_marker_linear::KEY, $value_long_name_marker_linear::KEY,)? |
700 | $($value_short_name_marker_linear4::KEY, $value_long_name_marker_linear4::KEY,)? |
701 | )+ |
702 | bidi_data::BidiAuxiliaryPropertiesV1Marker::KEY, |
703 | GeneralCategoryMaskNameToValueV1Marker::KEY, |
704 | ScriptWithExtensionsPropertyV1Marker::KEY, |
705 | ]; |
706 | }; |
707 | } |
708 | |
709 | pub use self::names::GeneralCategoryMaskNameToValueV1Marker; |
710 | |
711 | expand!( |
712 | ( |
713 | // code point sets |
714 | (AsciiHexDigitV1Marker, "AHex" ), |
715 | (AlnumV1Marker, "alnum" ), |
716 | (AlphabeticV1Marker, "Alpha" ), |
717 | (BidiControlV1Marker, "Bidi_C" ), |
718 | (BidiMirroredV1Marker, "Bidi_M" ), |
719 | (BlankV1Marker, "blank" ), |
720 | (CasedV1Marker, "Cased" ), |
721 | (CaseIgnorableV1Marker, "CI" ), |
722 | (FullCompositionExclusionV1Marker, "Comp_Ex" ), |
723 | (ChangesWhenCasefoldedV1Marker, "CWCF" ), |
724 | (ChangesWhenCasemappedV1Marker, "CWCM" ), |
725 | (ChangesWhenNfkcCasefoldedV1Marker, "CWKCF" ), |
726 | (ChangesWhenLowercasedV1Marker, "CWL" ), |
727 | (ChangesWhenTitlecasedV1Marker, "CWT" ), |
728 | (ChangesWhenUppercasedV1Marker, "CWU" ), |
729 | (DashV1Marker, "Dash" ), |
730 | (DeprecatedV1Marker, "Dep" ), |
731 | (DefaultIgnorableCodePointV1Marker, "DI" ), |
732 | (DiacriticV1Marker, "Dia" ), |
733 | (EmojiModifierBaseV1Marker, "EBase" ), |
734 | (EmojiComponentV1Marker, "EComp" ), |
735 | (EmojiModifierV1Marker, "EMod" ), |
736 | (EmojiV1Marker, "Emoji" ), |
737 | (EmojiPresentationV1Marker, "EPres" ), |
738 | (ExtenderV1Marker, "Ext" ), |
739 | (ExtendedPictographicV1Marker, "ExtPict" ), |
740 | (GraphV1Marker, "graph" ), |
741 | (GraphemeBaseV1Marker, "Gr_Base" ), |
742 | (GraphemeExtendV1Marker, "Gr_Ext" ), |
743 | (GraphemeLinkV1Marker, "Gr_Link" ), |
744 | (HexDigitV1Marker, "Hex" ), |
745 | (HyphenV1Marker, "Hyphen" ), |
746 | (IdContinueV1Marker, "IDC" ), |
747 | (IdeographicV1Marker, "Ideo" ), |
748 | (IdStartV1Marker, "IDS" ), |
749 | (IdsBinaryOperatorV1Marker, "IDSB" ), |
750 | (IdsTrinaryOperatorV1Marker, "IDST" ), |
751 | (JoinControlV1Marker, "Join_C" ), |
752 | (LogicalOrderExceptionV1Marker, "LOE" ), |
753 | (LowercaseV1Marker, "Lower" ), |
754 | (MathV1Marker, "Math" ), |
755 | (NoncharacterCodePointV1Marker, "NChar" ), |
756 | (NfcInertV1Marker, "nfcinert" ), |
757 | (NfdInertV1Marker, "nfdinert" ), |
758 | (NfkcInertV1Marker, "nfkcinert" ), |
759 | (NfkdInertV1Marker, "nfkdinert" ), |
760 | (PatternSyntaxV1Marker, "Pat_Syn" ), |
761 | (PatternWhiteSpaceV1Marker, "Pat_WS" ), |
762 | (PrependedConcatenationMarkV1Marker, "PCM" ), |
763 | (PrintV1Marker, "print" ), |
764 | (QuotationMarkV1Marker, "QMark" ), |
765 | (RadicalV1Marker, "Radical" ), |
766 | (RegionalIndicatorV1Marker, "RI" ), |
767 | (SoftDottedV1Marker, "SD" ), |
768 | (SegmentStarterV1Marker, "segstart" ), |
769 | (CaseSensitiveV1Marker, "Sensitive" ), |
770 | (SentenceTerminalV1Marker, "STerm" ), |
771 | (TerminalPunctuationV1Marker, "Term" ), |
772 | (UnifiedIdeographV1Marker, "UIdeo" ), |
773 | (UppercaseV1Marker, "Upper" ), |
774 | (VariationSelectorV1Marker, "VS" ), |
775 | (WhiteSpaceV1Marker, "WSpace" ), |
776 | (XdigitV1Marker, "xdigit" ), |
777 | (XidContinueV1Marker, "XIDC" ), |
778 | (XidStartV1Marker, "XIDS" ), |
779 | ), |
780 | ( |
781 | // UnicodeSets (code points + strings) |
782 | (BasicEmojiV1Marker, "Basic_Emoji" , true), |
783 | (ExemplarCharactersMainV1Marker, "exemplarchars/main" , false), |
784 | ( |
785 | ExemplarCharactersAuxiliaryV1Marker, |
786 | "exemplarchars/auxiliary" , |
787 | false |
788 | ), |
789 | ( |
790 | ExemplarCharactersPunctuationV1Marker, |
791 | "exemplarchars/punctuation" , |
792 | false |
793 | ), |
794 | ( |
795 | ExemplarCharactersNumbersV1Marker, |
796 | "exemplarchars/numbers" , |
797 | false |
798 | ), |
799 | ( |
800 | ExemplarCharactersIndexV1Marker, |
801 | "exemplarchars/index" , |
802 | false |
803 | ), |
804 | ), |
805 | ( |
806 | // code point maps |
807 | ( |
808 | CanonicalCombiningClassV1Marker, |
809 | CanonicalCombiningClassNameToValueV1Marker, |
810 | ( |
811 | sparse: CanonicalCombiningClassValueToShortNameV1Marker, |
812 | CanonicalCombiningClassValueToLongNameV1Marker |
813 | ), |
814 | "ccc" , |
815 | CanonicalCombiningClass |
816 | ), |
817 | ( |
818 | GeneralCategoryV1Marker, |
819 | GeneralCategoryNameToValueV1Marker, |
820 | ( |
821 | linear: GeneralCategoryValueToShortNameV1Marker, |
822 | GeneralCategoryValueToLongNameV1Marker |
823 | ), |
824 | "gc" , |
825 | GeneralCategory |
826 | ), |
827 | ( |
828 | BidiClassV1Marker, |
829 | BidiClassNameToValueV1Marker, |
830 | ( |
831 | linear: BidiClassValueToShortNameV1Marker, |
832 | BidiClassValueToLongNameV1Marker |
833 | ), |
834 | "bc" , |
835 | BidiClass |
836 | ), |
837 | ( |
838 | ScriptV1Marker, |
839 | ScriptNameToValueV1Marker, |
840 | ( |
841 | linear4: ScriptValueToShortNameV1Marker, |
842 | ScriptValueToLongNameV1Marker |
843 | ), |
844 | "sc" , |
845 | Script |
846 | ), |
847 | ( |
848 | HangulSyllableTypeV1Marker, |
849 | HangulSyllableTypeNameToValueV1Marker, |
850 | ( |
851 | linear: HangulSyllableTypeValueToShortNameV1Marker, |
852 | HangulSyllableTypeValueToLongNameV1Marker |
853 | ), |
854 | "hst" , |
855 | HangulSyllableType |
856 | ), |
857 | ( |
858 | EastAsianWidthV1Marker, |
859 | EastAsianWidthNameToValueV1Marker, |
860 | ( |
861 | linear: EastAsianWidthValueToShortNameV1Marker, |
862 | EastAsianWidthValueToLongNameV1Marker |
863 | ), |
864 | "ea" , |
865 | EastAsianWidth |
866 | ), |
867 | ( |
868 | LineBreakV1Marker, |
869 | LineBreakNameToValueV1Marker, |
870 | ( |
871 | linear: LineBreakValueToShortNameV1Marker, |
872 | LineBreakValueToLongNameV1Marker |
873 | ), |
874 | "lb" , |
875 | LineBreak |
876 | ), |
877 | ( |
878 | GraphemeClusterBreakV1Marker, |
879 | GraphemeClusterBreakNameToValueV1Marker, |
880 | ( |
881 | linear: GraphemeClusterBreakValueToShortNameV1Marker, |
882 | GraphemeClusterBreakValueToLongNameV1Marker |
883 | ), |
884 | "GCB" , |
885 | GraphemeClusterBreak |
886 | ), |
887 | ( |
888 | WordBreakV1Marker, |
889 | WordBreakNameToValueV1Marker, |
890 | ( |
891 | linear: WordBreakValueToShortNameV1Marker, |
892 | WordBreakValueToLongNameV1Marker |
893 | ), |
894 | "WB" , |
895 | WordBreak |
896 | ), |
897 | ( |
898 | SentenceBreakV1Marker, |
899 | SentenceBreakNameToValueV1Marker, |
900 | ( |
901 | linear: SentenceBreakValueToShortNameV1Marker, |
902 | SentenceBreakValueToLongNameV1Marker |
903 | ), |
904 | "SB" , |
905 | SentenceBreak |
906 | ), |
907 | ( |
908 | IndicSyllabicCategoryV1Marker, |
909 | IndicSyllabicCategoryNameToValueV1Marker, |
910 | ( |
911 | linear: IndicSyllabicCategoryValueToShortNameV1Marker, |
912 | IndicSyllabicCategoryValueToLongNameV1Marker |
913 | ), |
914 | "InSC" , |
915 | IndicSyllabicCategory |
916 | ), |
917 | ( |
918 | JoiningTypeV1Marker, |
919 | JoiningTypeNameToValueV1Marker, |
920 | ( |
921 | linear: JoiningTypeValueToShortNameV1Marker, |
922 | JoiningTypeValueToLongNameV1Marker |
923 | ), |
924 | "jt" , |
925 | JoiningType |
926 | ), |
927 | // note: the names key for the GCM mask is handled above |
928 | ) |
929 | ); |
930 | |