1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | use crate::provider::bidi_data::{ |
6 | CheckedBidiPairedBracketType, MirroredPairedBracketData, MirroredPairedBracketDataTryFromError, |
7 | }; |
8 | use crate::script::ScriptWithExt; |
9 | use crate::{ |
10 | BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup, |
11 | GraphemeClusterBreak, HangulSyllableType, IndicSyllabicCategory, JoiningType, LineBreak, |
12 | Script, SentenceBreak, WordBreak, |
13 | }; |
14 | use core::convert::TryInto; |
15 | use core::num::TryFromIntError; |
16 | use zerovec::ule::{AsULE, RawBytesULE}; |
17 | |
18 | use icu_collections::codepointtrie::TrieValue; |
19 | |
20 | use core::convert::TryFrom; |
21 | |
22 | impl TrieValue for CanonicalCombiningClass { |
23 | type TryFromU32Error = TryFromIntError; |
24 | |
25 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
26 | u8::try_from(i).map(Self) |
27 | } |
28 | |
29 | fn to_u32(self) -> u32 { |
30 | u32::from(self.0) |
31 | } |
32 | } |
33 | |
34 | impl TrieValue for BidiClass { |
35 | type TryFromU32Error = TryFromIntError; |
36 | |
37 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
38 | u8::try_from(i).map(Self) |
39 | } |
40 | |
41 | fn to_u32(self) -> u32 { |
42 | u32::from(self.0) |
43 | } |
44 | } |
45 | |
46 | impl TrieValue for GeneralCategory { |
47 | type TryFromU32Error = &'static str; |
48 | |
49 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
50 | // If the u32 is out of range, fall back to u8::MAX, which is out of range of the GeneralCategory enum. |
51 | GeneralCategory::new_from_u8(i.try_into().unwrap_or(default:u8::MAX)) |
52 | .ok_or("Cannot parse GeneralCategory from integer" ) |
53 | } |
54 | |
55 | fn to_u32(self) -> u32 { |
56 | u32::from(self as u8) |
57 | } |
58 | } |
59 | |
60 | impl TrieValue for Script { |
61 | type TryFromU32Error = TryFromIntError; |
62 | |
63 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
64 | u16::try_from(i).map(op:Script) |
65 | } |
66 | |
67 | fn to_u32(self) -> u32 { |
68 | u32::from(self.0) |
69 | } |
70 | } |
71 | |
72 | impl TrieValue for HangulSyllableType { |
73 | type TryFromU32Error = TryFromIntError; |
74 | |
75 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
76 | u8::try_from(i).map(Self) |
77 | } |
78 | |
79 | fn to_u32(self) -> u32 { |
80 | u32::from(self.0) |
81 | } |
82 | } |
83 | |
84 | impl TrieValue for ScriptWithExt { |
85 | type TryFromU32Error = TryFromIntError; |
86 | |
87 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
88 | u16::try_from(i).map(Self) |
89 | } |
90 | |
91 | fn to_u32(self) -> u32 { |
92 | u32::from(self.0) |
93 | } |
94 | } |
95 | |
96 | impl TrieValue for EastAsianWidth { |
97 | type TryFromU32Error = TryFromIntError; |
98 | |
99 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
100 | u8::try_from(i).map(Self) |
101 | } |
102 | |
103 | fn to_u32(self) -> u32 { |
104 | u32::from(self.0) |
105 | } |
106 | } |
107 | |
108 | impl TrieValue for LineBreak { |
109 | type TryFromU32Error = TryFromIntError; |
110 | |
111 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
112 | u8::try_from(i).map(Self) |
113 | } |
114 | |
115 | fn to_u32(self) -> u32 { |
116 | u32::from(self.0) |
117 | } |
118 | } |
119 | |
120 | impl TrieValue for GraphemeClusterBreak { |
121 | type TryFromU32Error = TryFromIntError; |
122 | |
123 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
124 | u8::try_from(i).map(Self) |
125 | } |
126 | |
127 | fn to_u32(self) -> u32 { |
128 | u32::from(self.0) |
129 | } |
130 | } |
131 | |
132 | impl TrieValue for WordBreak { |
133 | type TryFromU32Error = TryFromIntError; |
134 | |
135 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
136 | u8::try_from(i).map(Self) |
137 | } |
138 | |
139 | fn to_u32(self) -> u32 { |
140 | u32::from(self.0) |
141 | } |
142 | } |
143 | |
144 | impl TrieValue for SentenceBreak { |
145 | type TryFromU32Error = TryFromIntError; |
146 | |
147 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
148 | u8::try_from(i).map(Self) |
149 | } |
150 | |
151 | fn to_u32(self) -> u32 { |
152 | u32::from(self.0) |
153 | } |
154 | } |
155 | |
156 | impl TrieValue for CheckedBidiPairedBracketType { |
157 | type TryFromU32Error = TryFromIntError; |
158 | |
159 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
160 | Ok(match i { |
161 | 1 => CheckedBidiPairedBracketType::Open, |
162 | 2 => CheckedBidiPairedBracketType::Close, |
163 | _ => CheckedBidiPairedBracketType::None, |
164 | }) |
165 | } |
166 | } |
167 | |
168 | impl TrieValue for IndicSyllabicCategory { |
169 | type TryFromU32Error = TryFromIntError; |
170 | |
171 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
172 | u8::try_from(i).map(Self) |
173 | } |
174 | |
175 | fn to_u32(self) -> u32 { |
176 | u32::from(self.0) |
177 | } |
178 | } |
179 | |
180 | // GCG is not used inside tries, but it is used in the name lookup type, and we want |
181 | // to squeeze it into a u16 for storage. Its named mask values are specced so we can |
182 | // do this in code. |
183 | // |
184 | // This is done by: |
185 | // - Single-value masks are translated to their corresponding GeneralCategory values |
186 | // - we know all of the multi-value masks and we give them special values |
187 | // - Anything else goes to 0xFF00, though this code path shouldn't be hit unless working with malformed icuexportdata |
188 | // |
189 | // In the reverse direction, unknown values go to the empty mask, but this codepath should not be hit except |
190 | // with malformed ICU4X generated data. |
191 | impl AsULE for GeneralCategoryGroup { |
192 | type ULE = RawBytesULE<2>; |
193 | fn to_unaligned(self) -> Self::ULE { |
194 | let value: u16 = gcg_to_packed_u16(self); |
195 | value.to_unaligned() |
196 | } |
197 | fn from_unaligned(ule: Self::ULE) -> Self { |
198 | let value: u16 = ule.as_unsigned_int(); |
199 | packed_u16_to_gcg(value) |
200 | } |
201 | } |
202 | |
203 | fn packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup { |
204 | match value { |
205 | 0xFFFF => GeneralCategoryGroup::CasedLetter, |
206 | 0xFFFE => GeneralCategoryGroup::Letter, |
207 | 0xFFFD => GeneralCategoryGroup::Mark, |
208 | 0xFFFC => GeneralCategoryGroup::Number, |
209 | 0xFFFB => GeneralCategoryGroup::Separator, |
210 | 0xFFFA => GeneralCategoryGroup::Other, |
211 | 0xFFF9 => GeneralCategoryGroup::Punctuation, |
212 | 0xFFF8 => GeneralCategoryGroup::Symbol, |
213 | v: u16 if v < 32 => GeneralCategoryMap<{unknown}, impl FnMut(…) -> …>::new_from_u8(v as u8) |
214 | .map(|gc| gc.into()) |
215 | .unwrap_or(GeneralCategoryGroup(0)), |
216 | // unknown values produce an empty mask |
217 | _ => GeneralCategoryGroup(0), |
218 | } |
219 | } |
220 | |
221 | fn gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16 { |
222 | // if it's a single property, translate to that property |
223 | if gcg.0.count_ones() == 1 { |
224 | // inverse operation of a bitshift |
225 | gcg.0.trailing_zeros() as u16 |
226 | } else { |
227 | match gcg { |
228 | GeneralCategoryGroup::CasedLetter => 0xFFFF, |
229 | GeneralCategoryGroup::Letter => 0xFFFE, |
230 | GeneralCategoryGroup::Mark => 0xFFFD, |
231 | GeneralCategoryGroup::Number => 0xFFFC, |
232 | GeneralCategoryGroup::Separator => 0xFFFB, |
233 | GeneralCategoryGroup::Other => 0xFFFA, |
234 | GeneralCategoryGroup::Punctuation => 0xFFF9, |
235 | GeneralCategoryGroup::Symbol => 0xFFF8, |
236 | _ => 0xFF00, // random sentinel value |
237 | } |
238 | } |
239 | } |
240 | |
241 | impl TrieValue for GeneralCategoryGroup { |
242 | type TryFromU32Error = TryFromIntError; |
243 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
244 | // Even though we're dealing with u32s here, TrieValue is about converting |
245 | // trie storage types to the actual type. This type will always be a packed u16 |
246 | // in our case since the names map upcasts from u16 |
247 | u16::try_from(i).map(op:packed_u16_to_gcg) |
248 | } |
249 | |
250 | fn to_u32(self) -> u32 { |
251 | u32::from(gcg_to_packed_u16(self)) |
252 | } |
253 | } |
254 | |
255 | impl TrieValue for MirroredPairedBracketData { |
256 | type TryFromU32Error = MirroredPairedBracketDataTryFromError; |
257 | |
258 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
259 | Self::try_from(i) |
260 | } |
261 | } |
262 | |
263 | impl TrieValue for JoiningType { |
264 | type TryFromU32Error = TryFromIntError; |
265 | |
266 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
267 | u8::try_from(i).map(Self) |
268 | } |
269 | |
270 | fn to_u32(self) -> u32 { |
271 | u32::from(self.0) |
272 | } |
273 | } |
274 | |