1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::provider::bidi_data::{
6 CheckedBidiPairedBracketType, MirroredPairedBracketData, MirroredPairedBracketDataTryFromError,
7};
8use crate::script::ScriptWithExt;
9use crate::{
10 BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup,
11 GraphemeClusterBreak, HangulSyllableType, IndicSyllabicCategory, JoiningType, LineBreak,
12 Script, SentenceBreak, WordBreak,
13};
14use core::convert::TryInto;
15use core::num::TryFromIntError;
16use zerovec::ule::{AsULE, RawBytesULE};
17
18use icu_collections::codepointtrie::TrieValue;
19
20use core::convert::TryFrom;
21
22impl TrieValue for CanonicalCombiningClass {
23 type TryFromU32Error = TryFromIntError;
24
25 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
26 u8::try_from(i).map(Self)
27 }
28
29 fn to_u32(self) -> u32 {
30 u32::from(self.0)
31 }
32}
33
34impl TrieValue for BidiClass {
35 type TryFromU32Error = TryFromIntError;
36
37 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
38 u8::try_from(i).map(Self)
39 }
40
41 fn to_u32(self) -> u32 {
42 u32::from(self.0)
43 }
44}
45
46impl TrieValue for GeneralCategory {
47 type TryFromU32Error = &'static str;
48
49 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
50 // If the u32 is out of range, fall back to u8::MAX, which is out of range of the GeneralCategory enum.
51 GeneralCategory::new_from_u8(i.try_into().unwrap_or(default:u8::MAX))
52 .ok_or("Cannot parse GeneralCategory from integer")
53 }
54
55 fn to_u32(self) -> u32 {
56 u32::from(self as u8)
57 }
58}
59
60impl TrieValue for Script {
61 type TryFromU32Error = TryFromIntError;
62
63 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
64 u16::try_from(i).map(op:Script)
65 }
66
67 fn to_u32(self) -> u32 {
68 u32::from(self.0)
69 }
70}
71
72impl TrieValue for HangulSyllableType {
73 type TryFromU32Error = TryFromIntError;
74
75 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
76 u8::try_from(i).map(Self)
77 }
78
79 fn to_u32(self) -> u32 {
80 u32::from(self.0)
81 }
82}
83
84impl TrieValue for ScriptWithExt {
85 type TryFromU32Error = TryFromIntError;
86
87 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
88 u16::try_from(i).map(Self)
89 }
90
91 fn to_u32(self) -> u32 {
92 u32::from(self.0)
93 }
94}
95
96impl TrieValue for EastAsianWidth {
97 type TryFromU32Error = TryFromIntError;
98
99 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
100 u8::try_from(i).map(Self)
101 }
102
103 fn to_u32(self) -> u32 {
104 u32::from(self.0)
105 }
106}
107
108impl TrieValue for LineBreak {
109 type TryFromU32Error = TryFromIntError;
110
111 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
112 u8::try_from(i).map(Self)
113 }
114
115 fn to_u32(self) -> u32 {
116 u32::from(self.0)
117 }
118}
119
120impl TrieValue for GraphemeClusterBreak {
121 type TryFromU32Error = TryFromIntError;
122
123 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
124 u8::try_from(i).map(Self)
125 }
126
127 fn to_u32(self) -> u32 {
128 u32::from(self.0)
129 }
130}
131
132impl TrieValue for WordBreak {
133 type TryFromU32Error = TryFromIntError;
134
135 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
136 u8::try_from(i).map(Self)
137 }
138
139 fn to_u32(self) -> u32 {
140 u32::from(self.0)
141 }
142}
143
144impl TrieValue for SentenceBreak {
145 type TryFromU32Error = TryFromIntError;
146
147 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
148 u8::try_from(i).map(Self)
149 }
150
151 fn to_u32(self) -> u32 {
152 u32::from(self.0)
153 }
154}
155
156impl TrieValue for CheckedBidiPairedBracketType {
157 type TryFromU32Error = TryFromIntError;
158
159 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
160 Ok(match i {
161 1 => CheckedBidiPairedBracketType::Open,
162 2 => CheckedBidiPairedBracketType::Close,
163 _ => CheckedBidiPairedBracketType::None,
164 })
165 }
166}
167
168impl TrieValue for IndicSyllabicCategory {
169 type TryFromU32Error = TryFromIntError;
170
171 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
172 u8::try_from(i).map(Self)
173 }
174
175 fn to_u32(self) -> u32 {
176 u32::from(self.0)
177 }
178}
179
180// GCG is not used inside tries, but it is used in the name lookup type, and we want
181// to squeeze it into a u16 for storage. Its named mask values are specced so we can
182// do this in code.
183//
184// This is done by:
185// - Single-value masks are translated to their corresponding GeneralCategory values
186// - we know all of the multi-value masks and we give them special values
187// - Anything else goes to 0xFF00, though this code path shouldn't be hit unless working with malformed icuexportdata
188//
189// In the reverse direction, unknown values go to the empty mask, but this codepath should not be hit except
190// with malformed ICU4X generated data.
191impl AsULE for GeneralCategoryGroup {
192 type ULE = RawBytesULE<2>;
193 fn to_unaligned(self) -> Self::ULE {
194 let value: u16 = gcg_to_packed_u16(self);
195 value.to_unaligned()
196 }
197 fn from_unaligned(ule: Self::ULE) -> Self {
198 let value: u16 = ule.as_unsigned_int();
199 packed_u16_to_gcg(value)
200 }
201}
202
203fn packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup {
204 match value {
205 0xFFFF => GeneralCategoryGroup::CasedLetter,
206 0xFFFE => GeneralCategoryGroup::Letter,
207 0xFFFD => GeneralCategoryGroup::Mark,
208 0xFFFC => GeneralCategoryGroup::Number,
209 0xFFFB => GeneralCategoryGroup::Separator,
210 0xFFFA => GeneralCategoryGroup::Other,
211 0xFFF9 => GeneralCategoryGroup::Punctuation,
212 0xFFF8 => GeneralCategoryGroup::Symbol,
213 v: u16 if v < 32 => GeneralCategoryMap<{unknown}, impl FnMut(…) -> …>::new_from_u8(v as u8)
214 .map(|gc| gc.into())
215 .unwrap_or(GeneralCategoryGroup(0)),
216 // unknown values produce an empty mask
217 _ => GeneralCategoryGroup(0),
218 }
219}
220
221fn gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16 {
222 // if it's a single property, translate to that property
223 if gcg.0.count_ones() == 1 {
224 // inverse operation of a bitshift
225 gcg.0.trailing_zeros() as u16
226 } else {
227 match gcg {
228 GeneralCategoryGroup::CasedLetter => 0xFFFF,
229 GeneralCategoryGroup::Letter => 0xFFFE,
230 GeneralCategoryGroup::Mark => 0xFFFD,
231 GeneralCategoryGroup::Number => 0xFFFC,
232 GeneralCategoryGroup::Separator => 0xFFFB,
233 GeneralCategoryGroup::Other => 0xFFFA,
234 GeneralCategoryGroup::Punctuation => 0xFFF9,
235 GeneralCategoryGroup::Symbol => 0xFFF8,
236 _ => 0xFF00, // random sentinel value
237 }
238 }
239}
240
241impl TrieValue for GeneralCategoryGroup {
242 type TryFromU32Error = TryFromIntError;
243 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
244 // Even though we're dealing with u32s here, TrieValue is about converting
245 // trie storage types to the actual type. This type will always be a packed u16
246 // in our case since the names map upcasts from u16
247 u16::try_from(i).map(op:packed_u16_to_gcg)
248 }
249
250 fn to_u32(self) -> u32 {
251 u32::from(gcg_to_packed_u16(self))
252 }
253}
254
255impl TrieValue for MirroredPairedBracketData {
256 type TryFromU32Error = MirroredPairedBracketDataTryFromError;
257
258 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
259 Self::try_from(i)
260 }
261}
262
263impl TrieValue for JoiningType {
264 type TryFromU32Error = TryFromIntError;
265
266 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
267 u8::try_from(i).map(Self)
268 }
269
270 fn to_u32(self) -> u32 {
271 u32::from(self.0)
272 }
273}
274