| 1 | // This file is part of ICU4X. For terms of use, please see the file |
| 2 | // called LICENSE at the top level of the ICU4X source tree |
| 3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | |
| 5 | use crate::provider::bidi_data::{ |
| 6 | CheckedBidiPairedBracketType, MirroredPairedBracketData, MirroredPairedBracketDataTryFromError, |
| 7 | }; |
| 8 | use crate::script::ScriptWithExt; |
| 9 | use crate::{ |
| 10 | BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup, |
| 11 | GraphemeClusterBreak, HangulSyllableType, IndicSyllabicCategory, JoiningType, LineBreak, |
| 12 | Script, SentenceBreak, WordBreak, |
| 13 | }; |
| 14 | use core::convert::TryInto; |
| 15 | use core::num::TryFromIntError; |
| 16 | use zerovec::ule::{AsULE, RawBytesULE}; |
| 17 | |
| 18 | use icu_collections::codepointtrie::TrieValue; |
| 19 | |
| 20 | use core::convert::TryFrom; |
| 21 | |
| 22 | impl TrieValue for CanonicalCombiningClass { |
| 23 | type TryFromU32Error = TryFromIntError; |
| 24 | |
| 25 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 26 | u8::try_from(i).map(Self) |
| 27 | } |
| 28 | |
| 29 | fn to_u32(self) -> u32 { |
| 30 | u32::from(self.0) |
| 31 | } |
| 32 | } |
| 33 | |
| 34 | impl TrieValue for BidiClass { |
| 35 | type TryFromU32Error = TryFromIntError; |
| 36 | |
| 37 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 38 | u8::try_from(i).map(Self) |
| 39 | } |
| 40 | |
| 41 | fn to_u32(self) -> u32 { |
| 42 | u32::from(self.0) |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | impl TrieValue for GeneralCategory { |
| 47 | type TryFromU32Error = &'static str; |
| 48 | |
| 49 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 50 | // If the u32 is out of range, fall back to u8::MAX, which is out of range of the GeneralCategory enum. |
| 51 | GeneralCategory::new_from_u8(i.try_into().unwrap_or(default:u8::MAX)) |
| 52 | .ok_or("Cannot parse GeneralCategory from integer" ) |
| 53 | } |
| 54 | |
| 55 | fn to_u32(self) -> u32 { |
| 56 | u32::from(self as u8) |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | impl TrieValue for Script { |
| 61 | type TryFromU32Error = TryFromIntError; |
| 62 | |
| 63 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 64 | u16::try_from(i).map(op:Script) |
| 65 | } |
| 66 | |
| 67 | fn to_u32(self) -> u32 { |
| 68 | u32::from(self.0) |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | impl TrieValue for HangulSyllableType { |
| 73 | type TryFromU32Error = TryFromIntError; |
| 74 | |
| 75 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 76 | u8::try_from(i).map(Self) |
| 77 | } |
| 78 | |
| 79 | fn to_u32(self) -> u32 { |
| 80 | u32::from(self.0) |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | impl TrieValue for ScriptWithExt { |
| 85 | type TryFromU32Error = TryFromIntError; |
| 86 | |
| 87 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 88 | u16::try_from(i).map(Self) |
| 89 | } |
| 90 | |
| 91 | fn to_u32(self) -> u32 { |
| 92 | u32::from(self.0) |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | impl TrieValue for EastAsianWidth { |
| 97 | type TryFromU32Error = TryFromIntError; |
| 98 | |
| 99 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 100 | u8::try_from(i).map(Self) |
| 101 | } |
| 102 | |
| 103 | fn to_u32(self) -> u32 { |
| 104 | u32::from(self.0) |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | impl TrieValue for LineBreak { |
| 109 | type TryFromU32Error = TryFromIntError; |
| 110 | |
| 111 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 112 | u8::try_from(i).map(Self) |
| 113 | } |
| 114 | |
| 115 | fn to_u32(self) -> u32 { |
| 116 | u32::from(self.0) |
| 117 | } |
| 118 | } |
| 119 | |
| 120 | impl TrieValue for GraphemeClusterBreak { |
| 121 | type TryFromU32Error = TryFromIntError; |
| 122 | |
| 123 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 124 | u8::try_from(i).map(Self) |
| 125 | } |
| 126 | |
| 127 | fn to_u32(self) -> u32 { |
| 128 | u32::from(self.0) |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | impl TrieValue for WordBreak { |
| 133 | type TryFromU32Error = TryFromIntError; |
| 134 | |
| 135 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 136 | u8::try_from(i).map(Self) |
| 137 | } |
| 138 | |
| 139 | fn to_u32(self) -> u32 { |
| 140 | u32::from(self.0) |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | impl TrieValue for SentenceBreak { |
| 145 | type TryFromU32Error = TryFromIntError; |
| 146 | |
| 147 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 148 | u8::try_from(i).map(Self) |
| 149 | } |
| 150 | |
| 151 | fn to_u32(self) -> u32 { |
| 152 | u32::from(self.0) |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | impl TrieValue for CheckedBidiPairedBracketType { |
| 157 | type TryFromU32Error = TryFromIntError; |
| 158 | |
| 159 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 160 | Ok(match i { |
| 161 | 1 => CheckedBidiPairedBracketType::Open, |
| 162 | 2 => CheckedBidiPairedBracketType::Close, |
| 163 | _ => CheckedBidiPairedBracketType::None, |
| 164 | }) |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | impl TrieValue for IndicSyllabicCategory { |
| 169 | type TryFromU32Error = TryFromIntError; |
| 170 | |
| 171 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 172 | u8::try_from(i).map(Self) |
| 173 | } |
| 174 | |
| 175 | fn to_u32(self) -> u32 { |
| 176 | u32::from(self.0) |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | // GCG is not used inside tries, but it is used in the name lookup type, and we want |
| 181 | // to squeeze it into a u16 for storage. Its named mask values are specced so we can |
| 182 | // do this in code. |
| 183 | // |
| 184 | // This is done by: |
| 185 | // - Single-value masks are translated to their corresponding GeneralCategory values |
| 186 | // - we know all of the multi-value masks and we give them special values |
| 187 | // - Anything else goes to 0xFF00, though this code path shouldn't be hit unless working with malformed icuexportdata |
| 188 | // |
| 189 | // In the reverse direction, unknown values go to the empty mask, but this codepath should not be hit except |
| 190 | // with malformed ICU4X generated data. |
| 191 | impl AsULE for GeneralCategoryGroup { |
| 192 | type ULE = RawBytesULE<2>; |
| 193 | fn to_unaligned(self) -> Self::ULE { |
| 194 | let value: u16 = gcg_to_packed_u16(self); |
| 195 | value.to_unaligned() |
| 196 | } |
| 197 | fn from_unaligned(ule: Self::ULE) -> Self { |
| 198 | let value: u16 = ule.as_unsigned_int(); |
| 199 | packed_u16_to_gcg(value) |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | fn packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup { |
| 204 | match value { |
| 205 | 0xFFFF => GeneralCategoryGroup::CasedLetter, |
| 206 | 0xFFFE => GeneralCategoryGroup::Letter, |
| 207 | 0xFFFD => GeneralCategoryGroup::Mark, |
| 208 | 0xFFFC => GeneralCategoryGroup::Number, |
| 209 | 0xFFFB => GeneralCategoryGroup::Separator, |
| 210 | 0xFFFA => GeneralCategoryGroup::Other, |
| 211 | 0xFFF9 => GeneralCategoryGroup::Punctuation, |
| 212 | 0xFFF8 => GeneralCategoryGroup::Symbol, |
| 213 | v: u16 if v < 32 => GeneralCategoryMap<{unknown}, impl FnMut(…) -> …>::new_from_u8(v as u8) |
| 214 | .map(|gc| gc.into()) |
| 215 | .unwrap_or(GeneralCategoryGroup(0)), |
| 216 | // unknown values produce an empty mask |
| 217 | _ => GeneralCategoryGroup(0), |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | fn gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16 { |
| 222 | // if it's a single property, translate to that property |
| 223 | if gcg.0.count_ones() == 1 { |
| 224 | // inverse operation of a bitshift |
| 225 | gcg.0.trailing_zeros() as u16 |
| 226 | } else { |
| 227 | match gcg { |
| 228 | GeneralCategoryGroup::CasedLetter => 0xFFFF, |
| 229 | GeneralCategoryGroup::Letter => 0xFFFE, |
| 230 | GeneralCategoryGroup::Mark => 0xFFFD, |
| 231 | GeneralCategoryGroup::Number => 0xFFFC, |
| 232 | GeneralCategoryGroup::Separator => 0xFFFB, |
| 233 | GeneralCategoryGroup::Other => 0xFFFA, |
| 234 | GeneralCategoryGroup::Punctuation => 0xFFF9, |
| 235 | GeneralCategoryGroup::Symbol => 0xFFF8, |
| 236 | _ => 0xFF00, // random sentinel value |
| 237 | } |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | impl TrieValue for GeneralCategoryGroup { |
| 242 | type TryFromU32Error = TryFromIntError; |
| 243 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 244 | // Even though we're dealing with u32s here, TrieValue is about converting |
| 245 | // trie storage types to the actual type. This type will always be a packed u16 |
| 246 | // in our case since the names map upcasts from u16 |
| 247 | u16::try_from(i).map(op:packed_u16_to_gcg) |
| 248 | } |
| 249 | |
| 250 | fn to_u32(self) -> u32 { |
| 251 | u32::from(gcg_to_packed_u16(self)) |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | impl TrieValue for MirroredPairedBracketData { |
| 256 | type TryFromU32Error = MirroredPairedBracketDataTryFromError; |
| 257 | |
| 258 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 259 | Self::try_from(i) |
| 260 | } |
| 261 | } |
| 262 | |
| 263 | impl TrieValue for JoiningType { |
| 264 | type TryFromU32Error = TryFromIntError; |
| 265 | |
| 266 | fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { |
| 267 | u8::try_from(i).map(Self) |
| 268 | } |
| 269 | |
| 270 | fn to_u32(self) -> u32 { |
| 271 | u32::from(self.0) |
| 272 | } |
| 273 | } |
| 274 | |