| 1 | //! This crate exposes the Unicode `Script` and `Script_Extension` | 
| 2 | //! properties from [UAX #24](http://www.unicode.org/reports/tr24/) | 
|---|
| 3 |  | 
|---|
| 4 | #![ cfg_attr(not(test), no_std)] | 
|---|
| 5 | #![ cfg_attr(feature = "bench", feature(test))] | 
|---|
| 6 |  | 
|---|
| 7 | mod tables; | 
|---|
| 8 |  | 
|---|
| 9 | use core::convert::TryFrom; | 
|---|
| 10 | use core::fmt; | 
|---|
| 11 | use core::u64; | 
|---|
| 12 | pub use tables::script_extensions; | 
|---|
| 13 | use tables::{get_script, get_script_extension, NEXT_SCRIPT}; | 
|---|
| 14 | pub use tables::{Script, UNICODE_VERSION}; | 
|---|
| 15 |  | 
|---|
| 16 | impl Script { | 
|---|
| 17 | /// Get the full name of a script. | 
|---|
| 18 | pub fn full_name(self) -> &'static str { | 
|---|
| 19 | self.inner_full_name() | 
|---|
| 20 | } | 
|---|
| 21 |  | 
|---|
| 22 | /// Attempts to parse script name from the provided string. | 
|---|
| 23 | /// Returns `None` if the provided string does not represent a valid | 
|---|
| 24 | /// script full name. | 
|---|
| 25 | pub fn from_full_name(input: &str) -> Option<Self> { | 
|---|
| 26 | Self::inner_from_full_name(input) | 
|---|
| 27 | } | 
|---|
| 28 |  | 
|---|
| 29 | /// Get the four-character short name of a script. | 
|---|
| 30 | pub fn short_name(self) -> &'static str { | 
|---|
| 31 | self.inner_short_name() | 
|---|
| 32 | } | 
|---|
| 33 |  | 
|---|
| 34 | /// Attempts to parse script name from the provided string. | 
|---|
| 35 | /// Returns `None` if the provided string does not represent a valid | 
|---|
| 36 | /// script four-character short name. | 
|---|
| 37 | pub fn from_short_name(input: &str) -> Option<Self> { | 
|---|
| 38 | Self::inner_from_short_name(input) | 
|---|
| 39 | } | 
|---|
| 40 |  | 
|---|
| 41 | /// Is this script "Recommended" according to | 
|---|
| 42 | /// [UAX #31](www.unicode.org/reports/tr31/#Table_Recommended_Scripts)? | 
|---|
| 43 | pub fn is_recommended(self) -> bool { | 
|---|
| 44 | use Script::*; | 
|---|
| 45 | match self { | 
|---|
| 46 | Common | Inherited | Arabic | Armenian | Bengali | Bopomofo | Cyrillic | Devanagari | 
|---|
| 47 | | Ethiopic | Georgian | Greek | Gujarati | Gurmukhi | Han | Hangul | Hebrew | 
|---|
| 48 | | Hiragana | Kannada | Katakana | Khmer | Lao | Latin | Malayalam | Myanmar | Oriya | 
|---|
| 49 | | Sinhala | Tamil | Telugu | Thaana | Thai | Tibetan => true, | 
|---|
| 50 | _ => false, | 
|---|
| 51 | } | 
|---|
| 52 | } | 
|---|
| 53 | } | 
|---|
| 54 |  | 
|---|
| 55 | impl From<Script> for ScriptExtension { | 
|---|
| 56 | fn from(script: Script) -> Self { | 
|---|
| 57 | if script == Script::Common { | 
|---|
| 58 | ScriptExtension::new_common() | 
|---|
| 59 | } else if script == Script::Inherited { | 
|---|
| 60 | ScriptExtension::new_inherited() | 
|---|
| 61 | } else if script == Script::Unknown { | 
|---|
| 62 | ScriptExtension::new_unknown() | 
|---|
| 63 | } else { | 
|---|
| 64 | let mut first = 0; | 
|---|
| 65 | let mut second = 0; | 
|---|
| 66 | let mut third = 0; | 
|---|
| 67 | let bit = script as u8; | 
|---|
| 68 | // Find out which field it's in, and set the appropriate bit there | 
|---|
| 69 | if bit < 64 { | 
|---|
| 70 | first = 1 << bit as u64; | 
|---|
| 71 | } else if bit < 128 { | 
|---|
| 72 | // offset by 64 since `bit` is an absolute number, | 
|---|
| 73 | // not relative to the chunk | 
|---|
| 74 | second = 1 << (bit - 64) as u64; | 
|---|
| 75 | } else { | 
|---|
| 76 | third = 1 << (bit - 128) as u32; | 
|---|
| 77 | } | 
|---|
| 78 | ScriptExtension::new(first, second, third) | 
|---|
| 79 | } | 
|---|
| 80 | } | 
|---|
| 81 | } | 
|---|
| 82 |  | 
|---|
| 83 | impl TryFrom<ScriptExtension> for Script { | 
|---|
| 84 | type Error = (); | 
|---|
| 85 | fn try_from(ext: ScriptExtension) -> Result<Self, ()> { | 
|---|
| 86 | if ext.is_common_or_inherited() { | 
|---|
| 87 | if ext.common { | 
|---|
| 88 | Ok(Script::Common) | 
|---|
| 89 | } else { | 
|---|
| 90 | Ok(Script::Inherited) | 
|---|
| 91 | } | 
|---|
| 92 | } else if ext.is_empty() { | 
|---|
| 93 | Ok(Script::Unknown) | 
|---|
| 94 | } else { | 
|---|
| 95 | // filled elements will have set ones | 
|---|
| 96 | let fo = ext.first.count_ones(); | 
|---|
| 97 | let so = ext.second.count_ones(); | 
|---|
| 98 | let to = ext.third.count_ones(); | 
|---|
| 99 | // only one bit set, in the first chunk | 
|---|
| 100 | if fo == 1 && so == 0 && to == 0 { | 
|---|
| 101 | // use trailing_zeroes() to figure out which bit it is | 
|---|
| 102 | Ok(Script::for_integer(ext.first.trailing_zeros() as u8)) | 
|---|
| 103 | // only one bit set, in the second chunk | 
|---|
| 104 | } else if fo == 0 && so == 1 && to == 0 { | 
|---|
| 105 | Ok(Script::for_integer(64 + ext.second.trailing_zeros() as u8)) | 
|---|
| 106 | // only one bit set, in the third chunk | 
|---|
| 107 | } else if fo == 0 && so == 0 && to == 1 { | 
|---|
| 108 | Ok(Script::for_integer(128 + ext.third.trailing_zeros() as u8)) | 
|---|
| 109 | } else { | 
|---|
| 110 | Err(()) | 
|---|
| 111 | } | 
|---|
| 112 | } | 
|---|
| 113 | } | 
|---|
| 114 | } | 
|---|
| 115 |  | 
|---|
| 116 | impl Default for Script { | 
|---|
| 117 | fn default() -> Self { | 
|---|
| 118 | Script::Common | 
|---|
| 119 | } | 
|---|
| 120 | } | 
|---|
| 121 |  | 
|---|
| 122 | impl From<char> for Script { | 
|---|
| 123 | fn from(o: char) -> Self { | 
|---|
| 124 | o.script() | 
|---|
| 125 | } | 
|---|
| 126 | } | 
|---|
| 127 |  | 
|---|
| 128 | impl fmt::Display for Script { | 
|---|
| 129 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | 
|---|
| 130 | write!(f, "{} ", self.full_name()) | 
|---|
| 131 | } | 
|---|
| 132 | } | 
|---|
| 133 |  | 
|---|
| 134 | #[ derive(Clone, Copy, PartialEq, Eq, Hash)] | 
|---|
| 135 | #[ non_exhaustive] | 
|---|
| 136 | /// A value for the `Script_Extension` property | 
|---|
| 137 | /// | 
|---|
| 138 | /// [`ScriptExtension`] is one or more [`Script`] | 
|---|
| 139 | /// | 
|---|
| 140 | /// This is essentially an optimized version of `Vec<Script>` that uses bitfields | 
|---|
| 141 | pub struct ScriptExtension { | 
|---|
| 142 | // A bitset for the first 64 scripts | 
|---|
| 143 | first: u64, | 
|---|
| 144 | // A bitset for the scripts 65-128 | 
|---|
| 145 | second: u64, | 
|---|
| 146 | // A bitset for scripts after 128 | 
|---|
| 147 | third: u64, | 
|---|
| 148 | // Both Common and Inherited are represented by all used bits being set, | 
|---|
| 149 | // this flag lets us distinguish the two. | 
|---|
| 150 | common: bool, | 
|---|
| 151 | } | 
|---|
| 152 |  | 
|---|
| 153 | impl ScriptExtension { | 
|---|
| 154 | // We don't use the complete u64 of `third`, so the "all" value is not just u32::MAX | 
|---|
| 155 | // Instead, we take the number of the next (unused) script bit, subtract 128 to bring | 
|---|
| 156 | // it in the range of `third`, create a u64 with just that bit set, and subtract 1 | 
|---|
| 157 | // to create one with all the lower bits set. | 
|---|
| 158 | const THIRD_MAX: u64 = ((1 << (NEXT_SCRIPT - 128)) - 1); | 
|---|
| 159 |  | 
|---|
| 160 | pub(crate) const fn new(first: u64, second: u64, third: u64) -> Self { | 
|---|
| 161 | ScriptExtension { | 
|---|
| 162 | first, | 
|---|
| 163 | second, | 
|---|
| 164 | third, | 
|---|
| 165 | common: false, | 
|---|
| 166 | } | 
|---|
| 167 | } | 
|---|
| 168 |  | 
|---|
| 169 | pub(crate) const fn new_common() -> Self { | 
|---|
| 170 | ScriptExtension { | 
|---|
| 171 | first: u64::MAX, | 
|---|
| 172 | second: u64::MAX, | 
|---|
| 173 | third: Self::THIRD_MAX, | 
|---|
| 174 | common: true, | 
|---|
| 175 | } | 
|---|
| 176 | } | 
|---|
| 177 |  | 
|---|
| 178 | pub(crate) const fn new_inherited() -> Self { | 
|---|
| 179 | ScriptExtension { | 
|---|
| 180 | first: u64::MAX, | 
|---|
| 181 | second: u64::MAX, | 
|---|
| 182 | third: Self::THIRD_MAX, | 
|---|
| 183 | common: false, | 
|---|
| 184 | } | 
|---|
| 185 | } | 
|---|
| 186 |  | 
|---|
| 187 | pub(crate) const fn new_unknown() -> Self { | 
|---|
| 188 | ScriptExtension { | 
|---|
| 189 | first: 0, | 
|---|
| 190 | second: 0, | 
|---|
| 191 | third: 0, | 
|---|
| 192 | common: false, | 
|---|
| 193 | } | 
|---|
| 194 | } | 
|---|
| 195 |  | 
|---|
| 196 | const fn is_common_or_inherited(self) -> bool { | 
|---|
| 197 | (self.first == u64::MAX) & (self.second == u64::MAX) & (self.third == Self::THIRD_MAX) | 
|---|
| 198 | } | 
|---|
| 199 |  | 
|---|
| 200 | /// Checks if the script extension is Common | 
|---|
| 201 | pub const fn is_common(self) -> bool { | 
|---|
| 202 | self.is_common_or_inherited() & self.common | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 | /// Checks if the script extension is Inherited | 
|---|
| 206 | pub const fn is_inherited(self) -> bool { | 
|---|
| 207 | self.is_common_or_inherited() & !self.common | 
|---|
| 208 | } | 
|---|
| 209 |  | 
|---|
| 210 | /// Checks if the script extension is empty (unknown) | 
|---|
| 211 | pub const fn is_empty(self) -> bool { | 
|---|
| 212 | (self.first == 0) & (self.second == 0) & (self.third == 0) | 
|---|
| 213 | } | 
|---|
| 214 |  | 
|---|
| 215 | /// Returns the number of scripts in the script extension | 
|---|
| 216 | pub fn len(self) -> usize { | 
|---|
| 217 | if self.is_common_or_inherited() { | 
|---|
| 218 | 1 | 
|---|
| 219 | } else { | 
|---|
| 220 | (self.first.count_ones() + self.second.count_ones() + self.third.count_ones()) as usize | 
|---|
| 221 | } | 
|---|
| 222 | } | 
|---|
| 223 |  | 
|---|
| 224 | /// Intersect this `ScriptExtension` with another `ScriptExtension`. Produces `Unknown` if things | 
|---|
| 225 | /// do not intersect. This is equivalent to [`ScriptExtension::intersection`] but it stores the result | 
|---|
| 226 | /// in `self` | 
|---|
| 227 | /// | 
|---|
| 228 | /// "Common" (`Zyyy`) and "Inherited" (`Zinh`) are considered as intersecting | 
|---|
| 229 | /// everything, the intersection of `Common` and `Inherited` is `Inherited` | 
|---|
| 230 | pub fn intersect_with(&mut self, other: Self) { | 
|---|
| 231 | *self = self.intersection(other) | 
|---|
| 232 | } | 
|---|
| 233 |  | 
|---|
| 234 | /// Find the intersection between two ScriptExtensions. Returns Unknown if things | 
|---|
| 235 | /// do not intersect. | 
|---|
| 236 | /// | 
|---|
| 237 | /// "Common" (`Zyyy`) and "Inherited" (`Zinh`) are considered as intersecting | 
|---|
| 238 | /// everything, the intersection of `Common` and `Inherited` is `Inherited` | 
|---|
| 239 | pub const fn intersection(self, other: Self) -> Self { | 
|---|
| 240 | let first = self.first & other.first; | 
|---|
| 241 | let second = self.second & other.second; | 
|---|
| 242 | let third = self.third & other.third; | 
|---|
| 243 | let common = self.common & other.common; | 
|---|
| 244 | ScriptExtension { | 
|---|
| 245 | first, | 
|---|
| 246 | second, | 
|---|
| 247 | third, | 
|---|
| 248 | common, | 
|---|
| 249 | } | 
|---|
| 250 | } | 
|---|
| 251 |  | 
|---|
| 252 | /// Find the union between two ScriptExtensions. | 
|---|
| 253 | /// | 
|---|
| 254 | /// "Common" (`Zyyy`) and "Inherited" (`Zinh`) are considered as intersecting | 
|---|
| 255 | /// everything, the union of `Common` and `Inherited` is `Common` | 
|---|
| 256 | pub const fn union(self, other: Self) -> Self { | 
|---|
| 257 | let first = self.first | other.first; | 
|---|
| 258 | let second = self.second | other.second; | 
|---|
| 259 | let third = self.third | other.third; | 
|---|
| 260 | let common = self.common | other.common; | 
|---|
| 261 | ScriptExtension { | 
|---|
| 262 | first, | 
|---|
| 263 | second, | 
|---|
| 264 | third, | 
|---|
| 265 | common, | 
|---|
| 266 | } | 
|---|
| 267 | } | 
|---|
| 268 |  | 
|---|
| 269 | /// Check if this ScriptExtension contains the given script | 
|---|
| 270 | /// | 
|---|
| 271 | /// Should be used with specific scripts only, this will | 
|---|
| 272 | /// return `true` if `self` is not `Unknown` and `script` is | 
|---|
| 273 | /// `Common` or `Inherited` | 
|---|
| 274 | pub fn contains_script(self, script: Script) -> bool { | 
|---|
| 275 | !self.intersection(script.into()).is_empty() | 
|---|
| 276 | } | 
|---|
| 277 |  | 
|---|
| 278 | /// Get the intersection of script extensions of all characters | 
|---|
| 279 | /// in a string. | 
|---|
| 280 | pub fn for_str(x: &str) -> Self { | 
|---|
| 281 | let mut ext = ScriptExtension::default(); | 
|---|
| 282 | for ch in x.chars() { | 
|---|
| 283 | ext.intersect_with(ch.into()); | 
|---|
| 284 | } | 
|---|
| 285 | ext | 
|---|
| 286 | } | 
|---|
| 287 |  | 
|---|
| 288 | /// Iterate over the scripts in this script extension | 
|---|
| 289 | /// | 
|---|
| 290 | /// Will never yield Script::Unknown | 
|---|
| 291 | pub fn iter(self) -> ScriptIterator { | 
|---|
| 292 | ScriptIterator { ext: self } | 
|---|
| 293 | } | 
|---|
| 294 | } | 
|---|
| 295 |  | 
|---|
| 296 | impl Default for ScriptExtension { | 
|---|
| 297 | fn default() -> Self { | 
|---|
| 298 | ScriptExtension::new_common() | 
|---|
| 299 | } | 
|---|
| 300 | } | 
|---|
| 301 |  | 
|---|
| 302 | impl From<char> for ScriptExtension { | 
|---|
| 303 | fn from(o: char) -> Self { | 
|---|
| 304 | o.script_extension() | 
|---|
| 305 | } | 
|---|
| 306 | } | 
|---|
| 307 |  | 
|---|
| 308 | impl From<&'_ str> for ScriptExtension { | 
|---|
| 309 | fn from(o: &'_ str) -> Self { | 
|---|
| 310 | Self::for_str(o) | 
|---|
| 311 | } | 
|---|
| 312 | } | 
|---|
| 313 |  | 
|---|
| 314 | impl fmt::Debug for ScriptExtension { | 
|---|
| 315 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | 
|---|
| 316 | write!(f, "ScriptExtension(")?; | 
|---|
| 317 | fmt::Display::fmt(self, f)?; | 
|---|
| 318 | write!(f, ")") | 
|---|
| 319 | } | 
|---|
| 320 | } | 
|---|
| 321 |  | 
|---|
| 322 | impl fmt::Display for ScriptExtension { | 
|---|
| 323 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | 
|---|
| 324 | if self.is_common() { | 
|---|
| 325 | write!(f, "Common")?; | 
|---|
| 326 | } else if self.is_inherited() { | 
|---|
| 327 | write!(f, "Inherited")?; | 
|---|
| 328 | } else if self.is_empty() { | 
|---|
| 329 | write!(f, "Unknown")?; | 
|---|
| 330 | } else { | 
|---|
| 331 | let mut first: bool = true; | 
|---|
| 332 | for script: Script in self.iter() { | 
|---|
| 333 | if !first { | 
|---|
| 334 | write!(f, " + ")?; | 
|---|
| 335 | first = false; | 
|---|
| 336 | } | 
|---|
| 337 | script.full_name().fmt(f)?; | 
|---|
| 338 | } | 
|---|
| 339 | } | 
|---|
| 340 | Ok(()) | 
|---|
| 341 | } | 
|---|
| 342 | } | 
|---|
| 343 |  | 
|---|
| 344 | /// Extension trait on `char` for calculating script properties | 
|---|
| 345 | pub trait UnicodeScript { | 
|---|
| 346 | /// Get the script for a given character | 
|---|
| 347 | fn script(&self) -> Script; | 
|---|
| 348 | /// Get the Script_Extension for a given character | 
|---|
| 349 | fn script_extension(&self) -> ScriptExtension; | 
|---|
| 350 | } | 
|---|
| 351 |  | 
|---|
| 352 | impl UnicodeScript for char { | 
|---|
| 353 | fn script(&self) -> Script { | 
|---|
| 354 | get_script(*self).unwrap_or(default:Script::Unknown) | 
|---|
| 355 | } | 
|---|
| 356 |  | 
|---|
| 357 | fn script_extension(&self) -> ScriptExtension { | 
|---|
| 358 | get_script_extension(*self).unwrap_or_else(|| self.script().into()) | 
|---|
| 359 | } | 
|---|
| 360 | } | 
|---|
| 361 |  | 
|---|
| 362 | /// Iterator over scripts in a [ScriptExtension]. | 
|---|
| 363 | /// | 
|---|
| 364 | /// Can be obtained ia [ScriptExtension::iter()] | 
|---|
| 365 | pub struct ScriptIterator { | 
|---|
| 366 | ext: ScriptExtension, | 
|---|
| 367 | } | 
|---|
| 368 |  | 
|---|
| 369 | impl Iterator for ScriptIterator { | 
|---|
| 370 | type Item = Script; | 
|---|
| 371 |  | 
|---|
| 372 | fn next(&mut self) -> Option<Script> { | 
|---|
| 373 | if self.ext.is_common_or_inherited() { | 
|---|
| 374 | let common = self.ext.common; | 
|---|
| 375 | self.ext = ScriptExtension::new_unknown(); | 
|---|
| 376 | if common { | 
|---|
| 377 | Some(Script::Common) | 
|---|
| 378 | } else { | 
|---|
| 379 | Some(Script::Inherited) | 
|---|
| 380 | } | 
|---|
| 381 | // Are there bits left in the first chunk? | 
|---|
| 382 | } else if self.ext.first != 0 { | 
|---|
| 383 | // Find the next bit | 
|---|
| 384 | let bit = self.ext.first.trailing_zeros(); | 
|---|
| 385 | // unset just that bit | 
|---|
| 386 | self.ext.first &= !(1 << bit); | 
|---|
| 387 | Some(Script::for_integer(bit as u8)) | 
|---|
| 388 | // Are there bits left in the second chunk? | 
|---|
| 389 | } else if self.ext.second != 0 { | 
|---|
| 390 | let bit = self.ext.second.trailing_zeros(); | 
|---|
| 391 | self.ext.second &= !(1 << bit); | 
|---|
| 392 | Some(Script::for_integer(64 + bit as u8)) | 
|---|
| 393 | // Are there bits left in the third chunk? | 
|---|
| 394 | } else if self.ext.third != 0 { | 
|---|
| 395 | let bit = self.ext.third.trailing_zeros(); | 
|---|
| 396 | self.ext.third &= !(1 << bit); | 
|---|
| 397 | Some(Script::for_integer(128 + bit as u8)) | 
|---|
| 398 | } else { | 
|---|
| 399 | // Script::Unknown | 
|---|
| 400 | None | 
|---|
| 401 | } | 
|---|
| 402 | } | 
|---|
| 403 | } | 
|---|
| 404 |  | 
|---|
| 405 | #[ cfg(test)] | 
|---|
| 406 | mod tests { | 
|---|
| 407 | use crate::*; | 
|---|
| 408 | use std::collections::HashSet; | 
|---|
| 409 | use std::convert::TryInto; | 
|---|
| 410 |  | 
|---|
| 411 | #[ cfg(feature = "bench")] | 
|---|
| 412 | use test::bench::Bencher; | 
|---|
| 413 | #[ cfg(feature = "bench")] | 
|---|
| 414 | extern crate test; | 
|---|
| 415 |  | 
|---|
| 416 | #[ test] | 
|---|
| 417 | fn test_conversion() { | 
|---|
| 418 | let mut seen_scripts = HashSet::new(); | 
|---|
| 419 | let mut seen_exts = HashSet::new(); | 
|---|
| 420 | for bit in 0..NEXT_SCRIPT { | 
|---|
| 421 | let script = Script::for_integer(bit); | 
|---|
| 422 | let ext = script.into(); | 
|---|
| 423 | if seen_scripts.contains(&script) { | 
|---|
| 424 | panic!( "Found script {:?} twice!", script) | 
|---|
| 425 | } | 
|---|
| 426 | if seen_exts.contains(&ext) { | 
|---|
| 427 | panic!( "Found extension {:?} twice!", ext) | 
|---|
| 428 | } | 
|---|
| 429 | seen_scripts.insert(script); | 
|---|
| 430 | seen_exts.insert(ext); | 
|---|
| 431 | assert_eq!(script as u8, bit); | 
|---|
| 432 | assert!(!ScriptExtension::new_common().intersection(ext).is_empty()); | 
|---|
| 433 | assert!(!ScriptExtension::new_inherited() | 
|---|
| 434 | .intersection(ext) | 
|---|
| 435 | .is_empty()); | 
|---|
| 436 | assert!(ScriptExtension::new_unknown().intersection(ext).is_empty()); | 
|---|
| 437 | assert_eq!(ext.iter().collect::<Vec<_>>(), vec![script]); | 
|---|
| 438 | assert_eq!(Ok(script), ext.try_into()); | 
|---|
| 439 | } | 
|---|
| 440 | } | 
|---|
| 441 |  | 
|---|
| 442 | #[ test] | 
|---|
| 443 | fn test_specific() { | 
|---|
| 444 | let s = "सवव मानवी व्यद्क् जन्मतःच स्वतींत्र आहेत व त्ाींना समान प्रवतष्ठा व समान अविकार आहेत. त्ाींना ववचारशद्क् व सवविे कबुद्द्धलाभलेली आहे. व त्ाींनी एकमेकाींशी बींिुत्वाचाभावनेने आचरण करावे."; | 
|---|
| 445 | let ext = ScriptExtension::for_str(s); | 
|---|
| 446 | assert_eq!(ext, script_extensions::DEVA); | 
|---|
| 447 | println!( | 
|---|
| 448 | "{:?}", | 
|---|
| 449 | script_extensions::DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH | 
|---|
| 450 | ); | 
|---|
| 451 | println!( | 
|---|
| 452 | "{:?}", | 
|---|
| 453 | ext.intersection( | 
|---|
| 454 | script_extensions::DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH | 
|---|
| 455 | ) | 
|---|
| 456 | ); | 
|---|
| 457 | assert!(!ext | 
|---|
| 458 | .intersection(script_extensions::DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH) | 
|---|
| 459 | .is_empty()); | 
|---|
| 460 |  | 
|---|
| 461 | let u = ext.union(Script::Dogra.into()); | 
|---|
| 462 | assert_eq!( | 
|---|
| 463 | u.intersection( | 
|---|
| 464 | script_extensions::DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH | 
|---|
| 465 | ), | 
|---|
| 466 | u | 
|---|
| 467 | ); | 
|---|
| 468 | } | 
|---|
| 469 |  | 
|---|
| 470 | #[ test] | 
|---|
| 471 | fn test_specific_ext() { | 
|---|
| 472 | let ext = script_extensions::DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH; | 
|---|
| 473 |  | 
|---|
| 474 | let all: HashSet<_> = ext.iter().collect(); | 
|---|
| 475 |  | 
|---|
| 476 | for bit in 0..NEXT_SCRIPT { | 
|---|
| 477 | let script = Script::for_integer(bit); | 
|---|
| 478 |  | 
|---|
| 479 | if all.contains(&script) { | 
|---|
| 480 | assert!(ext.contains_script(script)) | 
|---|
| 481 | } else { | 
|---|
| 482 | assert!(!ext.contains_script(script)) | 
|---|
| 483 | } | 
|---|
| 484 | } | 
|---|
| 485 |  | 
|---|
| 486 | assert!(ext.contains_script(Script::Devanagari)); | 
|---|
| 487 | assert!(ext.contains_script(Script::Dogra)); | 
|---|
| 488 | assert!(ext.contains_script(Script::Gujarati)); | 
|---|
| 489 | assert!(ext.contains_script(Script::Gurmukhi)); | 
|---|
| 490 | assert!(ext.contains_script(Script::Khojki)); | 
|---|
| 491 | assert!(ext.contains_script(Script::Kaithi)); | 
|---|
| 492 | assert!(ext.contains_script(Script::Mahajani)); | 
|---|
| 493 | assert!(ext.contains_script(Script::Modi)); | 
|---|
| 494 | assert!(ext.contains_script(Script::Khudawadi)); | 
|---|
| 495 | assert!(ext.contains_script(Script::Takri)); | 
|---|
| 496 | assert!(ext.contains_script(Script::Tirhuta)); | 
|---|
| 497 |  | 
|---|
| 498 | let scr: Result<Script, _> = ext.try_into(); | 
|---|
| 499 | assert!(scr.is_err()); | 
|---|
| 500 | } | 
|---|
| 501 |  | 
|---|
| 502 | #[ cfg(feature = "bench")] | 
|---|
| 503 | #[ bench] | 
|---|
| 504 | fn bench_script_intersection(b: &mut Bencher) { | 
|---|
| 505 | b.iter(|| { | 
|---|
| 506 | let script = test::black_box(Script::Devanagari); | 
|---|
| 507 | let ext = test::black_box(script_extensions::BENG_DEVA_DOGR_GONG_GONM_GRAN_GUJR_GURU_KNDA_MAHJ_MLYM_NAND_ONAO_ORYA_SIND_SINH_SYLO_TAKR_TAML_TELU_TIRH); | 
|---|
| 508 | test::black_box(ext.intersection(script.into())); | 
|---|
| 509 | }) | 
|---|
| 510 | } | 
|---|
| 511 |  | 
|---|
| 512 | #[ cfg(feature = "bench")] | 
|---|
| 513 | #[ bench] | 
|---|
| 514 | fn bench_ext_to_script(b: &mut Bencher) { | 
|---|
| 515 | let ext: ScriptExtension = Script::Devanagari.into(); | 
|---|
| 516 | b.iter(|| { | 
|---|
| 517 | let ext = test::black_box(ext); | 
|---|
| 518 | let script: Result<Script, _> = ext.try_into(); | 
|---|
| 519 | let _ = test::black_box(script); | 
|---|
| 520 | }) | 
|---|
| 521 | } | 
|---|
| 522 |  | 
|---|
| 523 | #[ cfg(feature = "bench")] | 
|---|
| 524 | #[ bench] | 
|---|
| 525 | fn bench_script_to_ext(b: &mut Bencher) { | 
|---|
| 526 | b.iter(|| { | 
|---|
| 527 | let script = test::black_box(Script::Devanagari); | 
|---|
| 528 | let ext: ScriptExtension = script.into(); | 
|---|
| 529 | test::black_box(ext); | 
|---|
| 530 | }) | 
|---|
| 531 | } | 
|---|
| 532 |  | 
|---|
| 533 | #[ cfg(feature = "bench")] | 
|---|
| 534 | #[ bench] | 
|---|
| 535 | fn bench_ext_intersection(b: &mut Bencher) { | 
|---|
| 536 | b.iter(|| { | 
|---|
| 537 | let e1 = test::black_box(script_extensions::ARAB_GARA_NKOO_ROHG_SYRC_THAA_YEZI); | 
|---|
| 538 | let e2 = test::black_box(script_extensions::BENG_DEVA_DOGR_GONG_GONM_GRAN_GUJR_GURU_KNDA_MAHJ_MLYM_NAND_ONAO_ORYA_SIND_SINH_SYLO_TAKR_TAML_TELU_TIRH); | 
|---|
| 539 | test::black_box(e2.intersection(e1)); | 
|---|
| 540 | }) | 
|---|
| 541 | } | 
|---|
| 542 |  | 
|---|
| 543 | #[ cfg(feature = "bench")] | 
|---|
| 544 | #[ bench] | 
|---|
| 545 | fn bench_to_vec(b: &mut Bencher) { | 
|---|
| 546 | b.iter(|| { | 
|---|
| 547 | let ext = test::black_box(script_extensions::BENG_DEVA_DOGR_GONG_GONM_GRAN_GUJR_GURU_KNDA_MAHJ_MLYM_NAND_ONAO_ORYA_SIND_SINH_SYLO_TAKR_TAML_TELU_TIRH); | 
|---|
| 548 | test::black_box(ext.iter().collect::<Vec<_>>()); | 
|---|
| 549 | }) | 
|---|
| 550 | } | 
|---|
| 551 |  | 
|---|
| 552 | #[ cfg(feature = "bench")] | 
|---|
| 553 | #[ bench] | 
|---|
| 554 | fn bench_string_ext(b: &mut Bencher) { | 
|---|
| 555 | b.iter(|| { | 
|---|
| 556 | let s = test::black_box( "सवव मानवी व्यद्क् जन्मतःच स्वतींत्र आहेत व त्ाींना समान प्रवतष्ठा व समान अविकार आहेत. त्ाींना ववचारशद्क् व सवविे कबुद्द्धलाभलेली आहे. व त्ाींनी एकमेकाींशी बींिुत्वाचाभावनेने आचरण करावे."); | 
|---|
| 557 | test::black_box(ScriptExtension::for_str(s)); | 
|---|
| 558 | }) | 
|---|
| 559 | } | 
|---|
| 560 | } | 
|---|
| 561 |  | 
|---|