| 1 | use alloc::{ |
| 2 | string::{String, ToString}, |
| 3 | vec::Vec, |
| 4 | }; |
| 5 | |
| 6 | use crate::hir; |
| 7 | |
| 8 | /// An inclusive range of codepoints from a generated file (hence the static |
| 9 | /// lifetime). |
| 10 | type Range = &'static [(char, char)]; |
| 11 | |
| 12 | /// An error that occurs when dealing with Unicode. |
| 13 | /// |
| 14 | /// We don't impl the Error trait here because these always get converted |
| 15 | /// into other public errors. (This error type isn't exported.) |
| 16 | #[derive (Debug)] |
| 17 | pub enum Error { |
| 18 | PropertyNotFound, |
| 19 | PropertyValueNotFound, |
| 20 | // Not used when unicode-perl is enabled. |
| 21 | #[allow (dead_code)] |
| 22 | PerlClassNotFound, |
| 23 | } |
| 24 | |
| 25 | /// An error that occurs when Unicode-aware simple case folding fails. |
| 26 | /// |
| 27 | /// This error can occur when the case mapping tables necessary for Unicode |
| 28 | /// aware case folding are unavailable. This only occurs when the |
| 29 | /// `unicode-case` feature is disabled. (The feature is enabled by default.) |
| 30 | #[derive (Debug)] |
| 31 | pub struct CaseFoldError(()); |
| 32 | |
| 33 | #[cfg (feature = "std" )] |
| 34 | impl std::error::Error for CaseFoldError {} |
| 35 | |
| 36 | impl core::fmt::Display for CaseFoldError { |
| 37 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| 38 | write!( |
| 39 | f, |
| 40 | "Unicode-aware case folding is not available \ |
| 41 | (probably because the unicode-case feature is not enabled)" |
| 42 | ) |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | /// An error that occurs when the Unicode-aware `\w` class is unavailable. |
| 47 | /// |
| 48 | /// This error can occur when the data tables necessary for the Unicode aware |
| 49 | /// Perl character class `\w` are unavailable. This only occurs when the |
| 50 | /// `unicode-perl` feature is disabled. (The feature is enabled by default.) |
| 51 | #[derive (Debug)] |
| 52 | pub struct UnicodeWordError(()); |
| 53 | |
| 54 | #[cfg (feature = "std" )] |
| 55 | impl std::error::Error for UnicodeWordError {} |
| 56 | |
| 57 | impl core::fmt::Display for UnicodeWordError { |
| 58 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| 59 | write!( |
| 60 | f, |
| 61 | "Unicode-aware \\w class is not available \ |
| 62 | (probably because the unicode-perl feature is not enabled)" |
| 63 | ) |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | /// A state oriented traverser of the simple case folding table. |
| 68 | /// |
| 69 | /// A case folder can be constructed via `SimpleCaseFolder::new()`, which will |
| 70 | /// return an error if the underlying case folding table is unavailable. |
| 71 | /// |
| 72 | /// After construction, it is expected that callers will use |
| 73 | /// `SimpleCaseFolder::mapping` by calling it with codepoints in strictly |
| 74 | /// increasing order. For example, calling it on `b` and then on `a` is illegal |
| 75 | /// and will result in a panic. |
| 76 | /// |
| 77 | /// The main idea of this type is that it tries hard to make mapping lookups |
| 78 | /// fast by exploiting the structure of the underlying table, and the ordering |
| 79 | /// assumption enables this. |
| 80 | #[derive (Debug)] |
| 81 | pub struct SimpleCaseFolder { |
| 82 | /// The simple case fold table. It's a sorted association list, where the |
| 83 | /// keys are Unicode scalar values and the values are the corresponding |
| 84 | /// equivalence class (not including the key) of the "simple" case folded |
| 85 | /// Unicode scalar values. |
| 86 | table: &'static [(char, &'static [char])], |
| 87 | /// The last codepoint that was used for a lookup. |
| 88 | last: Option<char>, |
| 89 | /// The index to the entry in `table` corresponding to the smallest key `k` |
| 90 | /// such that `k > k0`, where `k0` is the most recent key lookup. Note that |
| 91 | /// in particular, `k0` may not be in the table! |
| 92 | next: usize, |
| 93 | } |
| 94 | |
| 95 | impl SimpleCaseFolder { |
| 96 | /// Create a new simple case folder, returning an error if the underlying |
| 97 | /// case folding table is unavailable. |
| 98 | pub fn new() -> Result<SimpleCaseFolder, CaseFoldError> { |
| 99 | #[cfg (not(feature = "unicode-case" ))] |
| 100 | { |
| 101 | Err(CaseFoldError(())) |
| 102 | } |
| 103 | #[cfg (feature = "unicode-case" )] |
| 104 | { |
| 105 | Ok(SimpleCaseFolder { |
| 106 | table: crate::unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE, |
| 107 | last: None, |
| 108 | next: 0, |
| 109 | }) |
| 110 | } |
| 111 | } |
| 112 | |
| 113 | /// Return the equivalence class of case folded codepoints for the given |
| 114 | /// codepoint. The equivalence class returned never includes the codepoint |
| 115 | /// given. If the given codepoint has no case folded codepoints (i.e., |
| 116 | /// no entry in the underlying case folding table), then this returns an |
| 117 | /// empty slice. |
| 118 | /// |
| 119 | /// # Panics |
| 120 | /// |
| 121 | /// This panics when called with a `c` that is less than or equal to the |
| 122 | /// previous call. In other words, callers need to use this method with |
| 123 | /// strictly increasing values of `c`. |
| 124 | pub fn mapping(&mut self, c: char) -> &'static [char] { |
| 125 | if let Some(last) = self.last { |
| 126 | assert!( |
| 127 | last < c, |
| 128 | "got codepoint U+ {:X} which occurs before \ |
| 129 | last codepoint U+ {:X}" , |
| 130 | u32::from(c), |
| 131 | u32::from(last), |
| 132 | ); |
| 133 | } |
| 134 | self.last = Some(c); |
| 135 | if self.next >= self.table.len() { |
| 136 | return &[]; |
| 137 | } |
| 138 | let (k, v) = self.table[self.next]; |
| 139 | if k == c { |
| 140 | self.next += 1; |
| 141 | return v; |
| 142 | } |
| 143 | match self.get(c) { |
| 144 | Err(i) => { |
| 145 | self.next = i; |
| 146 | &[] |
| 147 | } |
| 148 | Ok(i) => { |
| 149 | // Since we require lookups to proceed |
| 150 | // in order, anything we find should be |
| 151 | // after whatever we thought might be |
| 152 | // next. Otherwise, the caller is either |
| 153 | // going out of order or we would have |
| 154 | // found our next key at 'self.next'. |
| 155 | assert!(i > self.next); |
| 156 | self.next = i + 1; |
| 157 | self.table[i].1 |
| 158 | } |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | /// Returns true if and only if the given range overlaps with any region |
| 163 | /// of the underlying case folding table. That is, when true, there exists |
| 164 | /// at least one codepoint in the inclusive range `[start, end]` that has |
| 165 | /// a non-trivial equivalence class of case folded codepoints. Conversely, |
| 166 | /// when this returns false, all codepoints in the range `[start, end]` |
| 167 | /// correspond to the trivial equivalence class of case folded codepoints, |
| 168 | /// i.e., itself. |
| 169 | /// |
| 170 | /// This is useful to call before iterating over the codepoints in the |
| 171 | /// range and looking up the mapping for each. If you know none of the |
| 172 | /// mappings will return anything, then you might be able to skip doing it |
| 173 | /// altogether. |
| 174 | /// |
| 175 | /// # Panics |
| 176 | /// |
| 177 | /// This panics when `end < start`. |
| 178 | pub fn overlaps(&self, start: char, end: char) -> bool { |
| 179 | use core::cmp::Ordering; |
| 180 | |
| 181 | assert!(start <= end); |
| 182 | self.table |
| 183 | .binary_search_by(|&(c, _)| { |
| 184 | if start <= c && c <= end { |
| 185 | Ordering::Equal |
| 186 | } else if c > end { |
| 187 | Ordering::Greater |
| 188 | } else { |
| 189 | Ordering::Less |
| 190 | } |
| 191 | }) |
| 192 | .is_ok() |
| 193 | } |
| 194 | |
| 195 | /// Returns the index at which `c` occurs in the simple case fold table. If |
| 196 | /// `c` does not occur, then this returns an `i` such that `table[i-1].0 < |
| 197 | /// c` and `table[i].0 > c`. |
| 198 | fn get(&self, c: char) -> Result<usize, usize> { |
| 199 | self.table.binary_search_by_key(&c, |&(c1, _)| c1) |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | /// A query for finding a character class defined by Unicode. This supports |
| 204 | /// either use of a property name directly, or lookup by property value. The |
| 205 | /// former generally refers to Binary properties (see UTS#44, Table 8), but |
| 206 | /// as a special exception (see UTS#18, Section 1.2) both general categories |
| 207 | /// (an enumeration) and scripts (a catalog) are supported as if each of their |
| 208 | /// possible values were a binary property. |
| 209 | /// |
| 210 | /// In all circumstances, property names and values are normalized and |
| 211 | /// canonicalized. That is, `GC == gc == GeneralCategory == general_category`. |
| 212 | /// |
| 213 | /// The lifetime `'a` refers to the shorter of the lifetimes of property name |
| 214 | /// and property value. |
| 215 | #[derive (Debug)] |
| 216 | pub enum ClassQuery<'a> { |
| 217 | /// Return a class corresponding to a Unicode binary property, named by |
| 218 | /// a single letter. |
| 219 | OneLetter(char), |
| 220 | /// Return a class corresponding to a Unicode binary property. |
| 221 | /// |
| 222 | /// Note that, by special exception (see UTS#18, Section 1.2), both |
| 223 | /// general category values and script values are permitted here as if |
| 224 | /// they were a binary property. |
| 225 | Binary(&'a str), |
| 226 | /// Return a class corresponding to all codepoints whose property |
| 227 | /// (identified by `property_name`) corresponds to the given value |
| 228 | /// (identified by `property_value`). |
| 229 | ByValue { |
| 230 | /// A property name. |
| 231 | property_name: &'a str, |
| 232 | /// A property value. |
| 233 | property_value: &'a str, |
| 234 | }, |
| 235 | } |
| 236 | |
| 237 | impl<'a> ClassQuery<'a> { |
| 238 | fn canonicalize(&self) -> Result<CanonicalClassQuery, Error> { |
| 239 | match *self { |
| 240 | ClassQuery::OneLetter(c) => self.canonical_binary(&c.to_string()), |
| 241 | ClassQuery::Binary(name) => self.canonical_binary(name), |
| 242 | ClassQuery::ByValue { property_name, property_value } => { |
| 243 | let property_name = symbolic_name_normalize(property_name); |
| 244 | let property_value = symbolic_name_normalize(property_value); |
| 245 | |
| 246 | let canon_name = match canonical_prop(&property_name)? { |
| 247 | None => return Err(Error::PropertyNotFound), |
| 248 | Some(canon_name) => canon_name, |
| 249 | }; |
| 250 | Ok(match canon_name { |
| 251 | "General_Category" => { |
| 252 | let canon = match canonical_gencat(&property_value)? { |
| 253 | None => return Err(Error::PropertyValueNotFound), |
| 254 | Some(canon) => canon, |
| 255 | }; |
| 256 | CanonicalClassQuery::GeneralCategory(canon) |
| 257 | } |
| 258 | "Script" => { |
| 259 | let canon = match canonical_script(&property_value)? { |
| 260 | None => return Err(Error::PropertyValueNotFound), |
| 261 | Some(canon) => canon, |
| 262 | }; |
| 263 | CanonicalClassQuery::Script(canon) |
| 264 | } |
| 265 | _ => { |
| 266 | let vals = match property_values(canon_name)? { |
| 267 | None => return Err(Error::PropertyValueNotFound), |
| 268 | Some(vals) => vals, |
| 269 | }; |
| 270 | let canon_val = |
| 271 | match canonical_value(vals, &property_value) { |
| 272 | None => { |
| 273 | return Err(Error::PropertyValueNotFound) |
| 274 | } |
| 275 | Some(canon_val) => canon_val, |
| 276 | }; |
| 277 | CanonicalClassQuery::ByValue { |
| 278 | property_name: canon_name, |
| 279 | property_value: canon_val, |
| 280 | } |
| 281 | } |
| 282 | }) |
| 283 | } |
| 284 | } |
| 285 | } |
| 286 | |
| 287 | fn canonical_binary( |
| 288 | &self, |
| 289 | name: &str, |
| 290 | ) -> Result<CanonicalClassQuery, Error> { |
| 291 | let norm = symbolic_name_normalize(name); |
| 292 | |
| 293 | // This is a special case where 'cf' refers to the 'Format' general |
| 294 | // category, but where the 'cf' abbreviation is also an abbreviation |
| 295 | // for the 'Case_Folding' property. But we want to treat it as |
| 296 | // a general category. (Currently, we don't even support the |
| 297 | // 'Case_Folding' property. But if we do in the future, users will be |
| 298 | // required to spell it out.) |
| 299 | // |
| 300 | // Also 'sc' refers to the 'Currency_Symbol' general category, but is |
| 301 | // also the abbreviation for the 'Script' property. So we avoid calling |
| 302 | // 'canonical_prop' for it too, which would erroneously normalize it |
| 303 | // to 'Script'. |
| 304 | // |
| 305 | // Another case: 'lc' is an abbreviation for the 'Cased_Letter' |
| 306 | // general category, but is also an abbreviation for the 'Lowercase_Mapping' |
| 307 | // property. We don't currently support the latter, so as with 'cf' |
| 308 | // above, we treat 'lc' as 'Cased_Letter'. |
| 309 | if norm != "cf" && norm != "sc" && norm != "lc" { |
| 310 | if let Some(canon) = canonical_prop(&norm)? { |
| 311 | return Ok(CanonicalClassQuery::Binary(canon)); |
| 312 | } |
| 313 | } |
| 314 | if let Some(canon) = canonical_gencat(&norm)? { |
| 315 | return Ok(CanonicalClassQuery::GeneralCategory(canon)); |
| 316 | } |
| 317 | if let Some(canon) = canonical_script(&norm)? { |
| 318 | return Ok(CanonicalClassQuery::Script(canon)); |
| 319 | } |
| 320 | Err(Error::PropertyNotFound) |
| 321 | } |
| 322 | } |
| 323 | |
| 324 | /// Like ClassQuery, but its parameters have been canonicalized. This also |
| 325 | /// differentiates binary properties from flattened general categories and |
| 326 | /// scripts. |
| 327 | #[derive (Debug, Eq, PartialEq)] |
| 328 | enum CanonicalClassQuery { |
| 329 | /// The canonical binary property name. |
| 330 | Binary(&'static str), |
| 331 | /// The canonical general category name. |
| 332 | GeneralCategory(&'static str), |
| 333 | /// The canonical script name. |
| 334 | Script(&'static str), |
| 335 | /// An arbitrary association between property and value, both of which |
| 336 | /// have been canonicalized. |
| 337 | /// |
| 338 | /// Note that by construction, the property name of ByValue will never |
| 339 | /// be General_Category or Script. Those two cases are subsumed by the |
| 340 | /// eponymous variants. |
| 341 | ByValue { |
| 342 | /// The canonical property name. |
| 343 | property_name: &'static str, |
| 344 | /// The canonical property value. |
| 345 | property_value: &'static str, |
| 346 | }, |
| 347 | } |
| 348 | |
| 349 | /// Looks up a Unicode class given a query. If one doesn't exist, then |
| 350 | /// `None` is returned. |
| 351 | pub fn class(query: ClassQuery<'_>) -> Result<hir::ClassUnicode, Error> { |
| 352 | use self::CanonicalClassQuery::*; |
| 353 | |
| 354 | match query.canonicalize()? { |
| 355 | Binary(name) => bool_property(name), |
| 356 | GeneralCategory(name) => gencat(name), |
| 357 | Script(name) => script(name), |
| 358 | ByValue { property_name: "Age" , property_value } => { |
| 359 | let mut class = hir::ClassUnicode::empty(); |
| 360 | for set in ages(property_value)? { |
| 361 | class.union(&hir_class(set)); |
| 362 | } |
| 363 | Ok(class) |
| 364 | } |
| 365 | ByValue { property_name: "Script_Extensions" , property_value } => { |
| 366 | script_extension(property_value) |
| 367 | } |
| 368 | ByValue { |
| 369 | property_name: "Grapheme_Cluster_Break" , |
| 370 | property_value, |
| 371 | } => gcb(property_value), |
| 372 | ByValue { property_name: "Sentence_Break" , property_value } => { |
| 373 | sb(property_value) |
| 374 | } |
| 375 | ByValue { property_name: "Word_Break" , property_value } => { |
| 376 | wb(property_value) |
| 377 | } |
| 378 | _ => { |
| 379 | // What else should we support? |
| 380 | Err(Error::PropertyNotFound) |
| 381 | } |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | /// Returns a Unicode aware class for \w. |
| 386 | /// |
| 387 | /// This returns an error if the data is not available for \w. |
| 388 | pub fn perl_word() -> Result<hir::ClassUnicode, Error> { |
| 389 | #[cfg (not(feature = "unicode-perl" ))] |
| 390 | fn imp() -> Result<hir::ClassUnicode, Error> { |
| 391 | Err(Error::PerlClassNotFound) |
| 392 | } |
| 393 | |
| 394 | #[cfg (feature = "unicode-perl" )] |
| 395 | fn imp() -> Result<hir::ClassUnicode, Error> { |
| 396 | use crate::unicode_tables::perl_word::PERL_WORD; |
| 397 | Ok(hir_class(PERL_WORD)) |
| 398 | } |
| 399 | |
| 400 | imp() |
| 401 | } |
| 402 | |
| 403 | /// Returns a Unicode aware class for \s. |
| 404 | /// |
| 405 | /// This returns an error if the data is not available for \s. |
| 406 | pub fn perl_space() -> Result<hir::ClassUnicode, Error> { |
| 407 | #[cfg (not(any(feature = "unicode-perl" , feature = "unicode-bool" )))] |
| 408 | fn imp() -> Result<hir::ClassUnicode, Error> { |
| 409 | Err(Error::PerlClassNotFound) |
| 410 | } |
| 411 | |
| 412 | #[cfg (all(feature = "unicode-perl" , not(feature = "unicode-bool" )))] |
| 413 | fn imp() -> Result<hir::ClassUnicode, Error> { |
| 414 | use crate::unicode_tables::perl_space::WHITE_SPACE; |
| 415 | Ok(hir_class(WHITE_SPACE)) |
| 416 | } |
| 417 | |
| 418 | #[cfg (feature = "unicode-bool" )] |
| 419 | fn imp() -> Result<hir::ClassUnicode, Error> { |
| 420 | use crate::unicode_tables::property_bool::WHITE_SPACE; |
| 421 | Ok(hir_class(WHITE_SPACE)) |
| 422 | } |
| 423 | |
| 424 | imp() |
| 425 | } |
| 426 | |
| 427 | /// Returns a Unicode aware class for \d. |
| 428 | /// |
| 429 | /// This returns an error if the data is not available for \d. |
| 430 | pub fn perl_digit() -> Result<hir::ClassUnicode, Error> { |
| 431 | #[cfg (not(any(feature = "unicode-perl" , feature = "unicode-gencat" )))] |
| 432 | fn imp() -> Result<hir::ClassUnicode, Error> { |
| 433 | Err(Error::PerlClassNotFound) |
| 434 | } |
| 435 | |
| 436 | #[cfg (all(feature = "unicode-perl" , not(feature = "unicode-gencat" )))] |
| 437 | fn imp() -> Result<hir::ClassUnicode, Error> { |
| 438 | use crate::unicode_tables::perl_decimal::DECIMAL_NUMBER; |
| 439 | Ok(hir_class(DECIMAL_NUMBER)) |
| 440 | } |
| 441 | |
| 442 | #[cfg (feature = "unicode-gencat" )] |
| 443 | fn imp() -> Result<hir::ClassUnicode, Error> { |
| 444 | use crate::unicode_tables::general_category::DECIMAL_NUMBER; |
| 445 | Ok(hir_class(DECIMAL_NUMBER)) |
| 446 | } |
| 447 | |
| 448 | imp() |
| 449 | } |
| 450 | |
| 451 | /// Build a Unicode HIR class from a sequence of Unicode scalar value ranges. |
| 452 | pub fn hir_class(ranges: &[(char, char)]) -> hir::ClassUnicode { |
| 453 | let hir_ranges: Vec<hir::ClassUnicodeRange> = rangesimpl Iterator |
| 454 | .iter() |
| 455 | .map(|&(s: char, e: char)| hir::ClassUnicodeRange::new(start:s, end:e)) |
| 456 | .collect(); |
| 457 | hir::ClassUnicode::new(hir_ranges) |
| 458 | } |
| 459 | |
| 460 | /// Returns true only if the given codepoint is in the `\w` character class. |
| 461 | /// |
| 462 | /// If the `unicode-perl` feature is not enabled, then this returns an error. |
| 463 | pub fn is_word_character(c: char) -> Result<bool, UnicodeWordError> { |
| 464 | #[cfg (not(feature = "unicode-perl" ))] |
| 465 | fn imp(_: char) -> Result<bool, UnicodeWordError> { |
| 466 | Err(UnicodeWordError(())) |
| 467 | } |
| 468 | |
| 469 | #[cfg (feature = "unicode-perl" )] |
| 470 | fn imp(c: char) -> Result<bool, UnicodeWordError> { |
| 471 | use crate::{is_word_byte, unicode_tables::perl_word::PERL_WORD}; |
| 472 | |
| 473 | if u8::try_from(c).map_or(false, is_word_byte) { |
| 474 | return Ok(true); |
| 475 | } |
| 476 | Ok(PERL_WORD |
| 477 | .binary_search_by(|&(start, end)| { |
| 478 | use core::cmp::Ordering; |
| 479 | |
| 480 | if start <= c && c <= end { |
| 481 | Ordering::Equal |
| 482 | } else if start > c { |
| 483 | Ordering::Greater |
| 484 | } else { |
| 485 | Ordering::Less |
| 486 | } |
| 487 | }) |
| 488 | .is_ok()) |
| 489 | } |
| 490 | |
| 491 | imp(c) |
| 492 | } |
| 493 | |
| 494 | /// A mapping of property values for a specific property. |
| 495 | /// |
| 496 | /// The first element of each tuple is a normalized property value while the |
| 497 | /// second element of each tuple is the corresponding canonical property |
| 498 | /// value. |
| 499 | type PropertyValues = &'static [(&'static str, &'static str)]; |
| 500 | |
| 501 | fn canonical_gencat( |
| 502 | normalized_value: &str, |
| 503 | ) -> Result<Option<&'static str>, Error> { |
| 504 | Ok(match normalized_value { |
| 505 | "any" => Some("Any" ), |
| 506 | "assigned" => Some("Assigned" ), |
| 507 | "ascii" => Some("ASCII" ), |
| 508 | _ => { |
| 509 | let gencats: &'static [(&str, &str)] = property_values(canonical_property_name:"General_Category" )?.unwrap(); |
| 510 | canonical_value(vals:gencats, normalized_value) |
| 511 | } |
| 512 | }) |
| 513 | } |
| 514 | |
| 515 | fn canonical_script( |
| 516 | normalized_value: &str, |
| 517 | ) -> Result<Option<&'static str>, Error> { |
| 518 | let scripts: &'static [(&str, &str)] = property_values(canonical_property_name:"Script" )?.unwrap(); |
| 519 | Ok(canonical_value(vals:scripts, normalized_value)) |
| 520 | } |
| 521 | |
| 522 | /// Find the canonical property name for the given normalized property name. |
| 523 | /// |
| 524 | /// If no such property exists, then `None` is returned. |
| 525 | /// |
| 526 | /// The normalized property name must have been normalized according to |
| 527 | /// UAX44 LM3, which can be done using `symbolic_name_normalize`. |
| 528 | /// |
| 529 | /// If the property names data is not available, then an error is returned. |
| 530 | fn canonical_prop( |
| 531 | normalized_name: &str, |
| 532 | ) -> Result<Option<&'static str>, Error> { |
| 533 | #[cfg (not(any( |
| 534 | feature = "unicode-age" , |
| 535 | feature = "unicode-bool" , |
| 536 | feature = "unicode-gencat" , |
| 537 | feature = "unicode-perl" , |
| 538 | feature = "unicode-script" , |
| 539 | feature = "unicode-segment" , |
| 540 | )))] |
| 541 | fn imp(_: &str) -> Result<Option<&'static str>, Error> { |
| 542 | Err(Error::PropertyNotFound) |
| 543 | } |
| 544 | |
| 545 | #[cfg (any( |
| 546 | feature = "unicode-age" , |
| 547 | feature = "unicode-bool" , |
| 548 | feature = "unicode-gencat" , |
| 549 | feature = "unicode-perl" , |
| 550 | feature = "unicode-script" , |
| 551 | feature = "unicode-segment" , |
| 552 | ))] |
| 553 | fn imp(name: &str) -> Result<Option<&'static str>, Error> { |
| 554 | use crate::unicode_tables::property_names::PROPERTY_NAMES; |
| 555 | |
| 556 | Ok(PROPERTY_NAMES |
| 557 | .binary_search_by_key(&name, |&(n, _)| n) |
| 558 | .ok() |
| 559 | .map(|i| PROPERTY_NAMES[i].1)) |
| 560 | } |
| 561 | |
| 562 | imp(normalized_name) |
| 563 | } |
| 564 | |
| 565 | /// Find the canonical property value for the given normalized property |
| 566 | /// value. |
| 567 | /// |
| 568 | /// The given property values should correspond to the values for the property |
| 569 | /// under question, which can be found using `property_values`. |
| 570 | /// |
| 571 | /// If no such property value exists, then `None` is returned. |
| 572 | /// |
| 573 | /// The normalized property value must have been normalized according to |
| 574 | /// UAX44 LM3, which can be done using `symbolic_name_normalize`. |
| 575 | fn canonical_value( |
| 576 | vals: PropertyValues, |
| 577 | normalized_value: &str, |
| 578 | ) -> Option<&'static str> { |
| 579 | valsOption.binary_search_by_key(&normalized_value, |&(n: &str, _)| n) |
| 580 | .ok() |
| 581 | .map(|i: usize| vals[i].1) |
| 582 | } |
| 583 | |
| 584 | /// Return the table of property values for the given property name. |
| 585 | /// |
| 586 | /// If the property values data is not available, then an error is returned. |
| 587 | fn property_values( |
| 588 | canonical_property_name: &'static str, |
| 589 | ) -> Result<Option<PropertyValues>, Error> { |
| 590 | #[cfg (not(any( |
| 591 | feature = "unicode-age" , |
| 592 | feature = "unicode-bool" , |
| 593 | feature = "unicode-gencat" , |
| 594 | feature = "unicode-perl" , |
| 595 | feature = "unicode-script" , |
| 596 | feature = "unicode-segment" , |
| 597 | )))] |
| 598 | fn imp(_: &'static str) -> Result<Option<PropertyValues>, Error> { |
| 599 | Err(Error::PropertyValueNotFound) |
| 600 | } |
| 601 | |
| 602 | #[cfg (any( |
| 603 | feature = "unicode-age" , |
| 604 | feature = "unicode-bool" , |
| 605 | feature = "unicode-gencat" , |
| 606 | feature = "unicode-perl" , |
| 607 | feature = "unicode-script" , |
| 608 | feature = "unicode-segment" , |
| 609 | ))] |
| 610 | fn imp(name: &'static str) -> Result<Option<PropertyValues>, Error> { |
| 611 | use crate::unicode_tables::property_values::PROPERTY_VALUES; |
| 612 | |
| 613 | Ok(PROPERTY_VALUES |
| 614 | .binary_search_by_key(&name, |&(n, _)| n) |
| 615 | .ok() |
| 616 | .map(|i| PROPERTY_VALUES[i].1)) |
| 617 | } |
| 618 | |
| 619 | imp(canonical_property_name) |
| 620 | } |
| 621 | |
| 622 | // This is only used in some cases, but small enough to just let it be dead |
| 623 | // instead of figuring out (and maintaining) the right set of features. |
| 624 | #[allow (dead_code)] |
| 625 | fn property_set( |
| 626 | name_map: &'static [(&'static str, Range)], |
| 627 | canonical: &'static str, |
| 628 | ) -> Option<Range> { |
| 629 | name_mapOption |
| 630 | .binary_search_by_key(&canonical, |x: &(&str, &[(char, char)])| x.0) |
| 631 | .ok() |
| 632 | .map(|i: usize| name_map[i].1) |
| 633 | } |
| 634 | |
| 635 | /// Returns an iterator over Unicode Age sets. Each item corresponds to a set |
| 636 | /// of codepoints that were added in a particular revision of Unicode. The |
| 637 | /// iterator yields items in chronological order. |
| 638 | /// |
| 639 | /// If the given age value isn't valid or if the data isn't available, then an |
| 640 | /// error is returned instead. |
| 641 | fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>, Error> { |
| 642 | #[cfg (not(feature = "unicode-age" ))] |
| 643 | fn imp(_: &str) -> Result<impl Iterator<Item = Range>, Error> { |
| 644 | use core::option::IntoIter; |
| 645 | Err::<IntoIter<Range>, _>(Error::PropertyNotFound) |
| 646 | } |
| 647 | |
| 648 | #[cfg (feature = "unicode-age" )] |
| 649 | fn imp(canonical_age: &str) -> Result<impl Iterator<Item = Range>, Error> { |
| 650 | use crate::unicode_tables::age; |
| 651 | |
| 652 | const AGES: &[(&str, Range)] = &[ |
| 653 | ("V1_1" , age::V1_1), |
| 654 | ("V2_0" , age::V2_0), |
| 655 | ("V2_1" , age::V2_1), |
| 656 | ("V3_0" , age::V3_0), |
| 657 | ("V3_1" , age::V3_1), |
| 658 | ("V3_2" , age::V3_2), |
| 659 | ("V4_0" , age::V4_0), |
| 660 | ("V4_1" , age::V4_1), |
| 661 | ("V5_0" , age::V5_0), |
| 662 | ("V5_1" , age::V5_1), |
| 663 | ("V5_2" , age::V5_2), |
| 664 | ("V6_0" , age::V6_0), |
| 665 | ("V6_1" , age::V6_1), |
| 666 | ("V6_2" , age::V6_2), |
| 667 | ("V6_3" , age::V6_3), |
| 668 | ("V7_0" , age::V7_0), |
| 669 | ("V8_0" , age::V8_0), |
| 670 | ("V9_0" , age::V9_0), |
| 671 | ("V10_0" , age::V10_0), |
| 672 | ("V11_0" , age::V11_0), |
| 673 | ("V12_0" , age::V12_0), |
| 674 | ("V12_1" , age::V12_1), |
| 675 | ("V13_0" , age::V13_0), |
| 676 | ("V14_0" , age::V14_0), |
| 677 | ("V15_0" , age::V15_0), |
| 678 | ("V15_1" , age::V15_1), |
| 679 | ("V16_0" , age::V16_0), |
| 680 | ]; |
| 681 | assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync" ); |
| 682 | |
| 683 | let pos = AGES.iter().position(|&(age, _)| canonical_age == age); |
| 684 | match pos { |
| 685 | None => Err(Error::PropertyValueNotFound), |
| 686 | Some(i) => Ok(AGES[..=i].iter().map(|&(_, classes)| classes)), |
| 687 | } |
| 688 | } |
| 689 | |
| 690 | imp(canonical_age) |
| 691 | } |
| 692 | |
| 693 | /// Returns the Unicode HIR class corresponding to the given general category. |
| 694 | /// |
| 695 | /// Name canonicalization is assumed to be performed by the caller. |
| 696 | /// |
| 697 | /// If the given general category could not be found, or if the general |
| 698 | /// category data is not available, then an error is returned. |
| 699 | fn gencat(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 700 | #[cfg (not(feature = "unicode-gencat" ))] |
| 701 | fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 702 | Err(Error::PropertyNotFound) |
| 703 | } |
| 704 | |
| 705 | #[cfg (feature = "unicode-gencat" )] |
| 706 | fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 707 | use crate::unicode_tables::general_category::BY_NAME; |
| 708 | match name { |
| 709 | "ASCII" => Ok(hir_class(&[(' \0' , ' \x7F' )])), |
| 710 | "Any" => Ok(hir_class(&[(' \0' , ' \u{10FFFF}' )])), |
| 711 | "Assigned" => { |
| 712 | let mut cls = gencat("Unassigned" )?; |
| 713 | cls.negate(); |
| 714 | Ok(cls) |
| 715 | } |
| 716 | name => property_set(BY_NAME, name) |
| 717 | .map(hir_class) |
| 718 | .ok_or(Error::PropertyValueNotFound), |
| 719 | } |
| 720 | } |
| 721 | |
| 722 | match canonical_name { |
| 723 | "Decimal_Number" => perl_digit(), |
| 724 | name => imp(name), |
| 725 | } |
| 726 | } |
| 727 | |
| 728 | /// Returns the Unicode HIR class corresponding to the given script. |
| 729 | /// |
| 730 | /// Name canonicalization is assumed to be performed by the caller. |
| 731 | /// |
| 732 | /// If the given script could not be found, or if the script data is not |
| 733 | /// available, then an error is returned. |
| 734 | fn script(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 735 | #[cfg (not(feature = "unicode-script" ))] |
| 736 | fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 737 | Err(Error::PropertyNotFound) |
| 738 | } |
| 739 | |
| 740 | #[cfg (feature = "unicode-script" )] |
| 741 | fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 742 | use crate::unicode_tables::script::BY_NAME; |
| 743 | property_set(BY_NAME, name) |
| 744 | .map(hir_class) |
| 745 | .ok_or(err:Error::PropertyValueNotFound) |
| 746 | } |
| 747 | |
| 748 | imp(canonical_name) |
| 749 | } |
| 750 | |
| 751 | /// Returns the Unicode HIR class corresponding to the given script extension. |
| 752 | /// |
| 753 | /// Name canonicalization is assumed to be performed by the caller. |
| 754 | /// |
| 755 | /// If the given script extension could not be found, or if the script data is |
| 756 | /// not available, then an error is returned. |
| 757 | fn script_extension( |
| 758 | canonical_name: &'static str, |
| 759 | ) -> Result<hir::ClassUnicode, Error> { |
| 760 | #[cfg (not(feature = "unicode-script" ))] |
| 761 | fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 762 | Err(Error::PropertyNotFound) |
| 763 | } |
| 764 | |
| 765 | #[cfg (feature = "unicode-script" )] |
| 766 | fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 767 | use crate::unicode_tables::script_extension::BY_NAME; |
| 768 | property_set(BY_NAME, name) |
| 769 | .map(hir_class) |
| 770 | .ok_or(err:Error::PropertyValueNotFound) |
| 771 | } |
| 772 | |
| 773 | imp(canonical_name) |
| 774 | } |
| 775 | |
| 776 | /// Returns the Unicode HIR class corresponding to the given Unicode boolean |
| 777 | /// property. |
| 778 | /// |
| 779 | /// Name canonicalization is assumed to be performed by the caller. |
| 780 | /// |
| 781 | /// If the given boolean property could not be found, or if the boolean |
| 782 | /// property data is not available, then an error is returned. |
| 783 | fn bool_property( |
| 784 | canonical_name: &'static str, |
| 785 | ) -> Result<hir::ClassUnicode, Error> { |
| 786 | #[cfg (not(feature = "unicode-bool" ))] |
| 787 | fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 788 | Err(Error::PropertyNotFound) |
| 789 | } |
| 790 | |
| 791 | #[cfg (feature = "unicode-bool" )] |
| 792 | fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 793 | use crate::unicode_tables::property_bool::BY_NAME; |
| 794 | property_set(BY_NAME, name) |
| 795 | .map(hir_class) |
| 796 | .ok_or(err:Error::PropertyNotFound) |
| 797 | } |
| 798 | |
| 799 | match canonical_name { |
| 800 | "Decimal_Number" => perl_digit(), |
| 801 | "White_Space" => perl_space(), |
| 802 | name: &'static str => imp(name), |
| 803 | } |
| 804 | } |
| 805 | |
| 806 | /// Returns the Unicode HIR class corresponding to the given grapheme cluster |
| 807 | /// break property. |
| 808 | /// |
| 809 | /// Name canonicalization is assumed to be performed by the caller. |
| 810 | /// |
| 811 | /// If the given property could not be found, or if the corresponding data is |
| 812 | /// not available, then an error is returned. |
| 813 | fn gcb(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 814 | #[cfg (not(feature = "unicode-segment" ))] |
| 815 | fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 816 | Err(Error::PropertyNotFound) |
| 817 | } |
| 818 | |
| 819 | #[cfg (feature = "unicode-segment" )] |
| 820 | fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 821 | use crate::unicode_tables::grapheme_cluster_break::BY_NAME; |
| 822 | property_set(BY_NAME, name) |
| 823 | .map(hir_class) |
| 824 | .ok_or(err:Error::PropertyValueNotFound) |
| 825 | } |
| 826 | |
| 827 | imp(canonical_name) |
| 828 | } |
| 829 | |
| 830 | /// Returns the Unicode HIR class corresponding to the given word break |
| 831 | /// property. |
| 832 | /// |
| 833 | /// Name canonicalization is assumed to be performed by the caller. |
| 834 | /// |
| 835 | /// If the given property could not be found, or if the corresponding data is |
| 836 | /// not available, then an error is returned. |
| 837 | fn wb(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 838 | #[cfg (not(feature = "unicode-segment" ))] |
| 839 | fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 840 | Err(Error::PropertyNotFound) |
| 841 | } |
| 842 | |
| 843 | #[cfg (feature = "unicode-segment" )] |
| 844 | fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 845 | use crate::unicode_tables::word_break::BY_NAME; |
| 846 | property_set(BY_NAME, name) |
| 847 | .map(hir_class) |
| 848 | .ok_or(err:Error::PropertyValueNotFound) |
| 849 | } |
| 850 | |
| 851 | imp(canonical_name) |
| 852 | } |
| 853 | |
| 854 | /// Returns the Unicode HIR class corresponding to the given sentence |
| 855 | /// break property. |
| 856 | /// |
| 857 | /// Name canonicalization is assumed to be performed by the caller. |
| 858 | /// |
| 859 | /// If the given property could not be found, or if the corresponding data is |
| 860 | /// not available, then an error is returned. |
| 861 | fn sb(canonical_name: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 862 | #[cfg (not(feature = "unicode-segment" ))] |
| 863 | fn imp(_: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 864 | Err(Error::PropertyNotFound) |
| 865 | } |
| 866 | |
| 867 | #[cfg (feature = "unicode-segment" )] |
| 868 | fn imp(name: &'static str) -> Result<hir::ClassUnicode, Error> { |
| 869 | use crate::unicode_tables::sentence_break::BY_NAME; |
| 870 | property_set(BY_NAME, name) |
| 871 | .map(hir_class) |
| 872 | .ok_or(err:Error::PropertyValueNotFound) |
| 873 | } |
| 874 | |
| 875 | imp(canonical_name) |
| 876 | } |
| 877 | |
| 878 | /// Like symbolic_name_normalize_bytes, but operates on a string. |
| 879 | fn symbolic_name_normalize(x: &str) -> String { |
| 880 | let mut tmp: Vec = x.as_bytes().to_vec(); |
| 881 | let len: usize = symbolic_name_normalize_bytes(&mut tmp).len(); |
| 882 | tmp.truncate(len); |
| 883 | // This should always succeed because `symbolic_name_normalize_bytes` |
| 884 | // guarantees that `&tmp[..len]` is always valid UTF-8. |
| 885 | // |
| 886 | // N.B. We could avoid the additional UTF-8 check here, but it's unlikely |
| 887 | // to be worth skipping the additional safety check. A benchmark must |
| 888 | // justify it first. |
| 889 | String::from_utf8(vec:tmp).unwrap() |
| 890 | } |
| 891 | |
| 892 | /// Normalize the given symbolic name in place according to UAX44-LM3. |
| 893 | /// |
| 894 | /// A "symbolic name" typically corresponds to property names and property |
| 895 | /// value aliases. Note, though, that it should not be applied to property |
| 896 | /// string values. |
| 897 | /// |
| 898 | /// The slice returned is guaranteed to be valid UTF-8 for all possible values |
| 899 | /// of `slice`. |
| 900 | /// |
| 901 | /// See: https://unicode.org/reports/tr44/#UAX44-LM3 |
| 902 | fn symbolic_name_normalize_bytes(slice: &mut [u8]) -> &mut [u8] { |
| 903 | // I couldn't find a place in the standard that specified that property |
| 904 | // names/aliases had a particular structure (unlike character names), but |
| 905 | // we assume that it's ASCII only and drop anything that isn't ASCII. |
| 906 | let mut start = 0; |
| 907 | let mut starts_with_is = false; |
| 908 | if slice.len() >= 2 { |
| 909 | // Ignore any "is" prefix. |
| 910 | starts_with_is = slice[0..2] == b"is" [..] |
| 911 | || slice[0..2] == b"IS" [..] |
| 912 | || slice[0..2] == b"iS" [..] |
| 913 | || slice[0..2] == b"Is" [..]; |
| 914 | if starts_with_is { |
| 915 | start = 2; |
| 916 | } |
| 917 | } |
| 918 | let mut next_write = 0; |
| 919 | for i in start..slice.len() { |
| 920 | // VALIDITY ARGUMENT: To guarantee that the resulting slice is valid |
| 921 | // UTF-8, we ensure that the slice contains only ASCII bytes. In |
| 922 | // particular, we drop every non-ASCII byte from the normalized string. |
| 923 | let b = slice[i]; |
| 924 | if b == b' ' || b == b'_' || b == b'-' { |
| 925 | continue; |
| 926 | } else if b'A' <= b && b <= b'Z' { |
| 927 | slice[next_write] = b + (b'a' - b'A' ); |
| 928 | next_write += 1; |
| 929 | } else if b <= 0x7F { |
| 930 | slice[next_write] = b; |
| 931 | next_write += 1; |
| 932 | } |
| 933 | } |
| 934 | // Special case: ISO_Comment has a 'isc' abbreviation. Since we generally |
| 935 | // ignore 'is' prefixes, the 'isc' abbreviation gets caught in the cross |
| 936 | // fire and ends up creating an alias for 'c' to 'ISO_Comment', but it |
| 937 | // is actually an alias for the 'Other' general category. |
| 938 | if starts_with_is && next_write == 1 && slice[0] == b'c' { |
| 939 | slice[0] = b'i' ; |
| 940 | slice[1] = b's' ; |
| 941 | slice[2] = b'c' ; |
| 942 | next_write = 3; |
| 943 | } |
| 944 | &mut slice[..next_write] |
| 945 | } |
| 946 | |
| 947 | #[cfg (test)] |
| 948 | mod tests { |
| 949 | use super::*; |
| 950 | |
| 951 | #[cfg (feature = "unicode-case" )] |
| 952 | fn simple_fold_ok(c: char) -> impl Iterator<Item = char> { |
| 953 | SimpleCaseFolder::new().unwrap().mapping(c).iter().copied() |
| 954 | } |
| 955 | |
| 956 | #[cfg (feature = "unicode-case" )] |
| 957 | fn contains_case_map(start: char, end: char) -> bool { |
| 958 | SimpleCaseFolder::new().unwrap().overlaps(start, end) |
| 959 | } |
| 960 | |
| 961 | #[test ] |
| 962 | #[cfg (feature = "unicode-case" )] |
| 963 | fn simple_fold_k() { |
| 964 | let xs: Vec<char> = simple_fold_ok('k' ).collect(); |
| 965 | assert_eq!(xs, alloc::vec!['K' , 'K' ]); |
| 966 | |
| 967 | let xs: Vec<char> = simple_fold_ok('K' ).collect(); |
| 968 | assert_eq!(xs, alloc::vec!['k' , 'K' ]); |
| 969 | |
| 970 | let xs: Vec<char> = simple_fold_ok('K' ).collect(); |
| 971 | assert_eq!(xs, alloc::vec!['K' , 'k' ]); |
| 972 | } |
| 973 | |
| 974 | #[test ] |
| 975 | #[cfg (feature = "unicode-case" )] |
| 976 | fn simple_fold_a() { |
| 977 | let xs: Vec<char> = simple_fold_ok('a' ).collect(); |
| 978 | assert_eq!(xs, alloc::vec!['A' ]); |
| 979 | |
| 980 | let xs: Vec<char> = simple_fold_ok('A' ).collect(); |
| 981 | assert_eq!(xs, alloc::vec!['a' ]); |
| 982 | } |
| 983 | |
| 984 | #[test ] |
| 985 | #[cfg (not(feature = "unicode-case" ))] |
| 986 | fn simple_fold_disabled() { |
| 987 | assert!(SimpleCaseFolder::new().is_err()); |
| 988 | } |
| 989 | |
| 990 | #[test ] |
| 991 | #[cfg (feature = "unicode-case" )] |
| 992 | fn range_contains() { |
| 993 | assert!(contains_case_map('A' , 'A' )); |
| 994 | assert!(contains_case_map('Z' , 'Z' )); |
| 995 | assert!(contains_case_map('A' , 'Z' )); |
| 996 | assert!(contains_case_map('@' , 'A' )); |
| 997 | assert!(contains_case_map('Z' , '[' )); |
| 998 | assert!(contains_case_map('☃' , 'Ⰰ' )); |
| 999 | |
| 1000 | assert!(!contains_case_map('[' , '[' )); |
| 1001 | assert!(!contains_case_map('[' , '`' )); |
| 1002 | |
| 1003 | assert!(!contains_case_map('☃' , '☃' )); |
| 1004 | } |
| 1005 | |
| 1006 | #[test ] |
| 1007 | #[cfg (feature = "unicode-gencat" )] |
| 1008 | fn regression_466() { |
| 1009 | use super::{CanonicalClassQuery, ClassQuery}; |
| 1010 | |
| 1011 | let q = ClassQuery::OneLetter('C' ); |
| 1012 | assert_eq!( |
| 1013 | q.canonicalize().unwrap(), |
| 1014 | CanonicalClassQuery::GeneralCategory("Other" ) |
| 1015 | ); |
| 1016 | } |
| 1017 | |
| 1018 | #[test ] |
| 1019 | fn sym_normalize() { |
| 1020 | let sym_norm = symbolic_name_normalize; |
| 1021 | |
| 1022 | assert_eq!(sym_norm("Line_Break" ), "linebreak" ); |
| 1023 | assert_eq!(sym_norm("Line-break" ), "linebreak" ); |
| 1024 | assert_eq!(sym_norm("linebreak" ), "linebreak" ); |
| 1025 | assert_eq!(sym_norm("BA" ), "ba" ); |
| 1026 | assert_eq!(sym_norm("ba" ), "ba" ); |
| 1027 | assert_eq!(sym_norm("Greek" ), "greek" ); |
| 1028 | assert_eq!(sym_norm("isGreek" ), "greek" ); |
| 1029 | assert_eq!(sym_norm("IS_Greek" ), "greek" ); |
| 1030 | assert_eq!(sym_norm("isc" ), "isc" ); |
| 1031 | assert_eq!(sym_norm("is c" ), "isc" ); |
| 1032 | assert_eq!(sym_norm("is_c" ), "isc" ); |
| 1033 | } |
| 1034 | |
| 1035 | #[test ] |
| 1036 | fn valid_utf8_symbolic() { |
| 1037 | let mut x = b"abc \xFFxyz" .to_vec(); |
| 1038 | let y = symbolic_name_normalize_bytes(&mut x); |
| 1039 | assert_eq!(y, b"abcxyz" ); |
| 1040 | } |
| 1041 | } |
| 1042 | |