| 1 | //! For detecting the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection) |
| 2 | //! a string conforms to |
| 3 | |
| 4 | use crate::mixed_script::AugmentedScriptSet; |
| 5 | use crate::GeneralSecurityProfile; |
| 6 | use unicode_script::Script; |
| 7 | |
| 8 | #[derive (Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)] |
| 9 | /// The [Restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection) |
| 10 | /// a string conforms to |
| 11 | pub enum RestrictionLevel { |
| 12 | /// https://www.unicode.org/reports/tr39/#ascii_only |
| 13 | ASCIIOnly, |
| 14 | /// https://www.unicode.org/reports/tr39/#single_script |
| 15 | SingleScript, |
| 16 | /// https://www.unicode.org/reports/tr39/#highly_restrictive |
| 17 | HighlyRestrictive, |
| 18 | /// https://www.unicode.org/reports/tr39/#moderately_restrictive |
| 19 | ModeratelyRestrictive, |
| 20 | /// https://www.unicode.org/reports/tr39/#minimally_restrictive |
| 21 | MinimallyRestrictive, |
| 22 | /// https://www.unicode.org/reports/tr39/#unrestricted |
| 23 | Unrestricted, |
| 24 | } |
| 25 | |
| 26 | /// Utilities for determining which [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection) |
| 27 | /// a string satisfies |
| 28 | pub trait RestrictionLevelDetection: Sized { |
| 29 | /// Detect the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection) |
| 30 | /// |
| 31 | /// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness |
| 32 | fn detect_restriction_level(self) -> RestrictionLevel; |
| 33 | |
| 34 | /// Check if a string satisfies the supplied [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection) |
| 35 | /// |
| 36 | /// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness |
| 37 | fn check_restriction_level(self, level: RestrictionLevel) -> bool { |
| 38 | self.detect_restriction_level() <= level |
| 39 | } |
| 40 | } |
| 41 | |
| 42 | impl RestrictionLevelDetection for &'_ str { |
| 43 | fn detect_restriction_level(self) -> RestrictionLevel { |
| 44 | let mut ascii_only = true; |
| 45 | let mut set = AugmentedScriptSet::default(); |
| 46 | let mut exclude_latin_set = AugmentedScriptSet::default(); |
| 47 | for ch in self.chars() { |
| 48 | if !GeneralSecurityProfile::identifier_allowed(ch) { |
| 49 | return RestrictionLevel::Unrestricted; |
| 50 | } |
| 51 | if !ch.is_ascii() { |
| 52 | ascii_only = false; |
| 53 | } |
| 54 | let ch_set = ch.into(); |
| 55 | set.intersect_with(ch_set); |
| 56 | if !ch_set.base.contains_script(Script::Latin) { |
| 57 | exclude_latin_set.intersect_with(ch_set); |
| 58 | } |
| 59 | } |
| 60 | |
| 61 | if ascii_only { |
| 62 | return RestrictionLevel::ASCIIOnly; |
| 63 | } else if !set.is_empty() { |
| 64 | return RestrictionLevel::SingleScript; |
| 65 | } else if exclude_latin_set.kore || exclude_latin_set.hanb || exclude_latin_set.jpan { |
| 66 | return RestrictionLevel::HighlyRestrictive; |
| 67 | } else if exclude_latin_set.base.len() == 1 { |
| 68 | let script = exclude_latin_set.base.iter().next().unwrap(); |
| 69 | if script.is_recommended() && script != Script::Cyrillic && script != Script::Greek { |
| 70 | return RestrictionLevel::ModeratelyRestrictive; |
| 71 | } |
| 72 | } |
| 73 | return RestrictionLevel::MinimallyRestrictive; |
| 74 | } |
| 75 | } |
| 76 | |