1 | //! For detecting the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection) |
2 | //! a string conforms to |
3 | |
4 | use crate::mixed_script::AugmentedScriptSet; |
5 | use crate::GeneralSecurityProfile; |
6 | use unicode_script::Script; |
7 | |
8 | #[derive (Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)] |
9 | /// The [Restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection) |
10 | /// a string conforms to |
11 | pub enum RestrictionLevel { |
12 | /// https://www.unicode.org/reports/tr39/#ascii_only |
13 | ASCIIOnly, |
14 | /// https://www.unicode.org/reports/tr39/#single_script |
15 | SingleScript, |
16 | /// https://www.unicode.org/reports/tr39/#highly_restrictive |
17 | HighlyRestrictive, |
18 | /// https://www.unicode.org/reports/tr39/#moderately_restrictive |
19 | ModeratelyRestrictive, |
20 | /// https://www.unicode.org/reports/tr39/#minimally_restrictive |
21 | MinimallyRestrictive, |
22 | /// https://www.unicode.org/reports/tr39/#unrestricted |
23 | Unrestricted, |
24 | } |
25 | |
26 | /// Utilities for determining which [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection) |
27 | /// a string satisfies |
28 | pub trait RestrictionLevelDetection: Sized { |
29 | /// Detect the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection) |
30 | /// |
31 | /// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness |
32 | fn detect_restriction_level(self) -> RestrictionLevel; |
33 | |
34 | /// Check if a string satisfies the supplied [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection) |
35 | /// |
36 | /// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness |
37 | fn check_restriction_level(self, level: RestrictionLevel) -> bool { |
38 | self.detect_restriction_level() <= level |
39 | } |
40 | } |
41 | |
42 | impl RestrictionLevelDetection for &'_ str { |
43 | fn detect_restriction_level(self) -> RestrictionLevel { |
44 | let mut ascii_only = true; |
45 | let mut set = AugmentedScriptSet::default(); |
46 | let mut exclude_latin_set = AugmentedScriptSet::default(); |
47 | for ch in self.chars() { |
48 | if !GeneralSecurityProfile::identifier_allowed(ch) { |
49 | return RestrictionLevel::Unrestricted; |
50 | } |
51 | if !ch.is_ascii() { |
52 | ascii_only = false; |
53 | } |
54 | let ch_set = ch.into(); |
55 | set.intersect_with(ch_set); |
56 | if !ch_set.base.contains_script(Script::Latin) { |
57 | exclude_latin_set.intersect_with(ch_set); |
58 | } |
59 | } |
60 | |
61 | if ascii_only { |
62 | return RestrictionLevel::ASCIIOnly; |
63 | } else if !set.is_empty() { |
64 | return RestrictionLevel::SingleScript; |
65 | } else if exclude_latin_set.kore || exclude_latin_set.hanb || exclude_latin_set.jpan { |
66 | return RestrictionLevel::HighlyRestrictive; |
67 | } else if exclude_latin_set.base.len() == 1 { |
68 | let script = exclude_latin_set.base.iter().next().unwrap(); |
69 | if script.is_recommended() && script != Script::Cyrillic && script != Script::Greek { |
70 | return RestrictionLevel::ModeratelyRestrictive; |
71 | } |
72 | } |
73 | return RestrictionLevel::MinimallyRestrictive; |
74 | } |
75 | } |
76 | |