1//! For detecting the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
2//! a string conforms to
3
4use crate::mixed_script::AugmentedScriptSet;
5use crate::GeneralSecurityProfile;
6use unicode_script::Script;
7
8#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)]
9/// The [Restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
10/// a string conforms to
11pub enum RestrictionLevel {
12 /// https://www.unicode.org/reports/tr39/#ascii_only
13 ASCIIOnly,
14 /// https://www.unicode.org/reports/tr39/#single_script
15 SingleScript,
16 /// https://www.unicode.org/reports/tr39/#highly_restrictive
17 HighlyRestrictive,
18 /// https://www.unicode.org/reports/tr39/#moderately_restrictive
19 ModeratelyRestrictive,
20 /// https://www.unicode.org/reports/tr39/#minimally_restrictive
21 MinimallyRestrictive,
22 /// https://www.unicode.org/reports/tr39/#unrestricted
23 Unrestricted,
24}
25
26/// Utilities for determining which [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
27/// a string satisfies
28pub trait RestrictionLevelDetection: Sized {
29 /// Detect the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
30 ///
31 /// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
32 fn detect_restriction_level(self) -> RestrictionLevel;
33
34 /// Check if a string satisfies the supplied [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
35 ///
36 /// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
37 fn check_restriction_level(self, level: RestrictionLevel) -> bool {
38 self.detect_restriction_level() <= level
39 }
40}
41
42impl RestrictionLevelDetection for &'_ str {
43 fn detect_restriction_level(self) -> RestrictionLevel {
44 let mut ascii_only = true;
45 let mut set = AugmentedScriptSet::default();
46 let mut exclude_latin_set = AugmentedScriptSet::default();
47 for ch in self.chars() {
48 if !GeneralSecurityProfile::identifier_allowed(ch) {
49 return RestrictionLevel::Unrestricted;
50 }
51 if !ch.is_ascii() {
52 ascii_only = false;
53 }
54 let ch_set = ch.into();
55 set.intersect_with(ch_set);
56 if !ch_set.base.contains_script(Script::Latin) {
57 exclude_latin_set.intersect_with(ch_set);
58 }
59 }
60
61 if ascii_only {
62 return RestrictionLevel::ASCIIOnly;
63 } else if !set.is_empty() {
64 return RestrictionLevel::SingleScript;
65 } else if exclude_latin_set.kore || exclude_latin_set.hanb || exclude_latin_set.jpan {
66 return RestrictionLevel::HighlyRestrictive;
67 } else if exclude_latin_set.base.len() == 1 {
68 let script = exclude_latin_set.base.iter().next().unwrap();
69 if script.is_recommended() && script != Script::Cyrillic && script != Script::Greek {
70 return RestrictionLevel::ModeratelyRestrictive;
71 }
72 }
73 return RestrictionLevel::MinimallyRestrictive;
74 }
75}
76