1//! [Mixed-script detection](https://www.unicode.org/reports/tr39/#Mixed_Script_Detection)
2
3use core::fmt::{self, Debug};
4use unicode_script::{Script, ScriptExtension};
5
6/// An Augmented script set, as defined by UTS 39
7///
8/// https://www.unicode.org/reports/tr39/#def-augmented-script-set
9#[derive(Copy, Clone, PartialEq, Hash, Eq)]
10pub struct AugmentedScriptSet {
11 /// The base ScriptExtension value
12 pub base: ScriptExtension,
13 /// Han With Bopomofo
14 pub hanb: bool,
15 /// Japanese
16 pub jpan: bool,
17 /// Korean
18 pub kore: bool,
19}
20
21impl From<ScriptExtension> for AugmentedScriptSet {
22 fn from(ext: ScriptExtension) -> Self {
23 let mut hanb = false;
24 let mut jpan = false;
25 let mut kore = false;
26
27 if ext.is_common() || ext.is_inherited() || ext.contains_script(Script::Han) {
28 hanb = true;
29 jpan = true;
30 kore = true;
31 } else {
32 if ext.contains_script(Script::Hiragana) || ext.contains_script(Script::Katakana) {
33 jpan = true;
34 }
35
36 if ext.contains_script(Script::Hangul) {
37 kore = true;
38 }
39
40 if ext.contains_script(Script::Bopomofo) {
41 hanb = true;
42 }
43 }
44 Self {
45 base: ext,
46 hanb,
47 jpan,
48 kore,
49 }
50 }
51}
52
53impl From<char> for AugmentedScriptSet {
54 fn from(c: char) -> Self {
55 AugmentedScriptSet::for_char(c)
56 }
57}
58
59impl From<&'_ str> for AugmentedScriptSet {
60 fn from(s: &'_ str) -> Self {
61 AugmentedScriptSet::for_str(s)
62 }
63}
64
65impl Default for AugmentedScriptSet {
66 fn default() -> Self {
67 AugmentedScriptSet {
68 base: Script::Common.into(),
69 hanb: true,
70 jpan: true,
71 kore: true,
72 }
73 }
74}
75
76impl Debug for AugmentedScriptSet {
77 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78 if self.is_empty() {
79 write!(f, "AugmentedScriptSet {{}}")?;
80 } else if self.is_all() {
81 write!(f, "AugmentedScriptSet {{ALL}}")?;
82 } else {
83 write!(f, "AugmentedScriptSet {{")?;
84 let mut first_entry = true;
85 let hanb = if self.hanb { Some("Hanb") } else { None };
86 let jpan = if self.jpan { Some("Jpan") } else { None };
87 let kore = if self.kore { Some("Kore") } else { None };
88 for writing_system in None
89 .into_iter()
90 .chain(hanb)
91 .chain(jpan)
92 .chain(kore)
93 .chain(self.base.iter().map(Script::short_name))
94 {
95 if !first_entry {
96 write!(f, ", ")?;
97 } else {
98 first_entry = false;
99 }
100 write!(f, "{}", writing_system)?;
101 }
102 write!(f, "}}")?;
103 }
104 Ok(())
105 }
106}
107
108impl fmt::Display for AugmentedScriptSet {
109 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
110 if self.is_empty() {
111 write!(f, "Empty")?;
112 } else if self.is_all() {
113 write!(f, "All")?;
114 } else {
115 let mut first_entry = true;
116 let hanb = if self.hanb {
117 Some("Han with Bopomofo")
118 } else {
119 None
120 };
121 let jpan = if self.jpan { Some("Japanese") } else { None };
122 let kore = if self.kore { Some("Korean") } else { None };
123 for writing_system in None
124 .into_iter()
125 .chain(hanb)
126 .chain(jpan)
127 .chain(kore)
128 .chain(self.base.iter().map(Script::full_name))
129 {
130 if !first_entry {
131 write!(f, ", ")?;
132 } else {
133 first_entry = false;
134 }
135 write!(f, "{}", writing_system)?;
136 }
137 }
138 Ok(())
139 }
140}
141
142impl AugmentedScriptSet {
143 /// Intersect this set with another
144 pub fn intersect_with(&mut self, other: Self) {
145 self.base.intersect_with(other.base);
146 self.hanb = self.hanb && other.hanb;
147 self.jpan = self.jpan && other.jpan;
148 self.kore = self.kore && other.kore;
149 }
150
151 /// Check if the set is empty
152 pub fn is_empty(&self) -> bool {
153 self.base.is_empty() && !self.hanb && !self.jpan && !self.kore
154 }
155
156 /// Check if the set is "All" (Common or Inherited)
157 pub fn is_all(&self) -> bool {
158 self.base.is_common() || self.base.is_inherited()
159 }
160
161 /// Construct an AugmentedScriptSet for a given character
162 pub fn for_char(c: char) -> Self {
163 ScriptExtension::from(c).into()
164 }
165
166 /// Find the [resolved script set](https://www.unicode.org/reports/tr39/#def-resolved-script-set) of a given string
167 pub fn for_str(s: &str) -> Self {
168 let mut set = AugmentedScriptSet::default();
169 for ch in s.chars() {
170 set.intersect_with(ch.into())
171 }
172 set
173 }
174}
175
176/// Extension trait for [mixed-script detection](https://www.unicode.org/reports/tr39/#Mixed_Script_Detection)
177pub trait MixedScript {
178 /// Check if a string is [single-script](https://www.unicode.org/reports/tr39/#def-single-script)
179 ///
180 /// Note that a single-script string may still contain multiple Script properties!
181 fn is_single_script(self) -> bool;
182
183 /// Find the [resolved script set](https://www.unicode.org/reports/tr39/#def-resolved-script-set) of a given string
184 fn resolve_script_set(self) -> AugmentedScriptSet;
185}
186
187impl MixedScript for &'_ str {
188 fn is_single_script(self) -> bool {
189 !AugmentedScriptSet::for_str(self).is_empty()
190 }
191
192 fn resolve_script_set(self) -> AugmentedScriptSet {
193 self.into()
194 }
195}
196
197/// Check if a character is considered potential mixed script confusable.
198///
199/// If the specified character is not restricted from use for identifiers,
200/// this function returns whether it is considered mixed script confusable
201/// with another character that is not restricted from use for identifiers.
202///
203/// If the specified character is restricted from use for identifiers,
204/// the return value is unspecified.
205pub fn is_potential_mixed_script_confusable_char(c: char) -> bool {
206 use crate::tables::potential_mixed_script_confusable::potential_mixed_script_confusable;
207
208 potential_mixed_script_confusable(c)
209}
210