1 | //! Character inclusion in binary or General_Category value Unicode sets. |
2 | //! |
3 | //! We rely on dead code elimination to remove the tables that aren't needed. |
4 | |
5 | #![allow (bad_style)] |
6 | #![allow (clippy::all)] |
7 | |
8 | use alloc::boxed::Box; |
9 | |
10 | macro_rules! property_functions { |
11 | ($module:ident, $property_names:ident, [$( |
12 | $prop:ident, |
13 | )*]) => { |
14 | #[allow(unused)] |
15 | mod $module; |
16 | // unicode::ALPHABETIC('a') |
17 | $(pub fn $prop(c: char) -> bool { |
18 | self::$module::$prop.contains_char(c) |
19 | })* |
20 | |
21 | pub static $property_names: &[&str] = &[ |
22 | $(stringify!($prop),)* |
23 | ]; |
24 | }; |
25 | } |
26 | |
27 | macro_rules! char_property_functions { |
28 | // For define custom property names |
29 | {$( |
30 | mod $module:ident; |
31 | static $property_names:ident = [$( |
32 | $prop:ident, |
33 | )*]; |
34 | )*} => {$( |
35 | property_functions!($module, $property_names, [$( |
36 | $prop, |
37 | )*]); |
38 | )*}; |
39 | // For define property by copy BY_NAME values from `ucd-generate` generated. |
40 | {$( |
41 | mod $module:ident; |
42 | static $property_names:ident = [$( |
43 | ($_name:tt, $prop:ident), |
44 | )*]; |
45 | )*} => {$( |
46 | property_functions!($module, $property_names, [$( |
47 | $prop, |
48 | )*]); |
49 | )*}; |
50 | } |
51 | |
52 | char_property_functions! { |
53 | mod binary; |
54 | static BINARY_PROPERTY_NAMES = [ |
55 | // ASCII_HEX_DIGIT, // let this one be stripped out -- the full trie is wasteful for ASCII |
56 | ALPHABETIC, BIDI_CONTROL, CASE_IGNORABLE, CASED, CHANGES_WHEN_CASEFOLDED, |
57 | CHANGES_WHEN_CASEMAPPED, CHANGES_WHEN_LOWERCASED, CHANGES_WHEN_TITLECASED, |
58 | CHANGES_WHEN_UPPERCASED, DASH, DEFAULT_IGNORABLE_CODE_POINT, DEPRECATED, DIACRITIC, |
59 | EMOJI, EMOJI_COMPONENT, EMOJI_MODIFIER, EMOJI_MODIFIER_BASE, EMOJI_PRESENTATION, EXTENDED_PICTOGRAPHIC, |
60 | EXTENDER, GRAPHEME_BASE, GRAPHEME_EXTEND, GRAPHEME_LINK, HEX_DIGIT, HYPHEN, |
61 | IDS_BINARY_OPERATOR, IDS_TRINARY_OPERATOR, ID_CONTINUE, ID_START, IDEOGRAPHIC, JOIN_CONTROL, |
62 | LOGICAL_ORDER_EXCEPTION, LOWERCASE, MATH, NONCHARACTER_CODE_POINT, OTHER_ALPHABETIC, |
63 | OTHER_DEFAULT_IGNORABLE_CODE_POINT, OTHER_GRAPHEME_EXTEND, OTHER_ID_CONTINUE, |
64 | OTHER_ID_START, OTHER_LOWERCASE, OTHER_MATH, OTHER_UPPERCASE, PATTERN_SYNTAX, |
65 | PATTERN_WHITE_SPACE, PREPENDED_CONCATENATION_MARK, QUOTATION_MARK, RADICAL, |
66 | REGIONAL_INDICATOR, SENTENCE_TERMINAL, SOFT_DOTTED, TERMINAL_PUNCTUATION, UNIFIED_IDEOGRAPH, |
67 | UPPERCASE, VARIATION_SELECTOR, WHITE_SPACE, XID_CONTINUE, XID_START, |
68 | ]; |
69 | } |
70 | |
71 | char_property_functions! { |
72 | mod category; |
73 | // Copy from category::BY_NAME |
74 | static CATEGORY_PROPERTY_NAMES = [ |
75 | ("Cased_Letter" , CASED_LETTER), ("Close_Punctuation" , CLOSE_PUNCTUATION), |
76 | ("Connector_Punctuation" , CONNECTOR_PUNCTUATION), ("Control" , CONTROL), |
77 | ("Currency_Symbol" , CURRENCY_SYMBOL), |
78 | ("Dash_Punctuation" , DASH_PUNCTUATION), ("Decimal_Number" , DECIMAL_NUMBER), |
79 | ("Enclosing_Mark" , ENCLOSING_MARK), |
80 | ("Final_Punctuation" , FINAL_PUNCTUATION), ("Format" , FORMAT), |
81 | ("Initial_Punctuation" , INITIAL_PUNCTUATION), ("Letter" , LETTER), |
82 | ("Letter_Number" , LETTER_NUMBER), ("Line_Separator" , LINE_SEPARATOR), |
83 | ("Lowercase_Letter" , LOWERCASE_LETTER), ("Mark" , MARK), |
84 | ("Math_Symbol" , MATH_SYMBOL), ("Modifier_Letter" , MODIFIER_LETTER), |
85 | ("Modifier_Symbol" , MODIFIER_SYMBOL), ("Nonspacing_Mark" , NONSPACING_MARK), |
86 | ("Number" , NUMBER), ("Open_Punctuation" , OPEN_PUNCTUATION), |
87 | ("Other" , OTHER), ("Other_Letter" , OTHER_LETTER), |
88 | ("Other_Number" , OTHER_NUMBER), ("Other_Punctuation" , OTHER_PUNCTUATION), |
89 | ("Other_Symbol" , OTHER_SYMBOL), |
90 | ("Paragraph_Separator" , PARAGRAPH_SEPARATOR), ("Private_Use" , PRIVATE_USE), |
91 | ("Punctuation" , PUNCTUATION), ("Separator" , SEPARATOR), |
92 | ("Space_Separator" , SPACE_SEPARATOR), ("Spacing_Mark" , SPACING_MARK), |
93 | ("Surrogate" , SURROGATE), ("Symbol" , SYMBOL), |
94 | ("Titlecase_Letter" , TITLECASE_LETTER), ("Unassigned" , UNASSIGNED), |
95 | ("Uppercase_Letter" , UPPERCASE_LETTER), |
96 | ]; |
97 | |
98 | mod script; |
99 | // Copy from script::BY_NAME |
100 | static SCRIPT_PROPERTY_NAMES = [ |
101 | ("Adlam" , ADLAM), |
102 | ("Ahom" , AHOM), |
103 | ("Anatolian_Hieroglyphs" , ANATOLIAN_HIEROGLYPHS), |
104 | ("Arabic" , ARABIC), |
105 | ("Armenian" , ARMENIAN), |
106 | ("Avestan" , AVESTAN), |
107 | ("Balinese" , BALINESE), |
108 | ("Bamum" , BAMUM), |
109 | ("Bassa_Vah" , BASSA_VAH), |
110 | ("Batak" , BATAK), |
111 | ("Bengali" , BENGALI), |
112 | ("Bhaiksuki" , BHAIKSUKI), |
113 | ("Bopomofo" , BOPOMOFO), |
114 | ("Brahmi" , BRAHMI), |
115 | ("Braille" , BRAILLE), |
116 | ("Buginese" , BUGINESE), |
117 | ("Buhid" , BUHID), |
118 | ("Canadian_Aboriginal" , CANADIAN_ABORIGINAL), |
119 | ("Carian" , CARIAN), |
120 | ("Caucasian_Albanian" , CAUCASIAN_ALBANIAN), |
121 | ("Chakma" , CHAKMA), |
122 | ("Cham" , CHAM), |
123 | ("Cherokee" , CHEROKEE), |
124 | ("Chorasmian" , CHORASMIAN), |
125 | ("Common" , COMMON), |
126 | ("Coptic" , COPTIC), |
127 | ("Cuneiform" , CUNEIFORM), |
128 | ("Cypriot" , CYPRIOT), |
129 | ("Cypro_Minoan" , CYPRO_MINOAN), |
130 | ("Cyrillic" , CYRILLIC), |
131 | ("Deseret" , DESERET), |
132 | ("Devanagari" , DEVANAGARI), |
133 | ("Dives_Akuru" , DIVES_AKURU), |
134 | ("Dogra" , DOGRA), |
135 | ("Duployan" , DUPLOYAN), |
136 | ("Egyptian_Hieroglyphs" , EGYPTIAN_HIEROGLYPHS), |
137 | ("Elbasan" , ELBASAN), |
138 | ("Elymaic" , ELYMAIC), |
139 | ("Ethiopic" , ETHIOPIC), |
140 | ("Georgian" , GEORGIAN), |
141 | ("Glagolitic" , GLAGOLITIC), |
142 | ("Gothic" , GOTHIC), |
143 | ("Grantha" , GRANTHA), |
144 | ("Greek" , GREEK), |
145 | ("Gujarati" , GUJARATI), |
146 | ("Gunjala_Gondi" , GUNJALA_GONDI), |
147 | ("Gurmukhi" , GURMUKHI), |
148 | ("Han" , HAN), |
149 | ("Hangul" , HANGUL), |
150 | ("Hanifi_Rohingya" , HANIFI_ROHINGYA), |
151 | ("Hanunoo" , HANUNOO), |
152 | ("Hatran" , HATRAN), |
153 | ("Hebrew" , HEBREW), |
154 | ("Hiragana" , HIRAGANA), |
155 | ("Imperial_Aramaic" , IMPERIAL_ARAMAIC), |
156 | ("Inherited" , INHERITED), |
157 | ("Inscriptional_Pahlavi" , INSCRIPTIONAL_PAHLAVI), |
158 | ("Inscriptional_Parthian" , INSCRIPTIONAL_PARTHIAN), |
159 | ("Javanese" , JAVANESE), |
160 | ("Kaithi" , KAITHI), |
161 | ("Kannada" , KANNADA), |
162 | ("Katakana" , KATAKANA), |
163 | ("Kawi" , KAWI), |
164 | ("Kayah_Li" , KAYAH_LI), |
165 | ("Kharoshthi" , KHAROSHTHI), |
166 | ("Khitan_Small_Script" , KHITAN_SMALL_SCRIPT), |
167 | ("Khmer" , KHMER), |
168 | ("Khojki" , KHOJKI), |
169 | ("Khudawadi" , KHUDAWADI), |
170 | ("Lao" , LAO), |
171 | ("Latin" , LATIN), |
172 | ("Lepcha" , LEPCHA), |
173 | ("Limbu" , LIMBU), |
174 | ("Linear_A" , LINEAR_A), |
175 | ("Linear_B" , LINEAR_B), |
176 | ("Lisu" , LISU), |
177 | ("Lycian" , LYCIAN), |
178 | ("Lydian" , LYDIAN), |
179 | ("Mahajani" , MAHAJANI), |
180 | ("Makasar" , MAKASAR), |
181 | ("Malayalam" , MALAYALAM), |
182 | ("Mandaic" , MANDAIC), |
183 | ("Manichaean" , MANICHAEAN), |
184 | ("Marchen" , MARCHEN), |
185 | ("Masaram_Gondi" , MASARAM_GONDI), |
186 | ("Medefaidrin" , MEDEFAIDRIN), |
187 | ("Meetei_Mayek" , MEETEI_MAYEK), |
188 | ("Mende_Kikakui" , MENDE_KIKAKUI), |
189 | ("Meroitic_Cursive" , MEROITIC_CURSIVE), |
190 | ("Meroitic_Hieroglyphs" , MEROITIC_HIEROGLYPHS), |
191 | ("Miao" , MIAO), |
192 | ("Modi" , MODI), |
193 | ("Mongolian" , MONGOLIAN), |
194 | ("Mro" , MRO), |
195 | ("Multani" , MULTANI), |
196 | ("Myanmar" , MYANMAR), |
197 | ("Nabataean" , NABATAEAN), |
198 | ("Nag_Mundari" , NAG_MUNDARI), |
199 | ("Nandinagari" , NANDINAGARI), |
200 | ("New_Tai_Lue" , NEW_TAI_LUE), |
201 | ("Newa" , NEWA), |
202 | ("Nko" , NKO), |
203 | ("Nushu" , NUSHU), |
204 | ("Nyiakeng_Puachue_Hmong" , NYIAKENG_PUACHUE_HMONG), |
205 | ("Ogham" , OGHAM), |
206 | ("Ol_Chiki" , OL_CHIKI), |
207 | ("Old_Hungarian" , OLD_HUNGARIAN), |
208 | ("Old_Italic" , OLD_ITALIC), |
209 | ("Old_North_Arabian" , OLD_NORTH_ARABIAN), |
210 | ("Old_Permic" , OLD_PERMIC), |
211 | ("Old_Persian" , OLD_PERSIAN), |
212 | ("Old_Sogdian" , OLD_SOGDIAN), |
213 | ("Old_South_Arabian" , OLD_SOUTH_ARABIAN), |
214 | ("Old_Turkic" , OLD_TURKIC), |
215 | ("Old_Uyghur" , OLD_UYGHUR), |
216 | ("Oriya" , ORIYA), |
217 | ("Osage" , OSAGE), |
218 | ("Osmanya" , OSMANYA), |
219 | ("Pahawh_Hmong" , PAHAWH_HMONG), |
220 | ("Palmyrene" , PALMYRENE), |
221 | ("Pau_Cin_Hau" , PAU_CIN_HAU), |
222 | ("Phags_Pa" , PHAGS_PA), |
223 | ("Phoenician" , PHOENICIAN), |
224 | ("Psalter_Pahlavi" , PSALTER_PAHLAVI), |
225 | ("Rejang" , REJANG), |
226 | ("Runic" , RUNIC), |
227 | ("Samaritan" , SAMARITAN), |
228 | ("Saurashtra" , SAURASHTRA), |
229 | ("Sharada" , SHARADA), |
230 | ("Shavian" , SHAVIAN), |
231 | ("Siddham" , SIDDHAM), |
232 | ("SignWriting" , SIGNWRITING), |
233 | ("Sinhala" , SINHALA), |
234 | ("Sogdian" , SOGDIAN), |
235 | ("Sora_Sompeng" , SORA_SOMPENG), |
236 | ("Soyombo" , SOYOMBO), |
237 | ("Sundanese" , SUNDANESE), |
238 | ("Syloti_Nagri" , SYLOTI_NAGRI), |
239 | ("Syriac" , SYRIAC), |
240 | ("Tagalog" , TAGALOG), |
241 | ("Tagbanwa" , TAGBANWA), |
242 | ("Tai_Le" , TAI_LE), |
243 | ("Tai_Tham" , TAI_THAM), |
244 | ("Tai_Viet" , TAI_VIET), |
245 | ("Takri" , TAKRI), |
246 | ("Tamil" , TAMIL), |
247 | ("Tangsa" , TANGSA), |
248 | ("Tangut" , TANGUT), |
249 | ("Telugu" , TELUGU), |
250 | ("Thaana" , THAANA), |
251 | ("Thai" , THAI), |
252 | ("Tibetan" , TIBETAN), |
253 | ("Tifinagh" , TIFINAGH), |
254 | ("Tirhuta" , TIRHUTA), |
255 | ("Toto" , TOTO), |
256 | ("Ugaritic" , UGARITIC), |
257 | ("Vai" , VAI), |
258 | ("Vithkuqi" , VITHKUQI), |
259 | ("Wancho" , WANCHO), |
260 | ("Warang_Citi" , WARANG_CITI), |
261 | ("Yezidi" , YEZIDI), |
262 | ("Yi" , YI), |
263 | ("Zanabazar_Square" , ZANABAZAR_SQUARE), |
264 | ]; |
265 | } |
266 | |
267 | /// Return all available unicode property names |
268 | pub fn unicode_property_names() -> Box<dyn Iterator<Item = &'static str>> { |
269 | Box::new( |
270 | BINARY_PROPERTY_NAMESimpl Iterator |
271 | .iter() |
272 | .map(|name: &&str| *name) |
273 | .chain(CATEGORY_PROPERTY_NAMES.iter().map(|name: &&str| *name)) |
274 | .chain(SCRIPT_PROPERTY_NAMES.iter().map(|name: &&str| *name)), |
275 | ) |
276 | } |
277 | |
278 | pub fn by_name(name: &str) -> Option<Box<dyn Fn(char) -> bool>> { |
279 | for property: &(&str, &TrieSetSlice<'_>) in binary::BY_NAME { |
280 | if name == property.0.to_uppercase() { |
281 | return Some(Box::new(move |c: char| property.1.contains_char(c))); |
282 | } |
283 | } |
284 | |
285 | for property: &(&str, &TrieSetSlice<'_>) in category::BY_NAME { |
286 | if name == property.0.to_uppercase() { |
287 | return Some(Box::new(move |c: char| property.1.contains_char(c))); |
288 | } |
289 | } |
290 | |
291 | for property: &(&str, &TrieSetSlice<'_>) in script::BY_NAME { |
292 | if name == property.0.to_uppercase() { |
293 | return Some(Box::new(move |c: char| property.1.contains_char(c))); |
294 | } |
295 | } |
296 | |
297 | None |
298 | } |
299 | |