1 | /*! |
2 | |
3 | This library implements |
4 | [Unicode Canonical Combining Class](https://unicode.org/reports/tr44/#Canonical_Combining_Class_Values) detection. |
5 | |
6 | ```rust |
7 | use unicode_ccc::*; |
8 | |
9 | assert_eq!(get_canonical_combining_class('A' ), CanonicalCombiningClass::NotReordered); |
10 | assert_eq!(get_canonical_combining_class(' \u{0A3C}' ), CanonicalCombiningClass::Nukta); |
11 | assert_eq!(get_canonical_combining_class(' \u{18A9}' ), CanonicalCombiningClass::AboveLeft); |
12 | ``` |
13 | |
14 | */ |
15 | |
16 | #![no_std ] |
17 | |
18 | #![forbid (unsafe_code)] |
19 | |
20 | /// The Unicode version. |
21 | pub const UNICODE_VERSION: (u8, u8, u8) = (14, 0, 0); |
22 | |
23 | /// Character Canonical Combining Class. |
24 | #[derive (Clone, Copy, PartialEq, Debug)] |
25 | pub enum CanonicalCombiningClass { |
26 | NotReordered = 0, |
27 | Overlay = 1, |
28 | HanReading = 6, |
29 | Nukta = 7, |
30 | KanaVoicing = 8, |
31 | Virama = 9, |
32 | // Hebrew |
33 | CCC10 = 10, |
34 | CCC11 = 11, |
35 | CCC12 = 12, |
36 | CCC13 = 13, |
37 | CCC14 = 14, |
38 | CCC15 = 15, |
39 | CCC16 = 16, |
40 | CCC17 = 17, |
41 | CCC18 = 18, |
42 | CCC19 = 19, |
43 | CCC20 = 20, |
44 | CCC21 = 21, |
45 | CCC22 = 22, |
46 | CCC23 = 23, |
47 | CCC24 = 24, |
48 | CCC25 = 25, |
49 | CCC26 = 26, |
50 | // Arabic |
51 | CCC27 = 27, |
52 | CCC28 = 28, |
53 | CCC29 = 29, |
54 | CCC30 = 30, |
55 | CCC31 = 31, |
56 | CCC32 = 32, |
57 | CCC33 = 33, |
58 | CCC34 = 34, |
59 | CCC35 = 35, |
60 | // Syriac |
61 | CCC36 = 36, |
62 | // Telugu |
63 | CCC84 = 84, |
64 | CCC91 = 91, |
65 | // Thai |
66 | CCC103 = 103, |
67 | CCC107 = 107, |
68 | // Lao |
69 | CCC118 = 118, |
70 | CCC122 = 122, |
71 | // Tibetan |
72 | CCC129 = 129, |
73 | CCC130 = 130, |
74 | CCC132 = 132, |
75 | AttachedBelowLeft = 200, |
76 | AttachedBelow = 202, |
77 | AttachedAbove = 214, |
78 | AttachedAboveRight = 216, |
79 | BelowLeft = 218, |
80 | Below = 220, |
81 | BelowRight = 222, |
82 | Left = 224, |
83 | Right = 226, |
84 | AboveLeft = 228, |
85 | Above = 230, |
86 | AboveRight = 232, |
87 | DoubleBelow = 233, |
88 | DoubleAbove = 234, |
89 | IotaSubscript = 240, |
90 | } |
91 | |
92 | /// Returns a Canonical Combining Class of a character. |
93 | /// |
94 | /// Based on <https://www.unicode.org/Public/14.0.0/ucd/extracted/DerivedCombiningClass.txt>. |
95 | pub fn get_canonical_combining_class(c: char) -> CanonicalCombiningClass { |
96 | use CanonicalCombiningClass::*; |
97 | |
98 | match c as u32 { |
99 | 0x0334..=0x0338 => Overlay, |
100 | 0x1CD4 => Overlay, |
101 | 0x1CE2..=0x1CE8 => Overlay, |
102 | 0x20D2..=0x20D3 => Overlay, |
103 | 0x20D8..=0x20DA => Overlay, |
104 | 0x20E5..=0x20E6 => Overlay, |
105 | 0x20EA..=0x20EB => Overlay, |
106 | 0x10A39 => Overlay, |
107 | 0x16AF0..=0x16AF4 => Overlay, |
108 | 0x1BC9E => Overlay, |
109 | 0x1D167..=0x1D169 => Overlay, |
110 | 0x16FF0..=0x16FF1 => HanReading, |
111 | 0x093C => Nukta, |
112 | 0x09BC => Nukta, |
113 | 0x0A3C => Nukta, |
114 | 0x0ABC => Nukta, |
115 | 0x0B3C => Nukta, |
116 | 0x0C3C => Nukta, |
117 | 0x0CBC => Nukta, |
118 | 0x1037 => Nukta, |
119 | 0x1B34 => Nukta, |
120 | 0x1BE6 => Nukta, |
121 | 0x1C37 => Nukta, |
122 | 0xA9B3 => Nukta, |
123 | 0x110BA => Nukta, |
124 | 0x11173 => Nukta, |
125 | 0x111CA => Nukta, |
126 | 0x11236 => Nukta, |
127 | 0x112E9 => Nukta, |
128 | 0x1133B..=0x1133C => Nukta, |
129 | 0x11446 => Nukta, |
130 | 0x114C3 => Nukta, |
131 | 0x115C0 => Nukta, |
132 | 0x116B7 => Nukta, |
133 | 0x1183A => Nukta, |
134 | 0x11943 => Nukta, |
135 | 0x11D42 => Nukta, |
136 | 0x1E94A => Nukta, |
137 | 0x3099..=0x309A => KanaVoicing, |
138 | 0x094D => Virama, |
139 | 0x09CD => Virama, |
140 | 0x0A4D => Virama, |
141 | 0x0ACD => Virama, |
142 | 0x0B4D => Virama, |
143 | 0x0BCD => Virama, |
144 | 0x0C4D => Virama, |
145 | 0x0CCD => Virama, |
146 | 0x0D3B..=0x0D3C => Virama, |
147 | 0x0D4D => Virama, |
148 | 0x0DCA => Virama, |
149 | 0x0E3A => Virama, |
150 | 0x0EBA => Virama, |
151 | 0x0F84 => Virama, |
152 | 0x1039..=0x103A => Virama, |
153 | 0x1714 => Virama, |
154 | 0x1715 => Virama, |
155 | 0x1734 => Virama, |
156 | 0x17D2 => Virama, |
157 | 0x1A60 => Virama, |
158 | 0x1B44 => Virama, |
159 | 0x1BAA => Virama, |
160 | 0x1BAB => Virama, |
161 | 0x1BF2..=0x1BF3 => Virama, |
162 | 0x2D7F => Virama, |
163 | 0xA806 => Virama, |
164 | 0xA82C => Virama, |
165 | 0xA8C4 => Virama, |
166 | 0xA953 => Virama, |
167 | 0xA9C0 => Virama, |
168 | 0xAAF6 => Virama, |
169 | 0xABED => Virama, |
170 | 0x10A3F => Virama, |
171 | 0x11046 => Virama, |
172 | 0x11070 => Virama, |
173 | 0x1107F => Virama, |
174 | 0x110B9 => Virama, |
175 | 0x11133..=0x11134 => Virama, |
176 | 0x111C0 => Virama, |
177 | 0x11235 => Virama, |
178 | 0x112EA => Virama, |
179 | 0x1134D => Virama, |
180 | 0x11442 => Virama, |
181 | 0x114C2 => Virama, |
182 | 0x115BF => Virama, |
183 | 0x1163F => Virama, |
184 | 0x116B6 => Virama, |
185 | 0x1172B => Virama, |
186 | 0x11839 => Virama, |
187 | 0x1193D => Virama, |
188 | 0x1193E => Virama, |
189 | 0x119E0 => Virama, |
190 | 0x11A34 => Virama, |
191 | 0x11A47 => Virama, |
192 | 0x11A99 => Virama, |
193 | 0x11C3F => Virama, |
194 | 0x11D44..=0x11D45 => Virama, |
195 | 0x11D97 => Virama, |
196 | 0x05B0 => CCC10, |
197 | 0x05B1 => CCC11, |
198 | 0x05B2 => CCC12, |
199 | 0x05B3 => CCC13, |
200 | 0x05B4 => CCC14, |
201 | 0x05B5 => CCC15, |
202 | 0x05B6 => CCC16, |
203 | 0x05B7 => CCC17, |
204 | 0x05B8 => CCC18, |
205 | 0x05C7 => CCC18, |
206 | 0x05B9..=0x05BA => CCC19, |
207 | 0x05BB => CCC20, |
208 | 0x05BC => CCC21, |
209 | 0x05BD => CCC22, |
210 | 0x05BF => CCC23, |
211 | 0x05C1 => CCC24, |
212 | 0x05C2 => CCC25, |
213 | 0xFB1E => CCC26, |
214 | 0x064B => CCC27, |
215 | 0x08F0 => CCC27, |
216 | 0x064C => CCC28, |
217 | 0x08F1 => CCC28, |
218 | 0x064D => CCC29, |
219 | 0x08F2 => CCC29, |
220 | 0x0618 => CCC30, |
221 | 0x064E => CCC30, |
222 | 0x0619 => CCC31, |
223 | 0x064F => CCC31, |
224 | 0x061A => CCC32, |
225 | 0x0650 => CCC32, |
226 | 0x0651 => CCC33, |
227 | 0x0652 => CCC34, |
228 | 0x0670 => CCC35, |
229 | 0x0711 => CCC36, |
230 | 0x0C55 => CCC84, |
231 | 0x0C56 => CCC91, |
232 | 0x0E38..=0x0E39 => CCC103, |
233 | 0x0E48..=0x0E4B => CCC107, |
234 | 0x0EB8..=0x0EB9 => CCC118, |
235 | 0x0EC8..=0x0ECB => CCC122, |
236 | 0x0F71 => CCC129, |
237 | 0x0F72 => CCC130, |
238 | 0x0F7A..=0x0F7D => CCC130, |
239 | 0x0F80 => CCC130, |
240 | 0x0F74 => CCC132, |
241 | 0x0321..=0x0322 => AttachedBelow, |
242 | 0x0327..=0x0328 => AttachedBelow, |
243 | 0x1DD0 => AttachedBelow, |
244 | 0x1DCE => AttachedAbove, |
245 | 0x031B => AttachedAboveRight, |
246 | 0x0F39 => AttachedAboveRight, |
247 | 0x1D165..=0x1D166 => AttachedAboveRight, |
248 | 0x1D16E..=0x1D172 => AttachedAboveRight, |
249 | 0x1DFA => BelowLeft, |
250 | 0x302A => BelowLeft, |
251 | 0x0316..=0x0319 => Below, |
252 | 0x031C..=0x0320 => Below, |
253 | 0x0323..=0x0326 => Below, |
254 | 0x0329..=0x0333 => Below, |
255 | 0x0339..=0x033C => Below, |
256 | 0x0347..=0x0349 => Below, |
257 | 0x034D..=0x034E => Below, |
258 | 0x0353..=0x0356 => Below, |
259 | 0x0359..=0x035A => Below, |
260 | 0x0591 => Below, |
261 | 0x0596 => Below, |
262 | 0x059B => Below, |
263 | 0x05A2..=0x05A7 => Below, |
264 | 0x05AA => Below, |
265 | 0x05C5 => Below, |
266 | 0x0655..=0x0656 => Below, |
267 | 0x065C => Below, |
268 | 0x065F => Below, |
269 | 0x06E3 => Below, |
270 | 0x06EA => Below, |
271 | 0x06ED => Below, |
272 | 0x0731 => Below, |
273 | 0x0734 => Below, |
274 | 0x0737..=0x0739 => Below, |
275 | 0x073B..=0x073C => Below, |
276 | 0x073E => Below, |
277 | 0x0742 => Below, |
278 | 0x0744 => Below, |
279 | 0x0746 => Below, |
280 | 0x0748 => Below, |
281 | 0x07F2 => Below, |
282 | 0x07FD => Below, |
283 | 0x0859..=0x085B => Below, |
284 | 0x0899..=0x089B => Below, |
285 | 0x08CF..=0x08D3 => Below, |
286 | 0x08E3 => Below, |
287 | 0x08E6 => Below, |
288 | 0x08E9 => Below, |
289 | 0x08ED..=0x08EF => Below, |
290 | 0x08F6 => Below, |
291 | 0x08F9..=0x08FA => Below, |
292 | 0x0952 => Below, |
293 | 0x0F18..=0x0F19 => Below, |
294 | 0x0F35 => Below, |
295 | 0x0F37 => Below, |
296 | 0x0FC6 => Below, |
297 | 0x108D => Below, |
298 | 0x193B => Below, |
299 | 0x1A18 => Below, |
300 | 0x1A7F => Below, |
301 | 0x1AB5..=0x1ABA => Below, |
302 | 0x1ABD => Below, |
303 | 0x1ABF..=0x1AC0 => Below, |
304 | 0x1AC3..=0x1AC4 => Below, |
305 | 0x1ACA => Below, |
306 | 0x1B6C => Below, |
307 | 0x1CD5..=0x1CD9 => Below, |
308 | 0x1CDC..=0x1CDF => Below, |
309 | 0x1CED => Below, |
310 | 0x1DC2 => Below, |
311 | 0x1DCA => Below, |
312 | 0x1DCF => Below, |
313 | 0x1DF9 => Below, |
314 | 0x1DFD => Below, |
315 | 0x1DFF => Below, |
316 | 0x20E8 => Below, |
317 | 0x20EC..=0x20EF => Below, |
318 | 0xA92B..=0xA92D => Below, |
319 | 0xAAB4 => Below, |
320 | 0xFE27..=0xFE2D => Below, |
321 | 0x101FD => Below, |
322 | 0x102E0 => Below, |
323 | 0x10A0D => Below, |
324 | 0x10A3A => Below, |
325 | 0x10AE6 => Below, |
326 | 0x10F46..=0x10F47 => Below, |
327 | 0x10F4B => Below, |
328 | 0x10F4D..=0x10F50 => Below, |
329 | 0x10F83 => Below, |
330 | 0x10F85 => Below, |
331 | 0x1D17B..=0x1D182 => Below, |
332 | 0x1D18A..=0x1D18B => Below, |
333 | 0x1E8D0..=0x1E8D6 => Below, |
334 | 0x059A => BelowRight, |
335 | 0x05AD => BelowRight, |
336 | 0x1939 => BelowRight, |
337 | 0x302D => BelowRight, |
338 | 0x302E..=0x302F => Left, |
339 | 0x1D16D => Right, |
340 | 0x05AE => AboveLeft, |
341 | 0x18A9 => AboveLeft, |
342 | 0x1DF7..=0x1DF8 => AboveLeft, |
343 | 0x302B => AboveLeft, |
344 | 0x0300..=0x0314 => Above, |
345 | 0x033D..=0x0344 => Above, |
346 | 0x0346 => Above, |
347 | 0x034A..=0x034C => Above, |
348 | 0x0350..=0x0352 => Above, |
349 | 0x0357 => Above, |
350 | 0x035B => Above, |
351 | 0x0363..=0x036F => Above, |
352 | 0x0483..=0x0487 => Above, |
353 | 0x0592..=0x0595 => Above, |
354 | 0x0597..=0x0599 => Above, |
355 | 0x059C..=0x05A1 => Above, |
356 | 0x05A8..=0x05A9 => Above, |
357 | 0x05AB..=0x05AC => Above, |
358 | 0x05AF => Above, |
359 | 0x05C4 => Above, |
360 | 0x0610..=0x0617 => Above, |
361 | 0x0653..=0x0654 => Above, |
362 | 0x0657..=0x065B => Above, |
363 | 0x065D..=0x065E => Above, |
364 | 0x06D6..=0x06DC => Above, |
365 | 0x06DF..=0x06E2 => Above, |
366 | 0x06E4 => Above, |
367 | 0x06E7..=0x06E8 => Above, |
368 | 0x06EB..=0x06EC => Above, |
369 | 0x0730 => Above, |
370 | 0x0732..=0x0733 => Above, |
371 | 0x0735..=0x0736 => Above, |
372 | 0x073A => Above, |
373 | 0x073D => Above, |
374 | 0x073F..=0x0741 => Above, |
375 | 0x0743 => Above, |
376 | 0x0745 => Above, |
377 | 0x0747 => Above, |
378 | 0x0749..=0x074A => Above, |
379 | 0x07EB..=0x07F1 => Above, |
380 | 0x07F3 => Above, |
381 | 0x0816..=0x0819 => Above, |
382 | 0x081B..=0x0823 => Above, |
383 | 0x0825..=0x0827 => Above, |
384 | 0x0829..=0x082D => Above, |
385 | 0x0898 => Above, |
386 | 0x089C..=0x089F => Above, |
387 | 0x08CA..=0x08CE => Above, |
388 | 0x08D4..=0x08E1 => Above, |
389 | 0x08E4..=0x08E5 => Above, |
390 | 0x08E7..=0x08E8 => Above, |
391 | 0x08EA..=0x08EC => Above, |
392 | 0x08F3..=0x08F5 => Above, |
393 | 0x08F7..=0x08F8 => Above, |
394 | 0x08FB..=0x08FF => Above, |
395 | 0x0951 => Above, |
396 | 0x0953..=0x0954 => Above, |
397 | 0x09FE => Above, |
398 | 0x0F82..=0x0F83 => Above, |
399 | 0x0F86..=0x0F87 => Above, |
400 | 0x135D..=0x135F => Above, |
401 | 0x17DD => Above, |
402 | 0x193A => Above, |
403 | 0x1A17 => Above, |
404 | 0x1A75..=0x1A7C => Above, |
405 | 0x1AB0..=0x1AB4 => Above, |
406 | 0x1ABB..=0x1ABC => Above, |
407 | 0x1AC1..=0x1AC2 => Above, |
408 | 0x1AC5..=0x1AC9 => Above, |
409 | 0x1ACB..=0x1ACE => Above, |
410 | 0x1B6B => Above, |
411 | 0x1B6D..=0x1B73 => Above, |
412 | 0x1CD0..=0x1CD2 => Above, |
413 | 0x1CDA..=0x1CDB => Above, |
414 | 0x1CE0 => Above, |
415 | 0x1CF4 => Above, |
416 | 0x1CF8..=0x1CF9 => Above, |
417 | 0x1DC0..=0x1DC1 => Above, |
418 | 0x1DC3..=0x1DC9 => Above, |
419 | 0x1DCB..=0x1DCC => Above, |
420 | 0x1DD1..=0x1DF5 => Above, |
421 | 0x1DFB => Above, |
422 | 0x1DFE => Above, |
423 | 0x20D0..=0x20D1 => Above, |
424 | 0x20D4..=0x20D7 => Above, |
425 | 0x20DB..=0x20DC => Above, |
426 | 0x20E1 => Above, |
427 | 0x20E7 => Above, |
428 | 0x20E9 => Above, |
429 | 0x20F0 => Above, |
430 | 0x2CEF..=0x2CF1 => Above, |
431 | 0x2DE0..=0x2DFF => Above, |
432 | 0xA66F => Above, |
433 | 0xA674..=0xA67D => Above, |
434 | 0xA69E..=0xA69F => Above, |
435 | 0xA6F0..=0xA6F1 => Above, |
436 | 0xA8E0..=0xA8F1 => Above, |
437 | 0xAAB0 => Above, |
438 | 0xAAB2..=0xAAB3 => Above, |
439 | 0xAAB7..=0xAAB8 => Above, |
440 | 0xAABE..=0xAABF => Above, |
441 | 0xAAC1 => Above, |
442 | 0xFE20..=0xFE26 => Above, |
443 | 0xFE2E..=0xFE2F => Above, |
444 | 0x10376..=0x1037A => Above, |
445 | 0x10A0F => Above, |
446 | 0x10A38 => Above, |
447 | 0x10AE5 => Above, |
448 | 0x10D24..=0x10D27 => Above, |
449 | 0x10EAB..=0x10EAC => Above, |
450 | 0x10F48..=0x10F4A => Above, |
451 | 0x10F4C => Above, |
452 | 0x10F82 => Above, |
453 | 0x10F84 => Above, |
454 | 0x11100..=0x11102 => Above, |
455 | 0x11366..=0x1136C => Above, |
456 | 0x11370..=0x11374 => Above, |
457 | 0x1145E => Above, |
458 | 0x16B30..=0x16B36 => Above, |
459 | 0x1D185..=0x1D189 => Above, |
460 | 0x1D1AA..=0x1D1AD => Above, |
461 | 0x1D242..=0x1D244 => Above, |
462 | 0x1E000..=0x1E006 => Above, |
463 | 0x1E008..=0x1E018 => Above, |
464 | 0x1E01B..=0x1E021 => Above, |
465 | 0x1E023..=0x1E024 => Above, |
466 | 0x1E026..=0x1E02A => Above, |
467 | 0x1E130..=0x1E136 => Above, |
468 | 0x1E2AE => Above, |
469 | 0x1E2EC..=0x1E2EF => Above, |
470 | 0x1E944..=0x1E949 => Above, |
471 | 0x0315 => AboveRight, |
472 | 0x031A => AboveRight, |
473 | 0x0358 => AboveRight, |
474 | 0x1DF6 => AboveRight, |
475 | 0x302C => AboveRight, |
476 | 0x035C => DoubleBelow, |
477 | 0x035F => DoubleBelow, |
478 | 0x0362 => DoubleBelow, |
479 | 0x1DFC => DoubleBelow, |
480 | 0x035D..=0x035E => DoubleAbove, |
481 | 0x0360..=0x0361 => DoubleAbove, |
482 | 0x1DCD => DoubleAbove, |
483 | 0x0345 => IotaSubscript, |
484 | _ => NotReordered, |
485 | } |
486 | } |
487 | |