1/*!
2
3This library implements
4[Unicode Canonical Combining Class](https://unicode.org/reports/tr44/#Canonical_Combining_Class_Values) detection.
5
6```rust
7use unicode_ccc::*;
8
9assert_eq!(get_canonical_combining_class('A'), CanonicalCombiningClass::NotReordered);
10assert_eq!(get_canonical_combining_class('\u{0A3C}'), CanonicalCombiningClass::Nukta);
11assert_eq!(get_canonical_combining_class('\u{18A9}'), CanonicalCombiningClass::AboveLeft);
12```
13
14*/
15
16#![no_std]
17
18#![forbid(unsafe_code)]
19
20/// The Unicode version.
21pub const UNICODE_VERSION: (u8, u8, u8) = (14, 0, 0);
22
23/// Character Canonical Combining Class.
24#[derive(Clone, Copy, PartialEq, Debug)]
25pub enum CanonicalCombiningClass {
26 NotReordered = 0,
27 Overlay = 1,
28 HanReading = 6,
29 Nukta = 7,
30 KanaVoicing = 8,
31 Virama = 9,
32 // Hebrew
33 CCC10 = 10,
34 CCC11 = 11,
35 CCC12 = 12,
36 CCC13 = 13,
37 CCC14 = 14,
38 CCC15 = 15,
39 CCC16 = 16,
40 CCC17 = 17,
41 CCC18 = 18,
42 CCC19 = 19,
43 CCC20 = 20,
44 CCC21 = 21,
45 CCC22 = 22,
46 CCC23 = 23,
47 CCC24 = 24,
48 CCC25 = 25,
49 CCC26 = 26,
50 // Arabic
51 CCC27 = 27,
52 CCC28 = 28,
53 CCC29 = 29,
54 CCC30 = 30,
55 CCC31 = 31,
56 CCC32 = 32,
57 CCC33 = 33,
58 CCC34 = 34,
59 CCC35 = 35,
60 // Syriac
61 CCC36 = 36,
62 // Telugu
63 CCC84 = 84,
64 CCC91 = 91,
65 // Thai
66 CCC103 = 103,
67 CCC107 = 107,
68 // Lao
69 CCC118 = 118,
70 CCC122 = 122,
71 // Tibetan
72 CCC129 = 129,
73 CCC130 = 130,
74 CCC132 = 132,
75 AttachedBelowLeft = 200,
76 AttachedBelow = 202,
77 AttachedAbove = 214,
78 AttachedAboveRight = 216,
79 BelowLeft = 218,
80 Below = 220,
81 BelowRight = 222,
82 Left = 224,
83 Right = 226,
84 AboveLeft = 228,
85 Above = 230,
86 AboveRight = 232,
87 DoubleBelow = 233,
88 DoubleAbove = 234,
89 IotaSubscript = 240,
90}
91
92/// Returns a Canonical Combining Class of a character.
93///
94/// Based on <https://www.unicode.org/Public/14.0.0/ucd/extracted/DerivedCombiningClass.txt>.
95pub fn get_canonical_combining_class(c: char) -> CanonicalCombiningClass {
96 use CanonicalCombiningClass::*;
97
98 match c as u32 {
99 0x0334..=0x0338 => Overlay,
100 0x1CD4 => Overlay,
101 0x1CE2..=0x1CE8 => Overlay,
102 0x20D2..=0x20D3 => Overlay,
103 0x20D8..=0x20DA => Overlay,
104 0x20E5..=0x20E6 => Overlay,
105 0x20EA..=0x20EB => Overlay,
106 0x10A39 => Overlay,
107 0x16AF0..=0x16AF4 => Overlay,
108 0x1BC9E => Overlay,
109 0x1D167..=0x1D169 => Overlay,
110 0x16FF0..=0x16FF1 => HanReading,
111 0x093C => Nukta,
112 0x09BC => Nukta,
113 0x0A3C => Nukta,
114 0x0ABC => Nukta,
115 0x0B3C => Nukta,
116 0x0C3C => Nukta,
117 0x0CBC => Nukta,
118 0x1037 => Nukta,
119 0x1B34 => Nukta,
120 0x1BE6 => Nukta,
121 0x1C37 => Nukta,
122 0xA9B3 => Nukta,
123 0x110BA => Nukta,
124 0x11173 => Nukta,
125 0x111CA => Nukta,
126 0x11236 => Nukta,
127 0x112E9 => Nukta,
128 0x1133B..=0x1133C => Nukta,
129 0x11446 => Nukta,
130 0x114C3 => Nukta,
131 0x115C0 => Nukta,
132 0x116B7 => Nukta,
133 0x1183A => Nukta,
134 0x11943 => Nukta,
135 0x11D42 => Nukta,
136 0x1E94A => Nukta,
137 0x3099..=0x309A => KanaVoicing,
138 0x094D => Virama,
139 0x09CD => Virama,
140 0x0A4D => Virama,
141 0x0ACD => Virama,
142 0x0B4D => Virama,
143 0x0BCD => Virama,
144 0x0C4D => Virama,
145 0x0CCD => Virama,
146 0x0D3B..=0x0D3C => Virama,
147 0x0D4D => Virama,
148 0x0DCA => Virama,
149 0x0E3A => Virama,
150 0x0EBA => Virama,
151 0x0F84 => Virama,
152 0x1039..=0x103A => Virama,
153 0x1714 => Virama,
154 0x1715 => Virama,
155 0x1734 => Virama,
156 0x17D2 => Virama,
157 0x1A60 => Virama,
158 0x1B44 => Virama,
159 0x1BAA => Virama,
160 0x1BAB => Virama,
161 0x1BF2..=0x1BF3 => Virama,
162 0x2D7F => Virama,
163 0xA806 => Virama,
164 0xA82C => Virama,
165 0xA8C4 => Virama,
166 0xA953 => Virama,
167 0xA9C0 => Virama,
168 0xAAF6 => Virama,
169 0xABED => Virama,
170 0x10A3F => Virama,
171 0x11046 => Virama,
172 0x11070 => Virama,
173 0x1107F => Virama,
174 0x110B9 => Virama,
175 0x11133..=0x11134 => Virama,
176 0x111C0 => Virama,
177 0x11235 => Virama,
178 0x112EA => Virama,
179 0x1134D => Virama,
180 0x11442 => Virama,
181 0x114C2 => Virama,
182 0x115BF => Virama,
183 0x1163F => Virama,
184 0x116B6 => Virama,
185 0x1172B => Virama,
186 0x11839 => Virama,
187 0x1193D => Virama,
188 0x1193E => Virama,
189 0x119E0 => Virama,
190 0x11A34 => Virama,
191 0x11A47 => Virama,
192 0x11A99 => Virama,
193 0x11C3F => Virama,
194 0x11D44..=0x11D45 => Virama,
195 0x11D97 => Virama,
196 0x05B0 => CCC10,
197 0x05B1 => CCC11,
198 0x05B2 => CCC12,
199 0x05B3 => CCC13,
200 0x05B4 => CCC14,
201 0x05B5 => CCC15,
202 0x05B6 => CCC16,
203 0x05B7 => CCC17,
204 0x05B8 => CCC18,
205 0x05C7 => CCC18,
206 0x05B9..=0x05BA => CCC19,
207 0x05BB => CCC20,
208 0x05BC => CCC21,
209 0x05BD => CCC22,
210 0x05BF => CCC23,
211 0x05C1 => CCC24,
212 0x05C2 => CCC25,
213 0xFB1E => CCC26,
214 0x064B => CCC27,
215 0x08F0 => CCC27,
216 0x064C => CCC28,
217 0x08F1 => CCC28,
218 0x064D => CCC29,
219 0x08F2 => CCC29,
220 0x0618 => CCC30,
221 0x064E => CCC30,
222 0x0619 => CCC31,
223 0x064F => CCC31,
224 0x061A => CCC32,
225 0x0650 => CCC32,
226 0x0651 => CCC33,
227 0x0652 => CCC34,
228 0x0670 => CCC35,
229 0x0711 => CCC36,
230 0x0C55 => CCC84,
231 0x0C56 => CCC91,
232 0x0E38..=0x0E39 => CCC103,
233 0x0E48..=0x0E4B => CCC107,
234 0x0EB8..=0x0EB9 => CCC118,
235 0x0EC8..=0x0ECB => CCC122,
236 0x0F71 => CCC129,
237 0x0F72 => CCC130,
238 0x0F7A..=0x0F7D => CCC130,
239 0x0F80 => CCC130,
240 0x0F74 => CCC132,
241 0x0321..=0x0322 => AttachedBelow,
242 0x0327..=0x0328 => AttachedBelow,
243 0x1DD0 => AttachedBelow,
244 0x1DCE => AttachedAbove,
245 0x031B => AttachedAboveRight,
246 0x0F39 => AttachedAboveRight,
247 0x1D165..=0x1D166 => AttachedAboveRight,
248 0x1D16E..=0x1D172 => AttachedAboveRight,
249 0x1DFA => BelowLeft,
250 0x302A => BelowLeft,
251 0x0316..=0x0319 => Below,
252 0x031C..=0x0320 => Below,
253 0x0323..=0x0326 => Below,
254 0x0329..=0x0333 => Below,
255 0x0339..=0x033C => Below,
256 0x0347..=0x0349 => Below,
257 0x034D..=0x034E => Below,
258 0x0353..=0x0356 => Below,
259 0x0359..=0x035A => Below,
260 0x0591 => Below,
261 0x0596 => Below,
262 0x059B => Below,
263 0x05A2..=0x05A7 => Below,
264 0x05AA => Below,
265 0x05C5 => Below,
266 0x0655..=0x0656 => Below,
267 0x065C => Below,
268 0x065F => Below,
269 0x06E3 => Below,
270 0x06EA => Below,
271 0x06ED => Below,
272 0x0731 => Below,
273 0x0734 => Below,
274 0x0737..=0x0739 => Below,
275 0x073B..=0x073C => Below,
276 0x073E => Below,
277 0x0742 => Below,
278 0x0744 => Below,
279 0x0746 => Below,
280 0x0748 => Below,
281 0x07F2 => Below,
282 0x07FD => Below,
283 0x0859..=0x085B => Below,
284 0x0899..=0x089B => Below,
285 0x08CF..=0x08D3 => Below,
286 0x08E3 => Below,
287 0x08E6 => Below,
288 0x08E9 => Below,
289 0x08ED..=0x08EF => Below,
290 0x08F6 => Below,
291 0x08F9..=0x08FA => Below,
292 0x0952 => Below,
293 0x0F18..=0x0F19 => Below,
294 0x0F35 => Below,
295 0x0F37 => Below,
296 0x0FC6 => Below,
297 0x108D => Below,
298 0x193B => Below,
299 0x1A18 => Below,
300 0x1A7F => Below,
301 0x1AB5..=0x1ABA => Below,
302 0x1ABD => Below,
303 0x1ABF..=0x1AC0 => Below,
304 0x1AC3..=0x1AC4 => Below,
305 0x1ACA => Below,
306 0x1B6C => Below,
307 0x1CD5..=0x1CD9 => Below,
308 0x1CDC..=0x1CDF => Below,
309 0x1CED => Below,
310 0x1DC2 => Below,
311 0x1DCA => Below,
312 0x1DCF => Below,
313 0x1DF9 => Below,
314 0x1DFD => Below,
315 0x1DFF => Below,
316 0x20E8 => Below,
317 0x20EC..=0x20EF => Below,
318 0xA92B..=0xA92D => Below,
319 0xAAB4 => Below,
320 0xFE27..=0xFE2D => Below,
321 0x101FD => Below,
322 0x102E0 => Below,
323 0x10A0D => Below,
324 0x10A3A => Below,
325 0x10AE6 => Below,
326 0x10F46..=0x10F47 => Below,
327 0x10F4B => Below,
328 0x10F4D..=0x10F50 => Below,
329 0x10F83 => Below,
330 0x10F85 => Below,
331 0x1D17B..=0x1D182 => Below,
332 0x1D18A..=0x1D18B => Below,
333 0x1E8D0..=0x1E8D6 => Below,
334 0x059A => BelowRight,
335 0x05AD => BelowRight,
336 0x1939 => BelowRight,
337 0x302D => BelowRight,
338 0x302E..=0x302F => Left,
339 0x1D16D => Right,
340 0x05AE => AboveLeft,
341 0x18A9 => AboveLeft,
342 0x1DF7..=0x1DF8 => AboveLeft,
343 0x302B => AboveLeft,
344 0x0300..=0x0314 => Above,
345 0x033D..=0x0344 => Above,
346 0x0346 => Above,
347 0x034A..=0x034C => Above,
348 0x0350..=0x0352 => Above,
349 0x0357 => Above,
350 0x035B => Above,
351 0x0363..=0x036F => Above,
352 0x0483..=0x0487 => Above,
353 0x0592..=0x0595 => Above,
354 0x0597..=0x0599 => Above,
355 0x059C..=0x05A1 => Above,
356 0x05A8..=0x05A9 => Above,
357 0x05AB..=0x05AC => Above,
358 0x05AF => Above,
359 0x05C4 => Above,
360 0x0610..=0x0617 => Above,
361 0x0653..=0x0654 => Above,
362 0x0657..=0x065B => Above,
363 0x065D..=0x065E => Above,
364 0x06D6..=0x06DC => Above,
365 0x06DF..=0x06E2 => Above,
366 0x06E4 => Above,
367 0x06E7..=0x06E8 => Above,
368 0x06EB..=0x06EC => Above,
369 0x0730 => Above,
370 0x0732..=0x0733 => Above,
371 0x0735..=0x0736 => Above,
372 0x073A => Above,
373 0x073D => Above,
374 0x073F..=0x0741 => Above,
375 0x0743 => Above,
376 0x0745 => Above,
377 0x0747 => Above,
378 0x0749..=0x074A => Above,
379 0x07EB..=0x07F1 => Above,
380 0x07F3 => Above,
381 0x0816..=0x0819 => Above,
382 0x081B..=0x0823 => Above,
383 0x0825..=0x0827 => Above,
384 0x0829..=0x082D => Above,
385 0x0898 => Above,
386 0x089C..=0x089F => Above,
387 0x08CA..=0x08CE => Above,
388 0x08D4..=0x08E1 => Above,
389 0x08E4..=0x08E5 => Above,
390 0x08E7..=0x08E8 => Above,
391 0x08EA..=0x08EC => Above,
392 0x08F3..=0x08F5 => Above,
393 0x08F7..=0x08F8 => Above,
394 0x08FB..=0x08FF => Above,
395 0x0951 => Above,
396 0x0953..=0x0954 => Above,
397 0x09FE => Above,
398 0x0F82..=0x0F83 => Above,
399 0x0F86..=0x0F87 => Above,
400 0x135D..=0x135F => Above,
401 0x17DD => Above,
402 0x193A => Above,
403 0x1A17 => Above,
404 0x1A75..=0x1A7C => Above,
405 0x1AB0..=0x1AB4 => Above,
406 0x1ABB..=0x1ABC => Above,
407 0x1AC1..=0x1AC2 => Above,
408 0x1AC5..=0x1AC9 => Above,
409 0x1ACB..=0x1ACE => Above,
410 0x1B6B => Above,
411 0x1B6D..=0x1B73 => Above,
412 0x1CD0..=0x1CD2 => Above,
413 0x1CDA..=0x1CDB => Above,
414 0x1CE0 => Above,
415 0x1CF4 => Above,
416 0x1CF8..=0x1CF9 => Above,
417 0x1DC0..=0x1DC1 => Above,
418 0x1DC3..=0x1DC9 => Above,
419 0x1DCB..=0x1DCC => Above,
420 0x1DD1..=0x1DF5 => Above,
421 0x1DFB => Above,
422 0x1DFE => Above,
423 0x20D0..=0x20D1 => Above,
424 0x20D4..=0x20D7 => Above,
425 0x20DB..=0x20DC => Above,
426 0x20E1 => Above,
427 0x20E7 => Above,
428 0x20E9 => Above,
429 0x20F0 => Above,
430 0x2CEF..=0x2CF1 => Above,
431 0x2DE0..=0x2DFF => Above,
432 0xA66F => Above,
433 0xA674..=0xA67D => Above,
434 0xA69E..=0xA69F => Above,
435 0xA6F0..=0xA6F1 => Above,
436 0xA8E0..=0xA8F1 => Above,
437 0xAAB0 => Above,
438 0xAAB2..=0xAAB3 => Above,
439 0xAAB7..=0xAAB8 => Above,
440 0xAABE..=0xAABF => Above,
441 0xAAC1 => Above,
442 0xFE20..=0xFE26 => Above,
443 0xFE2E..=0xFE2F => Above,
444 0x10376..=0x1037A => Above,
445 0x10A0F => Above,
446 0x10A38 => Above,
447 0x10AE5 => Above,
448 0x10D24..=0x10D27 => Above,
449 0x10EAB..=0x10EAC => Above,
450 0x10F48..=0x10F4A => Above,
451 0x10F4C => Above,
452 0x10F82 => Above,
453 0x10F84 => Above,
454 0x11100..=0x11102 => Above,
455 0x11366..=0x1136C => Above,
456 0x11370..=0x11374 => Above,
457 0x1145E => Above,
458 0x16B30..=0x16B36 => Above,
459 0x1D185..=0x1D189 => Above,
460 0x1D1AA..=0x1D1AD => Above,
461 0x1D242..=0x1D244 => Above,
462 0x1E000..=0x1E006 => Above,
463 0x1E008..=0x1E018 => Above,
464 0x1E01B..=0x1E021 => Above,
465 0x1E023..=0x1E024 => Above,
466 0x1E026..=0x1E02A => Above,
467 0x1E130..=0x1E136 => Above,
468 0x1E2AE => Above,
469 0x1E2EC..=0x1E2EF => Above,
470 0x1E944..=0x1E949 => Above,
471 0x0315 => AboveRight,
472 0x031A => AboveRight,
473 0x0358 => AboveRight,
474 0x1DF6 => AboveRight,
475 0x302C => AboveRight,
476 0x035C => DoubleBelow,
477 0x035F => DoubleBelow,
478 0x0362 => DoubleBelow,
479 0x1DFC => DoubleBelow,
480 0x035D..=0x035E => DoubleAbove,
481 0x0360..=0x0361 => DoubleAbove,
482 0x1DCD => DoubleAbove,
483 0x0345 => IotaSubscript,
484 _ => NotReordered,
485 }
486}
487