| 1 | /*! |
| 2 | |
| 3 | This library implements |
| 4 | [Unicode Canonical Combining Class](https://unicode.org/reports/tr44/#Canonical_Combining_Class_Values) detection. |
| 5 | |
| 6 | ```rust |
| 7 | use unicode_ccc::*; |
| 8 | |
| 9 | assert_eq!(get_canonical_combining_class('A' ), CanonicalCombiningClass::NotReordered); |
| 10 | assert_eq!(get_canonical_combining_class(' \u{0A3C}' ), CanonicalCombiningClass::Nukta); |
| 11 | assert_eq!(get_canonical_combining_class(' \u{18A9}' ), CanonicalCombiningClass::AboveLeft); |
| 12 | ``` |
| 13 | |
| 14 | */ |
| 15 | |
| 16 | #![no_std ] |
| 17 | |
| 18 | #![forbid (unsafe_code)] |
| 19 | |
| 20 | /// The Unicode version. |
| 21 | pub const UNICODE_VERSION: (u8, u8, u8) = (16, 0, 0); |
| 22 | |
| 23 | /// Character Canonical Combining Class. |
| 24 | #[derive (Clone, Copy, PartialEq, Debug)] |
| 25 | pub enum CanonicalCombiningClass { |
| 26 | NotReordered = 0, |
| 27 | Overlay = 1, |
| 28 | HanReading = 6, |
| 29 | Nukta = 7, |
| 30 | KanaVoicing = 8, |
| 31 | Virama = 9, |
| 32 | // Hebrew |
| 33 | CCC10 = 10, |
| 34 | CCC11 = 11, |
| 35 | CCC12 = 12, |
| 36 | CCC13 = 13, |
| 37 | CCC14 = 14, |
| 38 | CCC15 = 15, |
| 39 | CCC16 = 16, |
| 40 | CCC17 = 17, |
| 41 | CCC18 = 18, |
| 42 | CCC19 = 19, |
| 43 | CCC20 = 20, |
| 44 | CCC21 = 21, |
| 45 | CCC22 = 22, |
| 46 | CCC23 = 23, |
| 47 | CCC24 = 24, |
| 48 | CCC25 = 25, |
| 49 | CCC26 = 26, |
| 50 | // Arabic |
| 51 | CCC27 = 27, |
| 52 | CCC28 = 28, |
| 53 | CCC29 = 29, |
| 54 | CCC30 = 30, |
| 55 | CCC31 = 31, |
| 56 | CCC32 = 32, |
| 57 | CCC33 = 33, |
| 58 | CCC34 = 34, |
| 59 | CCC35 = 35, |
| 60 | // Syriac |
| 61 | CCC36 = 36, |
| 62 | // Telugu |
| 63 | CCC84 = 84, |
| 64 | CCC91 = 91, |
| 65 | // Thai |
| 66 | CCC103 = 103, |
| 67 | CCC107 = 107, |
| 68 | // Lao |
| 69 | CCC118 = 118, |
| 70 | CCC122 = 122, |
| 71 | // Tibetan |
| 72 | CCC129 = 129, |
| 73 | CCC130 = 130, |
| 74 | CCC132 = 132, |
| 75 | AttachedBelowLeft = 200, |
| 76 | AttachedBelow = 202, |
| 77 | AttachedAbove = 214, |
| 78 | AttachedAboveRight = 216, |
| 79 | BelowLeft = 218, |
| 80 | Below = 220, |
| 81 | BelowRight = 222, |
| 82 | Left = 224, |
| 83 | Right = 226, |
| 84 | AboveLeft = 228, |
| 85 | Above = 230, |
| 86 | AboveRight = 232, |
| 87 | DoubleBelow = 233, |
| 88 | DoubleAbove = 234, |
| 89 | IotaSubscript = 240, |
| 90 | } |
| 91 | |
| 92 | /// Returns a Canonical Combining Class of a character. |
| 93 | /// |
| 94 | /// Based on <https://www.unicode.org/Public/14.0.0/ucd/extracted/DerivedCombiningClass.txt>. |
| 95 | pub fn get_canonical_combining_class(c: char) -> CanonicalCombiningClass { |
| 96 | use CanonicalCombiningClass::*; |
| 97 | |
| 98 | match c as u32 { |
| 99 | 0x0334..=0x0338 => Overlay, |
| 100 | 0x1CD4 => Overlay, |
| 101 | 0x1CE2..=0x1CE8 => Overlay, |
| 102 | 0x20D2..=0x20D3 => Overlay, |
| 103 | 0x20D8..=0x20DA => Overlay, |
| 104 | 0x20E5..=0x20E6 => Overlay, |
| 105 | 0x20EA..=0x20EB => Overlay, |
| 106 | 0x10A39 => Overlay, |
| 107 | 0x16AF0..=0x16AF4 => Overlay, |
| 108 | 0x1BC9E => Overlay, |
| 109 | 0x1D167..=0x1D169 => Overlay, |
| 110 | 0x16FF0..=0x16FF1 => HanReading, |
| 111 | 0x093C => Nukta, |
| 112 | 0x09BC => Nukta, |
| 113 | 0x0A3C => Nukta, |
| 114 | 0x0ABC => Nukta, |
| 115 | 0x0B3C => Nukta, |
| 116 | 0x0C3C => Nukta, |
| 117 | 0x0CBC => Nukta, |
| 118 | 0x1037 => Nukta, |
| 119 | 0x1B34 => Nukta, |
| 120 | 0x1BE6 => Nukta, |
| 121 | 0x1C37 => Nukta, |
| 122 | 0xA9B3 => Nukta, |
| 123 | 0x110BA => Nukta, |
| 124 | 0x11173 => Nukta, |
| 125 | 0x111CA => Nukta, |
| 126 | 0x11236 => Nukta, |
| 127 | 0x112E9 => Nukta, |
| 128 | 0x1133B..=0x1133C => Nukta, |
| 129 | 0x11446 => Nukta, |
| 130 | 0x114C3 => Nukta, |
| 131 | 0x115C0 => Nukta, |
| 132 | 0x116B7 => Nukta, |
| 133 | 0x1183A => Nukta, |
| 134 | 0x11943 => Nukta, |
| 135 | 0x11D42 => Nukta, |
| 136 | 0x1E94A => Nukta, |
| 137 | 0x3099..=0x309A => KanaVoicing, |
| 138 | 0x094D => Virama, |
| 139 | 0x09CD => Virama, |
| 140 | 0x0A4D => Virama, |
| 141 | 0x0ACD => Virama, |
| 142 | 0x0B4D => Virama, |
| 143 | 0x0BCD => Virama, |
| 144 | 0x0C4D => Virama, |
| 145 | 0x0CCD => Virama, |
| 146 | 0x0D3B..=0x0D3C => Virama, |
| 147 | 0x0D4D => Virama, |
| 148 | 0x0DCA => Virama, |
| 149 | 0x0E3A => Virama, |
| 150 | 0x0EBA => Virama, |
| 151 | 0x0F84 => Virama, |
| 152 | 0x1039..=0x103A => Virama, |
| 153 | 0x1714 => Virama, |
| 154 | 0x1715 => Virama, |
| 155 | 0x1734 => Virama, |
| 156 | 0x17D2 => Virama, |
| 157 | 0x1A60 => Virama, |
| 158 | 0x1B44 => Virama, |
| 159 | 0x1BAA => Virama, |
| 160 | 0x1BAB => Virama, |
| 161 | 0x1BF2..=0x1BF3 => Virama, |
| 162 | 0x2D7F => Virama, |
| 163 | 0xA806 => Virama, |
| 164 | 0xA82C => Virama, |
| 165 | 0xA8C4 => Virama, |
| 166 | 0xA953 => Virama, |
| 167 | 0xA9C0 => Virama, |
| 168 | 0xAAF6 => Virama, |
| 169 | 0xABED => Virama, |
| 170 | 0x10A3F => Virama, |
| 171 | 0x11046 => Virama, |
| 172 | 0x11070 => Virama, |
| 173 | 0x1107F => Virama, |
| 174 | 0x110B9 => Virama, |
| 175 | 0x11133..=0x11134 => Virama, |
| 176 | 0x111C0 => Virama, |
| 177 | 0x11235 => Virama, |
| 178 | 0x112EA => Virama, |
| 179 | 0x1134D => Virama, |
| 180 | 0x113CE => Virama, |
| 181 | 0x113CF => Virama, |
| 182 | 0x113D0 => Virama, |
| 183 | 0x11442 => Virama, |
| 184 | 0x114C2 => Virama, |
| 185 | 0x115BF => Virama, |
| 186 | 0x1163F => Virama, |
| 187 | 0x116B6 => Virama, |
| 188 | 0x1172B => Virama, |
| 189 | 0x11839 => Virama, |
| 190 | 0x1193D => Virama, |
| 191 | 0x1193E => Virama, |
| 192 | 0x119E0 => Virama, |
| 193 | 0x11A34 => Virama, |
| 194 | 0x11A47 => Virama, |
| 195 | 0x11A99 => Virama, |
| 196 | 0x11C3F => Virama, |
| 197 | 0x11D44..=0x11D45 => Virama, |
| 198 | 0x11D97 => Virama, |
| 199 | 0x11F41 => Virama, |
| 200 | 0x11F42 => Virama, |
| 201 | 0x1612F => Virama, |
| 202 | 0x05B0 => CCC10, |
| 203 | 0x05B1 => CCC11, |
| 204 | 0x05B2 => CCC12, |
| 205 | 0x05B3 => CCC13, |
| 206 | 0x05B4 => CCC14, |
| 207 | 0x05B5 => CCC15, |
| 208 | 0x05B6 => CCC16, |
| 209 | 0x05B7 => CCC17, |
| 210 | 0x05B8 => CCC18, |
| 211 | 0x05C7 => CCC18, |
| 212 | 0x05B9..=0x05BA => CCC19, |
| 213 | 0x05BB => CCC20, |
| 214 | 0x05BC => CCC21, |
| 215 | 0x05BD => CCC22, |
| 216 | 0x05BF => CCC23, |
| 217 | 0x05C1 => CCC24, |
| 218 | 0x05C2 => CCC25, |
| 219 | 0xFB1E => CCC26, |
| 220 | 0x064B => CCC27, |
| 221 | 0x08F0 => CCC27, |
| 222 | 0x064C => CCC28, |
| 223 | 0x08F1 => CCC28, |
| 224 | 0x064D => CCC29, |
| 225 | 0x08F2 => CCC29, |
| 226 | 0x0618 => CCC30, |
| 227 | 0x064E => CCC30, |
| 228 | 0x0619 => CCC31, |
| 229 | 0x064F => CCC31, |
| 230 | 0x061A => CCC32, |
| 231 | 0x0650 => CCC32, |
| 232 | 0x0651 => CCC33, |
| 233 | 0x0652 => CCC34, |
| 234 | 0x0670 => CCC35, |
| 235 | 0x0711 => CCC36, |
| 236 | 0x0C55 => CCC84, |
| 237 | 0x0C56 => CCC91, |
| 238 | 0x0E38..=0x0E39 => CCC103, |
| 239 | 0x0E48..=0x0E4B => CCC107, |
| 240 | 0x0EB8..=0x0EB9 => CCC118, |
| 241 | 0x0EC8..=0x0ECB => CCC122, |
| 242 | 0x0F71 => CCC129, |
| 243 | 0x0F72 => CCC130, |
| 244 | 0x0F7A..=0x0F7D => CCC130, |
| 245 | 0x0F80 => CCC130, |
| 246 | 0x0F74 => CCC132, |
| 247 | 0x0321..=0x0322 => AttachedBelow, |
| 248 | 0x0327..=0x0328 => AttachedBelow, |
| 249 | 0x1DD0 => AttachedBelow, |
| 250 | 0x1DCE => AttachedAbove, |
| 251 | 0x031B => AttachedAboveRight, |
| 252 | 0x0F39 => AttachedAboveRight, |
| 253 | 0x1D165..=0x1D166 => AttachedAboveRight, |
| 254 | 0x1D16E..=0x1D172 => AttachedAboveRight, |
| 255 | 0x1DFA => BelowLeft, |
| 256 | 0x302A => BelowLeft, |
| 257 | 0x0316..=0x0319 => Below, |
| 258 | 0x031C..=0x0320 => Below, |
| 259 | 0x0323..=0x0326 => Below, |
| 260 | 0x0329..=0x0333 => Below, |
| 261 | 0x0339..=0x033C => Below, |
| 262 | 0x0347..=0x0349 => Below, |
| 263 | 0x034D..=0x034E => Below, |
| 264 | 0x0353..=0x0356 => Below, |
| 265 | 0x0359..=0x035A => Below, |
| 266 | 0x0591 => Below, |
| 267 | 0x0596 => Below, |
| 268 | 0x059B => Below, |
| 269 | 0x05A2..=0x05A7 => Below, |
| 270 | 0x05AA => Below, |
| 271 | 0x05C5 => Below, |
| 272 | 0x0655..=0x0656 => Below, |
| 273 | 0x065C => Below, |
| 274 | 0x065F => Below, |
| 275 | 0x06E3 => Below, |
| 276 | 0x06EA => Below, |
| 277 | 0x06ED => Below, |
| 278 | 0x0731 => Below, |
| 279 | 0x0734 => Below, |
| 280 | 0x0737..=0x0739 => Below, |
| 281 | 0x073B..=0x073C => Below, |
| 282 | 0x073E => Below, |
| 283 | 0x0742 => Below, |
| 284 | 0x0744 => Below, |
| 285 | 0x0746 => Below, |
| 286 | 0x0748 => Below, |
| 287 | 0x07F2 => Below, |
| 288 | 0x07FD => Below, |
| 289 | 0x0859..=0x085B => Below, |
| 290 | 0x0899..=0x089B => Below, |
| 291 | 0x08CF..=0x08D3 => Below, |
| 292 | 0x08E3 => Below, |
| 293 | 0x08E6 => Below, |
| 294 | 0x08E9 => Below, |
| 295 | 0x08ED..=0x08EF => Below, |
| 296 | 0x08F6 => Below, |
| 297 | 0x08F9..=0x08FA => Below, |
| 298 | 0x0952 => Below, |
| 299 | 0x0F18..=0x0F19 => Below, |
| 300 | 0x0F35 => Below, |
| 301 | 0x0F37 => Below, |
| 302 | 0x0FC6 => Below, |
| 303 | 0x108D => Below, |
| 304 | 0x193B => Below, |
| 305 | 0x1A18 => Below, |
| 306 | 0x1A7F => Below, |
| 307 | 0x1AB5..=0x1ABA => Below, |
| 308 | 0x1ABD => Below, |
| 309 | 0x1ABF..=0x1AC0 => Below, |
| 310 | 0x1AC3..=0x1AC4 => Below, |
| 311 | 0x1ACA => Below, |
| 312 | 0x1B6C => Below, |
| 313 | 0x1CD5..=0x1CD9 => Below, |
| 314 | 0x1CDC..=0x1CDF => Below, |
| 315 | 0x1CED => Below, |
| 316 | 0x1DC2 => Below, |
| 317 | 0x1DCA => Below, |
| 318 | 0x1DCF => Below, |
| 319 | 0x1DF9 => Below, |
| 320 | 0x1DFD => Below, |
| 321 | 0x1DFF => Below, |
| 322 | 0x20E8 => Below, |
| 323 | 0x20EC..=0x20EF => Below, |
| 324 | 0xA92B..=0xA92D => Below, |
| 325 | 0xAAB4 => Below, |
| 326 | 0xFE27..=0xFE2D => Below, |
| 327 | 0x101FD => Below, |
| 328 | 0x102E0 => Below, |
| 329 | 0x10A0D => Below, |
| 330 | 0x10A3A => Below, |
| 331 | 0x10AE6 => Below, |
| 332 | 0x10EFD..=0x10EFF => Below, |
| 333 | 0x10F46..=0x10F47 => Below, |
| 334 | 0x10F4B => Below, |
| 335 | 0x10F4D..=0x10F50 => Below, |
| 336 | 0x10F83 => Below, |
| 337 | 0x10F85 => Below, |
| 338 | 0x1D17B..=0x1D182 => Below, |
| 339 | 0x1D18A..=0x1D18B => Below, |
| 340 | 0x1E4EE => Below, |
| 341 | 0x1E5EF => Below, |
| 342 | 0x1E8D0..=0x1E8D6 => Below, |
| 343 | 0x059A => BelowRight, |
| 344 | 0x05AD => BelowRight, |
| 345 | 0x1939 => BelowRight, |
| 346 | 0x302D => BelowRight, |
| 347 | 0x302E..=0x302F => Left, |
| 348 | 0x1D16D => Right, |
| 349 | 0x05AE => AboveLeft, |
| 350 | 0x18A9 => AboveLeft, |
| 351 | 0x1DF7..=0x1DF8 => AboveLeft, |
| 352 | 0x302B => AboveLeft, |
| 353 | 0x0300..=0x0314 => Above, |
| 354 | 0x033D..=0x0344 => Above, |
| 355 | 0x0346 => Above, |
| 356 | 0x034A..=0x034C => Above, |
| 357 | 0x0350..=0x0352 => Above, |
| 358 | 0x0357 => Above, |
| 359 | 0x035B => Above, |
| 360 | 0x0363..=0x036F => Above, |
| 361 | 0x0483..=0x0487 => Above, |
| 362 | 0x0592..=0x0595 => Above, |
| 363 | 0x0597..=0x0599 => Above, |
| 364 | 0x059C..=0x05A1 => Above, |
| 365 | 0x05A8..=0x05A9 => Above, |
| 366 | 0x05AB..=0x05AC => Above, |
| 367 | 0x05AF => Above, |
| 368 | 0x05C4 => Above, |
| 369 | 0x0610..=0x0617 => Above, |
| 370 | 0x0653..=0x0654 => Above, |
| 371 | 0x0657..=0x065B => Above, |
| 372 | 0x065D..=0x065E => Above, |
| 373 | 0x06D6..=0x06DC => Above, |
| 374 | 0x06DF..=0x06E2 => Above, |
| 375 | 0x06E4 => Above, |
| 376 | 0x06E7..=0x06E8 => Above, |
| 377 | 0x06EB..=0x06EC => Above, |
| 378 | 0x0730 => Above, |
| 379 | 0x0732..=0x0733 => Above, |
| 380 | 0x0735..=0x0736 => Above, |
| 381 | 0x073A => Above, |
| 382 | 0x073D => Above, |
| 383 | 0x073F..=0x0741 => Above, |
| 384 | 0x0743 => Above, |
| 385 | 0x0745 => Above, |
| 386 | 0x0747 => Above, |
| 387 | 0x0749..=0x074A => Above, |
| 388 | 0x07EB..=0x07F1 => Above, |
| 389 | 0x07F3 => Above, |
| 390 | 0x0816..=0x0819 => Above, |
| 391 | 0x081B..=0x0823 => Above, |
| 392 | 0x0825..=0x0827 => Above, |
| 393 | 0x0829..=0x082D => Above, |
| 394 | 0x0897..=0x0898 => Above, |
| 395 | 0x089C..=0x089F => Above, |
| 396 | 0x08CA..=0x08CE => Above, |
| 397 | 0x08D4..=0x08E1 => Above, |
| 398 | 0x08E4..=0x08E5 => Above, |
| 399 | 0x08E7..=0x08E8 => Above, |
| 400 | 0x08EA..=0x08EC => Above, |
| 401 | 0x08F3..=0x08F5 => Above, |
| 402 | 0x08F7..=0x08F8 => Above, |
| 403 | 0x08FB..=0x08FF => Above, |
| 404 | 0x0951 => Above, |
| 405 | 0x0953..=0x0954 => Above, |
| 406 | 0x09FE => Above, |
| 407 | 0x0F82..=0x0F83 => Above, |
| 408 | 0x0F86..=0x0F87 => Above, |
| 409 | 0x135D..=0x135F => Above, |
| 410 | 0x17DD => Above, |
| 411 | 0x193A => Above, |
| 412 | 0x1A17 => Above, |
| 413 | 0x1A75..=0x1A7C => Above, |
| 414 | 0x1AB0..=0x1AB4 => Above, |
| 415 | 0x1ABB..=0x1ABC => Above, |
| 416 | 0x1AC1..=0x1AC2 => Above, |
| 417 | 0x1AC5..=0x1AC9 => Above, |
| 418 | 0x1ACB..=0x1ACE => Above, |
| 419 | 0x1B6B => Above, |
| 420 | 0x1B6D..=0x1B73 => Above, |
| 421 | 0x1CD0..=0x1CD2 => Above, |
| 422 | 0x1CDA..=0x1CDB => Above, |
| 423 | 0x1CE0 => Above, |
| 424 | 0x1CF4 => Above, |
| 425 | 0x1CF8..=0x1CF9 => Above, |
| 426 | 0x1DC0..=0x1DC1 => Above, |
| 427 | 0x1DC3..=0x1DC9 => Above, |
| 428 | 0x1DCB..=0x1DCC => Above, |
| 429 | 0x1DD1..=0x1DF5 => Above, |
| 430 | 0x1DFB => Above, |
| 431 | 0x1DFE => Above, |
| 432 | 0x20D0..=0x20D1 => Above, |
| 433 | 0x20D4..=0x20D7 => Above, |
| 434 | 0x20DB..=0x20DC => Above, |
| 435 | 0x20E1 => Above, |
| 436 | 0x20E7 => Above, |
| 437 | 0x20E9 => Above, |
| 438 | 0x20F0 => Above, |
| 439 | 0x2CEF..=0x2CF1 => Above, |
| 440 | 0x2DE0..=0x2DFF => Above, |
| 441 | 0xA66F => Above, |
| 442 | 0xA674..=0xA67D => Above, |
| 443 | 0xA69E..=0xA69F => Above, |
| 444 | 0xA6F0..=0xA6F1 => Above, |
| 445 | 0xA8E0..=0xA8F1 => Above, |
| 446 | 0xAAB0 => Above, |
| 447 | 0xAAB2..=0xAAB3 => Above, |
| 448 | 0xAAB7..=0xAAB8 => Above, |
| 449 | 0xAABE..=0xAABF => Above, |
| 450 | 0xAAC1 => Above, |
| 451 | 0xFE20..=0xFE26 => Above, |
| 452 | 0xFE2E..=0xFE2F => Above, |
| 453 | 0x10376..=0x1037A => Above, |
| 454 | 0x10A0F => Above, |
| 455 | 0x10A38 => Above, |
| 456 | 0x10AE5 => Above, |
| 457 | 0x10D24..=0x10D27 => Above, |
| 458 | 0x10D69..=0x10D6D => Above, |
| 459 | 0x10EAB..=0x10EAC => Above, |
| 460 | 0x10F48..=0x10F4A => Above, |
| 461 | 0x10F4C => Above, |
| 462 | 0x10F82 => Above, |
| 463 | 0x10F84 => Above, |
| 464 | 0x11100..=0x11102 => Above, |
| 465 | 0x11366..=0x1136C => Above, |
| 466 | 0x11370..=0x11374 => Above, |
| 467 | 0x1145E => Above, |
| 468 | 0x16B30..=0x16B36 => Above, |
| 469 | 0x1D185..=0x1D189 => Above, |
| 470 | 0x1D1AA..=0x1D1AD => Above, |
| 471 | 0x1D242..=0x1D244 => Above, |
| 472 | 0x1E000..=0x1E006 => Above, |
| 473 | 0x1E008..=0x1E018 => Above, |
| 474 | 0x1E01B..=0x1E021 => Above, |
| 475 | 0x1E023..=0x1E024 => Above, |
| 476 | 0x1E026..=0x1E02A => Above, |
| 477 | 0x1E08F => Above, |
| 478 | 0x1E130..=0x1E136 => Above, |
| 479 | 0x1E2AE => Above, |
| 480 | 0x1E2EC..=0x1E2EF => Above, |
| 481 | 0x1E4EF => Above, |
| 482 | 0x1E5EE => Above, |
| 483 | 0x1E944..=0x1E949 => Above, |
| 484 | 0x0315 => AboveRight, |
| 485 | 0x031A => AboveRight, |
| 486 | 0x0358 => AboveRight, |
| 487 | 0x1DF6 => AboveRight, |
| 488 | 0x302C => AboveRight, |
| 489 | 0x1E4EC..=0x1E4ED => AboveRight, |
| 490 | 0x035C => DoubleBelow, |
| 491 | 0x035F => DoubleBelow, |
| 492 | 0x0362 => DoubleBelow, |
| 493 | 0x1DFC => DoubleBelow, |
| 494 | 0x035D..=0x035E => DoubleAbove, |
| 495 | 0x0360..=0x0361 => DoubleAbove, |
| 496 | 0x1DCD => DoubleAbove, |
| 497 | 0x0345 => IotaSubscript, |
| 498 | _ => NotReordered, |
| 499 | } |
| 500 | } |
| 501 | |