Warning: This file is not a C or C++ file. It does not have highlighting.
| 1 | // -*- C++ -*- |
|---|---|
| 2 | //===----------------------------------------------------------------------===// |
| 3 | // |
| 4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 5 | // See https://llvm.org/LICENSE.txt for license information. |
| 6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | // WARNING, this entire header is generated by |
| 11 | // utils/generate_indic_conjunct_break_table.py |
| 12 | // DO NOT MODIFY! |
| 13 | |
| 14 | // UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE |
| 15 | // |
| 16 | // See Terms of Use <https://www.unicode.org/copyright.html> |
| 17 | // for definitions of Unicode Inc.'s Data Files and Software. |
| 18 | // |
| 19 | // NOTICE TO USER: Carefully read the following legal agreement. |
| 20 | // BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S |
| 21 | // DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), |
| 22 | // YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE |
| 23 | // TERMS AND CONDITIONS OF THIS AGREEMENT. |
| 24 | // IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE |
| 25 | // THE DATA FILES OR SOFTWARE. |
| 26 | // |
| 27 | // COPYRIGHT AND PERMISSION NOTICE |
| 28 | // |
| 29 | // Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. |
| 30 | // Distributed under the Terms of Use in https://www.unicode.org/copyright.html. |
| 31 | // |
| 32 | // Permission is hereby granted, free of charge, to any person obtaining |
| 33 | // a copy of the Unicode data files and any associated documentation |
| 34 | // (the "Data Files") or Unicode software and any associated documentation |
| 35 | // (the "Software") to deal in the Data Files or Software |
| 36 | // without restriction, including without limitation the rights to use, |
| 37 | // copy, modify, merge, publish, distribute, and/or sell copies of |
| 38 | // the Data Files or Software, and to permit persons to whom the Data Files |
| 39 | // or Software are furnished to do so, provided that either |
| 40 | // (a) this copyright and permission notice appear with all copies |
| 41 | // of the Data Files or Software, or |
| 42 | // (b) this copyright and permission notice appear in associated |
| 43 | // Documentation. |
| 44 | // |
| 45 | // THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF |
| 46 | // ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE |
| 47 | // WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 48 | // NONINFRINGEMENT OF THIRD PARTY RIGHTS. |
| 49 | // IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS |
| 50 | // NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL |
| 51 | // DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, |
| 52 | // DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER |
| 53 | // TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR |
| 54 | // PERFORMANCE OF THE DATA FILES OR SOFTWARE. |
| 55 | // |
| 56 | // Except as contained in this notice, the name of a copyright holder |
| 57 | // shall not be used in advertising or otherwise to promote the sale, |
| 58 | // use or other dealings in these Data Files or Software without prior |
| 59 | // written authorization of the copyright holder. |
| 60 | |
| 61 | #ifndef _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H |
| 62 | #define _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H |
| 63 | |
| 64 | #include <__algorithm/ranges_upper_bound.h> |
| 65 | #include <__config> |
| 66 | #include <__cstddef/ptrdiff_t.h> |
| 67 | #include <__iterator/access.h> |
| 68 | #include <cstdint> |
| 69 | |
| 70 | #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
| 71 | # pragma GCC system_header |
| 72 | #endif |
| 73 | |
| 74 | _LIBCPP_BEGIN_NAMESPACE_STD |
| 75 | |
| 76 | #if _LIBCPP_STD_VER >= 20 |
| 77 | |
| 78 | namespace __indic_conjunct_break { |
| 79 | |
| 80 | enum class __property : uint8_t { |
| 81 | // Values generated from the data files. |
| 82 | __Consonant, |
| 83 | __Extend, |
| 84 | __Linker, |
| 85 | |
| 86 | // The code unit has none of above properties. |
| 87 | __none |
| 88 | }; |
| 89 | |
| 90 | /// The entries of the indic conjunct break property table. |
| 91 | /// |
| 92 | /// The data is generated from |
| 93 | /// - https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
| 94 | /// |
| 95 | /// The data has 3 values |
| 96 | /// - bits [0, 1] The property. One of the values generated from the datafiles |
| 97 | /// of \ref __property |
| 98 | /// - bits [2, 10] The size of the range. |
| 99 | /// - bits [11, 31] The lower bound code point of the range. The upper bound of |
| 100 | /// the range is lower bound + size. |
| 101 | /// |
| 102 | /// The 9 bits for the size allow a maximum range of 512 elements. Some ranges |
| 103 | /// in the Unicode tables are larger. They are stored in multiple consecutive |
| 104 | /// ranges in the data table. An alternative would be to store the sizes in a |
| 105 | /// separate 16-bit value. The original MSVC STL code had such an approach, but |
| 106 | /// this approach uses less space for the data and is about 4% faster in the |
| 107 | /// following benchmark. |
| 108 | /// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp |
| 109 | // clang-format off |
| 110 | _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[403] = { |
| 111 | 0x001801bd, |
| 112 | 0x00241819, |
| 113 | 0x002c88b1, |
| 114 | 0x002df801, |
| 115 | 0x002e0805, |
| 116 | 0x002e2005, |
| 117 | 0x002e3801, |
| 118 | 0x00308029, |
| 119 | 0x00325851, |
| 120 | 0x00338001, |
| 121 | 0x0036b019, |
| 122 | 0x0036f815, |
| 123 | 0x00373805, |
| 124 | 0x0037500d, |
| 125 | 0x00388801, |
| 126 | 0x00398069, |
| 127 | 0x003d3029, |
| 128 | 0x003f5821, |
| 129 | 0x003fe801, |
| 130 | 0x0040b00d, |
| 131 | 0x0040d821, |
| 132 | 0x00412809, |
| 133 | 0x00414811, |
| 134 | 0x0042c809, |
| 135 | 0x0044b821, |
| 136 | 0x0046505d, |
| 137 | 0x0047187d, |
| 138 | 0x0048a890, |
| 139 | 0x0049d001, |
| 140 | 0x0049e001, |
| 141 | 0x004a081d, |
| 142 | 0x004a6802, |
| 143 | 0x004a8819, |
| 144 | 0x004ac01c, |
| 145 | 0x004b1005, |
| 146 | 0x004bc01c, |
| 147 | 0x004c0801, |
| 148 | 0x004ca84c, |
| 149 | 0x004d5018, |
| 150 | 0x004d9000, |
| 151 | 0x004db00c, |
| 152 | 0x004de001, |
| 153 | 0x004df001, |
| 154 | 0x004e080d, |
| 155 | 0x004e6802, |
| 156 | 0x004eb801, |
| 157 | 0x004ee004, |
| 158 | 0x004ef800, |
| 159 | 0x004f1005, |
| 160 | 0x004f8004, |
| 161 | 0x004ff001, |
| 162 | 0x00500805, |
| 163 | 0x0051e001, |
| 164 | 0x00520805, |
| 165 | 0x00523805, |
| 166 | 0x00525809, |
| 167 | 0x00528801, |
| 168 | 0x00538005, |
| 169 | 0x0053a801, |
| 170 | 0x00540805, |
| 171 | 0x0054a84c, |
| 172 | 0x00555018, |
| 173 | 0x00559004, |
| 174 | 0x0055a810, |
| 175 | 0x0055e001, |
| 176 | 0x00560811, |
| 177 | 0x00563805, |
| 178 | 0x00566802, |
| 179 | 0x00571005, |
| 180 | 0x0057c800, |
| 181 | 0x0057d015, |
| 182 | 0x00580801, |
| 183 | 0x0058a84c, |
| 184 | 0x00595018, |
| 185 | 0x00599004, |
| 186 | 0x0059a810, |
| 187 | 0x0059e001, |
| 188 | 0x0059f005, |
| 189 | 0x005a080d, |
| 190 | 0x005a6802, |
| 191 | 0x005aa809, |
| 192 | 0x005ae004, |
| 193 | 0x005af800, |
| 194 | 0x005b1005, |
| 195 | 0x005b8800, |
| 196 | 0x005c1001, |
| 197 | 0x005df001, |
| 198 | 0x005e0001, |
| 199 | 0x005e6801, |
| 200 | 0x005eb801, |
| 201 | 0x00600001, |
| 202 | 0x00602001, |
| 203 | 0x0060a84c, |
| 204 | 0x0061503c, |
| 205 | 0x0061e001, |
| 206 | 0x0061f009, |
| 207 | 0x00623009, |
| 208 | 0x00625009, |
| 209 | 0x00626802, |
| 210 | 0x0062a805, |
| 211 | 0x0062c008, |
| 212 | 0x00631005, |
| 213 | 0x00640801, |
| 214 | 0x0065e001, |
| 215 | 0x0065f805, |
| 216 | 0x00661001, |
| 217 | 0x00663009, |
| 218 | 0x0066500d, |
| 219 | 0x0066a805, |
| 220 | 0x00671005, |
| 221 | 0x00680005, |
| 222 | 0x0068a894, |
| 223 | 0x0069d805, |
| 224 | 0x0069f001, |
| 225 | 0x006a080d, |
| 226 | 0x006a6802, |
| 227 | 0x006ab801, |
| 228 | 0x006b1005, |
| 229 | 0x006c0801, |
| 230 | 0x006e5001, |
| 231 | 0x006e7801, |
| 232 | 0x006e9009, |
| 233 | 0x006eb001, |
| 234 | 0x006ef801, |
| 235 | 0x00718801, |
| 236 | 0x0071a019, |
| 237 | 0x0072381d, |
| 238 | 0x00758801, |
| 239 | 0x0075a021, |
| 240 | 0x00764019, |
| 241 | 0x0078c005, |
| 242 | 0x0079a801, |
| 243 | 0x0079b801, |
| 244 | 0x0079c801, |
| 245 | 0x007b8835, |
| 246 | 0x007c0011, |
| 247 | 0x007c3005, |
| 248 | 0x007c6829, |
| 249 | 0x007cc88d, |
| 250 | 0x007e3001, |
| 251 | 0x0081680d, |
| 252 | 0x00819015, |
| 253 | 0x0081c805, |
| 254 | 0x0081e805, |
| 255 | 0x0082c005, |
| 256 | 0x0082f009, |
| 257 | 0x0083880d, |
| 258 | 0x00841001, |
| 259 | 0x00842805, |
| 260 | 0x00846801, |
| 261 | 0x0084e801, |
| 262 | 0x009ae809, |
| 263 | 0x00b8900d, |
| 264 | 0x00b99009, |
| 265 | 0x00ba9005, |
| 266 | 0x00bb9005, |
| 267 | 0x00bda005, |
| 268 | 0x00bdb819, |
| 269 | 0x00be3001, |
| 270 | 0x00be4829, |
| 271 | 0x00bee801, |
| 272 | 0x00c05809, |
| 273 | 0x00c07801, |
| 274 | 0x00c42805, |
| 275 | 0x00c54801, |
| 276 | 0x00c90009, |
| 277 | 0x00c93805, |
| 278 | 0x00c99001, |
| 279 | 0x00c9c809, |
| 280 | 0x00d0b805, |
| 281 | 0x00d0d801, |
| 282 | 0x00d2b001, |
| 283 | 0x00d2c019, |
| 284 | 0x00d30001, |
| 285 | 0x00d31001, |
| 286 | 0x00d3281d, |
| 287 | 0x00d39825, |
| 288 | 0x00d3f801, |
| 289 | 0x00d58079, |
| 290 | 0x00d8000d, |
| 291 | 0x00d9a025, |
| 292 | 0x00da1009, |
| 293 | 0x00db5821, |
| 294 | 0x00dc0005, |
| 295 | 0x00dd100d, |
| 296 | 0x00dd4015, |
| 297 | 0x00df3001, |
| 298 | 0x00df4005, |
| 299 | 0x00df6801, |
| 300 | 0x00df7811, |
| 301 | 0x00e1601d, |
| 302 | 0x00e1b005, |
| 303 | 0x00e68009, |
| 304 | 0x00e6a031, |
| 305 | 0x00e71019, |
| 306 | 0x00e76801, |
| 307 | 0x00e7a001, |
| 308 | 0x00e7c005, |
| 309 | 0x00ee00fd, |
| 310 | 0x01006801, |
| 311 | 0x01068081, |
| 312 | 0x01677809, |
| 313 | 0x016bf801, |
| 314 | 0x016f007d, |
| 315 | 0x01815015, |
| 316 | 0x0184c805, |
| 317 | 0x0533780d, |
| 318 | 0x0533a025, |
| 319 | 0x0534f005, |
| 320 | 0x05378005, |
| 321 | 0x05401001, |
| 322 | 0x05403001, |
| 323 | 0x05405801, |
| 324 | 0x05412805, |
| 325 | 0x05416001, |
| 326 | 0x05462005, |
| 327 | 0x05470045, |
| 328 | 0x0547f801, |
| 329 | 0x0549301d, |
| 330 | 0x054a3829, |
| 331 | 0x054a9801, |
| 332 | 0x054c0009, |
| 333 | 0x054d9801, |
| 334 | 0x054db00d, |
| 335 | 0x054de005, |
| 336 | 0x054e0001, |
| 337 | 0x054f2801, |
| 338 | 0x05514815, |
| 339 | 0x05518805, |
| 340 | 0x0551a805, |
| 341 | 0x05521801, |
| 342 | 0x05526001, |
| 343 | 0x0553e001, |
| 344 | 0x05558001, |
| 345 | 0x05559009, |
| 346 | 0x0555b805, |
| 347 | 0x0555f005, |
| 348 | 0x05560801, |
| 349 | 0x05576005, |
| 350 | 0x0557b001, |
| 351 | 0x055f2801, |
| 352 | 0x055f4001, |
| 353 | 0x055f6801, |
| 354 | 0x07d8f001, |
| 355 | 0x07f0003d, |
| 356 | 0x07f1003d, |
| 357 | 0x07fcf005, |
| 358 | 0x080fe801, |
| 359 | 0x08170001, |
| 360 | 0x081bb011, |
| 361 | 0x08500809, |
| 362 | 0x08502805, |
| 363 | 0x0850600d, |
| 364 | 0x0851c009, |
| 365 | 0x0851f801, |
| 366 | 0x08572805, |
| 367 | 0x0869200d, |
| 368 | 0x086b4811, |
| 369 | 0x08755805, |
| 370 | 0x0877e00d, |
| 371 | 0x087a3029, |
| 372 | 0x087c100d, |
| 373 | 0x08800801, |
| 374 | 0x0881c039, |
| 375 | 0x08838001, |
| 376 | 0x08839805, |
| 377 | 0x0883f809, |
| 378 | 0x0885980d, |
| 379 | 0x0885c805, |
| 380 | 0x08861001, |
| 381 | 0x08880009, |
| 382 | 0x08893811, |
| 383 | 0x0889681d, |
| 384 | 0x088b9801, |
| 385 | 0x088c0005, |
| 386 | 0x088db021, |
| 387 | 0x088e0001, |
| 388 | 0x088e480d, |
| 389 | 0x088e7801, |
| 390 | 0x08917809, |
| 391 | 0x0891a00d, |
| 392 | 0x0891f001, |
| 393 | 0x08920801, |
| 394 | 0x0896f801, |
| 395 | 0x0897181d, |
| 396 | 0x08980005, |
| 397 | 0x0899d805, |
| 398 | 0x0899f001, |
| 399 | 0x089a0001, |
| 400 | 0x089a6801, |
| 401 | 0x089ab801, |
| 402 | 0x089b3019, |
| 403 | 0x089b8011, |
| 404 | 0x089dc001, |
| 405 | 0x089dd815, |
| 406 | 0x089e1001, |
| 407 | 0x089e2801, |
| 408 | 0x089e3809, |
| 409 | 0x089e7009, |
| 410 | 0x089e9001, |
| 411 | 0x089f0805, |
| 412 | 0x08a1c01d, |
| 413 | 0x08a21009, |
| 414 | 0x08a23001, |
| 415 | 0x08a2f001, |
| 416 | 0x08a58001, |
| 417 | 0x08a59815, |
| 418 | 0x08a5d001, |
| 419 | 0x08a5e801, |
| 420 | 0x08a5f805, |
| 421 | 0x08a61005, |
| 422 | 0x08ad7801, |
| 423 | 0x08ad900d, |
| 424 | 0x08ade005, |
| 425 | 0x08adf805, |
| 426 | 0x08aee005, |
| 427 | 0x08b1981d, |
| 428 | 0x08b1e801, |
| 429 | 0x08b1f805, |
| 430 | 0x08b55801, |
| 431 | 0x08b56801, |
| 432 | 0x08b5801d, |
| 433 | 0x08b8e801, |
| 434 | 0x08b8f801, |
| 435 | 0x08b9100d, |
| 436 | 0x08b93811, |
| 437 | 0x08c17821, |
| 438 | 0x08c1c805, |
| 439 | 0x08c98001, |
| 440 | 0x08c9d80d, |
| 441 | 0x08ca1801, |
| 442 | 0x08cea00d, |
| 443 | 0x08ced005, |
| 444 | 0x08cf0001, |
| 445 | 0x08d00825, |
| 446 | 0x08d19815, |
| 447 | 0x08d1d80d, |
| 448 | 0x08d23801, |
| 449 | 0x08d28815, |
| 450 | 0x08d2c809, |
| 451 | 0x08d45031, |
| 452 | 0x08d4c005, |
| 453 | 0x08e18019, |
| 454 | 0x08e1c015, |
| 455 | 0x08e1f801, |
| 456 | 0x08e49055, |
| 457 | 0x08e55019, |
| 458 | 0x08e59005, |
| 459 | 0x08e5a805, |
| 460 | 0x08e98815, |
| 461 | 0x08e9d001, |
| 462 | 0x08e9e005, |
| 463 | 0x08e9f819, |
| 464 | 0x08ea3801, |
| 465 | 0x08ec8005, |
| 466 | 0x08eca801, |
| 467 | 0x08ecb801, |
| 468 | 0x08f79805, |
| 469 | 0x08f80005, |
| 470 | 0x08f9b011, |
| 471 | 0x08fa0009, |
| 472 | 0x08fad001, |
| 473 | 0x09a20001, |
| 474 | 0x09a23839, |
| 475 | 0x0b08f02d, |
| 476 | 0x0b096809, |
| 477 | 0x0b578011, |
| 478 | 0x0b598019, |
| 479 | 0x0b7a7801, |
| 480 | 0x0b7c780d, |
| 481 | 0x0b7f2001, |
| 482 | 0x0b7f8005, |
| 483 | 0x0de4e805, |
| 484 | 0x0e7800b5, |
| 485 | 0x0e798059, |
| 486 | 0x0e8b2811, |
| 487 | 0x0e8b6815, |
| 488 | 0x0e8bd81d, |
| 489 | 0x0e8c2819, |
| 490 | 0x0e8d500d, |
| 491 | 0x0e921009, |
| 492 | 0x0ed000d9, |
| 493 | 0x0ed1d8c5, |
| 494 | 0x0ed3a801, |
| 495 | 0x0ed42001, |
| 496 | 0x0ed4d811, |
| 497 | 0x0ed50839, |
| 498 | 0x0f000019, |
| 499 | 0x0f004041, |
| 500 | 0x0f00d819, |
| 501 | 0x0f011805, |
| 502 | 0x0f013011, |
| 503 | 0x0f047801, |
| 504 | 0x0f098019, |
| 505 | 0x0f157001, |
| 506 | 0x0f17600d, |
| 507 | 0x0f27600d, |
| 508 | 0x0f2f7005, |
| 509 | 0x0f468019, |
| 510 | 0x0f4a2019, |
| 511 | 0x0f9fd811, |
| 512 | 0x7001017d, |
| 513 | 0x700803bd}; |
| 514 | // clang-format on |
| 515 | |
| 516 | /// Returns the indic conjuct break property of a code point. |
| 517 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __property __get_property(const char32_t __code_point) noexcept { |
| 518 | // The algorithm searches for the upper bound of the range and, when found, |
| 519 | // steps back one entry. This algorithm is used since the code point can be |
| 520 | // anywhere in the range. After a lower bound is found the next step is to |
| 521 | // compare whether the code unit is indeed in the range. |
| 522 | // |
| 523 | // Since the entry contains a code unit, size, and property the code point |
| 524 | // being sought needs to be adjusted. Just shifting the code point to the |
| 525 | // proper position doesn't work; suppose an entry has property 0, size 1, |
| 526 | // and lower bound 3. This results in the entry 0x1810. |
| 527 | // When searching for code point 3 it will search for 0x1800, find 0x1810 |
| 528 | // and moves to the previous entry. Thus the lower bound value will never |
| 529 | // be found. |
| 530 | // The simple solution is to set the bits belonging to the property and |
| 531 | // size. Then the upper bound for code point 3 will return the entry after |
| 532 | // 0x1810. After moving to the previous entry the algorithm arrives at the |
| 533 | // correct entry. |
| 534 | ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 11) | 0x7ffu) - __entries; |
| 535 | if (__i == 0) |
| 536 | return __property::__none; |
| 537 | |
| 538 | --__i; |
| 539 | uint32_t __upper_bound = (__entries[__i] >> 11) + ((__entries[__i] >> 2) & 0b1'1111'1111); |
| 540 | if (__code_point <= __upper_bound) |
| 541 | return static_cast<__property>(__entries[__i] & 0b11); |
| 542 | |
| 543 | return __property::__none; |
| 544 | } |
| 545 | |
| 546 | } // namespace __indic_conjunct_break |
| 547 | |
| 548 | #endif // _LIBCPP_STD_VER >= 20 |
| 549 | |
| 550 | _LIBCPP_END_NAMESPACE_STD |
| 551 | |
| 552 | #endif // _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H |
| 553 |
Warning: This file is not a C or C++ file. It does not have highlighting.
