Warning: This file is not a C or C++ file. It does not have highlighting.
1 | // -*- C++ -*- |
---|---|
2 | //===----------------------------------------------------------------------===// |
3 | // |
4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | // See https://llvm.org/LICENSE.txt for license information. |
6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | |
10 | // WARNING, this entire header is generated by |
11 | // utils/generate_indic_conjunct_break_table.py |
12 | // DO NOT MODIFY! |
13 | |
14 | // UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE |
15 | // |
16 | // See Terms of Use <https://www.unicode.org/copyright.html> |
17 | // for definitions of Unicode Inc.'s Data Files and Software. |
18 | // |
19 | // NOTICE TO USER: Carefully read the following legal agreement. |
20 | // BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S |
21 | // DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), |
22 | // YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE |
23 | // TERMS AND CONDITIONS OF THIS AGREEMENT. |
24 | // IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE |
25 | // THE DATA FILES OR SOFTWARE. |
26 | // |
27 | // COPYRIGHT AND PERMISSION NOTICE |
28 | // |
29 | // Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. |
30 | // Distributed under the Terms of Use in https://www.unicode.org/copyright.html. |
31 | // |
32 | // Permission is hereby granted, free of charge, to any person obtaining |
33 | // a copy of the Unicode data files and any associated documentation |
34 | // (the "Data Files") or Unicode software and any associated documentation |
35 | // (the "Software") to deal in the Data Files or Software |
36 | // without restriction, including without limitation the rights to use, |
37 | // copy, modify, merge, publish, distribute, and/or sell copies of |
38 | // the Data Files or Software, and to permit persons to whom the Data Files |
39 | // or Software are furnished to do so, provided that either |
40 | // (a) this copyright and permission notice appear with all copies |
41 | // of the Data Files or Software, or |
42 | // (b) this copyright and permission notice appear in associated |
43 | // Documentation. |
44 | // |
45 | // THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF |
46 | // ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE |
47 | // WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
48 | // NONINFRINGEMENT OF THIRD PARTY RIGHTS. |
49 | // IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS |
50 | // NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL |
51 | // DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, |
52 | // DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER |
53 | // TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR |
54 | // PERFORMANCE OF THE DATA FILES OR SOFTWARE. |
55 | // |
56 | // Except as contained in this notice, the name of a copyright holder |
57 | // shall not be used in advertising or otherwise to promote the sale, |
58 | // use or other dealings in these Data Files or Software without prior |
59 | // written authorization of the copyright holder. |
60 | |
61 | #ifndef _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H |
62 | #define _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H |
63 | |
64 | #include <__algorithm/ranges_upper_bound.h> |
65 | #include <__config> |
66 | #include <__cstddef/ptrdiff_t.h> |
67 | #include <__iterator/access.h> |
68 | #include <cstdint> |
69 | |
70 | #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
71 | # pragma GCC system_header |
72 | #endif |
73 | |
74 | _LIBCPP_BEGIN_NAMESPACE_STD |
75 | |
76 | #if _LIBCPP_STD_VER >= 20 |
77 | |
78 | namespace __indic_conjunct_break { |
79 | |
80 | enum class __property : uint8_t { |
81 | // Values generated from the data files. |
82 | __Consonant, |
83 | __Extend, |
84 | __Linker, |
85 | |
86 | // The code unit has none of above properties. |
87 | __none |
88 | }; |
89 | |
90 | /// The entries of the indic conjunct break property table. |
91 | /// |
92 | /// The data is generated from |
93 | /// - https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt |
94 | /// |
95 | /// The data has 3 values |
96 | /// - bits [0, 1] The property. One of the values generated from the datafiles |
97 | /// of \ref __property |
98 | /// - bits [2, 10] The size of the range. |
99 | /// - bits [11, 31] The lower bound code point of the range. The upper bound of |
100 | /// the range is lower bound + size. |
101 | /// |
102 | /// The 9 bits for the size allow a maximum range of 512 elements. Some ranges |
103 | /// in the Unicode tables are larger. They are stored in multiple consecutive |
104 | /// ranges in the data table. An alternative would be to store the sizes in a |
105 | /// separate 16-bit value. The original MSVC STL code had such an approach, but |
106 | /// this approach uses less space for the data and is about 4% faster in the |
107 | /// following benchmark. |
108 | /// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp |
109 | // clang-format off |
110 | _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[403] = { |
111 | 0x001801bd, |
112 | 0x00241819, |
113 | 0x002c88b1, |
114 | 0x002df801, |
115 | 0x002e0805, |
116 | 0x002e2005, |
117 | 0x002e3801, |
118 | 0x00308029, |
119 | 0x00325851, |
120 | 0x00338001, |
121 | 0x0036b019, |
122 | 0x0036f815, |
123 | 0x00373805, |
124 | 0x0037500d, |
125 | 0x00388801, |
126 | 0x00398069, |
127 | 0x003d3029, |
128 | 0x003f5821, |
129 | 0x003fe801, |
130 | 0x0040b00d, |
131 | 0x0040d821, |
132 | 0x00412809, |
133 | 0x00414811, |
134 | 0x0042c809, |
135 | 0x0044b821, |
136 | 0x0046505d, |
137 | 0x0047187d, |
138 | 0x0048a890, |
139 | 0x0049d001, |
140 | 0x0049e001, |
141 | 0x004a081d, |
142 | 0x004a6802, |
143 | 0x004a8819, |
144 | 0x004ac01c, |
145 | 0x004b1005, |
146 | 0x004bc01c, |
147 | 0x004c0801, |
148 | 0x004ca84c, |
149 | 0x004d5018, |
150 | 0x004d9000, |
151 | 0x004db00c, |
152 | 0x004de001, |
153 | 0x004df001, |
154 | 0x004e080d, |
155 | 0x004e6802, |
156 | 0x004eb801, |
157 | 0x004ee004, |
158 | 0x004ef800, |
159 | 0x004f1005, |
160 | 0x004f8004, |
161 | 0x004ff001, |
162 | 0x00500805, |
163 | 0x0051e001, |
164 | 0x00520805, |
165 | 0x00523805, |
166 | 0x00525809, |
167 | 0x00528801, |
168 | 0x00538005, |
169 | 0x0053a801, |
170 | 0x00540805, |
171 | 0x0054a84c, |
172 | 0x00555018, |
173 | 0x00559004, |
174 | 0x0055a810, |
175 | 0x0055e001, |
176 | 0x00560811, |
177 | 0x00563805, |
178 | 0x00566802, |
179 | 0x00571005, |
180 | 0x0057c800, |
181 | 0x0057d015, |
182 | 0x00580801, |
183 | 0x0058a84c, |
184 | 0x00595018, |
185 | 0x00599004, |
186 | 0x0059a810, |
187 | 0x0059e001, |
188 | 0x0059f005, |
189 | 0x005a080d, |
190 | 0x005a6802, |
191 | 0x005aa809, |
192 | 0x005ae004, |
193 | 0x005af800, |
194 | 0x005b1005, |
195 | 0x005b8800, |
196 | 0x005c1001, |
197 | 0x005df001, |
198 | 0x005e0001, |
199 | 0x005e6801, |
200 | 0x005eb801, |
201 | 0x00600001, |
202 | 0x00602001, |
203 | 0x0060a84c, |
204 | 0x0061503c, |
205 | 0x0061e001, |
206 | 0x0061f009, |
207 | 0x00623009, |
208 | 0x00625009, |
209 | 0x00626802, |
210 | 0x0062a805, |
211 | 0x0062c008, |
212 | 0x00631005, |
213 | 0x00640801, |
214 | 0x0065e001, |
215 | 0x0065f805, |
216 | 0x00661001, |
217 | 0x00663009, |
218 | 0x0066500d, |
219 | 0x0066a805, |
220 | 0x00671005, |
221 | 0x00680005, |
222 | 0x0068a894, |
223 | 0x0069d805, |
224 | 0x0069f001, |
225 | 0x006a080d, |
226 | 0x006a6802, |
227 | 0x006ab801, |
228 | 0x006b1005, |
229 | 0x006c0801, |
230 | 0x006e5001, |
231 | 0x006e7801, |
232 | 0x006e9009, |
233 | 0x006eb001, |
234 | 0x006ef801, |
235 | 0x00718801, |
236 | 0x0071a019, |
237 | 0x0072381d, |
238 | 0x00758801, |
239 | 0x0075a021, |
240 | 0x00764019, |
241 | 0x0078c005, |
242 | 0x0079a801, |
243 | 0x0079b801, |
244 | 0x0079c801, |
245 | 0x007b8835, |
246 | 0x007c0011, |
247 | 0x007c3005, |
248 | 0x007c6829, |
249 | 0x007cc88d, |
250 | 0x007e3001, |
251 | 0x0081680d, |
252 | 0x00819015, |
253 | 0x0081c805, |
254 | 0x0081e805, |
255 | 0x0082c005, |
256 | 0x0082f009, |
257 | 0x0083880d, |
258 | 0x00841001, |
259 | 0x00842805, |
260 | 0x00846801, |
261 | 0x0084e801, |
262 | 0x009ae809, |
263 | 0x00b8900d, |
264 | 0x00b99009, |
265 | 0x00ba9005, |
266 | 0x00bb9005, |
267 | 0x00bda005, |
268 | 0x00bdb819, |
269 | 0x00be3001, |
270 | 0x00be4829, |
271 | 0x00bee801, |
272 | 0x00c05809, |
273 | 0x00c07801, |
274 | 0x00c42805, |
275 | 0x00c54801, |
276 | 0x00c90009, |
277 | 0x00c93805, |
278 | 0x00c99001, |
279 | 0x00c9c809, |
280 | 0x00d0b805, |
281 | 0x00d0d801, |
282 | 0x00d2b001, |
283 | 0x00d2c019, |
284 | 0x00d30001, |
285 | 0x00d31001, |
286 | 0x00d3281d, |
287 | 0x00d39825, |
288 | 0x00d3f801, |
289 | 0x00d58079, |
290 | 0x00d8000d, |
291 | 0x00d9a025, |
292 | 0x00da1009, |
293 | 0x00db5821, |
294 | 0x00dc0005, |
295 | 0x00dd100d, |
296 | 0x00dd4015, |
297 | 0x00df3001, |
298 | 0x00df4005, |
299 | 0x00df6801, |
300 | 0x00df7811, |
301 | 0x00e1601d, |
302 | 0x00e1b005, |
303 | 0x00e68009, |
304 | 0x00e6a031, |
305 | 0x00e71019, |
306 | 0x00e76801, |
307 | 0x00e7a001, |
308 | 0x00e7c005, |
309 | 0x00ee00fd, |
310 | 0x01006801, |
311 | 0x01068081, |
312 | 0x01677809, |
313 | 0x016bf801, |
314 | 0x016f007d, |
315 | 0x01815015, |
316 | 0x0184c805, |
317 | 0x0533780d, |
318 | 0x0533a025, |
319 | 0x0534f005, |
320 | 0x05378005, |
321 | 0x05401001, |
322 | 0x05403001, |
323 | 0x05405801, |
324 | 0x05412805, |
325 | 0x05416001, |
326 | 0x05462005, |
327 | 0x05470045, |
328 | 0x0547f801, |
329 | 0x0549301d, |
330 | 0x054a3829, |
331 | 0x054a9801, |
332 | 0x054c0009, |
333 | 0x054d9801, |
334 | 0x054db00d, |
335 | 0x054de005, |
336 | 0x054e0001, |
337 | 0x054f2801, |
338 | 0x05514815, |
339 | 0x05518805, |
340 | 0x0551a805, |
341 | 0x05521801, |
342 | 0x05526001, |
343 | 0x0553e001, |
344 | 0x05558001, |
345 | 0x05559009, |
346 | 0x0555b805, |
347 | 0x0555f005, |
348 | 0x05560801, |
349 | 0x05576005, |
350 | 0x0557b001, |
351 | 0x055f2801, |
352 | 0x055f4001, |
353 | 0x055f6801, |
354 | 0x07d8f001, |
355 | 0x07f0003d, |
356 | 0x07f1003d, |
357 | 0x07fcf005, |
358 | 0x080fe801, |
359 | 0x08170001, |
360 | 0x081bb011, |
361 | 0x08500809, |
362 | 0x08502805, |
363 | 0x0850600d, |
364 | 0x0851c009, |
365 | 0x0851f801, |
366 | 0x08572805, |
367 | 0x0869200d, |
368 | 0x086b4811, |
369 | 0x08755805, |
370 | 0x0877e00d, |
371 | 0x087a3029, |
372 | 0x087c100d, |
373 | 0x08800801, |
374 | 0x0881c039, |
375 | 0x08838001, |
376 | 0x08839805, |
377 | 0x0883f809, |
378 | 0x0885980d, |
379 | 0x0885c805, |
380 | 0x08861001, |
381 | 0x08880009, |
382 | 0x08893811, |
383 | 0x0889681d, |
384 | 0x088b9801, |
385 | 0x088c0005, |
386 | 0x088db021, |
387 | 0x088e0001, |
388 | 0x088e480d, |
389 | 0x088e7801, |
390 | 0x08917809, |
391 | 0x0891a00d, |
392 | 0x0891f001, |
393 | 0x08920801, |
394 | 0x0896f801, |
395 | 0x0897181d, |
396 | 0x08980005, |
397 | 0x0899d805, |
398 | 0x0899f001, |
399 | 0x089a0001, |
400 | 0x089a6801, |
401 | 0x089ab801, |
402 | 0x089b3019, |
403 | 0x089b8011, |
404 | 0x089dc001, |
405 | 0x089dd815, |
406 | 0x089e1001, |
407 | 0x089e2801, |
408 | 0x089e3809, |
409 | 0x089e7009, |
410 | 0x089e9001, |
411 | 0x089f0805, |
412 | 0x08a1c01d, |
413 | 0x08a21009, |
414 | 0x08a23001, |
415 | 0x08a2f001, |
416 | 0x08a58001, |
417 | 0x08a59815, |
418 | 0x08a5d001, |
419 | 0x08a5e801, |
420 | 0x08a5f805, |
421 | 0x08a61005, |
422 | 0x08ad7801, |
423 | 0x08ad900d, |
424 | 0x08ade005, |
425 | 0x08adf805, |
426 | 0x08aee005, |
427 | 0x08b1981d, |
428 | 0x08b1e801, |
429 | 0x08b1f805, |
430 | 0x08b55801, |
431 | 0x08b56801, |
432 | 0x08b5801d, |
433 | 0x08b8e801, |
434 | 0x08b8f801, |
435 | 0x08b9100d, |
436 | 0x08b93811, |
437 | 0x08c17821, |
438 | 0x08c1c805, |
439 | 0x08c98001, |
440 | 0x08c9d80d, |
441 | 0x08ca1801, |
442 | 0x08cea00d, |
443 | 0x08ced005, |
444 | 0x08cf0001, |
445 | 0x08d00825, |
446 | 0x08d19815, |
447 | 0x08d1d80d, |
448 | 0x08d23801, |
449 | 0x08d28815, |
450 | 0x08d2c809, |
451 | 0x08d45031, |
452 | 0x08d4c005, |
453 | 0x08e18019, |
454 | 0x08e1c015, |
455 | 0x08e1f801, |
456 | 0x08e49055, |
457 | 0x08e55019, |
458 | 0x08e59005, |
459 | 0x08e5a805, |
460 | 0x08e98815, |
461 | 0x08e9d001, |
462 | 0x08e9e005, |
463 | 0x08e9f819, |
464 | 0x08ea3801, |
465 | 0x08ec8005, |
466 | 0x08eca801, |
467 | 0x08ecb801, |
468 | 0x08f79805, |
469 | 0x08f80005, |
470 | 0x08f9b011, |
471 | 0x08fa0009, |
472 | 0x08fad001, |
473 | 0x09a20001, |
474 | 0x09a23839, |
475 | 0x0b08f02d, |
476 | 0x0b096809, |
477 | 0x0b578011, |
478 | 0x0b598019, |
479 | 0x0b7a7801, |
480 | 0x0b7c780d, |
481 | 0x0b7f2001, |
482 | 0x0b7f8005, |
483 | 0x0de4e805, |
484 | 0x0e7800b5, |
485 | 0x0e798059, |
486 | 0x0e8b2811, |
487 | 0x0e8b6815, |
488 | 0x0e8bd81d, |
489 | 0x0e8c2819, |
490 | 0x0e8d500d, |
491 | 0x0e921009, |
492 | 0x0ed000d9, |
493 | 0x0ed1d8c5, |
494 | 0x0ed3a801, |
495 | 0x0ed42001, |
496 | 0x0ed4d811, |
497 | 0x0ed50839, |
498 | 0x0f000019, |
499 | 0x0f004041, |
500 | 0x0f00d819, |
501 | 0x0f011805, |
502 | 0x0f013011, |
503 | 0x0f047801, |
504 | 0x0f098019, |
505 | 0x0f157001, |
506 | 0x0f17600d, |
507 | 0x0f27600d, |
508 | 0x0f2f7005, |
509 | 0x0f468019, |
510 | 0x0f4a2019, |
511 | 0x0f9fd811, |
512 | 0x7001017d, |
513 | 0x700803bd}; |
514 | // clang-format on |
515 | |
516 | /// Returns the indic conjuct break property of a code point. |
517 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __property __get_property(const char32_t __code_point) noexcept { |
518 | // The algorithm searches for the upper bound of the range and, when found, |
519 | // steps back one entry. This algorithm is used since the code point can be |
520 | // anywhere in the range. After a lower bound is found the next step is to |
521 | // compare whether the code unit is indeed in the range. |
522 | // |
523 | // Since the entry contains a code unit, size, and property the code point |
524 | // being sought needs to be adjusted. Just shifting the code point to the |
525 | // proper position doesn't work; suppose an entry has property 0, size 1, |
526 | // and lower bound 3. This results in the entry 0x1810. |
527 | // When searching for code point 3 it will search for 0x1800, find 0x1810 |
528 | // and moves to the previous entry. Thus the lower bound value will never |
529 | // be found. |
530 | // The simple solution is to set the bits belonging to the property and |
531 | // size. Then the upper bound for code point 3 will return the entry after |
532 | // 0x1810. After moving to the previous entry the algorithm arrives at the |
533 | // correct entry. |
534 | ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 11) | 0x7ffu) - __entries; |
535 | if (__i == 0) |
536 | return __property::__none; |
537 | |
538 | --__i; |
539 | uint32_t __upper_bound = (__entries[__i] >> 11) + ((__entries[__i] >> 2) & 0b1'1111'1111); |
540 | if (__code_point <= __upper_bound) |
541 | return static_cast<__property>(__entries[__i] & 0b11); |
542 | |
543 | return __property::__none; |
544 | } |
545 | |
546 | } // namespace __indic_conjunct_break |
547 | |
548 | #endif // _LIBCPP_STD_VER >= 20 |
549 | |
550 | _LIBCPP_END_NAMESPACE_STD |
551 | |
552 | #endif // _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H |
553 |
Warning: This file is not a C or C++ file. It does not have highlighting.