Warning: This file is not a C or C++ file. It does not have highlighting.

1// -*- C++ -*-
2//===----------------------------------------------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10// WARNING, this entire header is generated by
11// utils/generate_indic_conjunct_break_table.py
12// DO NOT MODIFY!
13
14// UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
15//
16// See Terms of Use <https://www.unicode.org/copyright.html>
17// for definitions of Unicode Inc.'s Data Files and Software.
18//
19// NOTICE TO USER: Carefully read the following legal agreement.
20// BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
21// DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
22// YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
23// TERMS AND CONDITIONS OF THIS AGREEMENT.
24// IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
25// THE DATA FILES OR SOFTWARE.
26//
27// COPYRIGHT AND PERMISSION NOTICE
28//
29// Copyright (c) 1991-2022 Unicode, Inc. All rights reserved.
30// Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
31//
32// Permission is hereby granted, free of charge, to any person obtaining
33// a copy of the Unicode data files and any associated documentation
34// (the "Data Files") or Unicode software and any associated documentation
35// (the "Software") to deal in the Data Files or Software
36// without restriction, including without limitation the rights to use,
37// copy, modify, merge, publish, distribute, and/or sell copies of
38// the Data Files or Software, and to permit persons to whom the Data Files
39// or Software are furnished to do so, provided that either
40// (a) this copyright and permission notice appear with all copies
41// of the Data Files or Software, or
42// (b) this copyright and permission notice appear in associated
43// Documentation.
44//
45// THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
46// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
47// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
48// NONINFRINGEMENT OF THIRD PARTY RIGHTS.
49// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
50// NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
51// DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
52// DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
53// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
54// PERFORMANCE OF THE DATA FILES OR SOFTWARE.
55//
56// Except as contained in this notice, the name of a copyright holder
57// shall not be used in advertising or otherwise to promote the sale,
58// use or other dealings in these Data Files or Software without prior
59// written authorization of the copyright holder.
60
61#ifndef _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H
62#define _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H
63
64#include <__algorithm/ranges_upper_bound.h>
65#include <__config>
66#include <__cstddef/ptrdiff_t.h>
67#include <__iterator/access.h>
68#include <cstdint>
69
70#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
71# pragma GCC system_header
72#endif
73
74_LIBCPP_BEGIN_NAMESPACE_STD
75
76#if _LIBCPP_STD_VER >= 20
77
78namespace __indic_conjunct_break {
79
80enum class __property : uint8_t {
81 // Values generated from the data files.
82 __Consonant,
83 __Extend,
84 __Linker,
85
86 // The code unit has none of above properties.
87 __none
88};
89
90/// The entries of the indic conjunct break property table.
91///
92/// The data is generated from
93/// - https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
94///
95/// The data has 3 values
96/// - bits [0, 1] The property. One of the values generated from the datafiles
97/// of \ref __property
98/// - bits [2, 10] The size of the range.
99/// - bits [11, 31] The lower bound code point of the range. The upper bound of
100/// the range is lower bound + size.
101///
102/// The 9 bits for the size allow a maximum range of 512 elements. Some ranges
103/// in the Unicode tables are larger. They are stored in multiple consecutive
104/// ranges in the data table. An alternative would be to store the sizes in a
105/// separate 16-bit value. The original MSVC STL code had such an approach, but
106/// this approach uses less space for the data and is about 4% faster in the
107/// following benchmark.
108/// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp
109// clang-format off
110_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[403] = {
111 0x001801bd,
112 0x00241819,
113 0x002c88b1,
114 0x002df801,
115 0x002e0805,
116 0x002e2005,
117 0x002e3801,
118 0x00308029,
119 0x00325851,
120 0x00338001,
121 0x0036b019,
122 0x0036f815,
123 0x00373805,
124 0x0037500d,
125 0x00388801,
126 0x00398069,
127 0x003d3029,
128 0x003f5821,
129 0x003fe801,
130 0x0040b00d,
131 0x0040d821,
132 0x00412809,
133 0x00414811,
134 0x0042c809,
135 0x0044b821,
136 0x0046505d,
137 0x0047187d,
138 0x0048a890,
139 0x0049d001,
140 0x0049e001,
141 0x004a081d,
142 0x004a6802,
143 0x004a8819,
144 0x004ac01c,
145 0x004b1005,
146 0x004bc01c,
147 0x004c0801,
148 0x004ca84c,
149 0x004d5018,
150 0x004d9000,
151 0x004db00c,
152 0x004de001,
153 0x004df001,
154 0x004e080d,
155 0x004e6802,
156 0x004eb801,
157 0x004ee004,
158 0x004ef800,
159 0x004f1005,
160 0x004f8004,
161 0x004ff001,
162 0x00500805,
163 0x0051e001,
164 0x00520805,
165 0x00523805,
166 0x00525809,
167 0x00528801,
168 0x00538005,
169 0x0053a801,
170 0x00540805,
171 0x0054a84c,
172 0x00555018,
173 0x00559004,
174 0x0055a810,
175 0x0055e001,
176 0x00560811,
177 0x00563805,
178 0x00566802,
179 0x00571005,
180 0x0057c800,
181 0x0057d015,
182 0x00580801,
183 0x0058a84c,
184 0x00595018,
185 0x00599004,
186 0x0059a810,
187 0x0059e001,
188 0x0059f005,
189 0x005a080d,
190 0x005a6802,
191 0x005aa809,
192 0x005ae004,
193 0x005af800,
194 0x005b1005,
195 0x005b8800,
196 0x005c1001,
197 0x005df001,
198 0x005e0001,
199 0x005e6801,
200 0x005eb801,
201 0x00600001,
202 0x00602001,
203 0x0060a84c,
204 0x0061503c,
205 0x0061e001,
206 0x0061f009,
207 0x00623009,
208 0x00625009,
209 0x00626802,
210 0x0062a805,
211 0x0062c008,
212 0x00631005,
213 0x00640801,
214 0x0065e001,
215 0x0065f805,
216 0x00661001,
217 0x00663009,
218 0x0066500d,
219 0x0066a805,
220 0x00671005,
221 0x00680005,
222 0x0068a894,
223 0x0069d805,
224 0x0069f001,
225 0x006a080d,
226 0x006a6802,
227 0x006ab801,
228 0x006b1005,
229 0x006c0801,
230 0x006e5001,
231 0x006e7801,
232 0x006e9009,
233 0x006eb001,
234 0x006ef801,
235 0x00718801,
236 0x0071a019,
237 0x0072381d,
238 0x00758801,
239 0x0075a021,
240 0x00764019,
241 0x0078c005,
242 0x0079a801,
243 0x0079b801,
244 0x0079c801,
245 0x007b8835,
246 0x007c0011,
247 0x007c3005,
248 0x007c6829,
249 0x007cc88d,
250 0x007e3001,
251 0x0081680d,
252 0x00819015,
253 0x0081c805,
254 0x0081e805,
255 0x0082c005,
256 0x0082f009,
257 0x0083880d,
258 0x00841001,
259 0x00842805,
260 0x00846801,
261 0x0084e801,
262 0x009ae809,
263 0x00b8900d,
264 0x00b99009,
265 0x00ba9005,
266 0x00bb9005,
267 0x00bda005,
268 0x00bdb819,
269 0x00be3001,
270 0x00be4829,
271 0x00bee801,
272 0x00c05809,
273 0x00c07801,
274 0x00c42805,
275 0x00c54801,
276 0x00c90009,
277 0x00c93805,
278 0x00c99001,
279 0x00c9c809,
280 0x00d0b805,
281 0x00d0d801,
282 0x00d2b001,
283 0x00d2c019,
284 0x00d30001,
285 0x00d31001,
286 0x00d3281d,
287 0x00d39825,
288 0x00d3f801,
289 0x00d58079,
290 0x00d8000d,
291 0x00d9a025,
292 0x00da1009,
293 0x00db5821,
294 0x00dc0005,
295 0x00dd100d,
296 0x00dd4015,
297 0x00df3001,
298 0x00df4005,
299 0x00df6801,
300 0x00df7811,
301 0x00e1601d,
302 0x00e1b005,
303 0x00e68009,
304 0x00e6a031,
305 0x00e71019,
306 0x00e76801,
307 0x00e7a001,
308 0x00e7c005,
309 0x00ee00fd,
310 0x01006801,
311 0x01068081,
312 0x01677809,
313 0x016bf801,
314 0x016f007d,
315 0x01815015,
316 0x0184c805,
317 0x0533780d,
318 0x0533a025,
319 0x0534f005,
320 0x05378005,
321 0x05401001,
322 0x05403001,
323 0x05405801,
324 0x05412805,
325 0x05416001,
326 0x05462005,
327 0x05470045,
328 0x0547f801,
329 0x0549301d,
330 0x054a3829,
331 0x054a9801,
332 0x054c0009,
333 0x054d9801,
334 0x054db00d,
335 0x054de005,
336 0x054e0001,
337 0x054f2801,
338 0x05514815,
339 0x05518805,
340 0x0551a805,
341 0x05521801,
342 0x05526001,
343 0x0553e001,
344 0x05558001,
345 0x05559009,
346 0x0555b805,
347 0x0555f005,
348 0x05560801,
349 0x05576005,
350 0x0557b001,
351 0x055f2801,
352 0x055f4001,
353 0x055f6801,
354 0x07d8f001,
355 0x07f0003d,
356 0x07f1003d,
357 0x07fcf005,
358 0x080fe801,
359 0x08170001,
360 0x081bb011,
361 0x08500809,
362 0x08502805,
363 0x0850600d,
364 0x0851c009,
365 0x0851f801,
366 0x08572805,
367 0x0869200d,
368 0x086b4811,
369 0x08755805,
370 0x0877e00d,
371 0x087a3029,
372 0x087c100d,
373 0x08800801,
374 0x0881c039,
375 0x08838001,
376 0x08839805,
377 0x0883f809,
378 0x0885980d,
379 0x0885c805,
380 0x08861001,
381 0x08880009,
382 0x08893811,
383 0x0889681d,
384 0x088b9801,
385 0x088c0005,
386 0x088db021,
387 0x088e0001,
388 0x088e480d,
389 0x088e7801,
390 0x08917809,
391 0x0891a00d,
392 0x0891f001,
393 0x08920801,
394 0x0896f801,
395 0x0897181d,
396 0x08980005,
397 0x0899d805,
398 0x0899f001,
399 0x089a0001,
400 0x089a6801,
401 0x089ab801,
402 0x089b3019,
403 0x089b8011,
404 0x089dc001,
405 0x089dd815,
406 0x089e1001,
407 0x089e2801,
408 0x089e3809,
409 0x089e7009,
410 0x089e9001,
411 0x089f0805,
412 0x08a1c01d,
413 0x08a21009,
414 0x08a23001,
415 0x08a2f001,
416 0x08a58001,
417 0x08a59815,
418 0x08a5d001,
419 0x08a5e801,
420 0x08a5f805,
421 0x08a61005,
422 0x08ad7801,
423 0x08ad900d,
424 0x08ade005,
425 0x08adf805,
426 0x08aee005,
427 0x08b1981d,
428 0x08b1e801,
429 0x08b1f805,
430 0x08b55801,
431 0x08b56801,
432 0x08b5801d,
433 0x08b8e801,
434 0x08b8f801,
435 0x08b9100d,
436 0x08b93811,
437 0x08c17821,
438 0x08c1c805,
439 0x08c98001,
440 0x08c9d80d,
441 0x08ca1801,
442 0x08cea00d,
443 0x08ced005,
444 0x08cf0001,
445 0x08d00825,
446 0x08d19815,
447 0x08d1d80d,
448 0x08d23801,
449 0x08d28815,
450 0x08d2c809,
451 0x08d45031,
452 0x08d4c005,
453 0x08e18019,
454 0x08e1c015,
455 0x08e1f801,
456 0x08e49055,
457 0x08e55019,
458 0x08e59005,
459 0x08e5a805,
460 0x08e98815,
461 0x08e9d001,
462 0x08e9e005,
463 0x08e9f819,
464 0x08ea3801,
465 0x08ec8005,
466 0x08eca801,
467 0x08ecb801,
468 0x08f79805,
469 0x08f80005,
470 0x08f9b011,
471 0x08fa0009,
472 0x08fad001,
473 0x09a20001,
474 0x09a23839,
475 0x0b08f02d,
476 0x0b096809,
477 0x0b578011,
478 0x0b598019,
479 0x0b7a7801,
480 0x0b7c780d,
481 0x0b7f2001,
482 0x0b7f8005,
483 0x0de4e805,
484 0x0e7800b5,
485 0x0e798059,
486 0x0e8b2811,
487 0x0e8b6815,
488 0x0e8bd81d,
489 0x0e8c2819,
490 0x0e8d500d,
491 0x0e921009,
492 0x0ed000d9,
493 0x0ed1d8c5,
494 0x0ed3a801,
495 0x0ed42001,
496 0x0ed4d811,
497 0x0ed50839,
498 0x0f000019,
499 0x0f004041,
500 0x0f00d819,
501 0x0f011805,
502 0x0f013011,
503 0x0f047801,
504 0x0f098019,
505 0x0f157001,
506 0x0f17600d,
507 0x0f27600d,
508 0x0f2f7005,
509 0x0f468019,
510 0x0f4a2019,
511 0x0f9fd811,
512 0x7001017d,
513 0x700803bd};
514// clang-format on
515
516/// Returns the indic conjuct break property of a code point.
517[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __property __get_property(const char32_t __code_point) noexcept {
518 // The algorithm searches for the upper bound of the range and, when found,
519 // steps back one entry. This algorithm is used since the code point can be
520 // anywhere in the range. After a lower bound is found the next step is to
521 // compare whether the code unit is indeed in the range.
522 //
523 // Since the entry contains a code unit, size, and property the code point
524 // being sought needs to be adjusted. Just shifting the code point to the
525 // proper position doesn't work; suppose an entry has property 0, size 1,
526 // and lower bound 3. This results in the entry 0x1810.
527 // When searching for code point 3 it will search for 0x1800, find 0x1810
528 // and moves to the previous entry. Thus the lower bound value will never
529 // be found.
530 // The simple solution is to set the bits belonging to the property and
531 // size. Then the upper bound for code point 3 will return the entry after
532 // 0x1810. After moving to the previous entry the algorithm arrives at the
533 // correct entry.
534 ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 11) | 0x7ffu) - __entries;
535 if (__i == 0)
536 return __property::__none;
537
538 --__i;
539 uint32_t __upper_bound = (__entries[__i] >> 11) + ((__entries[__i] >> 2) & 0b1'1111'1111);
540 if (__code_point <= __upper_bound)
541 return static_cast<__property>(__entries[__i] & 0b11);
542
543 return __property::__none;
544}
545
546} // namespace __indic_conjunct_break
547
548#endif // _LIBCPP_STD_VER >= 20
549
550_LIBCPP_END_NAMESPACE_STD
551
552#endif // _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H
553

Warning: This file is not a C or C++ file. It does not have highlighting.

source code of libcxx/include/__format/indic_conjunct_break_table.h