Warning: This file is not a C or C++ file. It does not have highlighting.
| 1 | // -*- C++ -*- |
|---|---|
| 2 | //===----------------------------------------------------------------------===// |
| 3 | // |
| 4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 5 | // See https://llvm.org/LICENSE.txt for license information. |
| 6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | // WARNING, this entire header is generated by |
| 11 | // utils/generate_width_estimation_table.py |
| 12 | // DO NOT MODIFY! |
| 13 | |
| 14 | // UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE |
| 15 | // |
| 16 | // See Terms of Use <https://www.unicode.org/copyright.html> |
| 17 | // for definitions of Unicode Inc.'s Data Files and Software. |
| 18 | // |
| 19 | // NOTICE TO USER: Carefully read the following legal agreement. |
| 20 | // BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S |
| 21 | // DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), |
| 22 | // YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE |
| 23 | // TERMS AND CONDITIONS OF THIS AGREEMENT. |
| 24 | // IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE |
| 25 | // THE DATA FILES OR SOFTWARE. |
| 26 | // |
| 27 | // COPYRIGHT AND PERMISSION NOTICE |
| 28 | // |
| 29 | // Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. |
| 30 | // Distributed under the Terms of Use in https://www.unicode.org/copyright.html. |
| 31 | // |
| 32 | // Permission is hereby granted, free of charge, to any person obtaining |
| 33 | // a copy of the Unicode data files and any associated documentation |
| 34 | // (the "Data Files") or Unicode software and any associated documentation |
| 35 | // (the "Software") to deal in the Data Files or Software |
| 36 | // without restriction, including without limitation the rights to use, |
| 37 | // copy, modify, merge, publish, distribute, and/or sell copies of |
| 38 | // the Data Files or Software, and to permit persons to whom the Data Files |
| 39 | // or Software are furnished to do so, provided that either |
| 40 | // (a) this copyright and permission notice appear with all copies |
| 41 | // of the Data Files or Software, or |
| 42 | // (b) this copyright and permission notice appear in associated |
| 43 | // Documentation. |
| 44 | // |
| 45 | // THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF |
| 46 | // ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE |
| 47 | // WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 48 | // NONINFRINGEMENT OF THIRD PARTY RIGHTS. |
| 49 | // IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS |
| 50 | // NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL |
| 51 | // DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, |
| 52 | // DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER |
| 53 | // TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR |
| 54 | // PERFORMANCE OF THE DATA FILES OR SOFTWARE. |
| 55 | // |
| 56 | // Except as contained in this notice, the name of a copyright holder |
| 57 | // shall not be used in advertising or otherwise to promote the sale, |
| 58 | // use or other dealings in these Data Files or Software without prior |
| 59 | // written authorization of the copyright holder. |
| 60 | |
| 61 | #ifndef _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H |
| 62 | #define _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H |
| 63 | |
| 64 | #include <__algorithm/ranges_upper_bound.h> |
| 65 | #include <__config> |
| 66 | #include <__cstddef/ptrdiff_t.h> |
| 67 | #include <cstdint> |
| 68 | |
| 69 | #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
| 70 | # pragma GCC system_header |
| 71 | #endif |
| 72 | |
| 73 | _LIBCPP_BEGIN_NAMESPACE_STD |
| 74 | |
| 75 | #if _LIBCPP_STD_VER >= 20 |
| 76 | |
| 77 | namespace __width_estimation_table { |
| 78 | |
| 79 | /// The entries of the characters with an estimated width of 2. |
| 80 | /// |
| 81 | /// Contains the entries for [format.string.std]/12 |
| 82 | /// - Any code point with the East_Asian_Width="W" or East_Asian_Width="F" |
| 83 | /// Derived Extracted Property as described by UAX #44 |
| 84 | /// - U+4DC0 - U+4DFF (Yijing Hexagram Symbols) |
| 85 | /// - U+1F300 - U+1F5FF (Miscellaneous Symbols and Pictographs) |
| 86 | /// - U+1F900 - U+1F9FF (Supplemental Symbols and Pictographs) |
| 87 | /// |
| 88 | /// The data is generated from |
| 89 | /// - https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt |
| 90 | /// - The "overrides" in [format.string.std]/12 |
| 91 | /// |
| 92 | /// The format of EastAsianWidth.txt is two fields separated by a semicolon. |
| 93 | /// Field 0: Unicode code point value or range of code point values |
| 94 | /// Field 1: East_Asian_Width property, consisting of one of the following values: |
| 95 | /// "A", "F", "H", "N", "Na", "W" |
| 96 | /// - All code points, assigned or unassigned, that are not listed |
| 97 | /// explicitly are given the value "N". |
| 98 | /// - The unassigned code points in the following blocks default to "W": |
| 99 | /// CJK Unified Ideographs Extension A: U+3400..U+4DBF |
| 100 | /// CJK Unified Ideographs: U+4E00..U+9FFF |
| 101 | /// CJK Compatibility Ideographs: U+F900..U+FAFF |
| 102 | /// - All undesignated code points in Planes 2 and 3, whether inside or |
| 103 | /// outside of allocated blocks, default to "W": |
| 104 | /// Plane 2: U+20000..U+2FFFD |
| 105 | /// Plane 3: U+30000..U+3FFFD |
| 106 | /// |
| 107 | /// The table is similar to the table |
| 108 | /// __extended_grapheme_custer_property_boundary::__entries |
| 109 | /// which explains the details of these classes. The only difference is this |
| 110 | /// table lacks a property, thus having more bits available for the size. |
| 111 | /// |
| 112 | /// The maximum code point that has an estimated width of 2 is U+3FFFD. This |
| 113 | /// value can be encoded in 18 bits. Thus the upper 3 bits of the code point |
| 114 | /// are always 0. These 3 bits are used to enlarge the offset range. This |
| 115 | /// optimization reduces the table in Unicode 15 from 184 to 104 entries, |
| 116 | /// saving 320 bytes. |
| 117 | /// |
| 118 | /// The data has 2 values: |
| 119 | /// - bits [0, 13] The size of the range, allowing 16384 elements. |
| 120 | /// - bits [14, 31] The lower bound code point of the range. The upper bound of |
| 121 | /// the range is lower bound + size. |
| 122 | _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[110] = { |
| 123 | 0x0440005f /* 00001100 - 0000115f [ 96] */, // |
| 124 | 0x08c68001 /* 0000231a - 0000231b [ 2] */, // |
| 125 | 0x08ca4001 /* 00002329 - 0000232a [ 2] */, // |
| 126 | 0x08fa4003 /* 000023e9 - 000023ec [ 4] */, // |
| 127 | 0x08fc0000 /* 000023f0 - 000023f0 [ 1] */, // |
| 128 | 0x08fcc000 /* 000023f3 - 000023f3 [ 1] */, // |
| 129 | 0x097f4001 /* 000025fd - 000025fe [ 2] */, // |
| 130 | 0x09850001 /* 00002614 - 00002615 [ 2] */, // |
| 131 | 0x098c0007 /* 00002630 - 00002637 [ 8] */, // |
| 132 | 0x0992000b /* 00002648 - 00002653 [ 12] */, // |
| 133 | 0x099fc000 /* 0000267f - 0000267f [ 1] */, // |
| 134 | 0x09a28005 /* 0000268a - 0000268f [ 6] */, // |
| 135 | 0x09a4c000 /* 00002693 - 00002693 [ 1] */, // |
| 136 | 0x09a84000 /* 000026a1 - 000026a1 [ 1] */, // |
| 137 | 0x09aa8001 /* 000026aa - 000026ab [ 2] */, // |
| 138 | 0x09af4001 /* 000026bd - 000026be [ 2] */, // |
| 139 | 0x09b10001 /* 000026c4 - 000026c5 [ 2] */, // |
| 140 | 0x09b38000 /* 000026ce - 000026ce [ 1] */, // |
| 141 | 0x09b50000 /* 000026d4 - 000026d4 [ 1] */, // |
| 142 | 0x09ba8000 /* 000026ea - 000026ea [ 1] */, // |
| 143 | 0x09bc8001 /* 000026f2 - 000026f3 [ 2] */, // |
| 144 | 0x09bd4000 /* 000026f5 - 000026f5 [ 1] */, // |
| 145 | 0x09be8000 /* 000026fa - 000026fa [ 1] */, // |
| 146 | 0x09bf4000 /* 000026fd - 000026fd [ 1] */, // |
| 147 | 0x09c14000 /* 00002705 - 00002705 [ 1] */, // |
| 148 | 0x09c28001 /* 0000270a - 0000270b [ 2] */, // |
| 149 | 0x09ca0000 /* 00002728 - 00002728 [ 1] */, // |
| 150 | 0x09d30000 /* 0000274c - 0000274c [ 1] */, // |
| 151 | 0x09d38000 /* 0000274e - 0000274e [ 1] */, // |
| 152 | 0x09d4c002 /* 00002753 - 00002755 [ 3] */, // |
| 153 | 0x09d5c000 /* 00002757 - 00002757 [ 1] */, // |
| 154 | 0x09e54002 /* 00002795 - 00002797 [ 3] */, // |
| 155 | 0x09ec0000 /* 000027b0 - 000027b0 [ 1] */, // |
| 156 | 0x09efc000 /* 000027bf - 000027bf [ 1] */, // |
| 157 | 0x0ac6c001 /* 00002b1b - 00002b1c [ 2] */, // |
| 158 | 0x0ad40000 /* 00002b50 - 00002b50 [ 1] */, // |
| 159 | 0x0ad54000 /* 00002b55 - 00002b55 [ 1] */, // |
| 160 | 0x0ba00019 /* 00002e80 - 00002e99 [ 26] */, // |
| 161 | 0x0ba6c058 /* 00002e9b - 00002ef3 [ 89] */, // |
| 162 | 0x0bc000d5 /* 00002f00 - 00002fd5 [ 214] */, // |
| 163 | 0x0bfc004e /* 00002ff0 - 0000303e [ 79] */, // |
| 164 | 0x0c104055 /* 00003041 - 00003096 [ 86] */, // |
| 165 | 0x0c264066 /* 00003099 - 000030ff [ 103] */, // |
| 166 | 0x0c41402a /* 00003105 - 0000312f [ 43] */, // |
| 167 | 0x0c4c405d /* 00003131 - 0000318e [ 94] */, // |
| 168 | 0x0c640055 /* 00003190 - 000031e5 [ 86] */, // |
| 169 | 0x0c7bc02f /* 000031ef - 0000321e [ 48] */, // |
| 170 | 0x0c880027 /* 00003220 - 00003247 [ 40] */, // |
| 171 | 0x0c943fff /* 00003250 - 0000724f [16384] */, // |
| 172 | 0x1c94323c /* 00007250 - 0000a48c [12861] */, // |
| 173 | 0x29240036 /* 0000a490 - 0000a4c6 [ 55] */, // |
| 174 | 0x2a58001c /* 0000a960 - 0000a97c [ 29] */, // |
| 175 | 0x2b002ba3 /* 0000ac00 - 0000d7a3 [11172] */, // |
| 176 | 0x3e4001ff /* 0000f900 - 0000faff [ 512] */, // |
| 177 | 0x3f840009 /* 0000fe10 - 0000fe19 [ 10] */, // |
| 178 | 0x3f8c0022 /* 0000fe30 - 0000fe52 [ 35] */, // |
| 179 | 0x3f950012 /* 0000fe54 - 0000fe66 [ 19] */, // |
| 180 | 0x3f9a0003 /* 0000fe68 - 0000fe6b [ 4] */, // |
| 181 | 0x3fc0405f /* 0000ff01 - 0000ff60 [ 96] */, // |
| 182 | 0x3ff80006 /* 0000ffe0 - 0000ffe6 [ 7] */, // |
| 183 | 0x5bf80004 /* 00016fe0 - 00016fe4 [ 5] */, // |
| 184 | 0x5bfc0001 /* 00016ff0 - 00016ff1 [ 2] */, // |
| 185 | 0x5c0017f7 /* 00017000 - 000187f7 [ 6136] */, // |
| 186 | 0x620004d5 /* 00018800 - 00018cd5 [ 1238] */, // |
| 187 | 0x633fc009 /* 00018cff - 00018d08 [ 10] */, // |
| 188 | 0x6bfc0003 /* 0001aff0 - 0001aff3 [ 4] */, // |
| 189 | 0x6bfd4006 /* 0001aff5 - 0001affb [ 7] */, // |
| 190 | 0x6bff4001 /* 0001affd - 0001affe [ 2] */, // |
| 191 | 0x6c000122 /* 0001b000 - 0001b122 [ 291] */, // |
| 192 | 0x6c4c8000 /* 0001b132 - 0001b132 [ 1] */, // |
| 193 | 0x6c540002 /* 0001b150 - 0001b152 [ 3] */, // |
| 194 | 0x6c554000 /* 0001b155 - 0001b155 [ 1] */, // |
| 195 | 0x6c590003 /* 0001b164 - 0001b167 [ 4] */, // |
| 196 | 0x6c5c018b /* 0001b170 - 0001b2fb [ 396] */, // |
| 197 | 0x74c00056 /* 0001d300 - 0001d356 [ 87] */, // |
| 198 | 0x74d80016 /* 0001d360 - 0001d376 [ 23] */, // |
| 199 | 0x7c010000 /* 0001f004 - 0001f004 [ 1] */, // |
| 200 | 0x7c33c000 /* 0001f0cf - 0001f0cf [ 1] */, // |
| 201 | 0x7c638000 /* 0001f18e - 0001f18e [ 1] */, // |
| 202 | 0x7c644009 /* 0001f191 - 0001f19a [ 10] */, // |
| 203 | 0x7c800002 /* 0001f200 - 0001f202 [ 3] */, // |
| 204 | 0x7c84002b /* 0001f210 - 0001f23b [ 44] */, // |
| 205 | 0x7c900008 /* 0001f240 - 0001f248 [ 9] */, // |
| 206 | 0x7c940001 /* 0001f250 - 0001f251 [ 2] */, // |
| 207 | 0x7c980005 /* 0001f260 - 0001f265 [ 6] */, // |
| 208 | 0x7cc0034f /* 0001f300 - 0001f64f [ 848] */, // |
| 209 | 0x7da00045 /* 0001f680 - 0001f6c5 [ 70] */, // |
| 210 | 0x7db30000 /* 0001f6cc - 0001f6cc [ 1] */, // |
| 211 | 0x7db40002 /* 0001f6d0 - 0001f6d2 [ 3] */, // |
| 212 | 0x7db54002 /* 0001f6d5 - 0001f6d7 [ 3] */, // |
| 213 | 0x7db70003 /* 0001f6dc - 0001f6df [ 4] */, // |
| 214 | 0x7dbac001 /* 0001f6eb - 0001f6ec [ 2] */, // |
| 215 | 0x7dbd0008 /* 0001f6f4 - 0001f6fc [ 9] */, // |
| 216 | 0x7df8000b /* 0001f7e0 - 0001f7eb [ 12] */, // |
| 217 | 0x7dfc0000 /* 0001f7f0 - 0001f7f0 [ 1] */, // |
| 218 | 0x7e4000ff /* 0001f900 - 0001f9ff [ 256] */, // |
| 219 | 0x7e9c000c /* 0001fa70 - 0001fa7c [ 13] */, // |
| 220 | 0x7ea00009 /* 0001fa80 - 0001fa89 [ 10] */, // |
| 221 | 0x7ea3c037 /* 0001fa8f - 0001fac6 [ 56] */, // |
| 222 | 0x7eb3800e /* 0001face - 0001fadc [ 15] */, // |
| 223 | 0x7eb7c00a /* 0001fadf - 0001fae9 [ 11] */, // |
| 224 | 0x7ebc0008 /* 0001faf0 - 0001faf8 [ 9] */, // |
| 225 | 0x80003fff /* 00020000 - 00023fff [16384] */, // |
| 226 | 0x90003fff /* 00024000 - 00027fff [16384] */, // |
| 227 | 0xa0003fff /* 00028000 - 0002bfff [16384] */, // |
| 228 | 0xb0003ffd /* 0002c000 - 0002fffd [16382] */, // |
| 229 | 0xc0003fff /* 00030000 - 00033fff [16384] */, // |
| 230 | 0xd0003fff /* 00034000 - 00037fff [16384] */, // |
| 231 | 0xe0003fff /* 00038000 - 0003bfff [16384] */, // |
| 232 | 0xf0003ffd /* 0003c000 - 0003fffd [16382] */}; |
| 233 | |
| 234 | /// The upper bound entry of EastAsianWidth.txt. |
| 235 | /// |
| 236 | /// Values greater than this value may have more than 18 significant bits. |
| 237 | /// They always have a width of 1. This property makes it possible to store |
| 238 | /// the table in its compact form. |
| 239 | inline constexpr uint32_t __table_upper_bound = 0x0003fffd; |
| 240 | |
| 241 | /// Returns the estimated width of a Unicode code point. |
| 242 | /// |
| 243 | /// \\pre The code point is a valid Unicode code point. |
| 244 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int __estimated_width(const char32_t __code_point) noexcept { |
| 245 | // Since __table_upper_bound contains the unshifted range do the |
| 246 | // comparison without shifting. |
| 247 | if (__code_point > __table_upper_bound) [[unlikely]] |
| 248 | return 1; |
| 249 | |
| 250 | // When the code-point is less than the first element in the table |
| 251 | // the lookup is quite expensive. Since quite some scripts are in |
| 252 | // that range, it makes sense to validate that first. |
| 253 | // The std_format_spec_string_unicode benchmark gives a measurable |
| 254 | // improvement. |
| 255 | if (__code_point < (__entries[0] >> 14)) |
| 256 | return 1; |
| 257 | |
| 258 | ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 14) | 0x3fffu) - __entries; |
| 259 | if (__i == 0) |
| 260 | return 1; |
| 261 | |
| 262 | --__i; |
| 263 | uint32_t __upper_bound = (__entries[__i] >> 14) + (__entries[__i] & 0x3fffu); |
| 264 | return 1 + (__code_point <= __upper_bound); |
| 265 | } |
| 266 | |
| 267 | } // namespace __width_estimation_table |
| 268 | |
| 269 | #endif // _LIBCPP_STD_VER >= 20 |
| 270 | |
| 271 | _LIBCPP_END_NAMESPACE_STD |
| 272 | |
| 273 | #endif // _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H |
| 274 |
Warning: This file is not a C or C++ file. It does not have highlighting.
