1 | /* Declarations for internal libc locale interfaces |
2 | Copyright (C) 1995-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #ifndef _LOCALEINFO_H |
20 | #define _LOCALEINFO_H 1 |
21 | |
22 | #include <stddef.h> |
23 | #include <langinfo.h> |
24 | #include <limits.h> |
25 | #include <locale.h> |
26 | #include <time.h> |
27 | #include <stdint.h> |
28 | #include <sys/types.h> |
29 | |
30 | #include <intl/loadinfo.h> /* For loaded_l10nfile definition. */ |
31 | |
32 | /* Magic number at the beginning of a locale data file for CATEGORY. */ |
33 | #define LIMAGIC(category) \ |
34 | (category == LC_COLLATE \ |
35 | ? ((unsigned int) (0x20051014 ^ (category))) \ |
36 | : category == LC_CTYPE \ |
37 | ? ((unsigned int) (0x20090720 ^ (category))) \ |
38 | : ((unsigned int) (0x20031115 ^ (category)))) |
39 | |
40 | /* Two special weight constants for the collation data. */ |
41 | #define IGNORE_CHAR 2 |
42 | |
43 | /* We use a special value for the usage counter in `__locale_data' to |
44 | signal that this data must never be removed anymore. */ |
45 | #define MAX_USAGE_COUNT (UINT_MAX - 1) |
46 | #define UNDELETABLE UINT_MAX |
47 | |
48 | /* Structure describing locale data in core for a category. */ |
49 | struct __locale_data |
50 | { |
51 | const char *name; |
52 | const char *filedata; /* Region mapping the file data. */ |
53 | off_t filesize; /* Size of the file (and the region). */ |
54 | enum /* Flavor of storage used for those. */ |
55 | { |
56 | ld_malloced, /* Both are malloc'd. */ |
57 | ld_mapped, /* name is malloc'd, filedata mmap'd */ |
58 | ld_archive /* Both point into mmap'd archive regions. */ |
59 | } alloc; |
60 | |
61 | /* This provides a slot for category-specific code to cache data |
62 | computed about this locale. Type of the data pointed to: |
63 | |
64 | LC_CTYPE struct lc_ctype_data (_nl_intern_locale_data) |
65 | LC_TIME struct lc_time_data (_nl_init_alt_digit, _nl_init_era_entries) |
66 | |
67 | This data deallocated at the start of _nl_unload_locale. */ |
68 | void *private; |
69 | |
70 | unsigned int usage_count; /* Counter for users. */ |
71 | |
72 | int use_translit; /* Nonzero if the mb*towv*() and wc*tomb() |
73 | functions should use transliteration. */ |
74 | |
75 | unsigned int nstrings; /* Number of strings below. */ |
76 | union locale_data_value |
77 | { |
78 | const uint32_t *wstr; |
79 | const char *string; |
80 | unsigned int word; /* Note endian issues vs 64-bit pointers. */ |
81 | } |
82 | values __flexarr; /* Items, usually pointers into `filedata'. */ |
83 | }; |
84 | |
85 | /* This alignment is used for 32-bit integers in locale files, both |
86 | those that are explicitly int32_t or uint32_t and those that are |
87 | wchar_t, regardless of the (possibly smaller) alignment required |
88 | for such integers on a particular host. */ |
89 | #define LOCFILE_ALIGN sizeof (int32_t) |
90 | #define LOCFILE_ALIGN_MASK (LOCFILE_ALIGN - 1) |
91 | #define LOCFILE_ALIGN_UP(x) (((x) + LOCFILE_ALIGN - 1) \ |
92 | & ~LOCFILE_ALIGN_MASK) |
93 | #define LOCFILE_ALIGNED_P(x) (((x) & LOCFILE_ALIGN_MASK) == 0) |
94 | |
95 | /* We know three kinds of collation sorting rules. */ |
96 | enum coll_sort_rule |
97 | { |
98 | illegal_0__, |
99 | sort_forward, |
100 | sort_backward, |
101 | illegal_3__, |
102 | sort_position, |
103 | sort_forward_position, |
104 | sort_backward_position, |
105 | sort_mask |
106 | }; |
107 | |
108 | /* We can map the types of the entries into a few categories. */ |
109 | enum value_type |
110 | { |
111 | none, |
112 | string, |
113 | stringarray, |
114 | byte, |
115 | bytearray, |
116 | word, |
117 | stringlist, |
118 | wordarray, |
119 | wstring, |
120 | wstringarray, |
121 | wstringlist |
122 | }; |
123 | |
124 | |
125 | /* Definitions for `era' information from LC_TIME. */ |
126 | #define ERA_NAME_FORMAT_MEMBERS 4 |
127 | #define ERA_M_NAME 0 |
128 | #define ERA_M_FORMAT 1 |
129 | #define ERA_W_NAME 2 |
130 | #define ERA_W_FORMAT 3 |
131 | |
132 | |
133 | /* Structure to access `era' information from LC_TIME. */ |
134 | struct era_entry |
135 | { |
136 | uint32_t direction; /* Contains '+' or '-'. */ |
137 | int32_t offset; |
138 | int32_t start_date[3]; |
139 | int32_t stop_date[3]; |
140 | const char *era_name; |
141 | const char *era_format; |
142 | const wchar_t *era_wname; |
143 | const wchar_t *era_wformat; |
144 | int absolute_direction; |
145 | /* absolute direction: |
146 | +1 indicates that year number is higher in the future. (like A.D.) |
147 | -1 indicates that year number is higher in the past. (like B.C.) */ |
148 | }; |
149 | |
150 | /* Structure caching computed data about information from LC_TIME. |
151 | The `private.time' member of `struct __locale_data' points to this. */ |
152 | struct lc_time_data |
153 | { |
154 | struct era_entry *eras; |
155 | size_t num_eras; |
156 | int era_initialized; |
157 | |
158 | const char **alt_digits; |
159 | const wchar_t **walt_digits; |
160 | int alt_digits_initialized; |
161 | int walt_digits_initialized; |
162 | }; |
163 | |
164 | /* Ancillary data for LC_CTYPE. Co-allocated after struct |
165 | __locale_data by _nl_intern_locale_data. */ |
166 | struct lc_ctype_data |
167 | { |
168 | /* See get_gconv_fcts and __wcsmbs_load_conv. */ |
169 | const struct gconv_fcts *fcts; |
170 | |
171 | /* If false, outdigit just maps to the ASCII digits. */ |
172 | bool outdigit_translation_needed; |
173 | |
174 | /* Cached multi-byte string lengths. This could be added to the |
175 | locale data itself if the format is changed (which impacts |
176 | existing statically linked binaries). */ |
177 | |
178 | /* For the outdigit decimal digits (copied from LC_CTYPE). */ |
179 | unsigned char outdigit_bytes[10]; |
180 | |
181 | /* If all outdigit_bytes elements are equal, this is that value, |
182 | otherwise it is 0. */ |
183 | unsigned char outdigit_bytes_all_equal; |
184 | }; |
185 | |
186 | /* LC_CTYPE specific: |
187 | Hardwired indices for standard wide character translation mappings. */ |
188 | enum |
189 | { |
190 | __TOW_toupper = 0, |
191 | __TOW_tolower = 1 |
192 | }; |
193 | |
194 | |
195 | /* LC_CTYPE specific: |
196 | Access a wide character class with a single character index. |
197 | _ISCTYPE (c, desc) = iswctype (btowc (c), desc). |
198 | c must be an `unsigned char'. desc must be a nonzero wctype_t. */ |
199 | #define _ISCTYPE(c, desc) \ |
200 | (((((const uint32_t *) (desc)) - 8)[(c) >> 5] >> ((c) & 0x1f)) & 1) |
201 | |
202 | /* Category name handling variables. Concatenate all the strings in a |
203 | single object to minimize relocations. Individual strings can be |
204 | accessed using _nl_category_names. */ |
205 | #define CATNAMEMF(line) CATNAMEMF1 (line) |
206 | #define CATNAMEMF1(line) str##line |
207 | extern const struct catnamestr_t |
208 | { |
209 | #define DEFINE_CATEGORY(category, category_name, items, a) \ |
210 | char CATNAMEMF (__LINE__)[sizeof (category_name)]; |
211 | #include "categories.def" |
212 | #undef DEFINE_CATEGORY |
213 | } _nl_category_names attribute_hidden; |
214 | extern const uint8_t _nl_category_name_idxs[__LC_LAST] attribute_hidden; |
215 | extern const uint8_t _nl_category_name_sizes[__LC_LAST] attribute_hidden; |
216 | |
217 | /* Return the name of the category INDEX, which must be nonnegative |
218 | and less than _LC_LAST. */ |
219 | static inline const char * |
220 | _nl_category_names_get (int index) |
221 | { |
222 | return (const char *) &_nl_category_names + _nl_category_name_idxs[index]; |
223 | } |
224 | |
225 | /* Name of the standard locales. */ |
226 | extern const char _nl_C_name[] attribute_hidden; |
227 | extern const char _nl_POSIX_name[] attribute_hidden; |
228 | |
229 | /* The standard codeset. */ |
230 | extern const char _nl_C_codeset[] attribute_hidden; |
231 | |
232 | /* This is the internal locale_t object that holds the global locale |
233 | controlled by calls to setlocale. A thread's TSD locale pointer |
234 | points to this when `uselocale (LC_GLOBAL_LOCALE)' is in effect. */ |
235 | extern struct __locale_struct _nl_global_locale attribute_hidden; |
236 | |
237 | /* This fetches the thread-local locale_t pointer, either one set with |
238 | uselocale or &_nl_global_locale. */ |
239 | #define _NL_CURRENT_LOCALE (__libc_tsd_get (locale_t, LOCALE)) |
240 | #include <libc-tsd.h> |
241 | __libc_tsd_define (extern, locale_t, LOCALE) |
242 | |
243 | |
244 | /* For static linking it is desireable to avoid always linking in the code |
245 | and data for every category when we can tell at link time that they are |
246 | unused. We can manage this playing some tricks with weak references. |
247 | But with thread-local locale settings, it becomes quite ungainly unless |
248 | we can use __thread variables. So only in that case do we attempt this. */ |
249 | #ifndef SHARED |
250 | # include <tls.h> |
251 | # define NL_CURRENT_INDIRECT 1 |
252 | #endif |
253 | |
254 | #ifdef NL_CURRENT_INDIRECT |
255 | |
256 | /* For each category declare the thread-local variable for the current |
257 | locale data. This has an extra indirection so it points at the |
258 | __locales[CATEGORY] element in either _nl_global_locale or the current |
259 | locale object set by uselocale, which points at the actual data. The |
260 | reason for having these variables is so that references to particular |
261 | categories will link in the lc-CATEGORY.c module to define this symbol, |
262 | and we arrange that linking that module is what brings in all the code |
263 | associated with this category. */ |
264 | #define DEFINE_CATEGORY(category, category_name, items, a) \ |
265 | extern __thread struct __locale_data *const *_nl_current_##category \ |
266 | attribute_hidden attribute_tls_model_ie; |
267 | #include "categories.def" |
268 | #undef DEFINE_CATEGORY |
269 | |
270 | /* Return a pointer to the current `struct __locale_data' for CATEGORY. */ |
271 | #define _NL_CURRENT_DATA(category) (*_nl_current_##category) |
272 | |
273 | /* Extract the current CATEGORY locale's string for ITEM. */ |
274 | #define _NL_CURRENT(category, item) \ |
275 | ((*_nl_current_##category)->values[_NL_ITEM_INDEX (item)].string) |
276 | |
277 | /* Extract the current CATEGORY locale's string for ITEM. */ |
278 | #define _NL_CURRENT_WSTR(category, item) \ |
279 | ((wchar_t *) (*_nl_current_##category)->values[_NL_ITEM_INDEX (item)].wstr) |
280 | |
281 | /* Extract the current CATEGORY locale's word for ITEM. */ |
282 | #define _NL_CURRENT_WORD(category, item) \ |
283 | ((uint32_t) (*_nl_current_##category)->values[_NL_ITEM_INDEX (item)].word) |
284 | |
285 | /* This is used in lc-CATEGORY.c to define _nl_current_CATEGORY. The symbol |
286 | _nl_current_CATEGORY_used is set to a value unequal to zero to mark this |
287 | category as used. On S390 the used relocation to load the symbol address |
288 | can only handle even addresses. */ |
289 | #define _NL_CURRENT_DEFINE(category) \ |
290 | __thread struct __locale_data *const *_nl_current_##category \ |
291 | attribute_hidden = &_nl_global_locale.__locales[category]; \ |
292 | asm (".globl " __SYMBOL_PREFIX "_nl_current_" #category "_used\n" \ |
293 | _NL_CURRENT_DEFINE_ABS (_nl_current_##category##_used, 2)); |
294 | #ifdef HAVE_ASM_SET_DIRECTIVE |
295 | # define _NL_CURRENT_DEFINE_ABS(sym, val) ".set " #sym ", " #val |
296 | #else |
297 | # define _NL_CURRENT_DEFINE_ABS(sym, val) #sym " = " #val |
298 | #endif |
299 | |
300 | #else |
301 | |
302 | /* All categories are always loaded in the shared library, so there is no |
303 | point in having lots of separate symbols for linking. */ |
304 | |
305 | /* Return a pointer to the current `struct __locale_data' for CATEGORY. */ |
306 | # define _NL_CURRENT_DATA(category) \ |
307 | (_NL_CURRENT_LOCALE->__locales[category]) |
308 | |
309 | /* Extract the current CATEGORY locale's string for ITEM. */ |
310 | # define _NL_CURRENT(category, item) \ |
311 | (_NL_CURRENT_DATA (category)->values[_NL_ITEM_INDEX (item)].string) |
312 | |
313 | /* Extract the current CATEGORY locale's string for ITEM. */ |
314 | # define _NL_CURRENT_WSTR(category, item) \ |
315 | ((wchar_t *) _NL_CURRENT_DATA (category)->values[_NL_ITEM_INDEX (item)].wstr) |
316 | |
317 | /* Extract the current CATEGORY locale's word for ITEM. */ |
318 | # define _NL_CURRENT_WORD(category, item) \ |
319 | ((uint32_t) _NL_CURRENT_DATA (category)->values[_NL_ITEM_INDEX (item)].word) |
320 | |
321 | /* This is used in lc-CATEGORY.c to define _nl_current_CATEGORY. */ |
322 | # define _NL_CURRENT_DEFINE(category) \ |
323 | /* No per-category variable here. */ |
324 | |
325 | #endif |
326 | |
327 | /* Extract CATEGORY locale's string for ITEM. */ |
328 | static inline const char * |
329 | _nl_lookup (locale_t l, int category, int item) |
330 | { |
331 | return l->__locales[category]->values[_NL_ITEM_INDEX (item)].string; |
332 | } |
333 | |
334 | /* Extract CATEGORY locale's wide string for ITEM. */ |
335 | static inline const wchar_t * |
336 | _nl_lookup_wstr (locale_t l, int category, int item) |
337 | { |
338 | return (wchar_t *) l->__locales[category] |
339 | ->values[_NL_ITEM_INDEX (item)].wstr; |
340 | } |
341 | |
342 | /* Extract the CATEGORY locale's word for ITEM. */ |
343 | static inline uint32_t |
344 | _nl_lookup_word (locale_t l, int category, int item) |
345 | { |
346 | return l->__locales[category]->values[_NL_ITEM_INDEX (item)].word; |
347 | } |
348 | |
349 | /* Default search path if no LOCPATH environment variable. */ |
350 | extern const char _nl_default_locale_path[] attribute_hidden; |
351 | |
352 | /* Load the locale data for CATEGORY from the file specified by *NAME. |
353 | If *NAME is "", use environment variables as specified by POSIX, and |
354 | fill in *NAME with the actual name used. If LOCALE_PATH is not null, |
355 | those directories are searched for the locale files. If it's null, |
356 | the locale archive is checked first and then _nl_default_locale_path |
357 | is searched for locale files. */ |
358 | extern struct __locale_data *_nl_find_locale (const char *locale_path, |
359 | size_t locale_path_len, |
360 | int category, const char **name) |
361 | attribute_hidden; |
362 | |
363 | /* Try to load the file described by FILE. */ |
364 | extern void _nl_load_locale (struct loaded_l10nfile *file, int category) |
365 | attribute_hidden; |
366 | |
367 | /* Free all resource. */ |
368 | extern void _nl_unload_locale (int category, struct __locale_data *locale) |
369 | attribute_hidden; |
370 | |
371 | /* Free the locale and give back all memory if the usage count is one. */ |
372 | extern void _nl_remove_locale (int locale, struct __locale_data *data) |
373 | attribute_hidden; |
374 | |
375 | /* Find the locale *NAMEP in the locale archive, and return the |
376 | internalized data structure for its CATEGORY data. If this locale has |
377 | already been loaded from the archive, just returns the existing data |
378 | structure. If successful, sets *NAMEP to point directly into the mapped |
379 | archive string table; that way, the next call can short-circuit strcmp. */ |
380 | extern struct __locale_data *_nl_load_locale_from_archive (int category, |
381 | const char **namep) |
382 | attribute_hidden; |
383 | |
384 | /* Subroutine of setlocale's free resource. */ |
385 | extern void _nl_archive_subfreeres (void) attribute_hidden; |
386 | |
387 | /* Subroutine of gconv-db's free resource. */ |
388 | extern void _nl_locale_subfreeres (void) attribute_hidden; |
389 | |
390 | /* Validate the contents of a locale file and set up the in-core |
391 | data structure to point into the data. This leaves the `alloc' |
392 | and `name' fields uninitialized, for the caller to fill in. |
393 | If any bogons are detected in the data, this will refuse to |
394 | intern it, and return a null pointer instead. */ |
395 | extern struct __locale_data *_nl_intern_locale_data (int category, |
396 | const void *data, |
397 | size_t datasize) |
398 | attribute_hidden; |
399 | |
400 | |
401 | /* Return `era' entry which corresponds to TP. Used in strftime. */ |
402 | extern struct era_entry *_nl_get_era_entry (const struct tm *tp, |
403 | struct __locale_data *lc_time) |
404 | attribute_hidden; |
405 | |
406 | /* Return `era' cnt'th entry . Used in strptime. */ |
407 | extern struct era_entry *_nl_select_era_entry (int cnt, |
408 | struct __locale_data *lc_time) |
409 | attribute_hidden; |
410 | |
411 | /* Return `alt_digit' which corresponds to NUMBER. Used in strftime. */ |
412 | extern const char *_nl_get_alt_digit (unsigned int number, |
413 | struct __locale_data *lc_time) |
414 | attribute_hidden; |
415 | |
416 | /* Similar, but now for wide characters. */ |
417 | extern const wchar_t *_nl_get_walt_digit (unsigned int number, |
418 | struct __locale_data *lc_time) |
419 | attribute_hidden; |
420 | |
421 | /* Parse string as alternative digit and return numeric value. */ |
422 | extern int _nl_parse_alt_digit (const char **strp, |
423 | struct __locale_data *lc_time) |
424 | attribute_hidden; |
425 | |
426 | /* Postload processing. */ |
427 | extern void _nl_postload_ctype (void); |
428 | |
429 | /* Deallocate category-specific data. Used in _nl_unload_locale. */ |
430 | extern void _nl_cleanup_ctype (struct __locale_data *) attribute_hidden; |
431 | extern void _nl_cleanup_time (struct __locale_data *) attribute_hidden; |
432 | |
433 | |
434 | #endif /* localeinfo.h */ |
435 | |