1 | /* Copyright (C) 1996-2024 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | This program is free software; you can redistribute it and/or modify |
5 | it under the terms of the GNU General Public License as published |
6 | by the Free Software Foundation; version 2 of the License, or |
7 | (at your option) any later version. |
8 | |
9 | This program is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License |
15 | along with this program; if not, see <https://www.gnu.org/licenses/>. */ |
16 | |
17 | #ifdef HAVE_CONFIG_H |
18 | # include "config.h" |
19 | #endif |
20 | |
21 | #include <argp.h> |
22 | #include <assert.h> |
23 | #include <ctype.h> |
24 | #include <endian.h> |
25 | #include <errno.h> |
26 | #include <error.h> |
27 | #include <fcntl.h> |
28 | #include <iconv.h> |
29 | #include <langinfo.h> |
30 | #include <locale.h> |
31 | #include <libintl.h> |
32 | #include <limits.h> |
33 | #include <nl_types.h> |
34 | #include <obstack.h> |
35 | #include <scratch_buffer.h> |
36 | #include <stdint.h> |
37 | #include <stdio.h> |
38 | #include <stdlib.h> |
39 | #include <string.h> |
40 | #include <unistd.h> |
41 | #include <unistd_ext.h> |
42 | #include <wchar.h> |
43 | |
44 | #include "version.h" |
45 | |
46 | #include "catgetsinfo.h" |
47 | |
48 | |
49 | #define SWAPU32(w) \ |
50 | (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24)) |
51 | |
52 | struct message_list |
53 | { |
54 | int number; |
55 | const char *message; |
56 | |
57 | const char *fname; |
58 | size_t line; |
59 | const char *symbol; |
60 | |
61 | struct message_list *next; |
62 | }; |
63 | |
64 | |
65 | struct set_list |
66 | { |
67 | int number; |
68 | int deleted; |
69 | struct message_list *messages; |
70 | int last_message; |
71 | |
72 | const char *fname; |
73 | size_t line; |
74 | const char *symbol; |
75 | |
76 | struct set_list *next; |
77 | }; |
78 | |
79 | |
80 | struct catalog |
81 | { |
82 | struct set_list *all_sets; |
83 | struct set_list *current_set; |
84 | size_t total_messages; |
85 | wint_t quote_char; |
86 | int last_set; |
87 | |
88 | struct obstack mem_pool; |
89 | }; |
90 | |
91 | |
92 | /* If non-zero force creation of new file, not using existing one. */ |
93 | static int force_new; |
94 | |
95 | /* Name of output file. */ |
96 | static const char *output_name; |
97 | |
98 | /* Name of generated C header file. */ |
99 | static const char *; |
100 | |
101 | /* Name and version of program. */ |
102 | static void print_version (FILE *stream, struct argp_state *state); |
103 | void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; |
104 | |
105 | #define OPT_NEW 1 |
106 | |
107 | /* Definitions of arguments for argp functions. */ |
108 | static const struct argp_option options[] = |
109 | { |
110 | { "header" , 'H', N_("NAME" ), 0, |
111 | N_("Create C header file NAME containing symbol definitions" ) }, |
112 | { "new" , OPT_NEW, NULL, 0, |
113 | N_("Do not use existing catalog, force new output file" ) }, |
114 | { "output" , 'o', N_("NAME" ), 0, N_("Write output to file NAME" ) }, |
115 | { NULL, 0, NULL, 0, NULL } |
116 | }; |
117 | |
118 | /* Short description of program. */ |
119 | static const char doc[] = N_("Generate message catalog.\ |
120 | \vIf INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\ |
121 | is -, output is written to standard output.\n" ); |
122 | |
123 | /* Strings for arguments in help texts. */ |
124 | static const char args_doc[] = N_("\ |
125 | -o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]" ); |
126 | |
127 | /* Prototype for option handler. */ |
128 | static error_t parse_opt (int key, char *arg, struct argp_state *state); |
129 | |
130 | /* Function to print some extra text in the help message. */ |
131 | static char *more_help (int key, const char *text, void *input); |
132 | |
133 | /* Data structure to communicate with argp functions. */ |
134 | static struct argp argp = |
135 | { |
136 | options, parse_opt, args_doc, doc, NULL, more_help |
137 | }; |
138 | |
139 | |
140 | /* Wrapper functions with error checking for standard functions. */ |
141 | #include <programs/xmalloc.h> |
142 | |
143 | /* Prototypes for local functions. */ |
144 | static void error_print (void); |
145 | static struct catalog *read_input_file (struct catalog *current, |
146 | const char *fname); |
147 | static void write_out (struct catalog *result, const char *output_name, |
148 | const char *); |
149 | static struct set_list *find_set (struct catalog *current, int number); |
150 | static void normalize_line (const char *fname, size_t line, iconv_t cd, |
151 | wchar_t *string, wchar_t quote_char, |
152 | wchar_t escape_char); |
153 | static void read_old (struct catalog *catalog, const char *file_name); |
154 | static int open_conversion (const char *codesetp, iconv_t *cd_towcp, |
155 | iconv_t *cd_tombp, wchar_t *escape_charp); |
156 | |
157 | |
158 | int |
159 | main (int argc, char *argv[]) |
160 | { |
161 | struct catalog *result; |
162 | int remaining; |
163 | |
164 | /* Set program name for messages. */ |
165 | error_print_progname = error_print; |
166 | |
167 | /* Set locale via LC_ALL. */ |
168 | setlocale (LC_ALL, locale: "" ); |
169 | |
170 | /* Set the text message domain. */ |
171 | textdomain (PACKAGE); |
172 | |
173 | /* Initialize local variables. */ |
174 | result = NULL; |
175 | |
176 | /* Parse and process arguments. */ |
177 | argp_parse (argp: &argp, argc: argc, argv: argv, flags: 0, arg_index: &remaining, NULL); |
178 | |
179 | /* Determine output file. */ |
180 | if (output_name == NULL) |
181 | output_name = remaining < argc ? argv[remaining++] : "-" ; |
182 | |
183 | /* Process all input files. */ |
184 | setlocale (LC_CTYPE, locale: "C" ); |
185 | if (remaining < argc) |
186 | do |
187 | result = read_input_file (current: result, fname: argv[remaining]); |
188 | while (++remaining < argc); |
189 | else |
190 | result = read_input_file (NULL, fname: "-" ); |
191 | |
192 | /* Write out the result. */ |
193 | if (result != NULL) |
194 | write_out (result, output_name, header_name); |
195 | |
196 | return error_message_count != 0; |
197 | } |
198 | |
199 | |
200 | /* Handle program arguments. */ |
201 | static error_t |
202 | parse_opt (int key, char *arg, struct argp_state *state) |
203 | { |
204 | switch (key) |
205 | { |
206 | case 'H': |
207 | header_name = arg; |
208 | break; |
209 | case OPT_NEW: |
210 | force_new = 1; |
211 | break; |
212 | case 'o': |
213 | output_name = arg; |
214 | break; |
215 | default: |
216 | return ARGP_ERR_UNKNOWN; |
217 | } |
218 | return 0; |
219 | } |
220 | |
221 | |
222 | static char * |
223 | more_help (int key, const char *text, void *input) |
224 | { |
225 | char *tp = NULL; |
226 | switch (key) |
227 | { |
228 | case ARGP_KEY_HELP_EXTRA: |
229 | /* We print some extra information. */ |
230 | if (asprintf (ptr: &tp, gettext ("\ |
231 | For bug reporting instructions, please see:\n\ |
232 | %s.\n" ), REPORT_BUGS_TO) < 0) |
233 | return NULL; |
234 | return tp; |
235 | default: |
236 | break; |
237 | } |
238 | return (char *) text; |
239 | } |
240 | |
241 | /* Print the version information. */ |
242 | static void |
243 | print_version (FILE *stream, struct argp_state *state) |
244 | { |
245 | fprintf (stream: stream, format: "gencat %s%s\n" , PKGVERSION, VERSION); |
246 | fprintf (stream: stream, gettext ("\ |
247 | Copyright (C) %s Free Software Foundation, Inc.\n\ |
248 | This is free software; see the source for copying conditions. There is NO\n\ |
249 | warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ |
250 | " ), "2024" ); |
251 | fprintf (stream: stream, gettext ("Written by %s.\n" ), "Ulrich Drepper" ); |
252 | } |
253 | |
254 | |
255 | /* The address of this function will be assigned to the hook in the |
256 | error functions. */ |
257 | static void |
258 | error_print (void) |
259 | { |
260 | /* We don't want the program name to be printed in messages. Emacs' |
261 | compile.el does not like this. */ |
262 | } |
263 | |
264 | |
265 | static struct catalog * |
266 | read_input_file (struct catalog *current, const char *fname) |
267 | { |
268 | FILE *fp; |
269 | char *buf; |
270 | size_t len; |
271 | size_t line_number; |
272 | wchar_t *wbuf; |
273 | size_t wbufsize; |
274 | iconv_t cd_towc = (iconv_t) -1; |
275 | iconv_t cd_tomb = (iconv_t) -1; |
276 | wchar_t escape_char = L'\\'; |
277 | char *codeset = NULL; |
278 | |
279 | if (strcmp (s1: fname, s2: "-" ) == 0 || strcmp (s1: fname, s2: "/dev/stdin" ) == 0) |
280 | { |
281 | fp = stdin; |
282 | fname = gettext ("*standard input*" ); |
283 | } |
284 | else |
285 | fp = fopen (filename: fname, modes: "r" ); |
286 | if (fp == NULL) |
287 | { |
288 | error (status: 0, errno, gettext ("cannot open input file `%s'" ), fname); |
289 | return current; |
290 | } |
291 | |
292 | /* If we haven't seen anything yet, allocate result structure. */ |
293 | if (current == NULL) |
294 | { |
295 | current = (struct catalog *) xcalloc (n: 1, s: sizeof (*current)); |
296 | |
297 | #define obstack_chunk_alloc malloc |
298 | #define obstack_chunk_free free |
299 | obstack_init (¤t->mem_pool); |
300 | |
301 | current->current_set = find_set (current, NL_SETD); |
302 | } |
303 | |
304 | buf = NULL; |
305 | len = 0; |
306 | line_number = 0; |
307 | |
308 | wbufsize = 1024; |
309 | wbuf = (wchar_t *) xmalloc (n: wbufsize); |
310 | |
311 | while (!feof (stream: fp)) |
312 | { |
313 | int continued; |
314 | int used; |
315 | size_t start_line = line_number + 1; |
316 | char *this_line; |
317 | |
318 | do |
319 | { |
320 | int act_len; |
321 | |
322 | act_len = getline (lineptr: &buf, n: &len, stream: fp); |
323 | if (act_len <= 0) |
324 | break; |
325 | ++line_number; |
326 | |
327 | /* It the line continued? */ |
328 | continued = 0; |
329 | if (buf[act_len - 1] == '\n') |
330 | { |
331 | --act_len; |
332 | |
333 | /* There might be more than one backslash at the end of |
334 | the line. Only if there is an odd number of them is |
335 | the line continued. */ |
336 | if (act_len > 0 && buf[act_len - 1] == '\\') |
337 | { |
338 | int temp_act_len = act_len; |
339 | |
340 | do |
341 | { |
342 | --temp_act_len; |
343 | continued = !continued; |
344 | } |
345 | while (temp_act_len > 0 && buf[temp_act_len - 1] == '\\'); |
346 | |
347 | if (continued) |
348 | --act_len; |
349 | } |
350 | } |
351 | |
352 | /* Append to currently selected line. */ |
353 | obstack_grow (¤t->mem_pool, buf, act_len); |
354 | } |
355 | while (continued); |
356 | |
357 | obstack_1grow (¤t->mem_pool, '\0'); |
358 | this_line = (char *) obstack_finish (¤t->mem_pool); |
359 | |
360 | used = 0; |
361 | if (this_line[0] == '$') |
362 | { |
363 | if (isblank (this_line[1])) |
364 | { |
365 | int cnt = 1; |
366 | while (isblank (this_line[cnt])) |
367 | ++cnt; |
368 | if (strncmp (s1: &this_line[cnt], s2: "codeset=" , n: 8) != 0) |
369 | /* This is a comment line. Do nothing. */; |
370 | else if (codeset != NULL) |
371 | /* Ignore multiple codeset. */; |
372 | else |
373 | { |
374 | int start = cnt + 8; |
375 | cnt = start; |
376 | while (this_line[cnt] != '\0' && !isspace (this_line[cnt])) |
377 | ++cnt; |
378 | if (cnt != start) |
379 | { |
380 | int len = cnt - start; |
381 | codeset = xmalloc (n: len + 1); |
382 | *((char *) mempcpy (codeset, &this_line[start], len)) |
383 | = '\0'; |
384 | } |
385 | } |
386 | } |
387 | else if (strncmp (s1: &this_line[1], s2: "set" , n: 3) == 0) |
388 | { |
389 | int cnt = sizeof ("set" ); |
390 | int set_number; |
391 | const char *symbol = NULL; |
392 | while (isspace (this_line[cnt])) |
393 | ++cnt; |
394 | |
395 | if (isdigit (this_line[cnt])) |
396 | { |
397 | set_number = atol (nptr: &this_line[cnt]); |
398 | |
399 | /* If the given number for the character set is |
400 | higher than any we used for symbolic set names |
401 | avoid clashing by using only higher numbers for |
402 | the following symbolic definitions. */ |
403 | if (set_number > current->last_set) |
404 | current->last_set = set_number; |
405 | } |
406 | else |
407 | { |
408 | /* See whether it is a reasonable identifier. */ |
409 | int start = cnt; |
410 | while (isalnum (this_line[cnt]) || this_line[cnt] == '_') |
411 | ++cnt; |
412 | |
413 | if (cnt == start) |
414 | { |
415 | /* No correct character found. */ |
416 | error_at_line (status: 0, errnum: 0, fname: fname, lineno: start_line, |
417 | gettext ("illegal set number" )); |
418 | set_number = 0; |
419 | } |
420 | else |
421 | { |
422 | /* We have found seomthing that looks like a |
423 | correct identifier. */ |
424 | struct set_list *runp; |
425 | |
426 | this_line[cnt] = '\0'; |
427 | used = 1; |
428 | symbol = &this_line[start]; |
429 | |
430 | /* Test whether the identifier was already used. */ |
431 | runp = current->all_sets; |
432 | while (runp != 0) |
433 | if (runp->symbol != NULL |
434 | && strcmp (s1: runp->symbol, s2: symbol) == 0) |
435 | break; |
436 | else |
437 | runp = runp->next; |
438 | |
439 | if (runp != NULL) |
440 | { |
441 | /* We cannot allow duplicate identifiers for |
442 | message sets. */ |
443 | error_at_line (status: 0, errnum: 0, fname: fname, lineno: start_line, |
444 | gettext ("duplicate set definition" )); |
445 | error_at_line (status: 0, errnum: 0, fname: runp->fname, lineno: runp->line, |
446 | gettext ("\ |
447 | this is the first definition" )); |
448 | set_number = 0; |
449 | } |
450 | else |
451 | /* Allocate next free message set for identifier. */ |
452 | set_number = ++current->last_set; |
453 | } |
454 | } |
455 | |
456 | if (set_number != 0) |
457 | { |
458 | /* We found a legal set number. */ |
459 | current->current_set = find_set (current, number: set_number); |
460 | if (symbol != NULL) |
461 | used = 1; |
462 | current->current_set->symbol = symbol; |
463 | current->current_set->fname = fname; |
464 | current->current_set->line = start_line; |
465 | } |
466 | } |
467 | else if (strncmp (s1: &this_line[1], s2: "delset" , n: 6) == 0) |
468 | { |
469 | int cnt = sizeof ("delset" ); |
470 | while (isspace (this_line[cnt])) |
471 | ++cnt; |
472 | |
473 | if (isdigit (this_line[cnt])) |
474 | { |
475 | size_t set_number = atol (nptr: &this_line[cnt]); |
476 | struct set_list *set; |
477 | |
478 | /* Mark the message set with the given number as |
479 | deleted. */ |
480 | set = find_set (current, number: set_number); |
481 | set->deleted = 1; |
482 | } |
483 | else |
484 | { |
485 | /* See whether it is a reasonable identifier. */ |
486 | int start = cnt; |
487 | while (isalnum (this_line[cnt]) || this_line[cnt] == '_') |
488 | ++cnt; |
489 | |
490 | if (cnt == start) |
491 | error_at_line (status: 0, errnum: 0, fname: fname, lineno: start_line, |
492 | gettext ("illegal set number" )); |
493 | else |
494 | { |
495 | const char *symbol; |
496 | struct set_list *runp; |
497 | |
498 | this_line[cnt] = '\0'; |
499 | used = 1; |
500 | symbol = &this_line[start]; |
501 | |
502 | /* We have a symbolic set name. This name must |
503 | appear somewhere else in the catalogs read so |
504 | far. */ |
505 | for (runp = current->all_sets; runp != NULL; |
506 | runp = runp->next) |
507 | { |
508 | if (strcmp (s1: runp->symbol, s2: symbol) == 0) |
509 | { |
510 | runp->deleted = 1; |
511 | break; |
512 | } |
513 | } |
514 | if (runp == NULL) |
515 | /* Name does not exist before. */ |
516 | error_at_line (status: 0, errnum: 0, fname: fname, lineno: start_line, |
517 | gettext ("unknown set `%s'" ), symbol); |
518 | } |
519 | } |
520 | } |
521 | else if (strncmp (s1: &this_line[1], s2: "quote" , n: 5) == 0) |
522 | { |
523 | char buf[2]; |
524 | char *bufptr; |
525 | size_t buflen; |
526 | char *wbufptr; |
527 | size_t wbuflen; |
528 | int cnt; |
529 | |
530 | cnt = sizeof ("quote" ); |
531 | while (isspace (this_line[cnt])) |
532 | ++cnt; |
533 | |
534 | /* We need the conversion. */ |
535 | if (cd_towc == (iconv_t) -1 |
536 | && open_conversion (codesetp: codeset, cd_towcp: &cd_towc, cd_tombp: &cd_tomb, |
537 | escape_charp: &escape_char) != 0) |
538 | /* Something is wrong. */ |
539 | goto out; |
540 | |
541 | /* Yes, the quote char can be '\0'; this means no quote |
542 | char. The function using the information works on |
543 | wide characters so we have to convert it here. */ |
544 | buf[0] = this_line[cnt]; |
545 | buf[1] = '\0'; |
546 | bufptr = buf; |
547 | buflen = 2; |
548 | |
549 | wbufptr = (char *) wbuf; |
550 | wbuflen = wbufsize; |
551 | |
552 | /* Flush the state. */ |
553 | iconv (cd: cd_towc, NULL, NULL, NULL, NULL); |
554 | |
555 | iconv (cd: cd_towc, inbuf: &bufptr, inbytesleft: &buflen, outbuf: &wbufptr, outbytesleft: &wbuflen); |
556 | if (buflen != 0 || (wchar_t *) wbufptr != &wbuf[2]) |
557 | error_at_line (status: 0, errnum: 0, fname: fname, lineno: start_line, |
558 | gettext ("invalid quote character" )); |
559 | else |
560 | /* Use the converted wide character. */ |
561 | current->quote_char = wbuf[0]; |
562 | } |
563 | else |
564 | { |
565 | int cnt; |
566 | cnt = 2; |
567 | while (this_line[cnt] != '\0' && !isspace (this_line[cnt])) |
568 | ++cnt; |
569 | this_line[cnt] = '\0'; |
570 | error_at_line (status: 0, errnum: 0, fname: fname, lineno: start_line, |
571 | gettext ("unknown directive `%s': line ignored" ), |
572 | &this_line[1]); |
573 | } |
574 | } |
575 | else if (isalnum (this_line[0]) || this_line[0] == '_') |
576 | { |
577 | const char *ident = this_line; |
578 | char *line = this_line; |
579 | int message_number; |
580 | |
581 | do |
582 | ++line; |
583 | while (line[0] != '\0' && !isspace (line[0])); |
584 | if (line[0] != '\0') |
585 | *line++ = '\0'; /* Terminate the identifier. */ |
586 | |
587 | /* Now we found the beginning of the message itself. */ |
588 | |
589 | if (isdigit (ident[0])) |
590 | { |
591 | struct message_list *runp; |
592 | struct message_list *lastp; |
593 | |
594 | message_number = atoi (ident); |
595 | |
596 | /* Find location to insert the new message. */ |
597 | runp = current->current_set->messages; |
598 | lastp = NULL; |
599 | while (runp != NULL) |
600 | if (runp->number == message_number) |
601 | break; |
602 | else |
603 | { |
604 | lastp = runp; |
605 | runp = runp->next; |
606 | } |
607 | if (runp != NULL) |
608 | { |
609 | /* Oh, oh. There is already a message with this |
610 | number in the message set. */ |
611 | if (runp->symbol == NULL) |
612 | { |
613 | /* The existing message had its number specified |
614 | by the user. Fatal collision type uh, oh. */ |
615 | error_at_line (status: 0, errnum: 0, fname: fname, lineno: start_line, |
616 | gettext ("duplicated message number" )); |
617 | error_at_line (status: 0, errnum: 0, fname: runp->fname, lineno: runp->line, |
618 | gettext ("this is the first definition" )); |
619 | message_number = 0; |
620 | } |
621 | else |
622 | { |
623 | /* Collision was with number auto-assigned to a |
624 | symbolic. Change existing symbolic number |
625 | and move to end the list (if not already there). */ |
626 | runp->number = ++current->current_set->last_message; |
627 | |
628 | if (runp->next != NULL) |
629 | { |
630 | struct message_list *endp; |
631 | |
632 | if (lastp == NULL) |
633 | current->current_set->messages=runp->next; |
634 | else |
635 | lastp->next=runp->next; |
636 | |
637 | endp = runp->next; |
638 | while (endp->next != NULL) |
639 | endp = endp->next; |
640 | |
641 | endp->next = runp; |
642 | runp->next = NULL; |
643 | } |
644 | } |
645 | } |
646 | ident = NULL; /* We don't have a symbol. */ |
647 | |
648 | if (message_number != 0 |
649 | && message_number > current->current_set->last_message) |
650 | current->current_set->last_message = message_number; |
651 | } |
652 | else if (ident[0] != '\0') |
653 | { |
654 | struct message_list *runp; |
655 | |
656 | /* Test whether the symbolic name was not used for |
657 | another message in this message set. */ |
658 | runp = current->current_set->messages; |
659 | while (runp != NULL) |
660 | if (runp->symbol != NULL && strcmp (s1: ident, s2: runp->symbol) == 0) |
661 | break; |
662 | else |
663 | runp = runp->next; |
664 | if (runp != NULL) |
665 | { |
666 | /* The name is already used. */ |
667 | error_at_line (status: 0, errnum: 0, fname: fname, lineno: start_line, gettext ("\ |
668 | duplicated message identifier" )); |
669 | error_at_line (status: 0, errnum: 0, fname: runp->fname, lineno: runp->line, |
670 | gettext ("this is the first definition" )); |
671 | message_number = 0; |
672 | } |
673 | else |
674 | /* Give the message the next unused number. */ |
675 | message_number = ++current->current_set->last_message; |
676 | } |
677 | else |
678 | message_number = 0; |
679 | |
680 | if (message_number != 0) |
681 | { |
682 | char *inbuf; |
683 | size_t inlen; |
684 | char *outbuf; |
685 | size_t outlen; |
686 | struct message_list *newp; |
687 | size_t line_len = strlen (s: line) + 1; |
688 | size_t ident_len = 0; |
689 | |
690 | /* We need the conversion. */ |
691 | if (cd_towc == (iconv_t) -1 |
692 | && open_conversion (codesetp: codeset, cd_towcp: &cd_towc, cd_tombp: &cd_tomb, |
693 | escape_charp: &escape_char) != 0) |
694 | /* Something is wrong. */ |
695 | goto out; |
696 | |
697 | /* Convert to a wide character string. We have to |
698 | interpret escape sequences which will be impossible |
699 | without doing the conversion if the codeset of the |
700 | message is stateful. */ |
701 | while (1) |
702 | { |
703 | inbuf = line; |
704 | inlen = line_len; |
705 | outbuf = (char *) wbuf; |
706 | outlen = wbufsize; |
707 | |
708 | /* Flush the state. */ |
709 | iconv (cd: cd_towc, NULL, NULL, NULL, NULL); |
710 | |
711 | iconv (cd: cd_towc, inbuf: &inbuf, inbytesleft: &inlen, outbuf: &outbuf, outbytesleft: &outlen); |
712 | if (inlen == 0) |
713 | { |
714 | /* The string is converted. */ |
715 | assert (outlen < wbufsize); |
716 | assert (wbuf[(wbufsize - outlen) / sizeof (wchar_t) - 1] |
717 | == L'\0'); |
718 | break; |
719 | } |
720 | |
721 | if (outlen != 0) |
722 | { |
723 | /* Something is wrong with this string, we ignore it. */ |
724 | error_at_line (status: 0, errnum: 0, fname: fname, lineno: start_line, gettext ("\ |
725 | invalid character: message ignored" )); |
726 | goto ignore; |
727 | } |
728 | |
729 | /* The output buffer is too small. */ |
730 | wbufsize *= 2; |
731 | wbuf = (wchar_t *) xrealloc (o: wbuf, n: wbufsize); |
732 | } |
733 | |
734 | /* Strip quote characters, change escape sequences into |
735 | correct characters etc. */ |
736 | normalize_line (fname, line: start_line, cd: cd_towc, string: wbuf, |
737 | quote_char: current->quote_char, escape_char); |
738 | |
739 | if (ident) |
740 | ident_len = line - this_line; |
741 | |
742 | /* Now the string is free of escape sequences. Convert it |
743 | back into a multibyte character string. First free the |
744 | memory allocated for the original string. */ |
745 | obstack_free (¤t->mem_pool, this_line); |
746 | |
747 | used = 1; /* Yes, we use the line. */ |
748 | |
749 | /* Now fill in the new string. It should never happen that |
750 | the replaced string is longer than the original. */ |
751 | inbuf = (char *) wbuf; |
752 | inlen = (wcslen (s: wbuf) + 1) * sizeof (wchar_t); |
753 | |
754 | outlen = obstack_room (¤t->mem_pool); |
755 | obstack_blank (¤t->mem_pool, outlen); |
756 | this_line = (char *) obstack_base (¤t->mem_pool); |
757 | outbuf = this_line + ident_len; |
758 | outlen -= ident_len; |
759 | |
760 | /* Flush the state. */ |
761 | iconv (cd: cd_tomb, NULL, NULL, NULL, NULL); |
762 | |
763 | iconv (cd: cd_tomb, inbuf: &inbuf, inbytesleft: &inlen, outbuf: &outbuf, outbytesleft: &outlen); |
764 | if (inlen != 0) |
765 | { |
766 | error_at_line (status: 0, errnum: 0, fname: fname, lineno: start_line, |
767 | gettext ("invalid line" )); |
768 | goto ignore; |
769 | } |
770 | assert (outbuf[-1] == '\0'); |
771 | |
772 | /* Free the memory in the obstack we don't use. */ |
773 | obstack_blank (¤t->mem_pool, -(int) outlen); |
774 | line = obstack_finish (¤t->mem_pool); |
775 | |
776 | newp = (struct message_list *) xmalloc (n: sizeof (*newp)); |
777 | newp->number = message_number; |
778 | newp->message = line + ident_len; |
779 | /* Remember symbolic name; is NULL if no is given. */ |
780 | newp->symbol = ident ? line : NULL; |
781 | /* Remember where we found the character. */ |
782 | newp->fname = fname; |
783 | newp->line = start_line; |
784 | |
785 | /* Find place to insert to message. We keep them in a |
786 | sorted single linked list. */ |
787 | if (current->current_set->messages == NULL |
788 | || current->current_set->messages->number > message_number) |
789 | { |
790 | newp->next = current->current_set->messages; |
791 | current->current_set->messages = newp; |
792 | } |
793 | else |
794 | { |
795 | struct message_list *runp; |
796 | runp = current->current_set->messages; |
797 | while (runp->next != NULL) |
798 | if (runp->next->number > message_number) |
799 | break; |
800 | else |
801 | runp = runp->next; |
802 | newp->next = runp->next; |
803 | runp->next = newp; |
804 | } |
805 | } |
806 | ++current->total_messages; |
807 | } |
808 | else |
809 | { |
810 | size_t cnt; |
811 | |
812 | cnt = 0; |
813 | /* See whether we have any non-white space character in this |
814 | line. */ |
815 | while (this_line[cnt] != '\0' && isspace (this_line[cnt])) |
816 | ++cnt; |
817 | |
818 | if (this_line[cnt] != '\0') |
819 | /* Yes, some unknown characters found. */ |
820 | error_at_line (status: 0, errnum: 0, fname: fname, lineno: start_line, |
821 | gettext ("malformed line ignored" )); |
822 | } |
823 | |
824 | ignore: |
825 | /* We can save the memory for the line if it was not used. */ |
826 | if (!used) |
827 | obstack_free (¤t->mem_pool, this_line); |
828 | } |
829 | |
830 | /* Close the conversion modules. */ |
831 | iconv_close (cd: cd_towc); |
832 | iconv_close (cd: cd_tomb); |
833 | free (ptr: codeset); |
834 | |
835 | out: |
836 | free (ptr: wbuf); |
837 | |
838 | if (fp != stdin) |
839 | fclose (stream: fp); |
840 | return current; |
841 | } |
842 | |
843 | static void |
844 | write_out (struct catalog *catalog, const char *output_name, |
845 | const char *) |
846 | { |
847 | /* Computing the "optimal" size. */ |
848 | struct set_list *set_run; |
849 | size_t best_total, best_size, best_depth; |
850 | size_t act_size, act_depth; |
851 | struct catalog_obj obj; |
852 | struct obstack string_pool; |
853 | const char *strings; |
854 | size_t strings_size; |
855 | uint32_t *array1, *array2; |
856 | size_t cnt; |
857 | int fd; |
858 | struct scratch_buffer buf1; |
859 | scratch_buffer_init (buffer: &buf1); |
860 | struct scratch_buffer buf2; |
861 | scratch_buffer_init (buffer: &buf2); |
862 | |
863 | /* If not otherwise told try to read file with existing |
864 | translations. */ |
865 | if (!force_new) |
866 | read_old (catalog, file_name: output_name); |
867 | |
868 | /* Initialize best_size with a very high value. */ |
869 | best_total = best_size = best_depth = UINT_MAX; |
870 | |
871 | /* We need some start size for testing. Let's start with |
872 | TOTAL_MESSAGES / 5, which theoretically provides a mean depth of |
873 | 5. */ |
874 | act_size = 1 + catalog->total_messages / 5; |
875 | |
876 | /* We determine the size of a hash table here. Because the message |
877 | numbers can be chosen arbitrary by the programmer we cannot use |
878 | the simple method of accessing the array using the message |
879 | number. The algorithm is based on the trivial hash function |
880 | NUMBER % TABLE_SIZE, where collisions are stored in a second |
881 | dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that |
882 | the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal. */ |
883 | while (act_size <= best_total) |
884 | { |
885 | size_t deep[act_size]; |
886 | |
887 | act_depth = 1; |
888 | memset (s: deep, c: '\0', n: act_size * sizeof (size_t)); |
889 | set_run = catalog->all_sets; |
890 | while (set_run != NULL) |
891 | { |
892 | struct message_list *message_run; |
893 | |
894 | message_run = set_run->messages; |
895 | while (message_run != NULL) |
896 | { |
897 | size_t idx = (message_run->number * set_run->number) % act_size; |
898 | |
899 | ++deep[idx]; |
900 | if (deep[idx] > act_depth) |
901 | { |
902 | act_depth = deep[idx]; |
903 | if (act_depth * act_size > best_total) |
904 | break; |
905 | } |
906 | message_run = message_run->next; |
907 | } |
908 | set_run = set_run->next; |
909 | } |
910 | |
911 | if (act_depth * act_size <= best_total) |
912 | { |
913 | /* We have found a better solution. */ |
914 | best_total = act_depth * act_size; |
915 | best_size = act_size; |
916 | best_depth = act_depth; |
917 | } |
918 | |
919 | ++act_size; |
920 | } |
921 | |
922 | /* let's be prepared for an empty message file. */ |
923 | if (best_size == UINT_MAX) |
924 | { |
925 | best_size = 1; |
926 | best_depth = 1; |
927 | } |
928 | |
929 | /* OK, now we have the size we will use. Fill in the header, build |
930 | the table and the second one with swapped byte order. */ |
931 | obj.magic = CATGETS_MAGIC; |
932 | obj.plane_size = best_size; |
933 | obj.plane_depth = best_depth; |
934 | |
935 | uint32_t array_size = best_size * best_depth * sizeof (uint32_t) * 3; |
936 | /* Allocate room for all needed arrays. */ |
937 | if (!scratch_buffer_set_array_size (buffer: &buf1, nelem: best_size * best_depth * 3, |
938 | size: sizeof (uint32_t))) |
939 | error (EXIT_FAILURE, ENOMEM, gettext ("cannot allocate memory" )); |
940 | array1 = buf1.data; |
941 | memset (s: array1, c: '\0', n: array_size); |
942 | |
943 | if (!scratch_buffer_set_array_size (buffer: &buf2, nelem: best_size * best_depth * 3, |
944 | size: sizeof (uint32_t))) |
945 | { |
946 | scratch_buffer_free (buffer: &buf1); |
947 | error (EXIT_FAILURE, ENOMEM, gettext ("cannot allocate memory" )); |
948 | } |
949 | array2 = buf2.data; |
950 | obstack_init (&string_pool); |
951 | |
952 | set_run = catalog->all_sets; |
953 | while (set_run != NULL) |
954 | { |
955 | struct message_list *message_run; |
956 | |
957 | message_run = set_run->messages; |
958 | while (message_run != NULL) |
959 | { |
960 | size_t idx = (((message_run->number * set_run->number) % best_size) |
961 | * 3); |
962 | /* Determine collision depth. */ |
963 | while (array1[idx] != 0) |
964 | idx += best_size * 3; |
965 | |
966 | /* Store set number, message number and pointer into string |
967 | space, relative to the first string. */ |
968 | array1[idx + 0] = set_run->number; |
969 | array1[idx + 1] = message_run->number; |
970 | array1[idx + 2] = obstack_object_size (&string_pool); |
971 | |
972 | /* Add current string to the continuous space containing all |
973 | strings. */ |
974 | obstack_grow0 (&string_pool, message_run->message, |
975 | strlen (message_run->message)); |
976 | |
977 | message_run = message_run->next; |
978 | } |
979 | |
980 | set_run = set_run->next; |
981 | } |
982 | strings_size = obstack_object_size (&string_pool); |
983 | strings = obstack_finish (&string_pool); |
984 | |
985 | /* Compute ARRAY2 by changing the byte order. */ |
986 | for (cnt = 0; cnt < best_size * best_depth * 3; ++cnt) |
987 | array2[cnt] = SWAPU32 (array1[cnt]); |
988 | |
989 | /* Now we can write out the whole data. */ |
990 | if (strcmp (s1: output_name, s2: "-" ) == 0 |
991 | || strcmp (s1: output_name, s2: "/dev/stdout" ) == 0) |
992 | fd = STDOUT_FILENO; |
993 | else |
994 | { |
995 | fd = creat (file: output_name, mode: 0666); |
996 | if (fd < 0) |
997 | { |
998 | scratch_buffer_free (buffer: &buf1); |
999 | scratch_buffer_free (buffer: &buf2); |
1000 | error (EXIT_FAILURE, errno, gettext ("cannot open output file `%s'" ), |
1001 | output_name); |
1002 | } |
1003 | } |
1004 | |
1005 | /* Write out header. */ |
1006 | write_all(fd, buffer: &obj, length: sizeof (obj)); |
1007 | |
1008 | /* We always write out the little endian version of the index |
1009 | arrays. */ |
1010 | #if __BYTE_ORDER == __LITTLE_ENDIAN |
1011 | write_all(fd, buffer: array1, length: array_size); |
1012 | write_all(fd, buffer: array2, length: array_size); |
1013 | #elif __BYTE_ORDER == __BIG_ENDIAN |
1014 | write_all(fd, array2, array_size); |
1015 | write_all(fd, array1, array_size); |
1016 | #else |
1017 | # error Cannot handle __BYTE_ORDER byte order |
1018 | #endif |
1019 | |
1020 | /* Finally write the strings. */ |
1021 | write_all(fd, buffer: strings, length: strings_size); |
1022 | |
1023 | if (fd != STDOUT_FILENO) |
1024 | close (fd: fd); |
1025 | |
1026 | /* If requested now write out the header file. */ |
1027 | if (header_name != NULL) |
1028 | { |
1029 | int first = 1; |
1030 | FILE *fp; |
1031 | |
1032 | /* Open output file. "-" or "/dev/stdout" means write to |
1033 | standard output. */ |
1034 | if (strcmp (s1: header_name, s2: "-" ) == 0 |
1035 | || strcmp (s1: header_name, s2: "/dev/stdout" ) == 0) |
1036 | fp = stdout; |
1037 | else |
1038 | { |
1039 | fp = fopen (filename: header_name, modes: "w" ); |
1040 | if (fp == NULL) |
1041 | { |
1042 | scratch_buffer_free (buffer: &buf1); |
1043 | scratch_buffer_free (buffer: &buf2); |
1044 | error (EXIT_FAILURE, errno, |
1045 | gettext ("cannot open output file `%s'" ), header_name); |
1046 | } |
1047 | } |
1048 | |
1049 | /* Iterate over all sets and all messages. */ |
1050 | set_run = catalog->all_sets; |
1051 | while (set_run != NULL) |
1052 | { |
1053 | struct message_list *message_run; |
1054 | |
1055 | /* If the current message set has a symbolic name write this |
1056 | out first. */ |
1057 | if (set_run->symbol != NULL) |
1058 | fprintf (stream: fp, format: "%s#define %sSet %#x\t/* %s:%zu */\n" , |
1059 | first ? "" : "\n" , set_run->symbol, set_run->number - 1, |
1060 | set_run->fname, set_run->line); |
1061 | first = 0; |
1062 | |
1063 | message_run = set_run->messages; |
1064 | while (message_run != NULL) |
1065 | { |
1066 | /* If the current message has a symbolic name write |
1067 | #define out. But we have to take care for the set |
1068 | not having a symbolic name. */ |
1069 | if (message_run->symbol != NULL) |
1070 | { |
1071 | if (set_run->symbol == NULL) |
1072 | fprintf (stream: fp, format: "#define AutomaticSet%d%s %#x\t/* %s:%zu */\n" , |
1073 | set_run->number, message_run->symbol, |
1074 | message_run->number, message_run->fname, |
1075 | message_run->line); |
1076 | else |
1077 | fprintf (stream: fp, format: "#define %s%s %#x\t/* %s:%zu */\n" , |
1078 | set_run->symbol, message_run->symbol, |
1079 | message_run->number, message_run->fname, |
1080 | message_run->line); |
1081 | } |
1082 | |
1083 | message_run = message_run->next; |
1084 | } |
1085 | |
1086 | set_run = set_run->next; |
1087 | } |
1088 | |
1089 | if (fp != stdout) |
1090 | fclose (stream: fp); |
1091 | } |
1092 | scratch_buffer_free (buffer: &buf1); |
1093 | scratch_buffer_free (buffer: &buf2); |
1094 | } |
1095 | |
1096 | |
1097 | static struct set_list * |
1098 | find_set (struct catalog *current, int number) |
1099 | { |
1100 | struct set_list *result = current->all_sets; |
1101 | |
1102 | /* We must avoid set number 0 because a set of this number signals |
1103 | in the tables that the entry is not occupied. */ |
1104 | ++number; |
1105 | |
1106 | while (result != NULL) |
1107 | if (result->number == number) |
1108 | return result; |
1109 | else |
1110 | result = result->next; |
1111 | |
1112 | /* Prepare new message set. */ |
1113 | result = (struct set_list *) xcalloc (n: 1, s: sizeof (*result)); |
1114 | result->number = number; |
1115 | result->next = current->all_sets; |
1116 | current->all_sets = result; |
1117 | |
1118 | return result; |
1119 | } |
1120 | |
1121 | |
1122 | /* Normalize given string *in*place* by processing escape sequences |
1123 | and quote characters. */ |
1124 | static void |
1125 | normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string, |
1126 | wchar_t quote_char, wchar_t escape_char) |
1127 | { |
1128 | int is_quoted; |
1129 | wchar_t *rp = string; |
1130 | wchar_t *wp = string; |
1131 | |
1132 | if (quote_char != L'\0' && *rp == quote_char) |
1133 | { |
1134 | is_quoted = 1; |
1135 | ++rp; |
1136 | } |
1137 | else |
1138 | is_quoted = 0; |
1139 | |
1140 | while (*rp != L'\0') |
1141 | if (*rp == quote_char) |
1142 | /* We simply end the string when we find the first time an |
1143 | not-escaped quote character. */ |
1144 | break; |
1145 | else if (*rp == escape_char) |
1146 | { |
1147 | ++rp; |
1148 | if (quote_char != L'\0' && *rp == quote_char) |
1149 | /* This is an extension to XPG. */ |
1150 | *wp++ = *rp++; |
1151 | else |
1152 | /* Recognize escape sequences. */ |
1153 | switch (*rp) |
1154 | { |
1155 | case L'n': |
1156 | *wp++ = L'\n'; |
1157 | ++rp; |
1158 | break; |
1159 | case L't': |
1160 | *wp++ = L'\t'; |
1161 | ++rp; |
1162 | break; |
1163 | case L'v': |
1164 | *wp++ = L'\v'; |
1165 | ++rp; |
1166 | break; |
1167 | case L'b': |
1168 | *wp++ = L'\b'; |
1169 | ++rp; |
1170 | break; |
1171 | case L'r': |
1172 | *wp++ = L'\r'; |
1173 | ++rp; |
1174 | break; |
1175 | case L'f': |
1176 | *wp++ = L'\f'; |
1177 | ++rp; |
1178 | break; |
1179 | case L'0' ... L'7': |
1180 | { |
1181 | int number; |
1182 | char cbuf[2]; |
1183 | char *cbufptr; |
1184 | size_t cbufin; |
1185 | wchar_t wcbuf[2]; |
1186 | char *wcbufptr; |
1187 | size_t wcbufin; |
1188 | |
1189 | number = *rp++ - L'0'; |
1190 | while (number <= (255 / 8) && *rp >= L'0' && *rp <= L'7') |
1191 | { |
1192 | number *= 8; |
1193 | number += *rp++ - L'0'; |
1194 | } |
1195 | |
1196 | cbuf[0] = (char) number; |
1197 | cbuf[1] = '\0'; |
1198 | cbufptr = cbuf; |
1199 | cbufin = 2; |
1200 | |
1201 | wcbufptr = (char *) wcbuf; |
1202 | wcbufin = sizeof (wcbuf); |
1203 | |
1204 | /* Flush the state. */ |
1205 | iconv (cd: cd, NULL, NULL, NULL, NULL); |
1206 | |
1207 | iconv (cd: cd, inbuf: &cbufptr, inbytesleft: &cbufin, outbuf: &wcbufptr, outbytesleft: &wcbufin); |
1208 | if (cbufptr != &cbuf[2] || (wchar_t *) wcbufptr != &wcbuf[2]) |
1209 | error_at_line (status: 0, errnum: 0, fname: fname, lineno: line, |
1210 | gettext ("invalid escape sequence" )); |
1211 | else |
1212 | *wp++ = wcbuf[0]; |
1213 | } |
1214 | break; |
1215 | default: |
1216 | if (*rp == escape_char) |
1217 | { |
1218 | *wp++ = escape_char; |
1219 | ++rp; |
1220 | } |
1221 | else |
1222 | { |
1223 | /* Simply ignore the backslash character. */ |
1224 | } |
1225 | break; |
1226 | } |
1227 | } |
1228 | else |
1229 | *wp++ = *rp++; |
1230 | |
1231 | /* If we saw a quote character at the beginning we expect another |
1232 | one at the end. */ |
1233 | if (is_quoted && *rp != quote_char) |
1234 | error_at_line (status: 0, errnum: 0, fname: fname, lineno: line, gettext ("unterminated message" )); |
1235 | |
1236 | /* Terminate string. */ |
1237 | *wp = L'\0'; |
1238 | return; |
1239 | } |
1240 | |
1241 | |
1242 | static void |
1243 | read_old (struct catalog *catalog, const char *file_name) |
1244 | { |
1245 | struct catalog_info old_cat_obj; |
1246 | struct set_list *set = NULL; |
1247 | int last_set = -1; |
1248 | size_t cnt; |
1249 | |
1250 | /* Try to open catalog, but don't look through the NLSPATH. */ |
1251 | if (__open_catalog (cat_name: file_name, NULL, NULL, catalog: &old_cat_obj) != 0) |
1252 | { |
1253 | if (errno == ENOENT) |
1254 | /* No problem, the catalog simply does not exist. */ |
1255 | return; |
1256 | else |
1257 | error (EXIT_FAILURE, errno, |
1258 | gettext ("while opening old catalog file" )); |
1259 | } |
1260 | |
1261 | /* OK, we have the catalog loaded. Now read all messages and merge |
1262 | them. When set and message number clash for any message the new |
1263 | one is used. If the new one is empty it indicates that the |
1264 | message should be deleted. */ |
1265 | for (cnt = 0; cnt < old_cat_obj.plane_size * old_cat_obj.plane_depth; ++cnt) |
1266 | { |
1267 | struct message_list *message, *last; |
1268 | |
1269 | if (old_cat_obj.name_ptr[cnt * 3 + 0] == 0) |
1270 | /* No message in this slot. */ |
1271 | continue; |
1272 | |
1273 | if (old_cat_obj.name_ptr[cnt * 3 + 0] - 1 != (uint32_t) last_set) |
1274 | { |
1275 | last_set = old_cat_obj.name_ptr[cnt * 3 + 0] - 1; |
1276 | set = find_set (current: catalog, number: old_cat_obj.name_ptr[cnt * 3 + 0] - 1); |
1277 | } |
1278 | |
1279 | last = NULL; |
1280 | message = set->messages; |
1281 | while (message != NULL) |
1282 | { |
1283 | if ((uint32_t) message->number >= old_cat_obj.name_ptr[cnt * 3 + 1]) |
1284 | break; |
1285 | last = message; |
1286 | message = message->next; |
1287 | } |
1288 | |
1289 | if (message == NULL |
1290 | || (uint32_t) message->number > old_cat_obj.name_ptr[cnt * 3 + 1]) |
1291 | { |
1292 | /* We have found a message which is not yet in the catalog. |
1293 | Insert it at the right position. */ |
1294 | struct message_list *newp; |
1295 | |
1296 | newp = (struct message_list *) xmalloc (n: sizeof (*newp)); |
1297 | newp->number = old_cat_obj.name_ptr[cnt * 3 + 1]; |
1298 | newp->message = |
1299 | &old_cat_obj.strings[old_cat_obj.name_ptr[cnt * 3 + 2]]; |
1300 | newp->fname = NULL; |
1301 | newp->line = 0; |
1302 | newp->symbol = NULL; |
1303 | newp->next = message; |
1304 | |
1305 | if (last == NULL) |
1306 | set->messages = newp; |
1307 | else |
1308 | last->next = newp; |
1309 | |
1310 | ++catalog->total_messages; |
1311 | } |
1312 | else if (*message->message == '\0') |
1313 | { |
1314 | /* The new empty message has overridden the old one thus |
1315 | "deleting" it as required. Now remove the empty remains. */ |
1316 | if (last == NULL) |
1317 | set->messages = message->next; |
1318 | else |
1319 | last->next = message->next; |
1320 | } |
1321 | } |
1322 | } |
1323 | |
1324 | |
1325 | static int |
1326 | open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp, |
1327 | wchar_t *escape_charp) |
1328 | { |
1329 | char buf[2]; |
1330 | char *bufptr; |
1331 | size_t bufsize; |
1332 | wchar_t wbuf[2]; |
1333 | char *wbufptr; |
1334 | size_t wbufsize; |
1335 | |
1336 | /* If the input file does not specify the codeset use the locale's. */ |
1337 | if (codeset == NULL) |
1338 | { |
1339 | setlocale (LC_ALL, locale: "" ); |
1340 | codeset = nl_langinfo (CODESET); |
1341 | setlocale (LC_ALL, locale: "C" ); |
1342 | } |
1343 | |
1344 | /* Get the conversion modules. */ |
1345 | *cd_towcp = iconv_open (tocode: "WCHAR_T" , fromcode: codeset); |
1346 | *cd_tombp = iconv_open (tocode: codeset, fromcode: "WCHAR_T" ); |
1347 | if (*cd_towcp == (iconv_t) -1 || *cd_tombp == (iconv_t) -1) |
1348 | { |
1349 | error (status: 0, errnum: 0, gettext ("conversion modules not available" )); |
1350 | if (*cd_towcp != (iconv_t) -1) |
1351 | iconv_close (cd: *cd_towcp); |
1352 | |
1353 | return 1; |
1354 | } |
1355 | |
1356 | /* One special case for historical reasons is the backslash |
1357 | character. In some codesets the byte value 0x5c is not mapped to |
1358 | U005c in Unicode. These charsets then don't have a backslash |
1359 | character at all. Therefore we have to live with whatever the |
1360 | codeset provides and recognize, instead of the U005c, the character |
1361 | the byte value 0x5c is mapped to. */ |
1362 | buf[0] = '\\'; |
1363 | buf[1] = '\0'; |
1364 | bufptr = buf; |
1365 | bufsize = 2; |
1366 | |
1367 | wbufptr = (char *) wbuf; |
1368 | wbufsize = sizeof (wbuf); |
1369 | |
1370 | iconv (cd: *cd_towcp, inbuf: &bufptr, inbytesleft: &bufsize, outbuf: &wbufptr, outbytesleft: &wbufsize); |
1371 | if (bufsize != 0 || wbufsize != 0) |
1372 | { |
1373 | /* Something went wrong, we couldn't convert the byte 0x5c. Go |
1374 | on with using U005c. */ |
1375 | error (status: 0, errnum: 0, gettext ("cannot determine escape character" )); |
1376 | *escape_charp = L'\\'; |
1377 | } |
1378 | else |
1379 | *escape_charp = wbuf[0]; |
1380 | |
1381 | return 0; |
1382 | } |
1383 | |