gencat.c source code [glibc/catgets/gencat.c]

1	/ Copyright (C) 1996-2024 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4	This program is free software; you can redistribute it and/or modify
5	it under the terms of the GNU General Public License as published
6	by the Free Software Foundation; version 2 of the License, or
7	(at your option) any later version.
8
9	This program is distributed in the hope that it will be useful,
10	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	GNU General Public License for more details.
13
14	You should have received a copy of the GNU General Public License
15	along with this program; if not, see <https://www.gnu.org/licenses/>. /*
16
17	#ifdef HAVE_CONFIG_H
18	# include "config.h"
19	#endif
20
21	#include <argp.h>
22	#include <assert.h>
23	#include <ctype.h>
24	#include <endian.h>
25	#include <errno.h>
26	#include <error.h>
27	#include <fcntl.h>
28	#include <iconv.h>
29	#include <langinfo.h>
30	#include <locale.h>
31	#include <libintl.h>
32	#include <limits.h>
33	#include <nl_types.h>
34	#include <obstack.h>
35	#include <scratch_buffer.h>
36	#include <stdint.h>
37	#include <stdio.h>
38	#include <stdlib.h>
39	#include <string.h>
40	#include <unistd.h>
41	#include <unistd_ext.h>
42	#include <wchar.h>
43
44	#include "version.h"
45
46	#include "catgetsinfo.h"
47
48
49	#define SWAPU32(w) \
50	(((w) << 24) \| (((w) & 0xff00) << 8) \| (((w) >> 8) & 0xff00) \| ((w) >> 24))
51
52	struct message_list
53	{
54	int number;
55	const char *message;
56
57	const char *fname;
58	size_t line;
59	const char *symbol;
60
61	struct message_list *next;
62	};
63
64
65	struct set_list
66	{
67	int number;
68	int deleted;
69	struct message_list *messages;
70	int last_message;
71
72	const char *fname;
73	size_t line;
74	const char *symbol;
75
76	struct set_list *next;
77	};
78
79
80	struct catalog
81	{
82	struct set_list *all_sets;
83	struct set_list *current_set;
84	size_t total_messages;
85	wint_t quote_char;
86	int last_set;
87
88	struct obstack mem_pool;
89	};
90
91
92	/ If non-zero force creation of new file, not using existing one. /
93	static int force_new;
94
95	/ Name of output file. /
96	static const char *output_name;
97
98	/ Name of generated C header file. /
99	static const char *header_name;
100
101	/ Name and version of program. /
102	static void print_version (FILE stream, struct* argp_state *state);
103	void (argp_program_version_hook) (FILE , struct argp_state *) = print_version;
104
105	#define OPT_NEW 1
106
107	/ Definitions of arguments for argp functions. /
108	static const struct argp_option options[] =
109	{
110	{ "header", `'H'`, N_("NAME"), `0`,
111	N_("Create C header file NAME containing symbol definitions") },
112	{ "new", OPT_NEW, NULL, `0`,
113	N_("Do not use existing catalog, force new output file") },
114	{ "output", `'o'`, N_("NAME"), `0`, N_("Write output to file NAME") },
115	{ NULL, `0`, NULL, `0`, NULL }
116	};
117
118	/ Short description of program. /
119	static const char doc[] = N_("Generate message catalog.\
120	\vIf INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\
121	is -, output is written to standard output.\n");
122
123	/ Strings for arguments in help texts. /
124	static const char args_doc[] = N_("\
125	-o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]");
126
127	/ Prototype for option handler. /
128	static error_t parse_opt (int key, char arg, struct* argp_state *state);
129
130	/ Function to print some extra text in the help message. /
131	static char more_help (int* key, const char text, void* *input);
132
133	/ Data structure to communicate with argp functions. /
134	static struct argp argp =
135	{
136	options, parse_opt, args_doc, doc, NULL, more_help
137	};
138
139
140	/ Wrapper functions with error checking for standard functions. /
141	#include <programs/xmalloc.h>
142
143	/ Prototypes for local functions. /
144	static void error_print (void);
145	static struct catalog read_input_file (struct* catalog *current,
146	const char *fname);
147	static void write_out (struct catalog result, const* char *output_name,
148	const char *header_name);
149	static struct set_list find_set (struct* catalog current, int* number);
150	static void normalize_line (const char *fname, size_t line, iconv_t cd,
151	wchar_t *string, wchar_t quote_char,
152	wchar_t escape_char);
153	static void read_old (struct catalog catalog, const* char *file_name);
154	static int open_conversion (const char codesetp, iconv_t cd_towcp,
155	iconv_t cd_tombp, wchar_t escape_charp);
156
157
158	int
159	main (int argc, char *argv[])
160	{
161	struct catalog *result;
162	int remaining;
163
164	/ Set program name for messages. /
165	error_print_progname = error_print;
166
167	/ Set locale via LC_ALL. /
168	setlocale (LC_ALL, locale: "");
169
170	/ Set the text message domain. /
171	textdomain (PACKAGE);
172
173	/ Initialize local variables. /
174	result = NULL;
175
176	/ Parse and process arguments. /
177	argp_parse (argp: &argp, argc: argc, argv: argv, flags: `0`, arg_index: &remaining, NULL);
178
179	/ Determine output file. /
180	if (output_name == NULL)
181	output_name = remaining < argc ? argv[remaining++] : "-";
182
183	/ Process all input files. /
184	setlocale (LC_CTYPE, locale: "C");
185	if (remaining < argc)
186	do
187	result = read_input_file (current: result, fname: argv[remaining]);
188	while (++remaining < argc);
189	else
190	result = read_input_file (NULL, fname: "-");
191
192	/ Write out the result. /
193	if (result != NULL)
194	write_out (result, output_name, header_name);
195
196	return error_message_count != `0`;
197	}
198
199
200	/ Handle program arguments. /
201	static error_t
202	parse_opt (int key, char arg, struct* argp_state *state)
203	{
204	switch (key)
205	{
206	case `'H'`:
207	header_name = arg;
208	break;
209	case OPT_NEW:
210	force_new = `1`;
211	break;
212	case `'o'`:
213	output_name = arg;
214	break;
215	default:
216	return ARGP_ERR_UNKNOWN;
217	}
218	return `0`;
219	}
220
221
222	static char *
223	more_help (int key, const char text, void* *input)
224	{
225	char *tp = NULL;
226	switch (key)
227	{
228	case ARGP_KEY_HELP_EXTRA:
229	/ We print some extra information. /
230	if (asprintf (ptr: &tp, gettext ("\
231	For bug reporting instructions, please see:\n\
232	%s.\n"), REPORT_BUGS_TO) < `0`)
233	return NULL;
234	return tp;
235	default:
236	break;
237	}
238	return (char *) text;
239	}
240
241	/ Print the version information. /
242	static void
243	print_version (FILE stream, struct* argp_state *state)
244	{
245	fprintf (stream: stream, format: "gencat %s%s\n", PKGVERSION, VERSION);
246	fprintf (stream: stream, gettext ("\
247	Copyright (C) %s Free Software Foundation, Inc.\n\
248	This is free software; see the source for copying conditions. There is NO\n\
249	warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
250	"), "2024");
251	fprintf (stream: stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
252	}
253
254
255	/ The address of this function will be assigned to the hook in the*
256	error functions. /*
257	static void
258	error_print (void)
259	{
260	/ We don't want the program name to be printed in messages. Emacs'*
261	compile.el does not like this. /*
262	}
263
264
265	static struct catalog *
266	read_input_file (struct catalog current, const* char *fname)
267	{
268	FILE *fp;
269	char *buf;
270	size_t len;
271	size_t line_number;
272	wchar_t *wbuf;
273	size_t wbufsize;
274	iconv_t cd_towc = (iconv_t) -`1`;
275	iconv_t cd_tomb = (iconv_t) -`1`;
276	wchar_t escape_char = L`'\\'`;
277	char *codeset = NULL;
278
279	if (strcmp (s1: fname, s2: "-") == `0` \|\| strcmp (s1: fname, s2: "/dev/stdin") == `0`)
280	{
281	fp = stdin;
282	fname = gettext ("standard input");
283	}
284	else
285	fp = fopen (filename: fname, modes: "r");
286	if (fp == NULL)
287	{
288	error (status: `0`, errno, gettext ("cannot open input file `%s'"), fname);
289	return current;
290	}
291
292	/ If we haven't seen anything yet, allocate result structure. /
293	if (current == NULL)
294	{
295	current = (struct catalog ) xcalloc (n: `1`, s: sizeof* (*current));
296
297	#define obstack_chunk_alloc malloc
298	#define obstack_chunk_free free
299	obstack_init (&current->mem_pool);
300
301	current->current_set = find_set (current, NL_SETD);
302	}
303
304	buf = NULL;
305	len = `0`;
306	line_number = `0`;
307
308	wbufsize = `1024`;
309	wbuf = (wchar_t *) xmalloc (n: wbufsize);
310
311	while (!feof (stream: fp))
312	{
313	int continued;
314	int used;
315	size_t start_line = line_number + `1`;
316	char *this_line;
317
318	do
319	{
320	int act_len;
321
322	act_len = getline (lineptr: &buf, n: &len, stream: fp);
323	if (act_len <= `0`)
324	break;
325	++line_number;
326
327	/ It the line continued? /
328	continued = `0`;
329	if (buf[act_len - `1`] == `'\n'`)
330	{
331	--act_len;
332
333	/ There might be more than one backslash at the end of*
334	the line. Only if there is an odd number of them is
335	the line continued. /*
336	if (act_len > `0` && buf[act_len - `1`] == `'\\'`)
337	{
338	int temp_act_len = act_len;
339
340	do
341	{
342	--temp_act_len;
343	continued = !continued;
344	}
345	while (temp_act_len > `0` && buf[temp_act_len - `1`] == `'\\'`);
346
347	if (continued)
348	--act_len;
349	}
350	}
351
352	/ Append to currently selected line. /
353	obstack_grow (&current->mem_pool, buf, act_len);
354	}
355	while (continued);
356
357	obstack_1grow (&current->mem_pool, `'\0'`);
358	this_line = (char *) obstack_finish (&current->mem_pool);
359
360	used = `0`;
361	if (this_line[`0`] == `'$'`)
362	{
363	if (isblank (this_line[`1`]))
364	{
365	int cnt = `1`;
366	while (isblank (this_line[cnt]))
367	++cnt;
368	if (strncmp (s1: &this_line[cnt], s2: "codeset=", n: `8`) != `0`)
369	/ This is a comment line. Do nothing. /;
370	else if (codeset != NULL)
371	/ Ignore multiple codeset. /;
372	else
373	{
374	int start = cnt + `8`;
375	cnt = start;
376	while (this_line[cnt] != `'\0'` && !isspace (this_line[cnt]))
377	++cnt;
378	if (cnt != start)
379	{
380	int len = cnt - start;
381	codeset = xmalloc (n: len + `1`);
382	((char* *) mempcpy (codeset, &this_line[start], len))
383	= `'\0'`;
384	}
385	}
386	}
387	else if (strncmp (s1: &this_line[`1`], s2: "set", n: `3`) == `0`)
388	{
389	int cnt = sizeof ("set");
390	int set_number;
391	const char *symbol = NULL;
392	while (isspace (this_line[cnt]))
393	++cnt;
394
395	if (isdigit (this_line[cnt]))
396	{
397	set_number = atol (nptr: &this_line[cnt]);
398
399	/ If the given number for the character set is*
400	higher than any we used for symbolic set names
401	avoid clashing by using only higher numbers for
402	the following symbolic definitions. /*
403	if (set_number > current->last_set)
404	current->last_set = set_number;
405	}
406	else
407	{
408	/ See whether it is a reasonable identifier. /
409	int start = cnt;
410	while (isalnum (this_line[cnt]) \|\| this_line[cnt] == `'_'`)
411	++cnt;
412
413	if (cnt == start)
414	{
415	/ No correct character found. /
416	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
417	gettext ("illegal set number"));
418	set_number = `0`;
419	}
420	else
421	{
422	/ We have found seomthing that looks like a*
423	correct identifier. /*
424	struct set_list *runp;
425
426	this_line[cnt] = `'\0'`;
427	used = `1`;
428	symbol = &this_line[start];
429
430	/ Test whether the identifier was already used. /
431	runp = current->all_sets;
432	while (runp != `0`)
433	if (runp->symbol != NULL
434	&& strcmp (s1: runp->symbol, s2: symbol) == `0`)
435	break;
436	else
437	runp = runp->next;
438
439	if (runp != NULL)
440	{
441	/ We cannot allow duplicate identifiers for*
442	message sets. /*
443	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
444	gettext ("duplicate set definition"));
445	error_at_line (status: `0`, errnum: `0`, fname: runp->fname, lineno: runp->line,
446	gettext ("\
447	this is the first definition"));
448	set_number = `0`;
449	}
450	else
451	/ Allocate next free message set for identifier. /
452	set_number = ++current->last_set;
453	}
454	}
455
456	if (set_number != `0`)
457	{
458	/ We found a legal set number. /
459	current->current_set = find_set (current, number: set_number);
460	if (symbol != NULL)
461	used = `1`;
462	current->current_set->symbol = symbol;
463	current->current_set->fname = fname;
464	current->current_set->line = start_line;
465	}
466	}
467	else if (strncmp (s1: &this_line[`1`], s2: "delset", n: `6`) == `0`)
468	{
469	int cnt = sizeof ("delset");
470	while (isspace (this_line[cnt]))
471	++cnt;
472
473	if (isdigit (this_line[cnt]))
474	{
475	size_t set_number = atol (nptr: &this_line[cnt]);
476	struct set_list *set;
477
478	/ Mark the message set with the given number as*
479	deleted. /*
480	set = find_set (current, number: set_number);
481	set->deleted = `1`;
482	}
483	else
484	{
485	/ See whether it is a reasonable identifier. /
486	int start = cnt;
487	while (isalnum (this_line[cnt]) \|\| this_line[cnt] == `'_'`)
488	++cnt;
489
490	if (cnt == start)
491	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
492	gettext ("illegal set number"));
493	else
494	{
495	const char *symbol;
496	struct set_list *runp;
497
498	this_line[cnt] = `'\0'`;
499	used = `1`;
500	symbol = &this_line[start];
501
502	/ We have a symbolic set name. This name must*
503	appear somewhere else in the catalogs read so
504	far. /*
505	for (runp = current->all_sets; runp != NULL;
506	runp = runp->next)
507	{
508	if (strcmp (s1: runp->symbol, s2: symbol) == `0`)
509	{
510	runp->deleted = `1`;
511	break;
512	}
513	}
514	if (runp == NULL)
515	/ Name does not exist before. /
516	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
517	gettext ("unknown set `%s'"), symbol);
518	}
519	}
520	}
521	else if (strncmp (s1: &this_line[`1`], s2: "quote", n: `5`) == `0`)
522	{
523	char buf[`2`];
524	char *bufptr;
525	size_t buflen;
526	char *wbufptr;
527	size_t wbuflen;
528	int cnt;
529
530	cnt = sizeof ("quote");
531	while (isspace (this_line[cnt]))
532	++cnt;
533
534	/ We need the conversion. /
535	if (cd_towc == (iconv_t) -`1`
536	&& open_conversion (codesetp: codeset, cd_towcp: &cd_towc, cd_tombp: &cd_tomb,
537	escape_charp: &escape_char) != `0`)
538	/ Something is wrong. /
539	goto out;
540
541	/ Yes, the quote char can be '\0'; this means no quote*
542	char. The function using the information works on
543	wide characters so we have to convert it here. /*
544	buf[`0`] = this_line[cnt];
545	buf[`1`] = `'\0'`;
546	bufptr = buf;
547	buflen = `2`;
548
549	wbufptr = (char *) wbuf;
550	wbuflen = wbufsize;
551
552	/ Flush the state. /
553	iconv (cd: cd_towc, NULL, NULL, NULL, NULL);
554
555	iconv (cd: cd_towc, inbuf: &bufptr, inbytesleft: &buflen, outbuf: &wbufptr, outbytesleft: &wbuflen);
556	if (buflen != `0` \|\| (wchar_t *) wbufptr != &wbuf[`2`])
557	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
558	gettext ("invalid quote character"));
559	else
560	/ Use the converted wide character. /
561	current->quote_char = wbuf[`0`];
562	}
563	else
564	{
565	int cnt;
566	cnt = `2`;
567	while (this_line[cnt] != `'\0'` && !isspace (this_line[cnt]))
568	++cnt;
569	this_line[cnt] = `'\0'`;
570	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
571	gettext ("unknown directive `%s': line ignored"),
572	&this_line[`1`]);
573	}
574	}
575	else if (isalnum (this_line[`0`]) \|\| this_line[`0`] == `'_'`)
576	{
577	const char *ident = this_line;
578	char *line = this_line;
579	int message_number;
580
581	do
582	++line;
583	while (line[`0`] != `'\0'` && !isspace (line[`0`]));
584	if (line[`0`] != `'\0'`)
585	line++ = `'\0'`; /* Terminate the identifier. /
586
587	/ Now we found the beginning of the message itself. /
588
589	if (isdigit (ident[`0`]))
590	{
591	struct message_list *runp;
592	struct message_list *lastp;
593
594	message_number = atoi (ident);
595
596	/ Find location to insert the new message. /
597	runp = current->current_set->messages;
598	lastp = NULL;
599	while (runp != NULL)
600	if (runp->number == message_number)
601	break;
602	else
603	{
604	lastp = runp;
605	runp = runp->next;
606	}
607	if (runp != NULL)
608	{
609	/ Oh, oh. There is already a message with this*
610	number in the message set. /*
611	if (runp->symbol == NULL)
612	{
613	/ The existing message had its number specified*
614	by the user. Fatal collision type uh, oh. /*
615	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
616	gettext ("duplicated message number"));
617	error_at_line (status: `0`, errnum: `0`, fname: runp->fname, lineno: runp->line,
618	gettext ("this is the first definition"));
619	message_number = `0`;
620	}
621	else
622	{
623	/ Collision was with number auto-assigned to a*
624	symbolic. Change existing symbolic number
625	and move to end the list (if not already there). /*
626	runp->number = ++current->current_set->last_message;
627
628	if (runp->next != NULL)
629	{
630	struct message_list *endp;
631
632	if (lastp == NULL)
633	current->current_set->messages=runp->next;
634	else
635	lastp->next=runp->next;
636
637	endp = runp->next;
638	while (endp->next != NULL)
639	endp = endp->next;
640
641	endp->next = runp;
642	runp->next = NULL;
643	}
644	}
645	}
646	ident = NULL; / We don't have a symbol. /
647
648	if (message_number != `0`
649	&& message_number > current->current_set->last_message)
650	current->current_set->last_message = message_number;
651	}
652	else if (ident[`0`] != `'\0'`)
653	{
654	struct message_list *runp;
655
656	/ Test whether the symbolic name was not used for*
657	another message in this message set. /*
658	runp = current->current_set->messages;
659	while (runp != NULL)
660	if (runp->symbol != NULL && strcmp (s1: ident, s2: runp->symbol) == `0`)
661	break;
662	else
663	runp = runp->next;
664	if (runp != NULL)
665	{
666	/ The name is already used. /
667	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line, gettext ("\
668	duplicated message identifier"));
669	error_at_line (status: `0`, errnum: `0`, fname: runp->fname, lineno: runp->line,
670	gettext ("this is the first definition"));
671	message_number = `0`;
672	}
673	else
674	/ Give the message the next unused number. /
675	message_number = ++current->current_set->last_message;
676	}
677	else
678	message_number = `0`;
679
680	if (message_number != `0`)
681	{
682	char *inbuf;
683	size_t inlen;
684	char *outbuf;
685	size_t outlen;
686	struct message_list *newp;
687	size_t line_len = strlen (s: line) + `1`;
688	size_t ident_len = `0`;
689
690	/ We need the conversion. /
691	if (cd_towc == (iconv_t) -`1`
692	&& open_conversion (codesetp: codeset, cd_towcp: &cd_towc, cd_tombp: &cd_tomb,
693	escape_charp: &escape_char) != `0`)
694	/ Something is wrong. /
695	goto out;
696
697	/ Convert to a wide character string. We have to*
698	interpret escape sequences which will be impossible
699	without doing the conversion if the codeset of the
700	message is stateful. /*
701	while (`1`)
702	{
703	inbuf = line;
704	inlen = line_len;
705	outbuf = (char *) wbuf;
706	outlen = wbufsize;
707
708	/ Flush the state. /
709	iconv (cd: cd_towc, NULL, NULL, NULL, NULL);
710
711	iconv (cd: cd_towc, inbuf: &inbuf, inbytesleft: &inlen, outbuf: &outbuf, outbytesleft: &outlen);
712	if (inlen == `0`)
713	{
714	/ The string is converted. /
715	assert (outlen < wbufsize);
716	assert (wbuf[(wbufsize - outlen) / sizeof (wchar_t) - `1`]
717	== L`'\0'`);
718	break;
719	}
720
721	if (outlen != `0`)
722	{
723	/ Something is wrong with this string, we ignore it. /
724	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line, gettext ("\
725	invalid character: message ignored"));
726	goto ignore;
727	}
728
729	/ The output buffer is too small. /
730	wbufsize *= `2`;
731	wbuf = (wchar_t *) xrealloc (o: wbuf, n: wbufsize);
732	}
733
734	/ Strip quote characters, change escape sequences into*
735	correct characters etc. /*
736	normalize_line (fname, line: start_line, cd: cd_towc, string: wbuf,
737	quote_char: current->quote_char, escape_char);
738
739	if (ident)
740	ident_len = line - this_line;
741
742	/ Now the string is free of escape sequences. Convert it*
743	back into a multibyte character string. First free the
744	memory allocated for the original string. /*
745	obstack_free (&current->mem_pool, this_line);
746
747	used = `1`; / Yes, we use the line. /
748
749	/ Now fill in the new string. It should never happen that*
750	the replaced string is longer than the original. /*
751	inbuf = (char *) wbuf;
752	inlen = (wcslen (s: wbuf) + `1`) * sizeof (wchar_t);
753
754	outlen = obstack_room (&current->mem_pool);
755	obstack_blank (&current->mem_pool, outlen);
756	this_line = (char *) obstack_base (&current->mem_pool);
757	outbuf = this_line + ident_len;
758	outlen -= ident_len;
759
760	/ Flush the state. /
761	iconv (cd: cd_tomb, NULL, NULL, NULL, NULL);
762
763	iconv (cd: cd_tomb, inbuf: &inbuf, inbytesleft: &inlen, outbuf: &outbuf, outbytesleft: &outlen);
764	if (inlen != `0`)
765	{
766	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
767	gettext ("invalid line"));
768	goto ignore;
769	}
770	assert (outbuf[-`1`] == `'\0'`);
771
772	/ Free the memory in the obstack we don't use. /
773	obstack_blank (&current->mem_pool, -(int) outlen);
774	line = obstack_finish (&current->mem_pool);
775
776	newp = (struct message_list ) xmalloc (n: sizeof* (*newp));
777	newp->number = message_number;
778	newp->message = line + ident_len;
779	/ Remember symbolic name; is NULL if no is given. /
780	newp->symbol = ident ? line : NULL;
781	/ Remember where we found the character. /
782	newp->fname = fname;
783	newp->line = start_line;
784
785	/ Find place to insert to message. We keep them in a*
786	sorted single linked list. /*
787	if (current->current_set->messages == NULL
788	\|\| current->current_set->messages->number > message_number)
789	{
790	newp->next = current->current_set->messages;
791	current->current_set->messages = newp;
792	}
793	else
794	{
795	struct message_list *runp;
796	runp = current->current_set->messages;
797	while (runp->next != NULL)
798	if (runp->next->number > message_number)
799	break;
800	else
801	runp = runp->next;
802	newp->next = runp->next;
803	runp->next = newp;
804	}
805	}
806	++current->total_messages;
807	}
808	else
809	{
810	size_t cnt;
811
812	cnt = `0`;
813	/ See whether we have any non-white space character in this*
814	line. /*
815	while (this_line[cnt] != `'\0'` && isspace (this_line[cnt]))
816	++cnt;
817
818	if (this_line[cnt] != `'\0'`)
819	/ Yes, some unknown characters found. /
820	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: start_line,
821	gettext ("malformed line ignored"));
822	}
823
824	ignore:
825	/ We can save the memory for the line if it was not used. /
826	if (!used)
827	obstack_free (&current->mem_pool, this_line);
828	}
829
830	/ Close the conversion modules. /
831	iconv_close (cd: cd_towc);
832	iconv_close (cd: cd_tomb);
833	free (ptr: codeset);
834
835	out:
836	free (ptr: wbuf);
837
838	if (fp != stdin)
839	fclose (stream: fp);
840	return current;
841	}
842
843	static void
844	write_out (struct catalog catalog, const* char *output_name,
845	const char *header_name)
846	{
847	/ Computing the "optimal" size. /
848	struct set_list *set_run;
849	size_t best_total, best_size, best_depth;
850	size_t act_size, act_depth;
851	struct catalog_obj obj;
852	struct obstack string_pool;
853	const char *strings;
854	size_t strings_size;
855	uint32_t array1, array2;
856	size_t cnt;
857	int fd;
858	struct scratch_buffer buf1;
859	scratch_buffer_init (buffer: &buf1);
860	struct scratch_buffer buf2;
861	scratch_buffer_init (buffer: &buf2);
862
863	/ If not otherwise told try to read file with existing*
864	translations. /*
865	if (!force_new)
866	read_old (catalog, file_name: output_name);
867
868	/ Initialize best_size with a very high value. /
869	best_total = best_size = best_depth = UINT_MAX;
870
871	/ We need some start size for testing. Let's start with*
872	TOTAL_MESSAGES / 5, which theoretically provides a mean depth of
873	5. /*
874	act_size = `1` + catalog->total_messages / `5`;
875
876	/ We determine the size of a hash table here. Because the message*
877	numbers can be chosen arbitrary by the programmer we cannot use
878	the simple method of accessing the array using the message
879	number. The algorithm is based on the trivial hash function
880	NUMBER % TABLE_SIZE, where collisions are stored in a second
881	dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that
882	the needed space (= TABLE_SIZE TABLE_DEPTH) is minimal. /
883	while (act_size <= best_total)
884	{
885	size_t deep[act_size];
886
887	act_depth = `1`;
888	memset (s: deep, c: `'\0'`, n: act_size * sizeof (size_t));
889	set_run = catalog->all_sets;
890	while (set_run != NULL)
891	{
892	struct message_list *message_run;
893
894	message_run = set_run->messages;
895	while (message_run != NULL)
896	{
897	size_t idx = (message_run->number * set_run->number) % act_size;
898
899	++deep[idx];
900	if (deep[idx] > act_depth)
901	{
902	act_depth = deep[idx];
903	if (act_depth * act_size > best_total)
904	break;
905	}
906	message_run = message_run->next;
907	}
908	set_run = set_run->next;
909	}
910
911	if (act_depth * act_size <= best_total)
912	{
913	/ We have found a better solution. /
914	best_total = act_depth * act_size;
915	best_size = act_size;
916	best_depth = act_depth;
917	}
918
919	++act_size;
920	}
921
922	/ let's be prepared for an empty message file. /
923	if (best_size == UINT_MAX)
924	{
925	best_size = `1`;
926	best_depth = `1`;
927	}
928
929	/ OK, now we have the size we will use. Fill in the header, build*
930	the table and the second one with swapped byte order. /*
931	obj.magic = CATGETS_MAGIC;
932	obj.plane_size = best_size;
933	obj.plane_depth = best_depth;
934
935	uint32_t array_size = best_size * best_depth * sizeof (uint32_t) * `3`;
936	/ Allocate room for all needed arrays. /
937	if (!scratch_buffer_set_array_size (buffer: &buf1, nelem: best_size * best_depth * `3`,
938	size: sizeof (uint32_t)))
939	error (EXIT_FAILURE, ENOMEM, gettext ("cannot allocate memory"));
940	array1 = buf1.data;
941	memset (s: array1, c: `'\0'`, n: array_size);
942
943	if (!scratch_buffer_set_array_size (buffer: &buf2, nelem: best_size * best_depth * `3`,
944	size: sizeof (uint32_t)))
945	{
946	scratch_buffer_free (buffer: &buf1);
947	error (EXIT_FAILURE, ENOMEM, gettext ("cannot allocate memory"));
948	}
949	array2 = buf2.data;
950	obstack_init (&string_pool);
951
952	set_run = catalog->all_sets;
953	while (set_run != NULL)
954	{
955	struct message_list *message_run;
956
957	message_run = set_run->messages;
958	while (message_run != NULL)
959	{
960	size_t idx = (((message_run->number * set_run->number) % best_size)
961	* `3`);
962	/ Determine collision depth. /
963	while (array1[idx] != `0`)
964	idx += best_size * `3`;
965
966	/ Store set number, message number and pointer into string*
967	space, relative to the first string. /*
968	array1[idx + `0`] = set_run->number;
969	array1[idx + `1`] = message_run->number;
970	array1[idx + `2`] = obstack_object_size (&string_pool);
971
972	/ Add current string to the continuous space containing all*
973	strings. /*
974	obstack_grow0 (&string_pool, message_run->message,
975	strlen (message_run->message));
976
977	message_run = message_run->next;
978	}
979
980	set_run = set_run->next;
981	}
982	strings_size = obstack_object_size (&string_pool);
983	strings = obstack_finish (&string_pool);
984
985	/ Compute ARRAY2 by changing the byte order. /
986	for (cnt = `0`; cnt < best_size * best_depth * `3`; ++cnt)
987	array2[cnt] = SWAPU32 (array1[cnt]);
988
989	/ Now we can write out the whole data. /
990	if (strcmp (s1: output_name, s2: "-") == `0`
991	\|\| strcmp (s1: output_name, s2: "/dev/stdout") == `0`)
992	fd = STDOUT_FILENO;
993	else
994	{
995	fd = creat (file: output_name, mode: `0666`);
996	if (fd < `0`)
997	{
998	scratch_buffer_free (buffer: &buf1);
999	scratch_buffer_free (buffer: &buf2);
1000	error (EXIT_FAILURE, errno, gettext ("cannot open output file `%s'"),
1001	output_name);
1002	}
1003	}
1004
1005	/ Write out header. /
1006	write_all(fd, buffer: &obj, length: sizeof (obj));
1007
1008	/ We always write out the little endian version of the index*
1009	arrays. /*
1010	#if __BYTE_ORDER == __LITTLE_ENDIAN
1011	write_all(fd, buffer: array1, length: array_size);
1012	write_all(fd, buffer: array2, length: array_size);
1013	#elif __BYTE_ORDER == __BIG_ENDIAN
1014	write_all(fd, array2, array_size);
1015	write_all(fd, array1, array_size);
1016	#else
1017	# error Cannot handle __BYTE_ORDER byte order
1018	#endif
1019
1020	/ Finally write the strings. /
1021	write_all(fd, buffer: strings, length: strings_size);
1022
1023	if (fd != STDOUT_FILENO)
1024	close (fd: fd);
1025
1026	/ If requested now write out the header file. /
1027	if (header_name != NULL)
1028	{
1029	int first = `1`;
1030	FILE *fp;
1031
1032	/ Open output file. "-" or "/dev/stdout" means write to*
1033	standard output. /*
1034	if (strcmp (s1: header_name, s2: "-") == `0`
1035	\|\| strcmp (s1: header_name, s2: "/dev/stdout") == `0`)
1036	fp = stdout;
1037	else
1038	{
1039	fp = fopen (filename: header_name, modes: "w");
1040	if (fp == NULL)
1041	{
1042	scratch_buffer_free (buffer: &buf1);
1043	scratch_buffer_free (buffer: &buf2);
1044	error (EXIT_FAILURE, errno,
1045	gettext ("cannot open output file `%s'"), header_name);
1046	}
1047	}
1048
1049	/ Iterate over all sets and all messages. /
1050	set_run = catalog->all_sets;
1051	while (set_run != NULL)
1052	{
1053	struct message_list *message_run;
1054
1055	/ If the current message set has a symbolic name write this*
1056	out first. /*
1057	if (set_run->symbol != NULL)
1058	fprintf (stream: fp, format: "%s#define %sSet %#x\t/* %s:%zu */\n",
1059	first ? "" : "\n", set_run->symbol, set_run->number - `1`,
1060	set_run->fname, set_run->line);
1061	first = `0`;
1062
1063	message_run = set_run->messages;
1064	while (message_run != NULL)
1065	{
1066	/ If the current message has a symbolic name write*
1067	#define out. But we have to take care for the set
1068	not having a symbolic name. /*
1069	if (message_run->symbol != NULL)
1070	{
1071	if (set_run->symbol == NULL)
1072	fprintf (stream: fp, format: "#define AutomaticSet%d%s %#x\t/* %s:%zu */\n",
1073	set_run->number, message_run->symbol,
1074	message_run->number, message_run->fname,
1075	message_run->line);
1076	else
1077	fprintf (stream: fp, format: "#define %s%s %#x\t/* %s:%zu */\n",
1078	set_run->symbol, message_run->symbol,
1079	message_run->number, message_run->fname,
1080	message_run->line);
1081	}
1082
1083	message_run = message_run->next;
1084	}
1085
1086	set_run = set_run->next;
1087	}
1088
1089	if (fp != stdout)
1090	fclose (stream: fp);
1091	}
1092	scratch_buffer_free (buffer: &buf1);
1093	scratch_buffer_free (buffer: &buf2);
1094	}
1095
1096
1097	static struct set_list *
1098	find_set (struct catalog current, int* number)
1099	{
1100	struct set_list *result = current->all_sets;
1101
1102	/ We must avoid set number 0 because a set of this number signals*
1103	in the tables that the entry is not occupied. /*
1104	++number;
1105
1106	while (result != NULL)
1107	if (result->number == number)
1108	return result;
1109	else
1110	result = result->next;
1111
1112	/ Prepare new message set. /
1113	result = (struct set_list ) xcalloc (n: `1`, s: sizeof* (*result));
1114	result->number = number;
1115	result->next = current->all_sets;
1116	current->all_sets = result;
1117
1118	return result;
1119	}
1120
1121
1122	/ Normalize given string inplace* by processing escape sequences*
1123	and quote characters. /*
1124	static void
1125	normalize_line (const char fname, size_t line, iconv_t cd, wchar_t string,
1126	wchar_t quote_char, wchar_t escape_char)
1127	{
1128	int is_quoted;
1129	wchar_t *rp = string;
1130	wchar_t *wp = string;
1131
1132	if (quote_char != L`'\0'` && *rp == quote_char)
1133	{
1134	is_quoted = `1`;
1135	++rp;
1136	}
1137	else
1138	is_quoted = `0`;
1139
1140	while (*rp != L`'\0'`)
1141	if (*rp == quote_char)
1142	/ We simply end the string when we find the first time an*
1143	not-escaped quote character. /*
1144	break;
1145	else if (*rp == escape_char)
1146	{
1147	++rp;
1148	if (quote_char != L`'\0'` && *rp == quote_char)
1149	/ This is an extension to XPG. /
1150	wp++ = rp++;
1151	else
1152	/ Recognize escape sequences. /
1153	switch (*rp)
1154	{
1155	case L`'n'`:
1156	*wp++ = L`'\n'`;
1157	++rp;
1158	break;
1159	case L`'t'`:
1160	*wp++ = L`'\t'`;
1161	++rp;
1162	break;
1163	case L`'v'`:
1164	*wp++ = L`'\v'`;
1165	++rp;
1166	break;
1167	case L`'b'`:
1168	*wp++ = L`'\b'`;
1169	++rp;
1170	break;
1171	case L`'r'`:
1172	*wp++ = L`'\r'`;
1173	++rp;
1174	break;
1175	case L`'f'`:
1176	*wp++ = L`'\f'`;
1177	++rp;
1178	break;
1179	case L`'0'` ... L`'7'`:
1180	{
1181	int number;
1182	char cbuf[`2`];
1183	char *cbufptr;
1184	size_t cbufin;
1185	wchar_t wcbuf[`2`];
1186	char *wcbufptr;
1187	size_t wcbufin;
1188
1189	number = *rp++ - L`'0'`;
1190	while (number <= (`255` / `8`) && rp >= L`'0'` && rp <= L`'7'`)
1191	{
1192	number *= `8`;
1193	number += *rp++ - L`'0'`;
1194	}
1195
1196	cbuf[`0`] = (char) number;
1197	cbuf[`1`] = `'\0'`;
1198	cbufptr = cbuf;
1199	cbufin = `2`;
1200
1201	wcbufptr = (char *) wcbuf;
1202	wcbufin = sizeof (wcbuf);
1203
1204	/ Flush the state. /
1205	iconv (cd: cd, NULL, NULL, NULL, NULL);
1206
1207	iconv (cd: cd, inbuf: &cbufptr, inbytesleft: &cbufin, outbuf: &wcbufptr, outbytesleft: &wcbufin);
1208	if (cbufptr != &cbuf[`2`] \|\| (wchar_t *) wcbufptr != &wcbuf[`2`])
1209	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: line,
1210	gettext ("invalid escape sequence"));
1211	else
1212	*wp++ = wcbuf[`0`];
1213	}
1214	break;
1215	default:
1216	if (*rp == escape_char)
1217	{
1218	*wp++ = escape_char;
1219	++rp;
1220	}
1221	else
1222	{
1223	/ Simply ignore the backslash character. /
1224	}
1225	break;
1226	}
1227	}
1228	else
1229	wp++ = rp++;
1230
1231	/ If we saw a quote character at the beginning we expect another*
1232	one at the end. /*
1233	if (is_quoted && *rp != quote_char)
1234	error_at_line (status: `0`, errnum: `0`, fname: fname, lineno: line, gettext ("unterminated message"));
1235
1236	/ Terminate string. /
1237	*wp = L`'\0'`;
1238	return;
1239	}
1240
1241
1242	static void
1243	read_old (struct catalog catalog, const* char *file_name)
1244	{
1245	struct catalog_info old_cat_obj;
1246	struct set_list *set = NULL;
1247	int last_set = -`1`;
1248	size_t cnt;
1249
1250	/ Try to open catalog, but don't look through the NLSPATH. /
1251	if (__open_catalog (cat_name: file_name, NULL, NULL, catalog: &old_cat_obj) != `0`)
1252	{
1253	if (errno == ENOENT)
1254	/ No problem, the catalog simply does not exist. /
1255	return;
1256	else
1257	error (EXIT_FAILURE, errno,
1258	gettext ("while opening old catalog file"));
1259	}
1260
1261	/ OK, we have the catalog loaded. Now read all messages and merge*
1262	them. When set and message number clash for any message the new
1263	one is used. If the new one is empty it indicates that the
1264	message should be deleted. /*
1265	for (cnt = `0`; cnt < old_cat_obj.plane_size * old_cat_obj.plane_depth; ++cnt)
1266	{
1267	struct message_list message, last;
1268
1269	if (old_cat_obj.name_ptr[cnt * `3` + `0`] == `0`)
1270	/ No message in this slot. /
1271	continue;
1272
1273	if (old_cat_obj.name_ptr[cnt * `3` + `0`] - `1` != (uint32_t) last_set)
1274	{
1275	last_set = old_cat_obj.name_ptr[cnt * `3` + `0`] - `1`;
1276	set = find_set (current: catalog, number: old_cat_obj.name_ptr[cnt * `3` + `0`] - `1`);
1277	}
1278
1279	last = NULL;
1280	message = set->messages;
1281	while (message != NULL)
1282	{
1283	if ((uint32_t) message->number >= old_cat_obj.name_ptr[cnt * `3` + `1`])
1284	break;
1285	last = message;
1286	message = message->next;
1287	}
1288
1289	if (message == NULL
1290	\|\| (uint32_t) message->number > old_cat_obj.name_ptr[cnt * `3` + `1`])
1291	{
1292	/ We have found a message which is not yet in the catalog.*
1293	Insert it at the right position. /*
1294	struct message_list *newp;
1295
1296	newp = (struct message_list ) xmalloc (n: sizeof* (*newp));
1297	newp->number = old_cat_obj.name_ptr[cnt * `3` + `1`];
1298	newp->message =
1299	&old_cat_obj.strings[old_cat_obj.name_ptr[cnt * `3` + `2`]];
1300	newp->fname = NULL;
1301	newp->line = `0`;
1302	newp->symbol = NULL;
1303	newp->next = message;
1304
1305	if (last == NULL)
1306	set->messages = newp;
1307	else
1308	last->next = newp;
1309
1310	++catalog->total_messages;
1311	}
1312	else if (*message->message == `'\0'`)
1313	{
1314	/ The new empty message has overridden the old one thus*
1315	"deleting" it as required. Now remove the empty remains. /*
1316	if (last == NULL)
1317	set->messages = message->next;
1318	else
1319	last->next = message->next;
1320	}
1321	}
1322	}
1323
1324
1325	static int
1326	open_conversion (const char codeset, iconv_t cd_towcp, iconv_t *cd_tombp,
1327	wchar_t *escape_charp)
1328	{
1329	char buf[`2`];
1330	char *bufptr;
1331	size_t bufsize;
1332	wchar_t wbuf[`2`];
1333	char *wbufptr;
1334	size_t wbufsize;
1335
1336	/ If the input file does not specify the codeset use the locale's. /
1337	if (codeset == NULL)
1338	{
1339	setlocale (LC_ALL, locale: "");
1340	codeset = nl_langinfo (CODESET);
1341	setlocale (LC_ALL, locale: "C");
1342	}
1343
1344	/ Get the conversion modules. /
1345	*cd_towcp = iconv_open (tocode: "WCHAR_T", fromcode: codeset);
1346	*cd_tombp = iconv_open (tocode: codeset, fromcode: "WCHAR_T");
1347	if (cd_towcp == (iconv_t) -`1` \|\| cd_tombp == (iconv_t) -`1`)
1348	{
1349	error (status: `0`, errnum: `0`, gettext ("conversion modules not available"));
1350	if (*cd_towcp != (iconv_t) -`1`)
1351	iconv_close (cd: *cd_towcp);
1352
1353	return `1`;
1354	}
1355
1356	/ One special case for historical reasons is the backslash*
1357	character. In some codesets the byte value 0x5c is not mapped to
1358	U005c in Unicode. These charsets then don't have a backslash
1359	character at all. Therefore we have to live with whatever the
1360	codeset provides and recognize, instead of the U005c, the character
1361	the byte value 0x5c is mapped to. /*
1362	buf[`0`] = `'\\'`;
1363	buf[`1`] = `'\0'`;
1364	bufptr = buf;
1365	bufsize = `2`;
1366
1367	wbufptr = (char *) wbuf;
1368	wbufsize = sizeof (wbuf);
1369
1370	iconv (cd: *cd_towcp, inbuf: &bufptr, inbytesleft: &bufsize, outbuf: &wbufptr, outbytesleft: &wbufsize);
1371	if (bufsize != `0` \|\| wbufsize != `0`)
1372	{
1373	/ Something went wrong, we couldn't convert the byte 0x5c. Go*
1374	on with using U005c. /*
1375	error (status: `0`, errnum: `0`, gettext ("cannot determine escape character"));
1376	*escape_charp = L`'\\'`;
1377	}
1378	else
1379	*escape_charp = wbuf[`0`];
1380
1381	return `0`;
1382	}
1383

source code of glibc/catgets/gencat.c