1/* Copyright (C) 2000-2022 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
17
18/* Create a table from CHARSET to Unicode.
19 This is a good test for CHARSET's iconv() module, in particular the
20 FROM_LOOP BODY macro. */
21
22#include <stddef.h>
23#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
26#include <iconv.h>
27#include <errno.h>
28
29/* If nonzero, ignore conversions outside Unicode plane 0. */
30static int bmp_only;
31
32/* Converts a byte buffer to a hexadecimal string. */
33static const char*
34hexbuf (unsigned char buf[], unsigned int buflen)
35{
36 static char msg[50];
37
38 switch (buflen)
39 {
40 case 1:
41 sprintf (s: msg, format: "0x%02X", buf[0]);
42 break;
43 case 2:
44 sprintf (s: msg, format: "0x%02X%02X", buf[0], buf[1]);
45 break;
46 case 3:
47 sprintf (s: msg, format: "0x%02X%02X%02X", buf[0], buf[1], buf[2]);
48 break;
49 case 4:
50 sprintf (s: msg, format: "0x%02X%02X%02X%02X", buf[0], buf[1], buf[2], buf[3]);
51 break;
52 default:
53 abort ();
54 }
55 return msg;
56}
57
58/* Attempts to convert a byte buffer BUF (BUFLEN bytes) to OUT (12 bytes)
59 using the conversion descriptor CD. Returns the number of written bytes,
60 or 0 if ambiguous, or -1 if invalid. */
61static int
62try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned char *out)
63{
64 const char *inbuf = (const char *) buf;
65 size_t inbytesleft = buflen;
66 char *outbuf = (char *) out;
67 size_t outbytesleft = 12;
68 size_t result;
69
70 iconv (cd: cd, NULL, NULL, NULL, NULL);
71 result = iconv (cd: cd, inbuf: (char **) &inbuf, inbytesleft: &inbytesleft, outbuf: &outbuf, outbytesleft: &outbytesleft);
72 if (result != (size_t)(-1))
73 result = iconv (cd: cd, NULL, NULL, outbuf: &outbuf, outbytesleft: &outbytesleft);
74
75 if (result == (size_t)(-1))
76 {
77 if (errno == EILSEQ)
78 {
79 return -1;
80 }
81 else if (errno == EINVAL)
82 {
83 return 0;
84 }
85 else
86 {
87 int saved_errno = errno;
88 fprintf (stderr, format: "%s: iconv error: ", hexbuf (buf, buflen));
89 errno = saved_errno;
90 perror (s: "");
91 exit (status: 1);
92 }
93 }
94 else
95 {
96 if (inbytesleft != 0)
97 {
98 fprintf (stderr, format: "%s: inbytes = %ld, outbytes = %ld\n",
99 hexbuf (buf, buflen),
100 (long) (buflen - inbytesleft),
101 (long) (12 - outbytesleft));
102 exit (status: 1);
103 }
104 return 12 - outbytesleft;
105 }
106}
107
108/* Returns the out[] buffer as a Unicode value, formatted as 0x%04X. */
109static const char *
110utf8_decode (const unsigned char *out, unsigned int outlen)
111{
112 static char hexbuf[84];
113 char *p = hexbuf;
114
115 while (outlen > 0)
116 {
117 if (p > hexbuf)
118 *p++ = ' ';
119
120 if (out[0] < 0x80)
121 {
122 sprintf (s: p, format: "0x%04X", out[0]);
123 out += 1; outlen -= 1;
124 }
125 else if (out[0] >= 0xc0 && out[0] < 0xe0 && outlen >= 2)
126 {
127 sprintf (s: p, format: "0x%04X", ((out[0] & 0x1f) << 6) + (out[1] & 0x3f));
128 out += 2; outlen -= 2;
129 }
130 else if (out[0] >= 0xe0 && out[0] < 0xf0 && outlen >= 3)
131 {
132 sprintf (s: p, format: "0x%04X", ((out[0] & 0x0f) << 12)
133 + ((out[1] & 0x3f) << 6) + (out[2] & 0x3f));
134 out += 3; outlen -= 3;
135 }
136 else if (out[0] >= 0xf0 && out[0] < 0xf8 && outlen >= 4)
137 {
138 sprintf (s: p, format: "0x%04X", ((out[0] & 0x07) << 18)
139 + ((out[1] & 0x3f) << 12)
140 + ((out[2] & 0x3f) << 6) + (out[3] & 0x3f));
141 out += 4; outlen -= 4;
142 }
143 else if (out[0] >= 0xf8 && out[0] < 0xfc && outlen >= 5)
144 {
145 sprintf (s: p, format: "0x%04X", ((out[0] & 0x03) << 24)
146 + ((out[1] & 0x3f) << 18)
147 + ((out[2] & 0x3f) << 12)
148 + ((out[3] & 0x3f) << 6) + (out[4] & 0x3f));
149 out += 5; outlen -= 5;
150 }
151 else if (out[0] >= 0xfc && out[0] < 0xfe && outlen >= 6)
152 {
153 sprintf (s: p, format: "0x%04X", ((out[0] & 0x01) << 30)
154 + ((out[1] & 0x3f) << 24)
155 + ((out[2] & 0x3f) << 18)
156 + ((out[3] & 0x3f) << 12)
157 + ((out[4] & 0x3f) << 6) + (out[5] & 0x3f));
158 out += 6; outlen -= 6;
159 }
160 else
161 {
162 sprintf (s: p, format: "0x????");
163 out += 1; outlen -= 1;
164 }
165
166 if (bmp_only && strlen (s: p) > 6)
167 /* Ignore conversions outside Unicode plane 0. */
168 return NULL;
169
170 p += strlen (s: p);
171 }
172
173 return hexbuf;
174}
175
176int
177main (int argc, char *argv[])
178{
179 const char *charset;
180 iconv_t cd;
181 int search_depth;
182
183 if (argc != 2)
184 {
185 fprintf (stderr, format: "Usage: tst-table-from charset\n");
186 exit (status: 1);
187 }
188 charset = argv[1];
189
190 cd = iconv_open (tocode: "UTF-8", fromcode: charset);
191 if (cd == (iconv_t)(-1))
192 {
193 perror (s: "iconv_open");
194 exit (status: 1);
195 }
196
197 /* When testing UTF-8 or GB18030, stop at 0x10000, otherwise the output
198 file gets too big. */
199 bmp_only = (strcmp (s1: charset, s2: "UTF-8") == 0
200 || strcmp (s1: charset, s2: "GB18030") == 0);
201 search_depth = (strcmp (s1: charset, s2: "UTF-8") == 0 ? 3 : 4);
202
203 {
204 unsigned char out[12];
205 unsigned char buf[4];
206 unsigned int i0, i1, i2, i3;
207 int result;
208
209 for (i0 = 0; i0 < 0x100; i0++)
210 {
211 buf[0] = i0;
212 result = try (cd, buf, buflen: 1, out);
213 if (result < 0)
214 {
215 }
216 else if (result > 0)
217 {
218 const char *unicode = utf8_decode (out, outlen: result);
219 if (unicode != NULL)
220 printf (format: "0x%02X\t%s\n", i0, unicode);
221 }
222 else
223 {
224 for (i1 = 0; i1 < 0x100; i1++)
225 {
226 buf[1] = i1;
227 result = try (cd, buf, buflen: 2, out);
228 if (result < 0)
229 {
230 }
231 else if (result > 0)
232 {
233 const char *unicode = utf8_decode (out, outlen: result);
234 if (unicode != NULL)
235 printf (format: "0x%02X%02X\t%s\n", i0, i1, unicode);
236 }
237 else
238 {
239 for (i2 = 0; i2 < 0x100; i2++)
240 {
241 buf[2] = i2;
242 result = try (cd, buf, buflen: 3, out);
243 if (result < 0)
244 {
245 }
246 else if (result > 0)
247 {
248 const char *unicode = utf8_decode (out, outlen: result);
249 if (unicode != NULL)
250 printf (format: "0x%02X%02X%02X\t%s\n",
251 i0, i1, i2, unicode);
252 }
253 else if (search_depth > 3)
254 {
255 for (i3 = 0; i3 < 0x100; i3++)
256 {
257 buf[3] = i3;
258 result = try (cd, buf, buflen: 4, out);
259 if (result < 0)
260 {
261 }
262 else if (result > 0)
263 {
264 const char *unicode =
265 utf8_decode (out, outlen: result);
266 if (unicode != NULL)
267 printf (format: "0x%02X%02X%02X%02X\t%s\n",
268 i0, i1, i2, i3, unicode);
269 }
270 else
271 {
272 fprintf (stderr,
273 format: "%s: incomplete byte sequence\n",
274 hexbuf (buf, buflen: 4));
275 exit (status: 1);
276 }
277 }
278 }
279 }
280 }
281 }
282 }
283 }
284 }
285
286 if (iconv_close (cd: cd) < 0)
287 {
288 perror (s: "iconv_close");
289 exit (status: 1);
290 }
291
292 if (ferror (stdin) || fflush (stdout) || ferror (stdout))
293 {
294 fprintf (stderr, format: "I/O error\n");
295 exit (status: 1);
296 }
297
298 return 0;
299}
300

source code of glibc/iconvdata/tst-table-from.c