1 | /* bug 19432: iconv rejects redundant escape sequences in IBM903, |
2 | IBM905, IBM907, and IBM909 |
3 | |
4 | Copyright (C) 2016-2022 Free Software Foundation, Inc. |
5 | This file is part of the GNU C Library. |
6 | |
7 | The GNU C Library is free software; you can redistribute it and/or |
8 | modify it under the terms of the GNU Lesser General Public |
9 | License as published by the Free Software Foundation; either |
10 | version 2.1 of the License, or (at your option) any later version. |
11 | |
12 | The GNU C Library is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | Lesser General Public License for more details. |
16 | |
17 | You should have received a copy of the GNU Lesser General Public |
18 | License along with the GNU C Library; if not, see |
19 | <https://www.gnu.org/licenses/>. */ |
20 | |
21 | #include <iconv.h> |
22 | #include <stdio.h> |
23 | #include <stdlib.h> |
24 | #include <string.h> |
25 | #include <errno.h> |
26 | #include <limits.h> |
27 | |
28 | // The longest test input sequence. |
29 | #define MAXINBYTES 8 |
30 | #define MAXOUTBYTES (MAXINBYTES * MB_LEN_MAX) |
31 | |
32 | /* Verify that a conversion of the INPUT sequence consisting of |
33 | INBYTESLEFT bytes in the encoding specified by the codeset |
34 | named by FROM_SET is successful. |
35 | Return 0 on success, non-zero on iconv() failure. */ |
36 | |
37 | static int |
38 | test_ibm93x (const char *from_set, const char *input, size_t inbytesleft) |
39 | { |
40 | const char to_set[] = "UTF-8" ; |
41 | iconv_t cd = iconv_open (tocode: to_set, fromcode: from_set); |
42 | if (cd == (iconv_t) -1) |
43 | { |
44 | printf (format: "iconv_open(\"%s\", \"%s\"): %s\n" , |
45 | from_set, to_set, strerror (errno)); |
46 | return 1; |
47 | } |
48 | |
49 | char output [MAXOUTBYTES]; |
50 | size_t outbytesleft = sizeof output; |
51 | |
52 | char *inbuf = (char*)input; |
53 | char *outbuf = output; |
54 | |
55 | printf (format: "iconv(cd, %p, %zu, %p, %zu)\n" , |
56 | inbuf, inbytesleft, outbuf, outbytesleft); |
57 | |
58 | errno = 0; |
59 | size_t ret = iconv (cd: cd, inbuf: &inbuf, inbytesleft: &inbytesleft, outbuf: &outbuf, outbytesleft: &outbytesleft); |
60 | printf (format: " ==> %zu: %s\n" |
61 | " inbuf%+td, inbytesleft=%zu, outbuf%+td, outbytesleft=%zu\n" , |
62 | ret, strerror (errno), |
63 | inbuf - input, inbytesleft, outbuf - output, outbytesleft); |
64 | |
65 | // Return 0 on success, non-zero on iconv() failure. |
66 | return ret == (size_t)-1 || errno; |
67 | } |
68 | |
69 | static int |
70 | do_test (void) |
71 | { |
72 | // State-dependent encodings to exercise. |
73 | static const char* const to_code[] = { |
74 | "IBM930" , "IBM933" , "IBM935" , "IBM937" , "IBM939" |
75 | }; |
76 | |
77 | static const size_t ncodesets = sizeof to_code / sizeof *to_code; |
78 | |
79 | static const struct { |
80 | char txt[MAXINBYTES]; |
81 | size_t len; |
82 | } input[] = { |
83 | #define DATA(s) { s, sizeof s - 1 } |
84 | /* <SI>: denotes the shift-in 1-byte escape sequence, changing |
85 | the encoder from a sigle-byte encoding to multibyte |
86 | <SO>: denotes the shift-out 1-byte escape sequence, switching |
87 | the encoder from a multibyte to a single-byte state */ |
88 | |
89 | DATA ("\x0e" ), // <SI> (not redundant) |
90 | DATA ("\x0f" ), // <S0> (redundant with initial state) |
91 | DATA ("\x0e\x0e" ), // <SI><SI> |
92 | DATA ("\x0e\x0f\x0f" ), // <SI><SO><SO> |
93 | DATA ("\x0f\x0f" ), // <SO><SO> |
94 | DATA ("\x0f\x0e\x0e" ), // <SO><SI><SI> |
95 | DATA ("\x0e\x0f\xc7\x0f" ), // <SI><SO><G><SO> |
96 | DATA ("\xc7\x0f" ) // <G><SO> (redundant with initial state) |
97 | }; |
98 | |
99 | static const size_t ninputs = sizeof input / sizeof *input; |
100 | |
101 | int ret = 0; |
102 | |
103 | size_t i, j; |
104 | |
105 | /* Iterate over the IBM93x codesets above and exercise each with |
106 | the input sequences above. */ |
107 | for (i = 0; i != ncodesets; ++i) |
108 | for (j = 0; j != ninputs; ++j) |
109 | ret += test_ibm93x (from_set: to_code [i], input: input [i].txt, inbytesleft: input [i].len); |
110 | |
111 | return ret; |
112 | } |
113 | |
114 | #define TEST_FUNCTION do_test () |
115 | #include "../test-skeleton.c" |
116 | |