1 | /* Verify the BIG5HKSCS outputs that generate 2 wchar_t's (Bug 25734). |
2 | Copyright (C) 2020-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <stdio.h> |
20 | #include <string.h> |
21 | #include <locale.h> |
22 | #include <wchar.h> |
23 | #include <support/check.h> |
24 | #include <support/support.h> |
25 | |
26 | /* A few BIG5-HKSCS characters map in two unicode code points. |
27 | They are: |
28 | /x88/x62 => <U00CA><U0304> |
29 | /x88/x64 => <U00CA><U030C> |
30 | /x88/xa3 => <U00EA><U0304> |
31 | /x88/xa5 => <U00EA><U030C> |
32 | Each of these is special cased in iconvdata/big5hkscs.c. |
33 | This test ensures that we correctly reset the shift state after |
34 | outputting any of these characters. We do this by converting |
35 | each them followed by converting an ASCII character. If we fail |
36 | to reset the shift state (bug 25734) then we'll see the last |
37 | character in the queue output again. */ |
38 | |
39 | /* Each test has name, input bytes, and expected wide character |
40 | output. */ |
41 | struct testdata { |
42 | const char *name; |
43 | const char input[3]; |
44 | wchar_t expected[3]; |
45 | }; |
46 | |
47 | /* In BIG5-HKSCS (2008) there are 4 characters that generate multiple |
48 | wide characters. */ |
49 | struct testdata tests[4] = { |
50 | /* <H-8862>X => <U+00CA><U+0304>X */ |
51 | { "<H-8862>" , "\x88\x62\x58" , { 0x00CA, 0x0304, 0x0058 } }, |
52 | /* <H-8864>X => <U+00CA><U+030C>X */ |
53 | { "<H-8864>" , "\x88\x64\x58" , { 0x00CA, 0x030C, 0x0058 } }, |
54 | /* <H-88A3>X => <U+00EA><U+0304>X */ |
55 | { "<H-88A3>" , "\x88\xa3\x58" , { 0x00EA, 0x0304, 0x0058 } }, |
56 | /* <H-88A5>X => <U+00EA><U+030C>X */ |
57 | { "<H-88A5>" , "\x88\xa5\x58" , { 0x00EA, 0x030C, 0x0058 } } |
58 | }; |
59 | |
60 | /* Each test is of the form: |
61 | - Translate first code sequence (two bytes) |
62 | - Translate second (zero bytes) |
63 | - Translate the third (one byte). */ |
64 | static int |
65 | check_conversion (struct testdata test) |
66 | { |
67 | int err = 0; |
68 | wchar_t wc; |
69 | mbstate_t st; |
70 | size_t ret; |
71 | const char *mbs = test.input; |
72 | int consumed = 0; |
73 | /* Input is always 3 bytes long. */ |
74 | int inlen = 3; |
75 | |
76 | memset (s: &st, c: 0, n: sizeof (st)); |
77 | /* First conversion: Consumes first 2 bytes. */ |
78 | ret = mbrtowc (pwc: &wc, s: mbs, n: inlen - consumed, p: &st); |
79 | if (ret != 2) |
80 | { |
81 | printf (format: "error: First conversion consumed only %zd bytes.\n" , ret); |
82 | err++; |
83 | } |
84 | /* Advance the two consumed bytes. */ |
85 | mbs += ret; |
86 | consumed += ret; |
87 | if (wc != test.expected[0]) |
88 | { |
89 | printf (format: "error: Result of first conversion was wrong.\n" ); |
90 | err++; |
91 | } |
92 | /* Second conversion: Consumes 0 bytes. */ |
93 | ret = mbrtowc (pwc: &wc, s: mbs, n: inlen - consumed, p: &st); |
94 | if (ret != 0) |
95 | { |
96 | printf (format: "error: Second conversion consumed only %zd bytes.\n" , ret); |
97 | err++; |
98 | } |
99 | /* Advance the zero consumed bytes. */ |
100 | mbs += ret; |
101 | consumed += ret; |
102 | if (wc != test.expected[1]) |
103 | { |
104 | printf (format: "error: Result of second conversion was wrong.\n" ); |
105 | err++; |
106 | } |
107 | /* After the second conversion the state of the converter should be |
108 | in the initial state. It is in the initial state because the two |
109 | input BIG5-HKSCS bytes have been consumed and the 2 wchar_t's have |
110 | been output. */ |
111 | if (mbsinit (&st) == 0) |
112 | { |
113 | printf (format: "error: Converter not in initial state.\n" ); |
114 | err++; |
115 | } |
116 | /* Third conversion: Consumes 1 byte (it's an ASCII character). */ |
117 | ret = mbrtowc (pwc: &wc, s: mbs, n: inlen - consumed, p: &st); |
118 | if (ret != 1) |
119 | { |
120 | printf (format: "error: Third conversion consumed only %zd bytes.\n" , ret); |
121 | err++; |
122 | } |
123 | /* Advance the one byte. */ |
124 | mbs += ret; |
125 | consumed += ret; |
126 | if (wc != test.expected[2]) |
127 | { |
128 | printf (format: "error: Result of third conversion was wrong.\n" ); |
129 | err++; |
130 | } |
131 | /* Return 0 if we saw no errors. */ |
132 | return err; |
133 | } |
134 | |
135 | static int |
136 | do_test (void) |
137 | { |
138 | int err = 0; |
139 | int ret; |
140 | /* Testing BIG5-HKSCS. */ |
141 | xsetlocale (LC_ALL, locale: "zh_HK.BIG5-HKSCS" ); |
142 | |
143 | /* Run all the special conversions. */ |
144 | for (int i = 0; i < (sizeof (tests) / sizeof (struct testdata)); i++) |
145 | { |
146 | printf (format: "Running test for %s\n" , tests[i].name); |
147 | ret = check_conversion (test: tests[i]); |
148 | if (ret > 0) |
149 | printf (format: "Test %s failed.\n" , tests[i].name); |
150 | err += ret; |
151 | } |
152 | |
153 | /* Fail if any conversion had an error. */ |
154 | if (err > 0) |
155 | FAIL_EXIT1 ("One or more conversions failed." ); |
156 | |
157 | return 0; |
158 | } |
159 | |
160 | #include <support/test-driver.c> |
161 | |