1 | /* Generic conversion to and from 8bit charsets - S390 version. |
2 | Copyright (C) 2016-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #if defined HAVE_S390_VX_ASM_SUPPORT |
20 | |
21 | # if defined HAVE_S390_VX_GCC_SUPPORT |
22 | # define ASM_CLOBBER_VR(NR) , NR |
23 | # else |
24 | # define ASM_CLOBBER_VR(NR) |
25 | # endif |
26 | |
27 | /* Generate the conversion loop routines without vector instructions as |
28 | fallback, if vector instructions aren't available at runtime. */ |
29 | # define IGNORE_ICONV_SKELETON |
30 | # define from_generic __from_generic_c |
31 | # define to_generic __to_generic_c |
32 | # include "iconvdata/8bit-generic.c" |
33 | # undef IGNORE_ICONV_SKELETON |
34 | # undef from_generic |
35 | # undef to_generic |
36 | |
37 | /* Generate the conversion routines with vector instructions. The vector |
38 | routines can only be used with charsets where the maximum UCS4 value |
39 | fits in 1 byte size. Then the hardware translate-instruction is used |
40 | to translate between multiple generic characters and "1 byte UCS4" |
41 | characters at once. The vector instructions are used to convert between |
42 | the "1 byte UCS4" and UCS4. */ |
43 | # include <ifunc-resolve.h> |
44 | |
45 | # undef FROM_LOOP |
46 | # undef TO_LOOP |
47 | # define FROM_LOOP __from_generic_vx |
48 | # define TO_LOOP __to_generic_vx |
49 | |
50 | # define MIN_NEEDED_FROM 1 |
51 | # define MIN_NEEDED_TO 4 |
52 | # define ONE_DIRECTION 0 |
53 | |
54 | /* First define the conversion function from the 8bit charset to UCS4. */ |
55 | # define MIN_NEEDED_INPUT MIN_NEEDED_FROM |
56 | # define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
57 | # define LOOPFCT FROM_LOOP |
58 | # define BODY_FROM_ORIG \ |
59 | { \ |
60 | uint32_t ch = to_ucs4[*inptr]; \ |
61 | \ |
62 | if (HAS_HOLES && __builtin_expect (ch == L'\0', 0) && *inptr != '\0') \ |
63 | { \ |
64 | /* This is an illegal character. */ \ |
65 | STANDARD_FROM_LOOP_ERR_HANDLER (1); \ |
66 | } \ |
67 | \ |
68 | put32 (outptr, ch); \ |
69 | outptr += 4; \ |
70 | ++inptr; \ |
71 | } |
72 | |
73 | # define BODY \ |
74 | { \ |
75 | if (__builtin_expect (inend - inptr < 16, 1) \ |
76 | || outend - outptr < 64) \ |
77 | /* Convert remaining bytes with c code. */ \ |
78 | BODY_FROM_ORIG \ |
79 | else \ |
80 | { \ |
81 | /* Convert 16 ... 256 bytes at once with tr-instruction. */ \ |
82 | size_t index; \ |
83 | char buf[256]; \ |
84 | size_t loop_count = (inend - inptr) / 16; \ |
85 | if (loop_count > (outend - outptr) / 64) \ |
86 | loop_count = (outend - outptr) / 64; \ |
87 | if (loop_count > 16) \ |
88 | loop_count = 16; \ |
89 | __asm__ volatile (".machine push\n\t" \ |
90 | ".machine \"z13\"\n\t" \ |
91 | ".machinemode \"zarch_nohighgprs\"\n\t" \ |
92 | " sllk %[R_I],%[R_LI],4\n\t" \ |
93 | " ahi %[R_I],-1\n\t" \ |
94 | /* Execute mvc and tr with correct len. */ \ |
95 | " exrl %[R_I],21f\n\t" \ |
96 | " exrl %[R_I],22f\n\t" \ |
97 | /* Post-processing. */ \ |
98 | " lghi %[R_I],0\n\t" \ |
99 | " vzero %%v0\n\t" \ |
100 | "0: \n\t" \ |
101 | /* Find invalid character - value is zero. */ \ |
102 | " vl %%v16,0(%[R_I],%[R_BUF])\n\t" \ |
103 | " vceqbs %%v23,%%v0,%%v16\n\t" \ |
104 | " jle 10f\n\t" \ |
105 | "1: \n\t" \ |
106 | /* Enlarge to UCS4. */ \ |
107 | " vuplhb %%v17,%%v16\n\t" \ |
108 | " vupllb %%v18,%%v16\n\t" \ |
109 | " vuplhh %%v19,%%v17\n\t" \ |
110 | " vupllh %%v20,%%v17\n\t" \ |
111 | " vuplhh %%v21,%%v18\n\t" \ |
112 | " vupllh %%v22,%%v18\n\t" \ |
113 | /* Store 64bytes to buf_out. */ \ |
114 | " vstm %%v19,%%v22,0(%[R_OUT])\n\t" \ |
115 | " aghi %[R_I],16\n\t" \ |
116 | " la %[R_OUT],64(%[R_OUT])\n\t" \ |
117 | " brct %[R_LI],0b\n\t" \ |
118 | " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ |
119 | " j 20f\n\t" \ |
120 | "21: mvc 0(1,%[R_BUF]),0(%[R_IN])\n\t" \ |
121 | "22: tr 0(1,%[R_BUF]),0(%[R_TBL])\n\t" \ |
122 | /* Possibly invalid character found. */ \ |
123 | "10: \n\t" \ |
124 | /* Test if input was zero, too. */ \ |
125 | " vl %%v24,0(%[R_I],%[R_IN])\n\t" \ |
126 | " vceqb %%v24,%%v0,%%v24\n\t" \ |
127 | /* Zeros in buf (v23) and inptr (v24) are marked \ |
128 | with one bits. After xor, invalid characters \ |
129 | are marked as one bits. Proceed, if no \ |
130 | invalid characters are found. */ \ |
131 | " vx %%v24,%%v23,%%v24\n\t" \ |
132 | " vfenebs %%v24,%%v24,%%v0\n\t" \ |
133 | " jo 1b\n\t" \ |
134 | /* Found an invalid translation. \ |
135 | Store the preceding chars. */ \ |
136 | " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ |
137 | " vlgvb %[R_I],%%v24,7\n\t" \ |
138 | " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ |
139 | " sll %[R_I],2\n\t" \ |
140 | " ahi %[R_I],-1\n\t" \ |
141 | " jl 20f\n\t" \ |
142 | " lgr %[R_LI],%[R_I]\n\t" \ |
143 | " vuplhb %%v17,%%v16\n\t" \ |
144 | " vuplhh %%v19,%%v17\n\t" \ |
145 | " vstl %%v19,%[R_I],0(%[R_OUT])\n\t" \ |
146 | " ahi %[R_I],-16\n\t" \ |
147 | " jl 11f\n\t" \ |
148 | " vupllh %%v20,%%v17\n\t" \ |
149 | " vstl %%v20,%[R_I],16(%[R_OUT])\n\t" \ |
150 | " ahi %[R_I],-16\n\t" \ |
151 | " jl 11f\n\t" \ |
152 | " vupllb %%v18,%%v16\n\t" \ |
153 | " vuplhh %%v21,%%v18\n\t" \ |
154 | " vstl %%v21,%[R_I],32(%[R_OUT])\n\t" \ |
155 | " ahi %[R_I],-16\n\t" \ |
156 | " jl 11f\n\t" \ |
157 | " vupllh %%v22,%%v18\n\t" \ |
158 | " vstl %%v22,%[R_I],48(%[R_OUT])\n\t" \ |
159 | "11: \n\t" \ |
160 | " la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t" \ |
161 | "20: \n\t" \ |
162 | ".machine pop" \ |
163 | : /* outputs */ [R_IN] "+a" (inptr) \ |
164 | , [R_OUT] "+a" (outptr), [R_I] "=&a" (index) \ |
165 | , [R_LI] "+a" (loop_count) \ |
166 | : /* inputs */ [R_BUF] "a" (buf) \ |
167 | , [R_TBL] "a" (to_ucs1) \ |
168 | : /* clobber list*/ "memory", "cc" \ |
169 | ASM_CLOBBER_VR ("v0") ASM_CLOBBER_VR ("v16") \ |
170 | ASM_CLOBBER_VR ("v17") ASM_CLOBBER_VR ("v18") \ |
171 | ASM_CLOBBER_VR ("v19") ASM_CLOBBER_VR ("v20") \ |
172 | ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22") \ |
173 | ASM_CLOBBER_VR ("v23") ASM_CLOBBER_VR ("v24") \ |
174 | ); \ |
175 | /* Error occurred? */ \ |
176 | if (loop_count != 0) \ |
177 | { \ |
178 | /* Found an invalid character! */ \ |
179 | STANDARD_FROM_LOOP_ERR_HANDLER (1); \ |
180 | } \ |
181 | } \ |
182 | } |
183 | |
184 | # define LOOP_NEED_FLAGS |
185 | # include <iconv/loop.c> |
186 | |
187 | /* Next, define the other direction - from UCS4 to 8bit charset. */ |
188 | # define MIN_NEEDED_INPUT MIN_NEEDED_TO |
189 | # define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM |
190 | # define LOOPFCT TO_LOOP |
191 | # define BODY_TO_ORIG \ |
192 | { \ |
193 | uint32_t ch = get32 (inptr); \ |
194 | \ |
195 | if (__builtin_expect (ch >= sizeof (from_ucs4) / sizeof (from_ucs4[0]), 0)\ |
196 | || (__builtin_expect (from_ucs4[ch], '\1') == '\0' && ch != 0)) \ |
197 | { \ |
198 | UNICODE_TAG_HANDLER (ch, 4); \ |
199 | \ |
200 | /* This is an illegal character. */ \ |
201 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
202 | } \ |
203 | \ |
204 | *outptr++ = from_ucs4[ch]; \ |
205 | inptr += 4; \ |
206 | } |
207 | # define BODY \ |
208 | { \ |
209 | if (__builtin_expect (inend - inptr < 64, 1) \ |
210 | || outend - outptr < 16) \ |
211 | /* Convert remaining bytes with c code. */ \ |
212 | BODY_TO_ORIG \ |
213 | else \ |
214 | { \ |
215 | /* Convert 64 ... 1024 bytes at once with tr-instruction. */ \ |
216 | size_t index, tmp; \ |
217 | char buf[256]; \ |
218 | size_t loop_count = (inend - inptr) / 64; \ |
219 | uint32_t max = sizeof (from_ucs4) / sizeof (from_ucs4[0]); \ |
220 | if (loop_count > (outend - outptr) / 16) \ |
221 | loop_count = (outend - outptr) / 16; \ |
222 | if (loop_count > 16) \ |
223 | loop_count = 16; \ |
224 | size_t remaining_loop_count = loop_count; \ |
225 | /* Step 1: Check for ch>=max, ch == 0 and shorten to bytes. \ |
226 | (ch == 0 is no error, but is handled differently) */ \ |
227 | __asm__ volatile (".machine push\n\t" \ |
228 | ".machine \"z13\"\n\t" \ |
229 | ".machinemode \"zarch_nohighgprs\"\n\t" \ |
230 | /* Setup to check for ch >= max. */ \ |
231 | " vzero %%v21\n\t" \ |
232 | " vleih %%v21,-24576,0\n\t" /* element 0: > */ \ |
233 | " vleih %%v21,-8192,2\n\t" /* element 1: =<> */ \ |
234 | " vlvgf %%v20,%[R_MAX],0\n\t" /* element 0: val */ \ |
235 | /* Process in 64byte - 16 characters blocks. */ \ |
236 | " lghi %[R_I],0\n\t" \ |
237 | " lghi %[R_TMP],0\n\t" \ |
238 | "0: \n\t" \ |
239 | " vlm %%v16,%%v19,0(%[R_IN])\n\t" \ |
240 | /* Test for ch >= max and ch == 0. */ \ |
241 | " vstrczfs %%v22,%%v16,%%v20,%%v21\n\t" \ |
242 | " jno 10f\n\t" \ |
243 | " vstrczfs %%v22,%%v17,%%v20,%%v21\n\t" \ |
244 | " jno 11f\n\t" \ |
245 | " vstrczfs %%v22,%%v18,%%v20,%%v21\n\t" \ |
246 | " jno 12f\n\t" \ |
247 | " vstrczfs %%v22,%%v19,%%v20,%%v21\n\t" \ |
248 | " jno 13f\n\t" \ |
249 | /* Shorten to byte values. */ \ |
250 | " vpkf %%v16,%%v16,%%v17\n\t" \ |
251 | " vpkf %%v18,%%v18,%%v19\n\t" \ |
252 | " vpkh %%v16,%%v16,%%v18\n\t" \ |
253 | /* Store 16bytes to buf. */ \ |
254 | " vst %%v16,0(%[R_I],%[R_BUF])\n\t" \ |
255 | /* Loop until all blocks are processed. */ \ |
256 | " la %[R_IN],64(%[R_IN])\n\t" \ |
257 | " aghi %[R_I],16\n\t" \ |
258 | " brct %[R_LI],0b\n\t" \ |
259 | " j 20f\n\t" \ |
260 | /* Found error ch >= max or ch == 0. */ \ |
261 | "13: aghi %[R_TMP],4\n\t" \ |
262 | "12: aghi %[R_TMP],4\n\t" \ |
263 | "11: aghi %[R_TMP],4\n\t" \ |
264 | "10: vlgvb %[R_I],%%v22,7\n\t" \ |
265 | " srlg %[R_I],%[R_I],2\n\t" \ |
266 | " agr %[R_I],%[R_TMP]\n\t" \ |
267 | "20: \n\t" \ |
268 | ".machine pop" \ |
269 | : /* outputs */ [R_IN] "+a" (inptr) \ |
270 | , [R_I] "=&a" (index) \ |
271 | , [R_TMP] "=d" (tmp) \ |
272 | , [R_LI] "+d" (remaining_loop_count) \ |
273 | : /* inputs */ [R_BUF] "a" (buf) \ |
274 | , [R_MAX] "d" (max) \ |
275 | : /* clobber list*/ "memory", "cc" \ |
276 | ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ |
277 | ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ |
278 | ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ |
279 | ASM_CLOBBER_VR ("v22") \ |
280 | ); \ |
281 | /* Error occurred in step 1? An error (ch >= max || ch == 0) \ |
282 | occurred, if remaining_loop_count > 0. The error occurred \ |
283 | at character-index (index) after already processed blocks. */ \ |
284 | loop_count -= remaining_loop_count; \ |
285 | if (loop_count > 0) \ |
286 | { \ |
287 | /* Step 2: Translate already processed blocks in buf and \ |
288 | check for errors (from_ucs4[ch] == 0). */ \ |
289 | __asm__ volatile (".machine push\n\t" \ |
290 | ".machine \"z13\"\n\t" \ |
291 | ".machinemode \"zarch_nohighgprs\"\n\t" \ |
292 | " sllk %[R_I],%[R_LI],4\n\t" \ |
293 | " ahi %[R_I],-1\n\t" \ |
294 | /* Execute tr with correct len. */ \ |
295 | " exrl %[R_I],21f\n\t" \ |
296 | /* Post-processing. */ \ |
297 | " lghi %[R_I],0\n\t" \ |
298 | "0: \n\t" \ |
299 | /* Find invalid character - value == 0. */ \ |
300 | " vl %%v16,0(%[R_I],%[R_BUF])\n\t" \ |
301 | " vfenezbs %%v17,%%v16,%%v16\n\t" \ |
302 | " je 10f\n\t" \ |
303 | /* Store 16bytes to buf_out. */ \ |
304 | " vst %%v16,0(%[R_I],%[R_OUT])\n\t" \ |
305 | " aghi %[R_I],16\n\t" \ |
306 | " brct %[R_LI],0b\n\t" \ |
307 | " la %[R_OUT],0(%[R_I],%[R_OUT])\n\t" \ |
308 | " j 20f\n\t" \ |
309 | "21: tr 0(1,%[R_BUF]),0(%[R_TBL])\n\t" \ |
310 | /* Found an error: from_ucs4[ch] == 0. */ \ |
311 | "10: la %[R_OUT],0(%[R_I],%[R_OUT])\n\t" \ |
312 | " vlgvb %[R_I],%%v17,7\n\t" \ |
313 | "20: \n\t" \ |
314 | ".machine pop" \ |
315 | : /* outputs */ [R_OUT] "+a" (outptr) \ |
316 | , [R_I] "=&a" (tmp) \ |
317 | , [R_LI] "+d" (loop_count) \ |
318 | : /* inputs */ [R_BUF] "a" (buf) \ |
319 | , [R_TBL] "a" (from_ucs4) \ |
320 | : /* clobber list*/ "memory", "cc" \ |
321 | ASM_CLOBBER_VR ("v16") \ |
322 | ASM_CLOBBER_VR ("v17") \ |
323 | ); \ |
324 | /* Error occurred in processed bytes of step 2? \ |
325 | Thus possible error in step 1 is obsolete.*/ \ |
326 | if (tmp < 16) \ |
327 | { \ |
328 | index = tmp; \ |
329 | inptr -= loop_count * 64; \ |
330 | } \ |
331 | } \ |
332 | /* Error occurred in step 1/2? */ \ |
333 | if (index < 16) \ |
334 | { \ |
335 | /* Found an invalid character (see step 2) or zero \ |
336 | (see step 1) at index! Convert the chars before index \ |
337 | manually. If there is a zero at index detected by step 1, \ |
338 | there could be invalid characters before this zero. */ \ |
339 | int i; \ |
340 | uint32_t ch; \ |
341 | for (i = 0; i < index; i++) \ |
342 | { \ |
343 | ch = get32 (inptr); \ |
344 | if (__builtin_expect (from_ucs4[ch], '\1') == '\0') \ |
345 | break; \ |
346 | *outptr++ = from_ucs4[ch]; \ |
347 | inptr += 4; \ |
348 | } \ |
349 | if (i == index) \ |
350 | { \ |
351 | ch = get32 (inptr); \ |
352 | if (ch == 0) \ |
353 | { \ |
354 | /* This is no error, but handled differently. */ \ |
355 | *outptr++ = from_ucs4[ch]; \ |
356 | inptr += 4; \ |
357 | continue; \ |
358 | } \ |
359 | } \ |
360 | \ |
361 | /* iconv/loop.c disables -Wmaybe-uninitialized for a false \ |
362 | positive warning in this code with -Os and has a \ |
363 | comment referencing this code accordingly. Updates in \ |
364 | one place may require updates in the other. */ \ |
365 | UNICODE_TAG_HANDLER (ch, 4); \ |
366 | \ |
367 | /* This is an illegal character. */ \ |
368 | STANDARD_TO_LOOP_ERR_HANDLER (4); \ |
369 | } \ |
370 | } \ |
371 | } |
372 | |
373 | # define LOOP_NEED_FLAGS |
374 | # include <iconv/loop.c> |
375 | |
376 | |
377 | /* Generate ifunc'ed loop function. */ |
378 | s390_libc_ifunc_expr (__from_generic_c, __from_generic, |
379 | (sizeof (from_ucs4) / sizeof (from_ucs4[0]) <= 256 |
380 | && hwcap & HWCAP_S390_VX) |
381 | ? __from_generic_vx |
382 | : __from_generic_c); |
383 | |
384 | s390_libc_ifunc_expr (__to_generic_c, __to_generic, |
385 | (sizeof (from_ucs4) / sizeof (from_ucs4[0]) <= 256 |
386 | && hwcap & HWCAP_S390_VX) |
387 | ? __to_generic_vx |
388 | : __to_generic_c); |
389 | |
390 | strong_alias (__to_generic_c_single, __to_generic_single) |
391 | |
392 | # undef FROM_LOOP |
393 | # undef TO_LOOP |
394 | # define FROM_LOOP __from_generic |
395 | # define TO_LOOP __to_generic |
396 | # include <iconv/skeleton.c> |
397 | |
398 | #else |
399 | /* Generate this module without ifunc if build environment lacks vector |
400 | support. Instead the common 8bit-generic.c is used. */ |
401 | # include "iconvdata/8bit-generic.c" |
402 | #endif /* !defined HAVE_S390_VX_ASM_SUPPORT */ |
403 | |