| 1 | /* Conversion between UTF-8 and UTF-16 - s390 version. |
| 2 | |
| 3 | This module uses the Z9-109 variants of the Convert Unicode |
| 4 | instructions. |
| 5 | Copyright (C) 1997-2024 Free Software Foundation, Inc. |
| 6 | |
| 7 | This is free software; you can redistribute it and/or |
| 8 | modify it under the terms of the GNU Lesser General Public |
| 9 | License as published by the Free Software Foundation; either |
| 10 | version 2.1 of the License, or (at your option) any later version. |
| 11 | |
| 12 | This is distributed in the hope that it will be useful, |
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | Lesser General Public License for more details. |
| 16 | |
| 17 | You should have received a copy of the GNU Lesser General Public |
| 18 | License along with the GNU C Library; if not, see |
| 19 | <https://www.gnu.org/licenses/>. */ |
| 20 | |
| 21 | #include <dlfcn.h> |
| 22 | #include <stdint.h> |
| 23 | #include <unistd.h> |
| 24 | #include <gconv.h> |
| 25 | #include <string.h> |
| 26 | |
| 27 | /* Select which versions should be defined depending on support |
| 28 | for multiarch, vector and used minimum architecture level. */ |
| 29 | #ifdef HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT |
| 30 | # define HAVE_FROM_C 0 |
| 31 | # define FROM_LOOP_DEFAULT FROM_LOOP_CU |
| 32 | #else |
| 33 | # define HAVE_FROM_C 1 |
| 34 | # define FROM_LOOP_DEFAULT FROM_LOOP_C |
| 35 | #endif |
| 36 | |
| 37 | #define HAVE_TO_C 1 |
| 38 | #define TO_LOOP_DEFAULT TO_LOOP_C |
| 39 | |
| 40 | #if defined HAVE_S390_MIN_Z196_ZARCH_ASM_SUPPORT || defined USE_MULTIARCH |
| 41 | # define HAVE_FROM_CU 1 |
| 42 | #else |
| 43 | # define HAVE_FROM_CU 0 |
| 44 | #endif |
| 45 | |
| 46 | #if defined HAVE_S390_VX_ASM_SUPPORT && defined USE_MULTIARCH |
| 47 | # define HAVE_FROM_VX 1 |
| 48 | # define HAVE_TO_VX 1 |
| 49 | # define HAVE_TO_VX_CU 1 |
| 50 | #else |
| 51 | # define HAVE_FROM_VX 0 |
| 52 | # define HAVE_TO_VX 0 |
| 53 | # define HAVE_TO_VX_CU 0 |
| 54 | #endif |
| 55 | |
| 56 | #if defined HAVE_S390_VX_GCC_SUPPORT |
| 57 | # define ASM_CLOBBER_VR(NR) , NR |
| 58 | #else |
| 59 | # define ASM_CLOBBER_VR(NR) |
| 60 | #endif |
| 61 | |
| 62 | #if defined __s390x__ |
| 63 | # define CONVERT_32BIT_SIZE_T(REG) |
| 64 | #else |
| 65 | # define CONVERT_32BIT_SIZE_T(REG) "llgfr %" #REG ",%" #REG "\n\t" |
| 66 | #endif |
| 67 | |
| 68 | /* Defines for skeleton.c. */ |
| 69 | #define DEFINE_INIT 0 |
| 70 | #define DEFINE_FINI 0 |
| 71 | #define MIN_NEEDED_FROM 1 |
| 72 | #define MAX_NEEDED_FROM 4 |
| 73 | #define MIN_NEEDED_TO 2 |
| 74 | #define MAX_NEEDED_TO 4 |
| 75 | #define FROM_LOOP FROM_LOOP_DEFAULT |
| 76 | #define TO_LOOP TO_LOOP_DEFAULT |
| 77 | #define FROM_DIRECTION (dir == from_utf8) |
| 78 | #define ONE_DIRECTION 0 |
| 79 | |
| 80 | |
| 81 | /* UTF-16 big endian byte order mark. */ |
| 82 | #define BOM_UTF16 0xfeff |
| 83 | |
| 84 | /* Direction of the transformation. */ |
| 85 | enum direction |
| 86 | { |
| 87 | illegal_dir, |
| 88 | to_utf8, |
| 89 | from_utf8 |
| 90 | }; |
| 91 | |
| 92 | struct utf8_data |
| 93 | { |
| 94 | enum direction dir; |
| 95 | int emit_bom; |
| 96 | }; |
| 97 | |
| 98 | |
| 99 | extern int gconv_init (struct __gconv_step *step); |
| 100 | int |
| 101 | gconv_init (struct __gconv_step *step) |
| 102 | { |
| 103 | /* Determine which direction. */ |
| 104 | struct utf8_data *new_data; |
| 105 | enum direction dir = illegal_dir; |
| 106 | int emit_bom; |
| 107 | int result; |
| 108 | |
| 109 | emit_bom = (__strcasecmp (s1: step->__to_name, s2: "UTF-16//" ) == 0); |
| 110 | |
| 111 | if (__strcasecmp (s1: step->__from_name, s2: "ISO-10646/UTF8/" ) == 0 |
| 112 | && (__strcasecmp (s1: step->__to_name, s2: "UTF-16//" ) == 0 |
| 113 | || __strcasecmp (s1: step->__to_name, s2: "UTF-16BE//" ) == 0)) |
| 114 | { |
| 115 | dir = from_utf8; |
| 116 | } |
| 117 | else if (__strcasecmp (s1: step->__from_name, s2: "UTF-16BE//" ) == 0 |
| 118 | && __strcasecmp (s1: step->__to_name, s2: "ISO-10646/UTF8/" ) == 0) |
| 119 | { |
| 120 | dir = to_utf8; |
| 121 | } |
| 122 | |
| 123 | result = __GCONV_NOCONV; |
| 124 | if (dir != illegal_dir) |
| 125 | { |
| 126 | new_data = (struct utf8_data *) malloc (size: sizeof (struct utf8_data)); |
| 127 | |
| 128 | result = __GCONV_NOMEM; |
| 129 | if (new_data != NULL) |
| 130 | { |
| 131 | new_data->dir = dir; |
| 132 | new_data->emit_bom = emit_bom; |
| 133 | step->__data = new_data; |
| 134 | |
| 135 | if (dir == from_utf8) |
| 136 | { |
| 137 | step->__min_needed_from = MIN_NEEDED_FROM; |
| 138 | step->__max_needed_from = MIN_NEEDED_FROM; |
| 139 | step->__min_needed_to = MIN_NEEDED_TO; |
| 140 | step->__max_needed_to = MIN_NEEDED_TO; |
| 141 | } |
| 142 | else |
| 143 | { |
| 144 | step->__min_needed_from = MIN_NEEDED_TO; |
| 145 | step->__max_needed_from = MIN_NEEDED_TO; |
| 146 | step->__min_needed_to = MIN_NEEDED_FROM; |
| 147 | step->__max_needed_to = MIN_NEEDED_FROM; |
| 148 | } |
| 149 | |
| 150 | step->__stateful = 0; |
| 151 | |
| 152 | result = __GCONV_OK; |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | return result; |
| 157 | } |
| 158 | |
| 159 | |
| 160 | extern void gconv_end (struct __gconv_step *data); |
| 161 | void |
| 162 | gconv_end (struct __gconv_step *data) |
| 163 | { |
| 164 | free (ptr: data->__data); |
| 165 | } |
| 166 | |
| 167 | /* The macro for the hardware loop. This is used for both |
| 168 | directions. */ |
| 169 | #define HARDWARE_CONVERT(INSTRUCTION) \ |
| 170 | { \ |
| 171 | register const unsigned char* pInput __asm__ ("8") = inptr; \ |
| 172 | register size_t inlen __asm__ ("9") = inend - inptr; \ |
| 173 | register unsigned char* pOutput __asm__ ("10") = outptr; \ |
| 174 | register size_t outlen __asm__("11") = outend - outptr; \ |
| 175 | unsigned long cc = 0; \ |
| 176 | \ |
| 177 | __asm__ __volatile__ (".machine push \n\t" \ |
| 178 | ".machine \"z9-109\" \n\t" \ |
| 179 | ".machinemode \"zarch_nohighgprs\"\n\t" \ |
| 180 | "0: " INSTRUCTION " \n\t" \ |
| 181 | ".machine pop \n\t" \ |
| 182 | " jo 0b \n\t" \ |
| 183 | " ipm %2 \n" \ |
| 184 | : "+a" (pOutput), "+a" (pInput), "+d" (cc), \ |
| 185 | "+d" (outlen), "+d" (inlen) \ |
| 186 | : \ |
| 187 | : "cc", "memory"); \ |
| 188 | \ |
| 189 | inptr = pInput; \ |
| 190 | outptr = pOutput; \ |
| 191 | cc >>= 28; \ |
| 192 | \ |
| 193 | if (cc == 1) \ |
| 194 | { \ |
| 195 | result = __GCONV_FULL_OUTPUT; \ |
| 196 | } \ |
| 197 | else if (cc == 2) \ |
| 198 | { \ |
| 199 | result = __GCONV_ILLEGAL_INPUT; \ |
| 200 | } \ |
| 201 | } |
| 202 | |
| 203 | #define PREPARE_LOOP \ |
| 204 | enum direction dir = ((struct utf8_data *) step->__data)->dir; \ |
| 205 | int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \ |
| 206 | \ |
| 207 | if (emit_bom && !data->__internal_use \ |
| 208 | && data->__invocation_counter == 0) \ |
| 209 | { \ |
| 210 | /* Emit the UTF-16 Byte Order Mark. */ \ |
| 211 | if (__glibc_unlikely (outbuf + 2 > outend)) \ |
| 212 | return __GCONV_FULL_OUTPUT; \ |
| 213 | \ |
| 214 | put16 (outbuf, BOM_UTF16); \ |
| 215 | outbuf += 2; \ |
| 216 | } |
| 217 | |
| 218 | /* Conversion function from UTF-8 to UTF-16. */ |
| 219 | #define BODY_FROM_HW(ASM) \ |
| 220 | { \ |
| 221 | ASM; \ |
| 222 | if (__glibc_likely (inptr == inend) \ |
| 223 | || result == __GCONV_FULL_OUTPUT) \ |
| 224 | break; \ |
| 225 | \ |
| 226 | int i; \ |
| 227 | for (i = 1; inptr + i < inend && i < 5; ++i) \ |
| 228 | if ((inptr[i] & 0xc0) != 0x80) \ |
| 229 | break; \ |
| 230 | \ |
| 231 | if (__glibc_likely (inptr + i == inend \ |
| 232 | && result == __GCONV_EMPTY_INPUT)) \ |
| 233 | { \ |
| 234 | result = __GCONV_INCOMPLETE_INPUT; \ |
| 235 | break; \ |
| 236 | } \ |
| 237 | STANDARD_FROM_LOOP_ERR_HANDLER (i); \ |
| 238 | } |
| 239 | |
| 240 | #if HAVE_FROM_VX == 1 |
| 241 | # define HW_FROM_VX \ |
| 242 | { \ |
| 243 | register const unsigned char* pInput asm ("8") = inptr; \ |
| 244 | register size_t inlen asm ("9") = inend - inptr; \ |
| 245 | register unsigned char* pOutput asm ("10") = outptr; \ |
| 246 | register size_t outlen asm("11") = outend - outptr; \ |
| 247 | unsigned long tmp, tmp2, tmp3; \ |
| 248 | asm volatile (".machine push\n\t" \ |
| 249 | ".machine \"z13\"\n\t" \ |
| 250 | ".machinemode \"zarch_nohighgprs\"\n\t" \ |
| 251 | " vrepib %%v30,0x7f\n\t" /* For compare > 0x7f. */ \ |
| 252 | " vrepib %%v31,0x20\n\t" \ |
| 253 | CONVERT_32BIT_SIZE_T ([R_INLEN]) \ |
| 254 | CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \ |
| 255 | /* Loop which handles UTF-8 chars <=0x7f. */ \ |
| 256 | "0: clgijl %[R_INLEN],16,20f\n\t" \ |
| 257 | " clgijl %[R_OUTLEN],32,20f\n\t" \ |
| 258 | "1: vl %%v16,0(%[R_IN])\n\t" \ |
| 259 | " vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \ |
| 260 | " jno 10f\n\t" /* Jump away if not all bytes are 1byte \ |
| 261 | UTF8 chars. */ \ |
| 262 | /* Enlarge to UTF-16. */ \ |
| 263 | " vuplhb %%v18,%%v16\n\t" \ |
| 264 | " la %[R_IN],16(%[R_IN])\n\t" \ |
| 265 | " vupllb %%v19,%%v16\n\t" \ |
| 266 | " aghi %[R_INLEN],-16\n\t" \ |
| 267 | /* Store 32 bytes to buf_out. */ \ |
| 268 | " vstm %%v18,%%v19,0(%[R_OUT])\n\t" \ |
| 269 | " aghi %[R_OUTLEN],-32\n\t" \ |
| 270 | " la %[R_OUT],32(%[R_OUT])\n\t" \ |
| 271 | " clgijl %[R_INLEN],16,20f\n\t" \ |
| 272 | " clgijl %[R_OUTLEN],32,20f\n\t" \ |
| 273 | " j 1b\n\t" \ |
| 274 | "10:\n\t" \ |
| 275 | /* At least one byte is > 0x7f. \ |
| 276 | Store the preceding 1-byte chars. */ \ |
| 277 | " vlgvb %[R_TMP],%%v17,7\n\t" \ |
| 278 | " sllk %[R_TMP2],%[R_TMP],1\n\t" /* Compute highest \ |
| 279 | index to store. */ \ |
| 280 | " llgfr %[R_TMP3],%[R_TMP2]\n\t" \ |
| 281 | " ahi %[R_TMP2],-1\n\t" \ |
| 282 | " jl 20f\n\t" \ |
| 283 | " vuplhb %%v18,%%v16\n\t" \ |
| 284 | " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \ |
| 285 | " ahi %[R_TMP2],-16\n\t" \ |
| 286 | " jl 11f\n\t" \ |
| 287 | " vupllb %%v19,%%v16\n\t" \ |
| 288 | " vstl %%v19,%[R_TMP2],16(%[R_OUT])\n\t" \ |
| 289 | "11: \n\t" /* Update pointers. */ \ |
| 290 | " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ |
| 291 | " slgr %[R_INLEN],%[R_TMP]\n\t" \ |
| 292 | " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ |
| 293 | " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ |
| 294 | /* Handle multibyte utf8-char with convert instruction. */ \ |
| 295 | "20: cu12 %[R_OUT],%[R_IN],1\n\t" \ |
| 296 | " jo 0b\n\t" /* Try vector implementation again. */ \ |
| 297 | " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ |
| 298 | " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ |
| 299 | ".machine pop" \ |
| 300 | : /* outputs */ [R_IN] "+a" (pInput) \ |
| 301 | , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ |
| 302 | , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ |
| 303 | , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ |
| 304 | , [R_RES] "+d" (result) \ |
| 305 | : /* inputs */ \ |
| 306 | [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ |
| 307 | , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ |
| 308 | : /* clobber list */ "memory", "cc" \ |
| 309 | ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ |
| 310 | ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ |
| 311 | ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ |
| 312 | ); \ |
| 313 | inptr = pInput; \ |
| 314 | outptr = pOutput; \ |
| 315 | } |
| 316 | # define BODY_FROM_VX BODY_FROM_HW (HW_FROM_VX) |
| 317 | |
| 318 | /* Generate loop-function with hardware vector and utf-convert instructions. */ |
| 319 | # define MIN_NEEDED_INPUT MIN_NEEDED_FROM |
| 320 | # define MAX_NEEDED_INPUT MAX_NEEDED_FROM |
| 321 | # define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
| 322 | # define MAX_NEEDED_OUTPUT MAX_NEEDED_TO |
| 323 | # define FROM_LOOP_VX __from_utf8_loop_vx |
| 324 | # define LOOPFCT FROM_LOOP_VX |
| 325 | # define LOOP_NEED_FLAGS |
| 326 | # define BODY BODY_FROM_VX |
| 327 | # include <iconv/loop.c> |
| 328 | #else |
| 329 | # define FROM_LOOP_VX NULL |
| 330 | #endif /* HAVE_FROM_VX != 1 */ |
| 331 | |
| 332 | #if HAVE_FROM_CU == 1 |
| 333 | # define BODY_FROM_ETF3EH BODY_FROM_HW (HARDWARE_CONVERT ("cu12 %0, %1, 1")) |
| 334 | |
| 335 | /* Generate loop-function with hardware utf-convert instruction. */ |
| 336 | # define MIN_NEEDED_INPUT MIN_NEEDED_FROM |
| 337 | # define MAX_NEEDED_INPUT MAX_NEEDED_FROM |
| 338 | # define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
| 339 | # define MAX_NEEDED_OUTPUT MAX_NEEDED_TO |
| 340 | # define FROM_LOOP_CU __from_utf8_loop_etf3eh |
| 341 | # define LOOPFCT FROM_LOOP_CU |
| 342 | # define LOOP_NEED_FLAGS |
| 343 | # define BODY BODY_FROM_ETF3EH |
| 344 | # include <iconv/loop.c> |
| 345 | #else |
| 346 | # define FROM_LOOP_CU NULL |
| 347 | #endif /* HAVE_FROM_CU != 1 */ |
| 348 | |
| 349 | #if HAVE_FROM_C == 1 |
| 350 | /* The software implementation is based on the code in gconv_simple.c. */ |
| 351 | # define BODY_FROM_C \ |
| 352 | { \ |
| 353 | /* Next input byte. */ \ |
| 354 | uint16_t ch = *inptr; \ |
| 355 | \ |
| 356 | if (__glibc_likely (ch < 0x80)) \ |
| 357 | { \ |
| 358 | /* One byte sequence. */ \ |
| 359 | ++inptr; \ |
| 360 | } \ |
| 361 | else \ |
| 362 | { \ |
| 363 | uint_fast32_t cnt; \ |
| 364 | uint_fast32_t i; \ |
| 365 | \ |
| 366 | if (ch >= 0xc2 && ch < 0xe0) \ |
| 367 | { \ |
| 368 | /* We expect two bytes. The first byte cannot be 0xc0 \ |
| 369 | or 0xc1, otherwise the wide character could have been \ |
| 370 | represented using a single byte. */ \ |
| 371 | cnt = 2; \ |
| 372 | ch &= 0x1f; \ |
| 373 | } \ |
| 374 | else if (__glibc_likely ((ch & 0xf0) == 0xe0)) \ |
| 375 | { \ |
| 376 | /* We expect three bytes. */ \ |
| 377 | cnt = 3; \ |
| 378 | ch &= 0x0f; \ |
| 379 | } \ |
| 380 | else if (__glibc_likely ((ch & 0xf8) == 0xf0)) \ |
| 381 | { \ |
| 382 | /* We expect four bytes. */ \ |
| 383 | cnt = 4; \ |
| 384 | ch &= 0x07; \ |
| 385 | } \ |
| 386 | else \ |
| 387 | { \ |
| 388 | /* Search the end of this ill-formed UTF-8 character. This \ |
| 389 | is the next byte with (x & 0xc0) != 0x80. */ \ |
| 390 | i = 0; \ |
| 391 | do \ |
| 392 | ++i; \ |
| 393 | while (inptr + i < inend \ |
| 394 | && (*(inptr + i) & 0xc0) == 0x80 \ |
| 395 | && i < 5); \ |
| 396 | \ |
| 397 | errout: \ |
| 398 | STANDARD_FROM_LOOP_ERR_HANDLER (i); \ |
| 399 | } \ |
| 400 | \ |
| 401 | if (__glibc_unlikely (inptr + cnt > inend)) \ |
| 402 | { \ |
| 403 | /* We don't have enough input. But before we report \ |
| 404 | that check that all the bytes are correct. */ \ |
| 405 | for (i = 1; inptr + i < inend; ++i) \ |
| 406 | if ((inptr[i] & 0xc0) != 0x80) \ |
| 407 | break; \ |
| 408 | \ |
| 409 | if (__glibc_likely (inptr + i == inend)) \ |
| 410 | { \ |
| 411 | result = __GCONV_INCOMPLETE_INPUT; \ |
| 412 | break; \ |
| 413 | } \ |
| 414 | \ |
| 415 | goto errout; \ |
| 416 | } \ |
| 417 | \ |
| 418 | if (cnt == 4) \ |
| 419 | { \ |
| 420 | /* For 4 byte UTF-8 chars two UTF-16 chars (high and \ |
| 421 | low) are needed. */ \ |
| 422 | uint16_t zabcd, high, low; \ |
| 423 | \ |
| 424 | if (__glibc_unlikely (outptr + 4 > outend)) \ |
| 425 | { \ |
| 426 | /* Overflow in the output buffer. */ \ |
| 427 | result = __GCONV_FULL_OUTPUT; \ |
| 428 | break; \ |
| 429 | } \ |
| 430 | \ |
| 431 | /* Check if tail-bytes >= 0x80, < 0xc0. */ \ |
| 432 | for (i = 1; i < cnt; ++i) \ |
| 433 | { \ |
| 434 | if ((inptr[i] & 0xc0) != 0x80) \ |
| 435 | /* This is an illegal encoding. */ \ |
| 436 | goto errout; \ |
| 437 | } \ |
| 438 | \ |
| 439 | /* See Principles of Operations cu12. */ \ |
| 440 | zabcd = (((inptr[0] & 0x7) << 2) \ |
| 441 | | ((inptr[1] & 0x30) >> 4)) - 1; \ |
| 442 | \ |
| 443 | /* z-bit must be zero after subtracting 1. */ \ |
| 444 | if (zabcd & 0x10) \ |
| 445 | STANDARD_FROM_LOOP_ERR_HANDLER (4) \ |
| 446 | \ |
| 447 | high = (uint16_t)(0xd8 << 8); /* high surrogate id */ \ |
| 448 | high |= zabcd << 6; /* abcd bits */ \ |
| 449 | high |= (inptr[1] & 0xf) << 2; /* efgh bits */ \ |
| 450 | high |= (inptr[2] & 0x30) >> 4; /* ij bits */ \ |
| 451 | \ |
| 452 | low = (uint16_t)(0xdc << 8); /* low surrogate id */ \ |
| 453 | low |= ((uint16_t)inptr[2] & 0xc) << 6; /* kl bits */ \ |
| 454 | low |= (inptr[2] & 0x3) << 6; /* mn bits */ \ |
| 455 | low |= inptr[3] & 0x3f; /* opqrst bits */ \ |
| 456 | \ |
| 457 | put16 (outptr, high); \ |
| 458 | outptr += 2; \ |
| 459 | put16 (outptr, low); \ |
| 460 | outptr += 2; \ |
| 461 | inptr += 4; \ |
| 462 | continue; \ |
| 463 | } \ |
| 464 | else \ |
| 465 | { \ |
| 466 | /* Read the possible remaining bytes. */ \ |
| 467 | for (i = 1; i < cnt; ++i) \ |
| 468 | { \ |
| 469 | uint16_t byte = inptr[i]; \ |
| 470 | \ |
| 471 | if ((byte & 0xc0) != 0x80) \ |
| 472 | /* This is an illegal encoding. */ \ |
| 473 | break; \ |
| 474 | \ |
| 475 | ch <<= 6; \ |
| 476 | ch |= byte & 0x3f; \ |
| 477 | } \ |
| 478 | \ |
| 479 | /* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \ |
| 480 | If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \ |
| 481 | have been represented with fewer than cnt bytes. */ \ |
| 482 | if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0) \ |
| 483 | /* Do not accept UTF-16 surrogates. */ \ |
| 484 | || (ch >= 0xd800 && ch <= 0xdfff)) \ |
| 485 | { \ |
| 486 | /* This is an illegal encoding. */ \ |
| 487 | goto errout; \ |
| 488 | } \ |
| 489 | \ |
| 490 | inptr += cnt; \ |
| 491 | } \ |
| 492 | } \ |
| 493 | /* Now adjust the pointers and store the result. */ \ |
| 494 | *((uint16_t *) outptr) = ch; \ |
| 495 | outptr += sizeof (uint16_t); \ |
| 496 | } |
| 497 | |
| 498 | /* Generate loop-function with software implementation. */ |
| 499 | # define MIN_NEEDED_INPUT MIN_NEEDED_FROM |
| 500 | # define MAX_NEEDED_INPUT MAX_NEEDED_FROM |
| 501 | # define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
| 502 | # define MAX_NEEDED_OUTPUT MAX_NEEDED_TO |
| 503 | # define FROM_LOOP_C __from_utf8_loop_c |
| 504 | # define LOOPFCT FROM_LOOP_C |
| 505 | # define LOOP_NEED_FLAGS |
| 506 | # define BODY BODY_FROM_C |
| 507 | # include <iconv/loop.c> |
| 508 | #else |
| 509 | # define FROM_LOOP_C NULL |
| 510 | #endif /* HAVE_FROM_C != 1 */ |
| 511 | |
| 512 | /* Conversion from UTF-16 to UTF-8. */ |
| 513 | |
| 514 | #if HAVE_TO_C == 1 |
| 515 | /* The software routine is based on the functionality of the S/390 |
| 516 | hardware instruction (cu21) as described in the Principles of |
| 517 | Operation. */ |
| 518 | # define BODY_TO_C \ |
| 519 | { \ |
| 520 | uint16_t c = get16 (inptr); \ |
| 521 | \ |
| 522 | if (__glibc_likely (c <= 0x007f)) \ |
| 523 | { \ |
| 524 | /* Single byte UTF-8 char. */ \ |
| 525 | *outptr = c & 0xff; \ |
| 526 | outptr++; \ |
| 527 | } \ |
| 528 | else if (c >= 0x0080 && c <= 0x07ff) \ |
| 529 | { \ |
| 530 | /* Two byte UTF-8 char. */ \ |
| 531 | \ |
| 532 | if (__glibc_unlikely (outptr + 2 > outend)) \ |
| 533 | { \ |
| 534 | /* Overflow in the output buffer. */ \ |
| 535 | result = __GCONV_FULL_OUTPUT; \ |
| 536 | break; \ |
| 537 | } \ |
| 538 | \ |
| 539 | outptr[0] = 0xc0; \ |
| 540 | outptr[0] |= c >> 6; \ |
| 541 | \ |
| 542 | outptr[1] = 0x80; \ |
| 543 | outptr[1] |= c & 0x3f; \ |
| 544 | \ |
| 545 | outptr += 2; \ |
| 546 | } \ |
| 547 | else if ((c >= 0x0800 && c <= 0xd7ff) || c > 0xdfff) \ |
| 548 | { \ |
| 549 | /* Three byte UTF-8 char. */ \ |
| 550 | \ |
| 551 | if (__glibc_unlikely (outptr + 3 > outend)) \ |
| 552 | { \ |
| 553 | /* Overflow in the output buffer. */ \ |
| 554 | result = __GCONV_FULL_OUTPUT; \ |
| 555 | break; \ |
| 556 | } \ |
| 557 | outptr[0] = 0xe0; \ |
| 558 | outptr[0] |= c >> 12; \ |
| 559 | \ |
| 560 | outptr[1] = 0x80; \ |
| 561 | outptr[1] |= (c >> 6) & 0x3f; \ |
| 562 | \ |
| 563 | outptr[2] = 0x80; \ |
| 564 | outptr[2] |= c & 0x3f; \ |
| 565 | \ |
| 566 | outptr += 3; \ |
| 567 | } \ |
| 568 | else if (c >= 0xd800 && c <= 0xdbff) \ |
| 569 | { \ |
| 570 | /* Four byte UTF-8 char. */ \ |
| 571 | uint16_t low, uvwxy; \ |
| 572 | \ |
| 573 | if (__glibc_unlikely (outptr + 4 > outend)) \ |
| 574 | { \ |
| 575 | /* Overflow in the output buffer. */ \ |
| 576 | result = __GCONV_FULL_OUTPUT; \ |
| 577 | break; \ |
| 578 | } \ |
| 579 | if (__glibc_unlikely (inptr + 4 > inend)) \ |
| 580 | { \ |
| 581 | result = __GCONV_INCOMPLETE_INPUT; \ |
| 582 | break; \ |
| 583 | } \ |
| 584 | \ |
| 585 | inptr += 2; \ |
| 586 | low = get16 (inptr); \ |
| 587 | \ |
| 588 | if ((low & 0xfc00) != 0xdc00) \ |
| 589 | { \ |
| 590 | inptr -= 2; \ |
| 591 | STANDARD_TO_LOOP_ERR_HANDLER (2); \ |
| 592 | } \ |
| 593 | uvwxy = ((c >> 6) & 0xf) + 1; \ |
| 594 | outptr[0] = 0xf0; \ |
| 595 | outptr[0] |= uvwxy >> 2; \ |
| 596 | \ |
| 597 | outptr[1] = 0x80; \ |
| 598 | outptr[1] |= (uvwxy << 4) & 0x30; \ |
| 599 | outptr[1] |= (c >> 2) & 0x0f; \ |
| 600 | \ |
| 601 | outptr[2] = 0x80; \ |
| 602 | outptr[2] |= (c & 0x03) << 4; \ |
| 603 | outptr[2] |= (low >> 6) & 0x0f; \ |
| 604 | \ |
| 605 | outptr[3] = 0x80; \ |
| 606 | outptr[3] |= low & 0x3f; \ |
| 607 | \ |
| 608 | outptr += 4; \ |
| 609 | } \ |
| 610 | else \ |
| 611 | { \ |
| 612 | STANDARD_TO_LOOP_ERR_HANDLER (2); \ |
| 613 | } \ |
| 614 | inptr += 2; \ |
| 615 | } |
| 616 | |
| 617 | /* Generate loop-function with software implementation. */ |
| 618 | # define MIN_NEEDED_INPUT MIN_NEEDED_TO |
| 619 | # define MAX_NEEDED_INPUT MAX_NEEDED_TO |
| 620 | # define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM |
| 621 | # define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM |
| 622 | # define TO_LOOP_C __to_utf8_loop_c |
| 623 | # define LOOPFCT TO_LOOP_C |
| 624 | # define BODY BODY_TO_C |
| 625 | # define LOOP_NEED_FLAGS |
| 626 | # include <iconv/loop.c> |
| 627 | #else |
| 628 | # define TO_LOOP_C NULL |
| 629 | #endif /* HAVE_TO_C != 1 */ |
| 630 | |
| 631 | #if HAVE_TO_VX == 1 |
| 632 | # define BODY_TO_VX \ |
| 633 | { \ |
| 634 | size_t inlen = inend - inptr; \ |
| 635 | size_t outlen = outend - outptr; \ |
| 636 | unsigned long tmp, tmp2, tmp3; \ |
| 637 | asm volatile (".machine push\n\t" \ |
| 638 | ".machine \"z13\"\n\t" \ |
| 639 | ".machinemode \"zarch_nohighgprs\"\n\t" \ |
| 640 | /* Setup to check for values <= 0x7f. */ \ |
| 641 | " larl %[R_TMP],9f\n\t" \ |
| 642 | " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \ |
| 643 | CONVERT_32BIT_SIZE_T ([R_INLEN]) \ |
| 644 | CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \ |
| 645 | /* Loop which handles UTF-16 chars <=0x7f. */ \ |
| 646 | "0: clgijl %[R_INLEN],32,2f\n\t" \ |
| 647 | " clgijl %[R_OUTLEN],16,2f\n\t" \ |
| 648 | "1: vlm %%v16,%%v17,0(%[R_IN])\n\t" \ |
| 649 | " lghi %[R_TMP2],0\n\t" \ |
| 650 | /* Check for > 1byte UTF-8 chars. */ \ |
| 651 | " vstrchs %%v19,%%v16,%%v30,%%v31\n\t" \ |
| 652 | " jno 10f\n\t" /* Jump away if not all bytes are 1byte \ |
| 653 | UTF8 chars. */ \ |
| 654 | " vstrchs %%v19,%%v17,%%v30,%%v31\n\t" \ |
| 655 | " jno 11f\n\t" /* Jump away if not all bytes are 1byte \ |
| 656 | UTF8 chars. */ \ |
| 657 | /* Shorten to UTF-8. */ \ |
| 658 | " vpkh %%v18,%%v16,%%v17\n\t" \ |
| 659 | " la %[R_IN],32(%[R_IN])\n\t" \ |
| 660 | " aghi %[R_INLEN],-32\n\t" \ |
| 661 | /* Store 16 bytes to buf_out. */ \ |
| 662 | " vst %%v18,0(%[R_OUT])\n\t" \ |
| 663 | " aghi %[R_OUTLEN],-16\n\t" \ |
| 664 | " la %[R_OUT],16(%[R_OUT])\n\t" \ |
| 665 | " clgijl %[R_INLEN],32,2f\n\t" \ |
| 666 | " clgijl %[R_OUTLEN],16,2f\n\t" \ |
| 667 | " j 1b\n\t" \ |
| 668 | /* Setup to check for ch > 0x7f. (v30, v31) */ \ |
| 669 | "9: .short 0x7f,0x7f,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ |
| 670 | " .short 0x2000,0x2000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ |
| 671 | /* At least one byte is > 0x7f. \ |
| 672 | Store the preceding 1-byte chars. */ \ |
| 673 | "11: lghi %[R_TMP2],16\n\t" /* match was found in v17. */ \ |
| 674 | "10:\n\t" \ |
| 675 | " vlgvb %[R_TMP],%%v19,7\n\t" \ |
| 676 | /* Shorten to UTF-8. */ \ |
| 677 | " vpkh %%v18,%%v16,%%v17\n\t" \ |
| 678 | " ar %[R_TMP],%[R_TMP2]\n\t" /* Number of in bytes. */ \ |
| 679 | " srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \ |
| 680 | " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \ |
| 681 | " jl 13f\n\t" \ |
| 682 | " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \ |
| 683 | /* Update pointers. */ \ |
| 684 | " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ |
| 685 | " slgr %[R_INLEN],%[R_TMP]\n\t" \ |
| 686 | " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ |
| 687 | " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ |
| 688 | "13: \n\t" \ |
| 689 | /* Calculate remaining uint16_t values in loaded vrs. */ \ |
| 690 | " lghi %[R_TMP2],16\n\t" \ |
| 691 | " slgr %[R_TMP2],%[R_TMP3]\n\t" \ |
| 692 | " llh %[R_TMP],0(%[R_IN])\n\t" \ |
| 693 | " aghi %[R_INLEN],-2\n\t" \ |
| 694 | " j 22f\n\t" \ |
| 695 | /* Handle remaining bytes. */ \ |
| 696 | "2: \n\t" \ |
| 697 | /* Zero, one or more bytes available? */ \ |
| 698 | " clgfi %[R_INLEN],1\n\t" \ |
| 699 | " locghie %[R_RES],%[RES_IN_FULL]\n\t" /* Only one byte. */ \ |
| 700 | " jle 99f\n\t" /* End if less than two bytes. */ \ |
| 701 | /* Calculate remaining uint16_t values in inptr. */ \ |
| 702 | " srlg %[R_TMP2],%[R_INLEN],1\n\t" \ |
| 703 | /* Handle multibyte utf8-char. */ \ |
| 704 | "20: llh %[R_TMP],0(%[R_IN])\n\t" \ |
| 705 | " aghi %[R_INLEN],-2\n\t" \ |
| 706 | /* Test if ch is 1-byte UTF-8 char. */ \ |
| 707 | "21: clijh %[R_TMP],0x7f,22f\n\t" \ |
| 708 | /* Handle 1-byte UTF-8 char. */ \ |
| 709 | "31: slgfi %[R_OUTLEN],1\n\t" \ |
| 710 | " jl 90f \n\t" \ |
| 711 | " stc %[R_TMP],0(%[R_OUT])\n\t" \ |
| 712 | " la %[R_IN],2(%[R_IN])\n\t" \ |
| 713 | " la %[R_OUT],1(%[R_OUT])\n\t" \ |
| 714 | " brctg %[R_TMP2],20b\n\t" \ |
| 715 | " j 0b\n\t" /* Switch to vx-loop. */ \ |
| 716 | /* Test if ch is 2-byte UTF-8 char. */ \ |
| 717 | "22: clfi %[R_TMP],0x7ff\n\t" \ |
| 718 | " jh 23f\n\t" \ |
| 719 | /* Handle 2-byte UTF-8 char. */ \ |
| 720 | "32: slgfi %[R_OUTLEN],2\n\t" \ |
| 721 | " jl 90f \n\t" \ |
| 722 | " llill %[R_TMP3],0xc080\n\t" \ |
| 723 | " la %[R_IN],2(%[R_IN])\n\t" \ |
| 724 | " risbgn %[R_TMP3],%[R_TMP],51,55,2\n\t" /* 1. byte. */ \ |
| 725 | " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 2. byte. */ \ |
| 726 | " sth %[R_TMP3],0(%[R_OUT])\n\t" \ |
| 727 | " la %[R_OUT],2(%[R_OUT])\n\t" \ |
| 728 | " brctg %[R_TMP2],20b\n\t" \ |
| 729 | " j 0b\n\t" /* Switch to vx-loop. */ \ |
| 730 | /* Test if ch is 3-byte UTF-8 char. */ \ |
| 731 | "23: clfi %[R_TMP],0xd7ff\n\t" \ |
| 732 | " jh 24f\n\t" \ |
| 733 | /* Handle 3-byte UTF-8 char. */ \ |
| 734 | "33: slgfi %[R_OUTLEN],3\n\t" \ |
| 735 | " jl 90f \n\t" \ |
| 736 | " llilf %[R_TMP3],0xe08080\n\t" \ |
| 737 | " la %[R_IN],2(%[R_IN])\n\t" \ |
| 738 | " risbgn %[R_TMP3],%[R_TMP],44,47,4\n\t" /* 1. byte. */ \ |
| 739 | " risbgn %[R_TMP3],%[R_TMP],50,55,2\n\t" /* 2. byte. */ \ |
| 740 | " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 3. byte. */ \ |
| 741 | " stcm %[R_TMP3],7,0(%[R_OUT])\n\t" \ |
| 742 | " la %[R_OUT],3(%[R_OUT])\n\t" \ |
| 743 | " brctg %[R_TMP2],20b\n\t" \ |
| 744 | " j 0b\n\t" /* Switch to vx-loop. */ \ |
| 745 | /* Test if ch is 4-byte UTF-8 char. */ \ |
| 746 | "24: clfi %[R_TMP],0xdfff\n\t" \ |
| 747 | " jh 33b\n\t" /* Handle this 3-byte UTF-8 char. */ \ |
| 748 | " clfi %[R_TMP],0xdbff\n\t" \ |
| 749 | " locghih %[R_RES],%[RES_IN_ILL]\n\t" \ |
| 750 | " jh 99f\n\t" /* Jump away if this is a low surrogate \ |
| 751 | without a preceding high surrogate. */ \ |
| 752 | /* Handle 4-byte UTF-8 char. */ \ |
| 753 | "34: slgfi %[R_OUTLEN],4\n\t" \ |
| 754 | " jl 90f \n\t" \ |
| 755 | " slgfi %[R_INLEN],2\n\t" \ |
| 756 | " locghil %[R_RES],%[RES_IN_FULL]\n\t" \ |
| 757 | " jl 99f\n\t" /* Jump away if low surrogate is missing. */ \ |
| 758 | " llilf %[R_TMP3],0xf0808080\n\t" \ |
| 759 | " aghi %[R_TMP],0x40\n\t" \ |
| 760 | " risbgn %[R_TMP3],%[R_TMP],37,39,16\n\t" /* 1. byte: uvw */ \ |
| 761 | " risbgn %[R_TMP3],%[R_TMP],42,43,14\n\t" /* 2. byte: xy */ \ |
| 762 | " risbgn %[R_TMP3],%[R_TMP],44,47,14\n\t" /* 2. byte: efgh */ \ |
| 763 | " risbgn %[R_TMP3],%[R_TMP],50,51,12\n\t" /* 3. byte: ij */ \ |
| 764 | " llh %[R_TMP],2(%[R_IN])\n\t" /* Load low surrogate. */ \ |
| 765 | " risbgn %[R_TMP3],%[R_TMP],52,55,2\n\t" /* 3. byte: klmn */ \ |
| 766 | " risbgn %[R_TMP3],%[R_TMP],58,63,0\n\t" /* 4. byte: opqrst */ \ |
| 767 | " nilf %[R_TMP],0xfc00\n\t" \ |
| 768 | " clfi %[R_TMP],0xdc00\n\t" /* Check if it starts with 0xdc00. */ \ |
| 769 | " locghine %[R_RES],%[RES_IN_ILL]\n\t" \ |
| 770 | " jne 99f\n\t" /* Jump away if low surrogate is invalid. */ \ |
| 771 | " st %[R_TMP3],0(%[R_OUT])\n\t" \ |
| 772 | " la %[R_IN],4(%[R_IN])\n\t" \ |
| 773 | " la %[R_OUT],4(%[R_OUT])\n\t" \ |
| 774 | " aghi %[R_TMP2],-2\n\t" \ |
| 775 | " jh 20b\n\t" \ |
| 776 | " j 0b\n\t" /* Switch to vx-loop. */ \ |
| 777 | /* Exit with __GCONV_FULL_OUTPUT. */ \ |
| 778 | "90: lghi %[R_RES],%[RES_OUT_FULL]\n\t" \ |
| 779 | "99: \n\t" \ |
| 780 | ".machine pop" \ |
| 781 | : /* outputs */ [R_IN] "+a" (inptr) \ |
| 782 | , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (outptr) \ |
| 783 | , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ |
| 784 | , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ |
| 785 | , [R_RES] "+d" (result) \ |
| 786 | : /* inputs */ \ |
| 787 | [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ |
| 788 | , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ |
| 789 | , [RES_IN_FULL] "i" (__GCONV_INCOMPLETE_INPUT) \ |
| 790 | : /* clobber list */ "memory", "cc" \ |
| 791 | ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ |
| 792 | ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ |
| 793 | ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ |
| 794 | ); \ |
| 795 | if (__glibc_likely (inptr == inend) \ |
| 796 | || result != __GCONV_ILLEGAL_INPUT) \ |
| 797 | break; \ |
| 798 | \ |
| 799 | STANDARD_TO_LOOP_ERR_HANDLER (2); \ |
| 800 | } |
| 801 | |
| 802 | /* Generate loop-function with vector implementation. */ |
| 803 | # define MIN_NEEDED_INPUT MIN_NEEDED_TO |
| 804 | # define MAX_NEEDED_INPUT MAX_NEEDED_TO |
| 805 | # define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM |
| 806 | # define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM |
| 807 | # define TO_LOOP_VX __to_utf8_loop_vx |
| 808 | # define LOOPFCT TO_LOOP_VX |
| 809 | # define BODY BODY_TO_VX |
| 810 | # define LOOP_NEED_FLAGS |
| 811 | # include <iconv/loop.c> |
| 812 | #else |
| 813 | # define TO_LOOP_VX NULL |
| 814 | #endif /* HAVE_TO_VX != 1 */ |
| 815 | |
| 816 | #if HAVE_TO_VX_CU == 1 |
| 817 | #define BODY_TO_VX_CU \ |
| 818 | { \ |
| 819 | register const unsigned char* pInput asm ("8") = inptr; \ |
| 820 | register size_t inlen asm ("9") = inend - inptr; \ |
| 821 | register unsigned char* pOutput asm ("10") = outptr; \ |
| 822 | register size_t outlen asm ("11") = outend - outptr; \ |
| 823 | unsigned long tmp, tmp2, tmp3; \ |
| 824 | asm volatile (".machine push\n\t" \ |
| 825 | ".machine \"z13\"\n\t" \ |
| 826 | ".machinemode \"zarch_nohighgprs\"\n\t" \ |
| 827 | /* Setup to check for values <= 0x7f. */ \ |
| 828 | " larl %[R_TMP],9f\n\t" \ |
| 829 | " vlm %%v30,%%v31,0(%[R_TMP])\n\t" \ |
| 830 | CONVERT_32BIT_SIZE_T ([R_INLEN]) \ |
| 831 | CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \ |
| 832 | /* Loop which handles UTF-16 chars <=0x7f. */ \ |
| 833 | "0: clgijl %[R_INLEN],32,20f\n\t" \ |
| 834 | " clgijl %[R_OUTLEN],16,20f\n\t" \ |
| 835 | "1: vlm %%v16,%%v17,0(%[R_IN])\n\t" \ |
| 836 | " lghi %[R_TMP2],0\n\t" \ |
| 837 | /* Check for > 1byte UTF-8 chars. */ \ |
| 838 | " vstrchs %%v19,%%v16,%%v30,%%v31\n\t" \ |
| 839 | " jno 10f\n\t" /* Jump away if not all bytes are 1byte \ |
| 840 | UTF8 chars. */ \ |
| 841 | " vstrchs %%v19,%%v17,%%v30,%%v31\n\t" \ |
| 842 | " jno 11f\n\t" /* Jump away if not all bytes are 1byte \ |
| 843 | UTF8 chars. */ \ |
| 844 | /* Shorten to UTF-8. */ \ |
| 845 | " vpkh %%v18,%%v16,%%v17\n\t" \ |
| 846 | " la %[R_IN],32(%[R_IN])\n\t" \ |
| 847 | " aghi %[R_INLEN],-32\n\t" \ |
| 848 | /* Store 16 bytes to buf_out. */ \ |
| 849 | " vst %%v18,0(%[R_OUT])\n\t" \ |
| 850 | " aghi %[R_OUTLEN],-16\n\t" \ |
| 851 | " la %[R_OUT],16(%[R_OUT])\n\t" \ |
| 852 | " clgijl %[R_INLEN],32,20f\n\t" \ |
| 853 | " clgijl %[R_OUTLEN],16,20f\n\t" \ |
| 854 | " j 1b\n\t" \ |
| 855 | /* Setup to check for ch > 0x7f. (v30, v31) */ \ |
| 856 | "9: .short 0x7f,0x7f,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ |
| 857 | " .short 0x2000,0x2000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ |
| 858 | /* At least one byte is > 0x7f. \ |
| 859 | Store the preceding 1-byte chars. */ \ |
| 860 | "11: lghi %[R_TMP2],16\n\t" /* match was found in v17. */ \ |
| 861 | "10: vlgvb %[R_TMP],%%v19,7\n\t" \ |
| 862 | /* Shorten to UTF-8. */ \ |
| 863 | " vpkh %%v18,%%v16,%%v17\n\t" \ |
| 864 | " ar %[R_TMP],%[R_TMP2]\n\t" /* Number of in bytes. */ \ |
| 865 | " srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes. */ \ |
| 866 | " ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store. */ \ |
| 867 | " jl 20f\n\t" \ |
| 868 | " vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t" \ |
| 869 | /* Update pointers. */ \ |
| 870 | " la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ |
| 871 | " slgr %[R_INLEN],%[R_TMP]\n\t" \ |
| 872 | " la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t" \ |
| 873 | " slgr %[R_OUTLEN],%[R_TMP3]\n\t" \ |
| 874 | /* Handles UTF16 surrogates with convert instruction. */ \ |
| 875 | "20: cu21 %[R_OUT],%[R_IN],1\n\t" \ |
| 876 | " jo 0b\n\t" /* Try vector implementation again. */ \ |
| 877 | " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \ |
| 878 | " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \ |
| 879 | ".machine pop" \ |
| 880 | : /* outputs */ [R_IN] "+a" (pInput) \ |
| 881 | , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \ |
| 882 | , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp) \ |
| 883 | , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3) \ |
| 884 | , [R_RES] "+d" (result) \ |
| 885 | : /* inputs */ \ |
| 886 | [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \ |
| 887 | , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \ |
| 888 | : /* clobber list */ "memory", "cc" \ |
| 889 | ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ |
| 890 | ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ |
| 891 | ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31") \ |
| 892 | ); \ |
| 893 | inptr = pInput; \ |
| 894 | outptr = pOutput; \ |
| 895 | \ |
| 896 | if (__glibc_likely (inlen == 0) \ |
| 897 | || result == __GCONV_FULL_OUTPUT) \ |
| 898 | break; \ |
| 899 | if (inlen == 1) \ |
| 900 | { \ |
| 901 | /* Input does not contain a complete utf16 character. */ \ |
| 902 | result = __GCONV_INCOMPLETE_INPUT; \ |
| 903 | break; \ |
| 904 | } \ |
| 905 | else if (result != __GCONV_ILLEGAL_INPUT) \ |
| 906 | { \ |
| 907 | /* Input is >= 2 and < 4 bytes (as cu21 would have processed \ |
| 908 | a possible next utf16 character) and not illegal. \ |
| 909 | => we have a single high surrogate at end of input. */ \ |
| 910 | result = __GCONV_INCOMPLETE_INPUT; \ |
| 911 | break; \ |
| 912 | } \ |
| 913 | \ |
| 914 | STANDARD_TO_LOOP_ERR_HANDLER (2); \ |
| 915 | } |
| 916 | |
| 917 | /* Generate loop-function with vector and utf-convert instructions. */ |
| 918 | # define MIN_NEEDED_INPUT MIN_NEEDED_TO |
| 919 | # define MAX_NEEDED_INPUT MAX_NEEDED_TO |
| 920 | # define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM |
| 921 | # define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM |
| 922 | # define TO_LOOP_VX_CU __to_utf8_loop_vx_cu |
| 923 | # define LOOPFCT TO_LOOP_VX_CU |
| 924 | # define BODY BODY_TO_VX_CU |
| 925 | # define LOOP_NEED_FLAGS |
| 926 | # include <iconv/loop.c> |
| 927 | #else |
| 928 | # define TO_LOOP_VX_CU NULL |
| 929 | #endif /* HAVE_TO_VX_CU != 1 */ |
| 930 | |
| 931 | /* This file also exists in sysdeps/s390/multiarch/ which |
| 932 | generates ifunc resolvers for FROM/TO_LOOP functions |
| 933 | and includes iconv/skeleton.c afterwards. */ |
| 934 | #if ! defined USE_MULTIARCH |
| 935 | # include <iconv/skeleton.c> |
| 936 | #endif |
| 937 | |