1/* strcpy with SSE2 and unaligned load
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <isa-level.h>
20
21/* MINIMUM_X86_ISA_LEVEL <= 2 because there is no V2 implementation
22 so we need this to build for ISA V2 builds. */
23#if ISA_SHOULD_BUILD (2)
24
25
26# ifndef USE_AS_STRCAT
27# include <sysdep.h>
28
29# ifndef STRCPY
30# define STRCPY __strcpy_sse2_unaligned
31# endif
32
33# endif
34
35# define JMPTBL(I, B) I - B
36# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
37 lea TABLE(%rip), %r11; \
38 movslq (%r11, INDEX, SCALE), %rcx; \
39 lea (%r11, %rcx), %rcx; \
40 _CET_NOTRACK jmp *%rcx
41
42# ifndef USE_AS_STRCAT
43
44.text
45ENTRY (STRCPY)
46# ifdef USE_AS_STRNCPY
47 mov %RDX_LP, %R8_LP
48 test %R8_LP, %R8_LP
49 jz L(ExitZero)
50# endif
51 mov %rsi, %rcx
52# ifndef USE_AS_STPCPY
53 mov %rdi, %rax /* save result */
54# endif
55
56# endif
57
58 and $63, %rcx
59 cmp $32, %rcx
60 jbe L(SourceStringAlignmentLess32)
61
62 and $-16, %rsi
63 and $15, %rcx
64 pxor %xmm0, %xmm0
65 pxor %xmm1, %xmm1
66
67 pcmpeqb (%rsi), %xmm1
68 pmovmskb %xmm1, %rdx
69 shr %cl, %rdx
70
71# ifdef USE_AS_STRNCPY
72# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
73 mov $16, %r10
74 sub %rcx, %r10
75 cmp %r10, %r8
76# else
77 mov $17, %r10
78 sub %rcx, %r10
79 cmp %r10, %r8
80# endif
81 jbe L(CopyFrom1To16BytesTailCase2OrCase3)
82# endif
83 test %rdx, %rdx
84 jnz L(CopyFrom1To16BytesTail)
85
86 pcmpeqb 16(%rsi), %xmm0
87 pmovmskb %xmm0, %rdx
88
89# ifdef USE_AS_STRNCPY
90 add $16, %r10
91 cmp %r10, %r8
92 jbe L(CopyFrom1To32BytesCase2OrCase3)
93# endif
94 test %rdx, %rdx
95 jnz L(CopyFrom1To32Bytes)
96
97 movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */
98 movdqu %xmm1, (%rdi)
99
100/* If source address alignment != destination address alignment */
101 .p2align 4
102L(Unalign16Both):
103 sub %rcx, %rdi
104# ifdef USE_AS_STRNCPY
105 add %rcx, %r8
106 sbb %rcx, %rcx
107 or %rcx, %r8
108# endif
109 mov $16, %rcx
110 movdqa (%rsi, %rcx), %xmm1
111 movaps 16(%rsi, %rcx), %xmm2
112 movdqu %xmm1, (%rdi, %rcx)
113 pcmpeqb %xmm2, %xmm0
114 pmovmskb %xmm0, %rdx
115 add $16, %rcx
116# ifdef USE_AS_STRNCPY
117 sub $48, %r8
118 jbe L(CopyFrom1To16BytesCase2OrCase3)
119# endif
120 test %rdx, %rdx
121# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
122 jnz L(CopyFrom1To16BytesUnalignedXmm2)
123# else
124 jnz L(CopyFrom1To16Bytes)
125# endif
126
127 movaps 16(%rsi, %rcx), %xmm3
128 movdqu %xmm2, (%rdi, %rcx)
129 pcmpeqb %xmm3, %xmm0
130 pmovmskb %xmm0, %rdx
131 add $16, %rcx
132# ifdef USE_AS_STRNCPY
133 sub $16, %r8
134 jbe L(CopyFrom1To16BytesCase2OrCase3)
135# endif
136 test %rdx, %rdx
137# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
138 jnz L(CopyFrom1To16BytesUnalignedXmm3)
139# else
140 jnz L(CopyFrom1To16Bytes)
141# endif
142
143 movaps 16(%rsi, %rcx), %xmm4
144 movdqu %xmm3, (%rdi, %rcx)
145 pcmpeqb %xmm4, %xmm0
146 pmovmskb %xmm0, %rdx
147 add $16, %rcx
148# ifdef USE_AS_STRNCPY
149 sub $16, %r8
150 jbe L(CopyFrom1To16BytesCase2OrCase3)
151# endif
152 test %rdx, %rdx
153# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
154 jnz L(CopyFrom1To16BytesUnalignedXmm4)
155# else
156 jnz L(CopyFrom1To16Bytes)
157# endif
158
159 movaps 16(%rsi, %rcx), %xmm1
160 movdqu %xmm4, (%rdi, %rcx)
161 pcmpeqb %xmm1, %xmm0
162 pmovmskb %xmm0, %rdx
163 add $16, %rcx
164# ifdef USE_AS_STRNCPY
165 sub $16, %r8
166 jbe L(CopyFrom1To16BytesCase2OrCase3)
167# endif
168 test %rdx, %rdx
169# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
170 jnz L(CopyFrom1To16BytesUnalignedXmm1)
171# else
172 jnz L(CopyFrom1To16Bytes)
173# endif
174
175 movaps 16(%rsi, %rcx), %xmm2
176 movdqu %xmm1, (%rdi, %rcx)
177 pcmpeqb %xmm2, %xmm0
178 pmovmskb %xmm0, %rdx
179 add $16, %rcx
180# ifdef USE_AS_STRNCPY
181 sub $16, %r8
182 jbe L(CopyFrom1To16BytesCase2OrCase3)
183# endif
184 test %rdx, %rdx
185# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
186 jnz L(CopyFrom1To16BytesUnalignedXmm2)
187# else
188 jnz L(CopyFrom1To16Bytes)
189# endif
190
191 movaps 16(%rsi, %rcx), %xmm3
192 movdqu %xmm2, (%rdi, %rcx)
193 pcmpeqb %xmm3, %xmm0
194 pmovmskb %xmm0, %rdx
195 add $16, %rcx
196# ifdef USE_AS_STRNCPY
197 sub $16, %r8
198 jbe L(CopyFrom1To16BytesCase2OrCase3)
199# endif
200 test %rdx, %rdx
201# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
202 jnz L(CopyFrom1To16BytesUnalignedXmm3)
203# else
204 jnz L(CopyFrom1To16Bytes)
205# endif
206
207 movdqu %xmm3, (%rdi, %rcx)
208 mov %rsi, %rdx
209 lea 16(%rsi, %rcx), %rsi
210 and $-0x40, %rsi
211 sub %rsi, %rdx
212 sub %rdx, %rdi
213# ifdef USE_AS_STRNCPY
214 lea 128(%r8, %rdx), %r8
215# endif
216L(Unaligned64Loop):
217 movaps (%rsi), %xmm2
218 movaps %xmm2, %xmm4
219 movaps 16(%rsi), %xmm5
220 movaps 32(%rsi), %xmm3
221 movaps %xmm3, %xmm6
222 movaps 48(%rsi), %xmm7
223 pminub %xmm5, %xmm2
224 pminub %xmm7, %xmm3
225 pminub %xmm2, %xmm3
226 pcmpeqb %xmm0, %xmm3
227 pmovmskb %xmm3, %rdx
228# ifdef USE_AS_STRNCPY
229 sub $64, %r8
230 jbe L(UnalignedLeaveCase2OrCase3)
231# endif
232 test %rdx, %rdx
233 jnz L(Unaligned64Leave)
234
235L(Unaligned64Loop_start):
236 add $64, %rdi
237 add $64, %rsi
238 movdqu %xmm4, -64(%rdi)
239 movaps (%rsi), %xmm2
240 movdqa %xmm2, %xmm4
241 movdqu %xmm5, -48(%rdi)
242 movaps 16(%rsi), %xmm5
243 pminub %xmm5, %xmm2
244 movaps 32(%rsi), %xmm3
245 movdqu %xmm6, -32(%rdi)
246 movaps %xmm3, %xmm6
247 movdqu %xmm7, -16(%rdi)
248 movaps 48(%rsi), %xmm7
249 pminub %xmm7, %xmm3
250 pminub %xmm2, %xmm3
251 pcmpeqb %xmm0, %xmm3
252 pmovmskb %xmm3, %rdx
253# ifdef USE_AS_STRNCPY
254 sub $64, %r8
255 jbe L(UnalignedLeaveCase2OrCase3)
256# endif
257 test %rdx, %rdx
258 jz L(Unaligned64Loop_start)
259
260L(Unaligned64Leave):
261 pxor %xmm1, %xmm1
262
263 pcmpeqb %xmm4, %xmm0
264 pcmpeqb %xmm5, %xmm1
265 pmovmskb %xmm0, %rdx
266 pmovmskb %xmm1, %rcx
267 test %rdx, %rdx
268 jnz L(CopyFrom1To16BytesUnaligned_0)
269 test %rcx, %rcx
270 jnz L(CopyFrom1To16BytesUnaligned_16)
271
272 pcmpeqb %xmm6, %xmm0
273 pcmpeqb %xmm7, %xmm1
274 pmovmskb %xmm0, %rdx
275 pmovmskb %xmm1, %rcx
276 test %rdx, %rdx
277 jnz L(CopyFrom1To16BytesUnaligned_32)
278
279 bsf %rcx, %rdx
280 movdqu %xmm4, (%rdi)
281 movdqu %xmm5, 16(%rdi)
282 movdqu %xmm6, 32(%rdi)
283# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
284# ifdef USE_AS_STPCPY
285 lea 48(%rdi, %rdx), %rax
286# endif
287 movdqu %xmm7, 48(%rdi)
288 add $15, %r8
289 sub %rdx, %r8
290 lea 49(%rdi, %rdx), %rdi
291 jmp L(StrncpyFillTailWithZero)
292# else
293 add $48, %rsi
294 add $48, %rdi
295 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
296# endif
297
298/* If source address alignment == destination address alignment */
299
300L(SourceStringAlignmentLess32):
301 pxor %xmm0, %xmm0
302 movdqu (%rsi), %xmm1
303 movdqu 16(%rsi), %xmm2
304 pcmpeqb %xmm1, %xmm0
305 pmovmskb %xmm0, %rdx
306
307# ifdef USE_AS_STRNCPY
308# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
309 cmp $16, %r8
310# else
311 cmp $17, %r8
312# endif
313 jbe L(CopyFrom1To16BytesTail1Case2OrCase3)
314# endif
315 test %rdx, %rdx
316 jnz L(CopyFrom1To16BytesTail1)
317
318 pcmpeqb %xmm2, %xmm0
319 movdqu %xmm1, (%rdi)
320 pmovmskb %xmm0, %rdx
321
322# ifdef USE_AS_STRNCPY
323# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
324 cmp $32, %r8
325# else
326 cmp $33, %r8
327# endif
328 jbe L(CopyFrom1To32Bytes1Case2OrCase3)
329# endif
330 test %rdx, %rdx
331 jnz L(CopyFrom1To32Bytes1)
332
333 and $-16, %rsi
334 and $15, %rcx
335 jmp L(Unalign16Both)
336
337/*------End of main part with loops---------------------*/
338
339/* Case1 */
340
341# if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
342 .p2align 4
343L(CopyFrom1To16Bytes):
344 add %rcx, %rdi
345 add %rcx, %rsi
346 bsf %rdx, %rdx
347 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
348# endif
349 .p2align 4
350L(CopyFrom1To16BytesTail):
351 add %rcx, %rsi
352 bsf %rdx, %rdx
353 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
354
355 .p2align 4
356L(CopyFrom1To32Bytes1):
357 add $16, %rsi
358 add $16, %rdi
359# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
360 sub $16, %r8
361# endif
362L(CopyFrom1To16BytesTail1):
363 bsf %rdx, %rdx
364 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
365
366 .p2align 4
367L(CopyFrom1To32Bytes):
368 bsf %rdx, %rdx
369 add %rcx, %rsi
370 add $16, %rdx
371 sub %rcx, %rdx
372 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
373
374 .p2align 4
375L(CopyFrom1To16BytesUnaligned_0):
376 bsf %rdx, %rdx
377# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
378# ifdef USE_AS_STPCPY
379 lea (%rdi, %rdx), %rax
380# endif
381 movdqu %xmm4, (%rdi)
382 add $63, %r8
383 sub %rdx, %r8
384 lea 1(%rdi, %rdx), %rdi
385 jmp L(StrncpyFillTailWithZero)
386# else
387 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
388# endif
389
390 .p2align 4
391L(CopyFrom1To16BytesUnaligned_16):
392 bsf %rcx, %rdx
393 movdqu %xmm4, (%rdi)
394# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
395# ifdef USE_AS_STPCPY
396 lea 16(%rdi, %rdx), %rax
397# endif
398 movdqu %xmm5, 16(%rdi)
399 add $47, %r8
400 sub %rdx, %r8
401 lea 17(%rdi, %rdx), %rdi
402 jmp L(StrncpyFillTailWithZero)
403# else
404 add $16, %rsi
405 add $16, %rdi
406 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
407# endif
408
409 .p2align 4
410L(CopyFrom1To16BytesUnaligned_32):
411 bsf %rdx, %rdx
412 movdqu %xmm4, (%rdi)
413 movdqu %xmm5, 16(%rdi)
414# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
415# ifdef USE_AS_STPCPY
416 lea 32(%rdi, %rdx), %rax
417# endif
418 movdqu %xmm6, 32(%rdi)
419 add $31, %r8
420 sub %rdx, %r8
421 lea 33(%rdi, %rdx), %rdi
422 jmp L(StrncpyFillTailWithZero)
423# else
424 add $32, %rsi
425 add $32, %rdi
426 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
427# endif
428
429# ifdef USE_AS_STRNCPY
430# ifndef USE_AS_STRCAT
431 .p2align 4
432L(CopyFrom1To16BytesUnalignedXmm6):
433 movdqu %xmm6, (%rdi, %rcx)
434 jmp L(CopyFrom1To16BytesXmmExit)
435
436 .p2align 4
437L(CopyFrom1To16BytesUnalignedXmm5):
438 movdqu %xmm5, (%rdi, %rcx)
439 jmp L(CopyFrom1To16BytesXmmExit)
440
441 .p2align 4
442L(CopyFrom1To16BytesUnalignedXmm4):
443 movdqu %xmm4, (%rdi, %rcx)
444 jmp L(CopyFrom1To16BytesXmmExit)
445
446 .p2align 4
447L(CopyFrom1To16BytesUnalignedXmm3):
448 movdqu %xmm3, (%rdi, %rcx)
449 jmp L(CopyFrom1To16BytesXmmExit)
450
451 .p2align 4
452L(CopyFrom1To16BytesUnalignedXmm1):
453 movdqu %xmm1, (%rdi, %rcx)
454 jmp L(CopyFrom1To16BytesXmmExit)
455# endif
456
457 .p2align 4
458L(CopyFrom1To16BytesExit):
459 BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
460
461/* Case2 */
462
463 .p2align 4
464L(CopyFrom1To16BytesCase2):
465 add $16, %r8
466 add %rcx, %rdi
467 add %rcx, %rsi
468 bsf %rdx, %rdx
469 cmp %r8, %rdx
470 jb L(CopyFrom1To16BytesExit)
471 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
472
473 .p2align 4
474L(CopyFrom1To32BytesCase2):
475 add %rcx, %rsi
476 bsf %rdx, %rdx
477 add $16, %rdx
478 sub %rcx, %rdx
479 cmp %r8, %rdx
480 jb L(CopyFrom1To16BytesExit)
481 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
482
483L(CopyFrom1To16BytesTailCase2):
484 add %rcx, %rsi
485 bsf %rdx, %rdx
486 cmp %r8, %rdx
487 jb L(CopyFrom1To16BytesExit)
488 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
489
490L(CopyFrom1To16BytesTail1Case2):
491 bsf %rdx, %rdx
492 cmp %r8, %rdx
493 jb L(CopyFrom1To16BytesExit)
494 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
495
496/* Case2 or Case3, Case3 */
497
498 .p2align 4
499L(CopyFrom1To16BytesCase2OrCase3):
500 test %rdx, %rdx
501 jnz L(CopyFrom1To16BytesCase2)
502L(CopyFrom1To16BytesCase3):
503 add $16, %r8
504 add %rcx, %rdi
505 add %rcx, %rsi
506 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
507
508 .p2align 4
509L(CopyFrom1To32BytesCase2OrCase3):
510 test %rdx, %rdx
511 jnz L(CopyFrom1To32BytesCase2)
512 add %rcx, %rsi
513 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
514
515 .p2align 4
516L(CopyFrom1To16BytesTailCase2OrCase3):
517 test %rdx, %rdx
518 jnz L(CopyFrom1To16BytesTailCase2)
519 add %rcx, %rsi
520 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
521
522 .p2align 4
523L(CopyFrom1To32Bytes1Case2OrCase3):
524 add $16, %rdi
525 add $16, %rsi
526 sub $16, %r8
527L(CopyFrom1To16BytesTail1Case2OrCase3):
528 test %rdx, %rdx
529 jnz L(CopyFrom1To16BytesTail1Case2)
530 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
531
532# endif
533
534/*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
535
536 .p2align 4
537L(Exit1):
538 mov %dh, (%rdi)
539# ifdef USE_AS_STPCPY
540 lea (%rdi), %rax
541# endif
542# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
543 sub $1, %r8
544 lea 1(%rdi), %rdi
545 jnz L(StrncpyFillTailWithZero)
546# endif
547 ret
548
549 .p2align 4
550L(Exit2):
551 mov (%rsi), %dx
552 mov %dx, (%rdi)
553# ifdef USE_AS_STPCPY
554 lea 1(%rdi), %rax
555# endif
556# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
557 sub $2, %r8
558 lea 2(%rdi), %rdi
559 jnz L(StrncpyFillTailWithZero)
560# endif
561 ret
562
563 .p2align 4
564L(Exit3):
565 mov (%rsi), %cx
566 mov %cx, (%rdi)
567 mov %dh, 2(%rdi)
568# ifdef USE_AS_STPCPY
569 lea 2(%rdi), %rax
570# endif
571# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
572 sub $3, %r8
573 lea 3(%rdi), %rdi
574 jnz L(StrncpyFillTailWithZero)
575# endif
576 ret
577
578 .p2align 4
579L(Exit4):
580 mov (%rsi), %edx
581 mov %edx, (%rdi)
582# ifdef USE_AS_STPCPY
583 lea 3(%rdi), %rax
584# endif
585# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
586 sub $4, %r8
587 lea 4(%rdi), %rdi
588 jnz L(StrncpyFillTailWithZero)
589# endif
590 ret
591
592 .p2align 4
593L(Exit5):
594 mov (%rsi), %ecx
595 mov %dh, 4(%rdi)
596 mov %ecx, (%rdi)
597# ifdef USE_AS_STPCPY
598 lea 4(%rdi), %rax
599# endif
600# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
601 sub $5, %r8
602 lea 5(%rdi), %rdi
603 jnz L(StrncpyFillTailWithZero)
604# endif
605 ret
606
607 .p2align 4
608L(Exit6):
609 mov (%rsi), %ecx
610 mov 4(%rsi), %dx
611 mov %ecx, (%rdi)
612 mov %dx, 4(%rdi)
613# ifdef USE_AS_STPCPY
614 lea 5(%rdi), %rax
615# endif
616# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
617 sub $6, %r8
618 lea 6(%rdi), %rdi
619 jnz L(StrncpyFillTailWithZero)
620# endif
621 ret
622
623 .p2align 4
624L(Exit7):
625 mov (%rsi), %ecx
626 mov 3(%rsi), %edx
627 mov %ecx, (%rdi)
628 mov %edx, 3(%rdi)
629# ifdef USE_AS_STPCPY
630 lea 6(%rdi), %rax
631# endif
632# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
633 sub $7, %r8
634 lea 7(%rdi), %rdi
635 jnz L(StrncpyFillTailWithZero)
636# endif
637 ret
638
639 .p2align 4
640L(Exit8):
641 mov (%rsi), %rdx
642 mov %rdx, (%rdi)
643# ifdef USE_AS_STPCPY
644 lea 7(%rdi), %rax
645# endif
646# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
647 sub $8, %r8
648 lea 8(%rdi), %rdi
649 jnz L(StrncpyFillTailWithZero)
650# endif
651 ret
652
653 .p2align 4
654L(Exit9):
655 mov (%rsi), %rcx
656 mov %dh, 8(%rdi)
657 mov %rcx, (%rdi)
658# ifdef USE_AS_STPCPY
659 lea 8(%rdi), %rax
660# endif
661# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
662 sub $9, %r8
663 lea 9(%rdi), %rdi
664 jnz L(StrncpyFillTailWithZero)
665# endif
666 ret
667
668 .p2align 4
669L(Exit10):
670 mov (%rsi), %rcx
671 mov 8(%rsi), %dx
672 mov %rcx, (%rdi)
673 mov %dx, 8(%rdi)
674# ifdef USE_AS_STPCPY
675 lea 9(%rdi), %rax
676# endif
677# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
678 sub $10, %r8
679 lea 10(%rdi), %rdi
680 jnz L(StrncpyFillTailWithZero)
681# endif
682 ret
683
684 .p2align 4
685L(Exit11):
686 mov (%rsi), %rcx
687 mov 7(%rsi), %edx
688 mov %rcx, (%rdi)
689 mov %edx, 7(%rdi)
690# ifdef USE_AS_STPCPY
691 lea 10(%rdi), %rax
692# endif
693# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
694 sub $11, %r8
695 lea 11(%rdi), %rdi
696 jnz L(StrncpyFillTailWithZero)
697# endif
698 ret
699
700 .p2align 4
701L(Exit12):
702 mov (%rsi), %rcx
703 mov 8(%rsi), %edx
704 mov %rcx, (%rdi)
705 mov %edx, 8(%rdi)
706# ifdef USE_AS_STPCPY
707 lea 11(%rdi), %rax
708# endif
709# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
710 sub $12, %r8
711 lea 12(%rdi), %rdi
712 jnz L(StrncpyFillTailWithZero)
713# endif
714 ret
715
716 .p2align 4
717L(Exit13):
718 mov (%rsi), %rcx
719 mov 5(%rsi), %rdx
720 mov %rcx, (%rdi)
721 mov %rdx, 5(%rdi)
722# ifdef USE_AS_STPCPY
723 lea 12(%rdi), %rax
724# endif
725# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
726 sub $13, %r8
727 lea 13(%rdi), %rdi
728 jnz L(StrncpyFillTailWithZero)
729# endif
730 ret
731
732 .p2align 4
733L(Exit14):
734 mov (%rsi), %rcx
735 mov 6(%rsi), %rdx
736 mov %rcx, (%rdi)
737 mov %rdx, 6(%rdi)
738# ifdef USE_AS_STPCPY
739 lea 13(%rdi), %rax
740# endif
741# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
742 sub $14, %r8
743 lea 14(%rdi), %rdi
744 jnz L(StrncpyFillTailWithZero)
745# endif
746 ret
747
748 .p2align 4
749L(Exit15):
750 mov (%rsi), %rcx
751 mov 7(%rsi), %rdx
752 mov %rcx, (%rdi)
753 mov %rdx, 7(%rdi)
754# ifdef USE_AS_STPCPY
755 lea 14(%rdi), %rax
756# endif
757# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
758 sub $15, %r8
759 lea 15(%rdi), %rdi
760 jnz L(StrncpyFillTailWithZero)
761# endif
762 ret
763
764 .p2align 4
765L(Exit16):
766 movdqu (%rsi), %xmm0
767 movdqu %xmm0, (%rdi)
768# ifdef USE_AS_STPCPY
769 lea 15(%rdi), %rax
770# endif
771# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
772 sub $16, %r8
773 lea 16(%rdi), %rdi
774 jnz L(StrncpyFillTailWithZero)
775# endif
776 ret
777
778 .p2align 4
779L(Exit17):
780 movdqu (%rsi), %xmm0
781 movdqu %xmm0, (%rdi)
782 mov %dh, 16(%rdi)
783# ifdef USE_AS_STPCPY
784 lea 16(%rdi), %rax
785# endif
786# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
787 sub $17, %r8
788 lea 17(%rdi), %rdi
789 jnz L(StrncpyFillTailWithZero)
790# endif
791 ret
792
793 .p2align 4
794L(Exit18):
795 movdqu (%rsi), %xmm0
796 mov 16(%rsi), %cx
797 movdqu %xmm0, (%rdi)
798 mov %cx, 16(%rdi)
799# ifdef USE_AS_STPCPY
800 lea 17(%rdi), %rax
801# endif
802# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
803 sub $18, %r8
804 lea 18(%rdi), %rdi
805 jnz L(StrncpyFillTailWithZero)
806# endif
807 ret
808
809 .p2align 4
810L(Exit19):
811 movdqu (%rsi), %xmm0
812 mov 15(%rsi), %ecx
813 movdqu %xmm0, (%rdi)
814 mov %ecx, 15(%rdi)
815# ifdef USE_AS_STPCPY
816 lea 18(%rdi), %rax
817# endif
818# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
819 sub $19, %r8
820 lea 19(%rdi), %rdi
821 jnz L(StrncpyFillTailWithZero)
822# endif
823 ret
824
825 .p2align 4
826L(Exit20):
827 movdqu (%rsi), %xmm0
828 mov 16(%rsi), %ecx
829 movdqu %xmm0, (%rdi)
830 mov %ecx, 16(%rdi)
831# ifdef USE_AS_STPCPY
832 lea 19(%rdi), %rax
833# endif
834# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
835 sub $20, %r8
836 lea 20(%rdi), %rdi
837 jnz L(StrncpyFillTailWithZero)
838# endif
839 ret
840
841 .p2align 4
842L(Exit21):
843 movdqu (%rsi), %xmm0
844 mov 16(%rsi), %ecx
845 movdqu %xmm0, (%rdi)
846 mov %ecx, 16(%rdi)
847 mov %dh, 20(%rdi)
848# ifdef USE_AS_STPCPY
849 lea 20(%rdi), %rax
850# endif
851# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
852 sub $21, %r8
853 lea 21(%rdi), %rdi
854 jnz L(StrncpyFillTailWithZero)
855# endif
856 ret
857
858 .p2align 4
859L(Exit22):
860 movdqu (%rsi), %xmm0
861 mov 14(%rsi), %rcx
862 movdqu %xmm0, (%rdi)
863 mov %rcx, 14(%rdi)
864# ifdef USE_AS_STPCPY
865 lea 21(%rdi), %rax
866# endif
867# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
868 sub $22, %r8
869 lea 22(%rdi), %rdi
870 jnz L(StrncpyFillTailWithZero)
871# endif
872 ret
873
874 .p2align 4
875L(Exit23):
876 movdqu (%rsi), %xmm0
877 mov 15(%rsi), %rcx
878 movdqu %xmm0, (%rdi)
879 mov %rcx, 15(%rdi)
880# ifdef USE_AS_STPCPY
881 lea 22(%rdi), %rax
882# endif
883# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
884 sub $23, %r8
885 lea 23(%rdi), %rdi
886 jnz L(StrncpyFillTailWithZero)
887# endif
888 ret
889
890 .p2align 4
891L(Exit24):
892 movdqu (%rsi), %xmm0
893 mov 16(%rsi), %rcx
894 movdqu %xmm0, (%rdi)
895 mov %rcx, 16(%rdi)
896# ifdef USE_AS_STPCPY
897 lea 23(%rdi), %rax
898# endif
899# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
900 sub $24, %r8
901 lea 24(%rdi), %rdi
902 jnz L(StrncpyFillTailWithZero)
903# endif
904 ret
905
906 .p2align 4
907L(Exit25):
908 movdqu (%rsi), %xmm0
909 mov 16(%rsi), %rcx
910 movdqu %xmm0, (%rdi)
911 mov %rcx, 16(%rdi)
912 mov %dh, 24(%rdi)
913# ifdef USE_AS_STPCPY
914 lea 24(%rdi), %rax
915# endif
916# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
917 sub $25, %r8
918 lea 25(%rdi), %rdi
919 jnz L(StrncpyFillTailWithZero)
920# endif
921 ret
922
923 .p2align 4
924L(Exit26):
925 movdqu (%rsi), %xmm0
926 mov 16(%rsi), %rdx
927 mov 24(%rsi), %cx
928 movdqu %xmm0, (%rdi)
929 mov %rdx, 16(%rdi)
930 mov %cx, 24(%rdi)
931# ifdef USE_AS_STPCPY
932 lea 25(%rdi), %rax
933# endif
934# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
935 sub $26, %r8
936 lea 26(%rdi), %rdi
937 jnz L(StrncpyFillTailWithZero)
938# endif
939 ret
940
941 .p2align 4
942L(Exit27):
943 movdqu (%rsi), %xmm0
944 mov 16(%rsi), %rdx
945 mov 23(%rsi), %ecx
946 movdqu %xmm0, (%rdi)
947 mov %rdx, 16(%rdi)
948 mov %ecx, 23(%rdi)
949# ifdef USE_AS_STPCPY
950 lea 26(%rdi), %rax
951# endif
952# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
953 sub $27, %r8
954 lea 27(%rdi), %rdi
955 jnz L(StrncpyFillTailWithZero)
956# endif
957 ret
958
959 .p2align 4
960L(Exit28):
961 movdqu (%rsi), %xmm0
962 mov 16(%rsi), %rdx
963 mov 24(%rsi), %ecx
964 movdqu %xmm0, (%rdi)
965 mov %rdx, 16(%rdi)
966 mov %ecx, 24(%rdi)
967# ifdef USE_AS_STPCPY
968 lea 27(%rdi), %rax
969# endif
970# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
971 sub $28, %r8
972 lea 28(%rdi), %rdi
973 jnz L(StrncpyFillTailWithZero)
974# endif
975 ret
976
977 .p2align 4
978L(Exit29):
979 movdqu (%rsi), %xmm0
980 movdqu 13(%rsi), %xmm2
981 movdqu %xmm0, (%rdi)
982 movdqu %xmm2, 13(%rdi)
983# ifdef USE_AS_STPCPY
984 lea 28(%rdi), %rax
985# endif
986# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
987 sub $29, %r8
988 lea 29(%rdi), %rdi
989 jnz L(StrncpyFillTailWithZero)
990# endif
991 ret
992
993 .p2align 4
994L(Exit30):
995 movdqu (%rsi), %xmm0
996 movdqu 14(%rsi), %xmm2
997 movdqu %xmm0, (%rdi)
998 movdqu %xmm2, 14(%rdi)
999# ifdef USE_AS_STPCPY
1000 lea 29(%rdi), %rax
1001# endif
1002# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1003 sub $30, %r8
1004 lea 30(%rdi), %rdi
1005 jnz L(StrncpyFillTailWithZero)
1006# endif
1007 ret
1008
1009 .p2align 4
1010L(Exit31):
1011 movdqu (%rsi), %xmm0
1012 movdqu 15(%rsi), %xmm2
1013 movdqu %xmm0, (%rdi)
1014 movdqu %xmm2, 15(%rdi)
1015# ifdef USE_AS_STPCPY
1016 lea 30(%rdi), %rax
1017# endif
1018# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1019 sub $31, %r8
1020 lea 31(%rdi), %rdi
1021 jnz L(StrncpyFillTailWithZero)
1022# endif
1023 ret
1024
1025 .p2align 4
1026L(Exit32):
1027 movdqu (%rsi), %xmm0
1028 movdqu 16(%rsi), %xmm2
1029 movdqu %xmm0, (%rdi)
1030 movdqu %xmm2, 16(%rdi)
1031# ifdef USE_AS_STPCPY
1032 lea 31(%rdi), %rax
1033# endif
1034# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
1035 sub $32, %r8
1036 lea 32(%rdi), %rdi
1037 jnz L(StrncpyFillTailWithZero)
1038# endif
1039 ret
1040
1041# ifdef USE_AS_STRNCPY
1042
1043 .p2align 4
1044L(StrncpyExit0):
1045# ifdef USE_AS_STPCPY
1046 mov %rdi, %rax
1047# endif
1048# ifdef USE_AS_STRCAT
1049 xor %ch, %ch
1050 movb %ch, (%rdi)
1051# endif
1052 ret
1053
1054 .p2align 4
1055L(StrncpyExit1):
1056 mov (%rsi), %dl
1057 mov %dl, (%rdi)
1058# ifdef USE_AS_STPCPY
1059 lea 1(%rdi), %rax
1060# endif
1061# ifdef USE_AS_STRCAT
1062 xor %ch, %ch
1063 movb %ch, 1(%rdi)
1064# endif
1065 ret
1066
1067 .p2align 4
1068L(StrncpyExit2):
1069 mov (%rsi), %dx
1070 mov %dx, (%rdi)
1071# ifdef USE_AS_STPCPY
1072 lea 2(%rdi), %rax
1073# endif
1074# ifdef USE_AS_STRCAT
1075 xor %ch, %ch
1076 movb %ch, 2(%rdi)
1077# endif
1078 ret
1079
1080 .p2align 4
1081L(StrncpyExit3):
1082 mov (%rsi), %cx
1083 mov 2(%rsi), %dl
1084 mov %cx, (%rdi)
1085 mov %dl, 2(%rdi)
1086# ifdef USE_AS_STPCPY
1087 lea 3(%rdi), %rax
1088# endif
1089# ifdef USE_AS_STRCAT
1090 xor %ch, %ch
1091 movb %ch, 3(%rdi)
1092# endif
1093 ret
1094
1095 .p2align 4
1096L(StrncpyExit4):
1097 mov (%rsi), %edx
1098 mov %edx, (%rdi)
1099# ifdef USE_AS_STPCPY
1100 lea 4(%rdi), %rax
1101# endif
1102# ifdef USE_AS_STRCAT
1103 xor %ch, %ch
1104 movb %ch, 4(%rdi)
1105# endif
1106 ret
1107
1108 .p2align 4
1109L(StrncpyExit5):
1110 mov (%rsi), %ecx
1111 mov 4(%rsi), %dl
1112 mov %ecx, (%rdi)
1113 mov %dl, 4(%rdi)
1114# ifdef USE_AS_STPCPY
1115 lea 5(%rdi), %rax
1116# endif
1117# ifdef USE_AS_STRCAT
1118 xor %ch, %ch
1119 movb %ch, 5(%rdi)
1120# endif
1121 ret
1122
1123 .p2align 4
1124L(StrncpyExit6):
1125 mov (%rsi), %ecx
1126 mov 4(%rsi), %dx
1127 mov %ecx, (%rdi)
1128 mov %dx, 4(%rdi)
1129# ifdef USE_AS_STPCPY
1130 lea 6(%rdi), %rax
1131# endif
1132# ifdef USE_AS_STRCAT
1133 xor %ch, %ch
1134 movb %ch, 6(%rdi)
1135# endif
1136 ret
1137
1138 .p2align 4
1139L(StrncpyExit7):
1140 mov (%rsi), %ecx
1141 mov 3(%rsi), %edx
1142 mov %ecx, (%rdi)
1143 mov %edx, 3(%rdi)
1144# ifdef USE_AS_STPCPY
1145 lea 7(%rdi), %rax
1146# endif
1147# ifdef USE_AS_STRCAT
1148 xor %ch, %ch
1149 movb %ch, 7(%rdi)
1150# endif
1151 ret
1152
1153 .p2align 4
1154L(StrncpyExit8):
1155 mov (%rsi), %rdx
1156 mov %rdx, (%rdi)
1157# ifdef USE_AS_STPCPY
1158 lea 8(%rdi), %rax
1159# endif
1160# ifdef USE_AS_STRCAT
1161 xor %ch, %ch
1162 movb %ch, 8(%rdi)
1163# endif
1164 ret
1165
1166 .p2align 4
1167L(StrncpyExit9):
1168 mov (%rsi), %rcx
1169 mov 8(%rsi), %dl
1170 mov %rcx, (%rdi)
1171 mov %dl, 8(%rdi)
1172# ifdef USE_AS_STPCPY
1173 lea 9(%rdi), %rax
1174# endif
1175# ifdef USE_AS_STRCAT
1176 xor %ch, %ch
1177 movb %ch, 9(%rdi)
1178# endif
1179 ret
1180
1181 .p2align 4
1182L(StrncpyExit10):
1183 mov (%rsi), %rcx
1184 mov 8(%rsi), %dx
1185 mov %rcx, (%rdi)
1186 mov %dx, 8(%rdi)
1187# ifdef USE_AS_STPCPY
1188 lea 10(%rdi), %rax
1189# endif
1190# ifdef USE_AS_STRCAT
1191 xor %ch, %ch
1192 movb %ch, 10(%rdi)
1193# endif
1194 ret
1195
1196 .p2align 4
1197L(StrncpyExit11):
1198 mov (%rsi), %rcx
1199 mov 7(%rsi), %edx
1200 mov %rcx, (%rdi)
1201 mov %edx, 7(%rdi)
1202# ifdef USE_AS_STPCPY
1203 lea 11(%rdi), %rax
1204# endif
1205# ifdef USE_AS_STRCAT
1206 xor %ch, %ch
1207 movb %ch, 11(%rdi)
1208# endif
1209 ret
1210
1211 .p2align 4
1212L(StrncpyExit12):
1213 mov (%rsi), %rcx
1214 mov 8(%rsi), %edx
1215 mov %rcx, (%rdi)
1216 mov %edx, 8(%rdi)
1217# ifdef USE_AS_STPCPY
1218 lea 12(%rdi), %rax
1219# endif
1220# ifdef USE_AS_STRCAT
1221 xor %ch, %ch
1222 movb %ch, 12(%rdi)
1223# endif
1224 ret
1225
1226 .p2align 4
1227L(StrncpyExit13):
1228 mov (%rsi), %rcx
1229 mov 5(%rsi), %rdx
1230 mov %rcx, (%rdi)
1231 mov %rdx, 5(%rdi)
1232# ifdef USE_AS_STPCPY
1233 lea 13(%rdi), %rax
1234# endif
1235# ifdef USE_AS_STRCAT
1236 xor %ch, %ch
1237 movb %ch, 13(%rdi)
1238# endif
1239 ret
1240
1241 .p2align 4
1242L(StrncpyExit14):
1243 mov (%rsi), %rcx
1244 mov 6(%rsi), %rdx
1245 mov %rcx, (%rdi)
1246 mov %rdx, 6(%rdi)
1247# ifdef USE_AS_STPCPY
1248 lea 14(%rdi), %rax
1249# endif
1250# ifdef USE_AS_STRCAT
1251 xor %ch, %ch
1252 movb %ch, 14(%rdi)
1253# endif
1254 ret
1255
1256 .p2align 4
1257L(StrncpyExit15):
1258 mov (%rsi), %rcx
1259 mov 7(%rsi), %rdx
1260 mov %rcx, (%rdi)
1261 mov %rdx, 7(%rdi)
1262# ifdef USE_AS_STPCPY
1263 lea 15(%rdi), %rax
1264# endif
1265# ifdef USE_AS_STRCAT
1266 xor %ch, %ch
1267 movb %ch, 15(%rdi)
1268# endif
1269 ret
1270
1271 .p2align 4
1272L(StrncpyExit16):
1273 movdqu (%rsi), %xmm0
1274 movdqu %xmm0, (%rdi)
1275# ifdef USE_AS_STPCPY
1276 lea 16(%rdi), %rax
1277# endif
1278# ifdef USE_AS_STRCAT
1279 xor %ch, %ch
1280 movb %ch, 16(%rdi)
1281# endif
1282 ret
1283
1284 .p2align 4
1285L(StrncpyExit17):
1286 movdqu (%rsi), %xmm0
1287 mov 16(%rsi), %cl
1288 movdqu %xmm0, (%rdi)
1289 mov %cl, 16(%rdi)
1290# ifdef USE_AS_STPCPY
1291 lea 17(%rdi), %rax
1292# endif
1293# ifdef USE_AS_STRCAT
1294 xor %ch, %ch
1295 movb %ch, 17(%rdi)
1296# endif
1297 ret
1298
1299 .p2align 4
1300L(StrncpyExit18):
1301 movdqu (%rsi), %xmm0
1302 mov 16(%rsi), %cx
1303 movdqu %xmm0, (%rdi)
1304 mov %cx, 16(%rdi)
1305# ifdef USE_AS_STPCPY
1306 lea 18(%rdi), %rax
1307# endif
1308# ifdef USE_AS_STRCAT
1309 xor %ch, %ch
1310 movb %ch, 18(%rdi)
1311# endif
1312 ret
1313
1314 .p2align 4
1315L(StrncpyExit19):
1316 movdqu (%rsi), %xmm0
1317 mov 15(%rsi), %ecx
1318 movdqu %xmm0, (%rdi)
1319 mov %ecx, 15(%rdi)
1320# ifdef USE_AS_STPCPY
1321 lea 19(%rdi), %rax
1322# endif
1323# ifdef USE_AS_STRCAT
1324 xor %ch, %ch
1325 movb %ch, 19(%rdi)
1326# endif
1327 ret
1328
1329 .p2align 4
1330L(StrncpyExit20):
1331 movdqu (%rsi), %xmm0
1332 mov 16(%rsi), %ecx
1333 movdqu %xmm0, (%rdi)
1334 mov %ecx, 16(%rdi)
1335# ifdef USE_AS_STPCPY
1336 lea 20(%rdi), %rax
1337# endif
1338# ifdef USE_AS_STRCAT
1339 xor %ch, %ch
1340 movb %ch, 20(%rdi)
1341# endif
1342 ret
1343
1344 .p2align 4
1345L(StrncpyExit21):
1346 movdqu (%rsi), %xmm0
1347 mov 16(%rsi), %ecx
1348 mov 20(%rsi), %dl
1349 movdqu %xmm0, (%rdi)
1350 mov %ecx, 16(%rdi)
1351 mov %dl, 20(%rdi)
1352# ifdef USE_AS_STPCPY
1353 lea 21(%rdi), %rax
1354# endif
1355# ifdef USE_AS_STRCAT
1356 xor %ch, %ch
1357 movb %ch, 21(%rdi)
1358# endif
1359 ret
1360
1361 .p2align 4
1362L(StrncpyExit22):
1363 movdqu (%rsi), %xmm0
1364 mov 14(%rsi), %rcx
1365 movdqu %xmm0, (%rdi)
1366 mov %rcx, 14(%rdi)
1367# ifdef USE_AS_STPCPY
1368 lea 22(%rdi), %rax
1369# endif
1370# ifdef USE_AS_STRCAT
1371 xor %ch, %ch
1372 movb %ch, 22(%rdi)
1373# endif
1374 ret
1375
1376 .p2align 4
1377L(StrncpyExit23):
1378 movdqu (%rsi), %xmm0
1379 mov 15(%rsi), %rcx
1380 movdqu %xmm0, (%rdi)
1381 mov %rcx, 15(%rdi)
1382# ifdef USE_AS_STPCPY
1383 lea 23(%rdi), %rax
1384# endif
1385# ifdef USE_AS_STRCAT
1386 xor %ch, %ch
1387 movb %ch, 23(%rdi)
1388# endif
1389 ret
1390
1391 .p2align 4
1392L(StrncpyExit24):
1393 movdqu (%rsi), %xmm0
1394 mov 16(%rsi), %rcx
1395 movdqu %xmm0, (%rdi)
1396 mov %rcx, 16(%rdi)
1397# ifdef USE_AS_STPCPY
1398 lea 24(%rdi), %rax
1399# endif
1400# ifdef USE_AS_STRCAT
1401 xor %ch, %ch
1402 movb %ch, 24(%rdi)
1403# endif
1404 ret
1405
1406 .p2align 4
1407L(StrncpyExit25):
1408 movdqu (%rsi), %xmm0
1409 mov 16(%rsi), %rdx
1410 mov 24(%rsi), %cl
1411 movdqu %xmm0, (%rdi)
1412 mov %rdx, 16(%rdi)
1413 mov %cl, 24(%rdi)
1414# ifdef USE_AS_STPCPY
1415 lea 25(%rdi), %rax
1416# endif
1417# ifdef USE_AS_STRCAT
1418 xor %ch, %ch
1419 movb %ch, 25(%rdi)
1420# endif
1421 ret
1422
1423 .p2align 4
1424L(StrncpyExit26):
1425 movdqu (%rsi), %xmm0
1426 mov 16(%rsi), %rdx
1427 mov 24(%rsi), %cx
1428 movdqu %xmm0, (%rdi)
1429 mov %rdx, 16(%rdi)
1430 mov %cx, 24(%rdi)
1431# ifdef USE_AS_STPCPY
1432 lea 26(%rdi), %rax
1433# endif
1434# ifdef USE_AS_STRCAT
1435 xor %ch, %ch
1436 movb %ch, 26(%rdi)
1437# endif
1438 ret
1439
1440 .p2align 4
1441L(StrncpyExit27):
1442 movdqu (%rsi), %xmm0
1443 mov 16(%rsi), %rdx
1444 mov 23(%rsi), %ecx
1445 movdqu %xmm0, (%rdi)
1446 mov %rdx, 16(%rdi)
1447 mov %ecx, 23(%rdi)
1448# ifdef USE_AS_STPCPY
1449 lea 27(%rdi), %rax
1450# endif
1451# ifdef USE_AS_STRCAT
1452 xor %ch, %ch
1453 movb %ch, 27(%rdi)
1454# endif
1455 ret
1456
1457 .p2align 4
1458L(StrncpyExit28):
1459 movdqu (%rsi), %xmm0
1460 mov 16(%rsi), %rdx
1461 mov 24(%rsi), %ecx
1462 movdqu %xmm0, (%rdi)
1463 mov %rdx, 16(%rdi)
1464 mov %ecx, 24(%rdi)
1465# ifdef USE_AS_STPCPY
1466 lea 28(%rdi), %rax
1467# endif
1468# ifdef USE_AS_STRCAT
1469 xor %ch, %ch
1470 movb %ch, 28(%rdi)
1471# endif
1472 ret
1473
1474 .p2align 4
1475L(StrncpyExit29):
1476 movdqu (%rsi), %xmm0
1477 movdqu 13(%rsi), %xmm2
1478 movdqu %xmm0, (%rdi)
1479 movdqu %xmm2, 13(%rdi)
1480# ifdef USE_AS_STPCPY
1481 lea 29(%rdi), %rax
1482# endif
1483# ifdef USE_AS_STRCAT
1484 xor %ch, %ch
1485 movb %ch, 29(%rdi)
1486# endif
1487 ret
1488
1489 .p2align 4
1490L(StrncpyExit30):
1491 movdqu (%rsi), %xmm0
1492 movdqu 14(%rsi), %xmm2
1493 movdqu %xmm0, (%rdi)
1494 movdqu %xmm2, 14(%rdi)
1495# ifdef USE_AS_STPCPY
1496 lea 30(%rdi), %rax
1497# endif
1498# ifdef USE_AS_STRCAT
1499 xor %ch, %ch
1500 movb %ch, 30(%rdi)
1501# endif
1502 ret
1503
1504 .p2align 4
1505L(StrncpyExit31):
1506 movdqu (%rsi), %xmm0
1507 movdqu 15(%rsi), %xmm2
1508 movdqu %xmm0, (%rdi)
1509 movdqu %xmm2, 15(%rdi)
1510# ifdef USE_AS_STPCPY
1511 lea 31(%rdi), %rax
1512# endif
1513# ifdef USE_AS_STRCAT
1514 xor %ch, %ch
1515 movb %ch, 31(%rdi)
1516# endif
1517 ret
1518
1519 .p2align 4
1520L(StrncpyExit32):
1521 movdqu (%rsi), %xmm0
1522 movdqu 16(%rsi), %xmm2
1523 movdqu %xmm0, (%rdi)
1524 movdqu %xmm2, 16(%rdi)
1525# ifdef USE_AS_STPCPY
1526 lea 32(%rdi), %rax
1527# endif
1528# ifdef USE_AS_STRCAT
1529 xor %ch, %ch
1530 movb %ch, 32(%rdi)
1531# endif
1532 ret
1533
1534 .p2align 4
1535L(StrncpyExit33):
1536 movdqu (%rsi), %xmm0
1537 movdqu 16(%rsi), %xmm2
1538 mov 32(%rsi), %cl
1539 movdqu %xmm0, (%rdi)
1540 movdqu %xmm2, 16(%rdi)
1541 mov %cl, 32(%rdi)
1542# ifdef USE_AS_STRCAT
1543 xor %ch, %ch
1544 movb %ch, 33(%rdi)
1545# endif
1546 ret
1547
1548# ifndef USE_AS_STRCAT
1549
1550 .p2align 4
1551L(Fill0):
1552 ret
1553
1554 .p2align 4
1555L(Fill1):
1556 mov %dl, (%rdi)
1557 ret
1558
1559 .p2align 4
1560L(Fill2):
1561 mov %dx, (%rdi)
1562 ret
1563
1564 .p2align 4
1565L(Fill3):
1566 mov %edx, -1(%rdi)
1567 ret
1568
1569 .p2align 4
1570L(Fill4):
1571 mov %edx, (%rdi)
1572 ret
1573
1574 .p2align 4
1575L(Fill5):
1576 mov %edx, (%rdi)
1577 mov %dl, 4(%rdi)
1578 ret
1579
1580 .p2align 4
1581L(Fill6):
1582 mov %edx, (%rdi)
1583 mov %dx, 4(%rdi)
1584 ret
1585
1586 .p2align 4
1587L(Fill7):
1588 mov %rdx, -1(%rdi)
1589 ret
1590
1591 .p2align 4
1592L(Fill8):
1593 mov %rdx, (%rdi)
1594 ret
1595
1596 .p2align 4
1597L(Fill9):
1598 mov %rdx, (%rdi)
1599 mov %dl, 8(%rdi)
1600 ret
1601
1602 .p2align 4
1603L(Fill10):
1604 mov %rdx, (%rdi)
1605 mov %dx, 8(%rdi)
1606 ret
1607
1608 .p2align 4
1609L(Fill11):
1610 mov %rdx, (%rdi)
1611 mov %edx, 7(%rdi)
1612 ret
1613
1614 .p2align 4
1615L(Fill12):
1616 mov %rdx, (%rdi)
1617 mov %edx, 8(%rdi)
1618 ret
1619
1620 .p2align 4
1621L(Fill13):
1622 mov %rdx, (%rdi)
1623 mov %rdx, 5(%rdi)
1624 ret
1625
1626 .p2align 4
1627L(Fill14):
1628 mov %rdx, (%rdi)
1629 mov %rdx, 6(%rdi)
1630 ret
1631
1632 .p2align 4
1633L(Fill15):
1634 movdqu %xmm0, -1(%rdi)
1635 ret
1636
1637 .p2align 4
1638L(Fill16):
1639 movdqu %xmm0, (%rdi)
1640 ret
1641
1642 .p2align 4
1643L(CopyFrom1To16BytesUnalignedXmm2):
1644 movdqu %xmm2, (%rdi, %rcx)
1645
1646 .p2align 4
1647L(CopyFrom1To16BytesXmmExit):
1648 bsf %rdx, %rdx
1649 add $15, %r8
1650 add %rcx, %rdi
1651# ifdef USE_AS_STPCPY
1652 lea (%rdi, %rdx), %rax
1653# endif
1654 sub %rdx, %r8
1655 lea 1(%rdi, %rdx), %rdi
1656
1657 .p2align 4
1658L(StrncpyFillTailWithZero):
1659 pxor %xmm0, %xmm0
1660 xor %rdx, %rdx
1661 sub $16, %r8
1662 jbe L(StrncpyFillExit)
1663
1664 movdqu %xmm0, (%rdi)
1665 add $16, %rdi
1666
1667 mov %rdi, %rsi
1668 and $0xf, %rsi
1669 sub %rsi, %rdi
1670 add %rsi, %r8
1671 sub $64, %r8
1672 jb L(StrncpyFillLess64)
1673
1674L(StrncpyFillLoopMovdqa):
1675 movdqa %xmm0, (%rdi)
1676 movdqa %xmm0, 16(%rdi)
1677 movdqa %xmm0, 32(%rdi)
1678 movdqa %xmm0, 48(%rdi)
1679 add $64, %rdi
1680 sub $64, %r8
1681 jae L(StrncpyFillLoopMovdqa)
1682
1683L(StrncpyFillLess64):
1684 add $32, %r8
1685 jl L(StrncpyFillLess32)
1686 movdqa %xmm0, (%rdi)
1687 movdqa %xmm0, 16(%rdi)
1688 add $32, %rdi
1689 sub $16, %r8
1690 jl L(StrncpyFillExit)
1691 movdqa %xmm0, (%rdi)
1692 add $16, %rdi
1693 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1694
1695L(StrncpyFillLess32):
1696 add $16, %r8
1697 jl L(StrncpyFillExit)
1698 movdqa %xmm0, (%rdi)
1699 add $16, %rdi
1700 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1701
1702L(StrncpyFillExit):
1703 add $16, %r8
1704 BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
1705
1706/* end of ifndef USE_AS_STRCAT */
1707# endif
1708
1709 .p2align 4
1710L(UnalignedLeaveCase2OrCase3):
1711 test %rdx, %rdx
1712 jnz L(Unaligned64LeaveCase2)
1713L(Unaligned64LeaveCase3):
1714 lea 64(%r8), %rcx
1715 and $-16, %rcx
1716 add $48, %r8
1717 jl L(CopyFrom1To16BytesCase3)
1718 movdqu %xmm4, (%rdi)
1719 sub $16, %r8
1720 jb L(CopyFrom1To16BytesCase3)
1721 movdqu %xmm5, 16(%rdi)
1722 sub $16, %r8
1723 jb L(CopyFrom1To16BytesCase3)
1724 movdqu %xmm6, 32(%rdi)
1725 sub $16, %r8
1726 jb L(CopyFrom1To16BytesCase3)
1727 movdqu %xmm7, 48(%rdi)
1728# ifdef USE_AS_STPCPY
1729 lea 64(%rdi), %rax
1730# endif
1731# ifdef USE_AS_STRCAT
1732 xor %ch, %ch
1733 movb %ch, 64(%rdi)
1734# endif
1735 ret
1736
1737 .p2align 4
1738L(Unaligned64LeaveCase2):
1739 xor %rcx, %rcx
1740 pcmpeqb %xmm4, %xmm0
1741 pmovmskb %xmm0, %rdx
1742 add $48, %r8
1743 jle L(CopyFrom1To16BytesCase2OrCase3)
1744 test %rdx, %rdx
1745# ifndef USE_AS_STRCAT
1746 jnz L(CopyFrom1To16BytesUnalignedXmm4)
1747# else
1748 jnz L(CopyFrom1To16Bytes)
1749# endif
1750 pcmpeqb %xmm5, %xmm0
1751 pmovmskb %xmm0, %rdx
1752 movdqu %xmm4, (%rdi)
1753 add $16, %rcx
1754 sub $16, %r8
1755 jbe L(CopyFrom1To16BytesCase2OrCase3)
1756 test %rdx, %rdx
1757# ifndef USE_AS_STRCAT
1758 jnz L(CopyFrom1To16BytesUnalignedXmm5)
1759# else
1760 jnz L(CopyFrom1To16Bytes)
1761# endif
1762
1763 pcmpeqb %xmm6, %xmm0
1764 pmovmskb %xmm0, %rdx
1765 movdqu %xmm5, 16(%rdi)
1766 add $16, %rcx
1767 sub $16, %r8
1768 jbe L(CopyFrom1To16BytesCase2OrCase3)
1769 test %rdx, %rdx
1770# ifndef USE_AS_STRCAT
1771 jnz L(CopyFrom1To16BytesUnalignedXmm6)
1772# else
1773 jnz L(CopyFrom1To16Bytes)
1774# endif
1775
1776 pcmpeqb %xmm7, %xmm0
1777 pmovmskb %xmm0, %rdx
1778 movdqu %xmm6, 32(%rdi)
1779 lea 16(%rdi, %rcx), %rdi
1780 lea 16(%rsi, %rcx), %rsi
1781 bsf %rdx, %rdx
1782 cmp %r8, %rdx
1783 jb L(CopyFrom1To16BytesExit)
1784 BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
1785
1786 .p2align 4
1787L(ExitZero):
1788# ifndef USE_AS_STRCAT
1789 mov %rdi, %rax
1790# endif
1791 ret
1792
1793# endif
1794
1795# ifndef USE_AS_STRCAT
1796END (STRCPY)
1797# else
1798END (STRCAT)
1799# endif
1800 .p2align 4
1801 .section .rodata
1802L(ExitTable):
1803 .int JMPTBL(L(Exit1), L(ExitTable))
1804 .int JMPTBL(L(Exit2), L(ExitTable))
1805 .int JMPTBL(L(Exit3), L(ExitTable))
1806 .int JMPTBL(L(Exit4), L(ExitTable))
1807 .int JMPTBL(L(Exit5), L(ExitTable))
1808 .int JMPTBL(L(Exit6), L(ExitTable))
1809 .int JMPTBL(L(Exit7), L(ExitTable))
1810 .int JMPTBL(L(Exit8), L(ExitTable))
1811 .int JMPTBL(L(Exit9), L(ExitTable))
1812 .int JMPTBL(L(Exit10), L(ExitTable))
1813 .int JMPTBL(L(Exit11), L(ExitTable))
1814 .int JMPTBL(L(Exit12), L(ExitTable))
1815 .int JMPTBL(L(Exit13), L(ExitTable))
1816 .int JMPTBL(L(Exit14), L(ExitTable))
1817 .int JMPTBL(L(Exit15), L(ExitTable))
1818 .int JMPTBL(L(Exit16), L(ExitTable))
1819 .int JMPTBL(L(Exit17), L(ExitTable))
1820 .int JMPTBL(L(Exit18), L(ExitTable))
1821 .int JMPTBL(L(Exit19), L(ExitTable))
1822 .int JMPTBL(L(Exit20), L(ExitTable))
1823 .int JMPTBL(L(Exit21), L(ExitTable))
1824 .int JMPTBL(L(Exit22), L(ExitTable))
1825 .int JMPTBL(L(Exit23), L(ExitTable))
1826 .int JMPTBL(L(Exit24), L(ExitTable))
1827 .int JMPTBL(L(Exit25), L(ExitTable))
1828 .int JMPTBL(L(Exit26), L(ExitTable))
1829 .int JMPTBL(L(Exit27), L(ExitTable))
1830 .int JMPTBL(L(Exit28), L(ExitTable))
1831 .int JMPTBL(L(Exit29), L(ExitTable))
1832 .int JMPTBL(L(Exit30), L(ExitTable))
1833 .int JMPTBL(L(Exit31), L(ExitTable))
1834 .int JMPTBL(L(Exit32), L(ExitTable))
1835# ifdef USE_AS_STRNCPY
1836L(ExitStrncpyTable):
1837 .int JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable))
1838 .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
1839 .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
1840 .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
1841 .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
1842 .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
1843 .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
1844 .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
1845 .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
1846 .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
1847 .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
1848 .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
1849 .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
1850 .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
1851 .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
1852 .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
1853 .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
1854 .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
1855 .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
1856 .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
1857 .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
1858 .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
1859 .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
1860 .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
1861 .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
1862 .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
1863 .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
1864 .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
1865 .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
1866 .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
1867 .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
1868 .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
1869 .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
1870 .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
1871# ifndef USE_AS_STRCAT
1872 .p2align 4
1873L(FillTable):
1874 .int JMPTBL(L(Fill0), L(FillTable))
1875 .int JMPTBL(L(Fill1), L(FillTable))
1876 .int JMPTBL(L(Fill2), L(FillTable))
1877 .int JMPTBL(L(Fill3), L(FillTable))
1878 .int JMPTBL(L(Fill4), L(FillTable))
1879 .int JMPTBL(L(Fill5), L(FillTable))
1880 .int JMPTBL(L(Fill6), L(FillTable))
1881 .int JMPTBL(L(Fill7), L(FillTable))
1882 .int JMPTBL(L(Fill8), L(FillTable))
1883 .int JMPTBL(L(Fill9), L(FillTable))
1884 .int JMPTBL(L(Fill10), L(FillTable))
1885 .int JMPTBL(L(Fill11), L(FillTable))
1886 .int JMPTBL(L(Fill12), L(FillTable))
1887 .int JMPTBL(L(Fill13), L(FillTable))
1888 .int JMPTBL(L(Fill14), L(FillTable))
1889 .int JMPTBL(L(Fill15), L(FillTable))
1890 .int JMPTBL(L(Fill16), L(FillTable))
1891# endif
1892# endif
1893#endif
1894

source code of glibc/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S