1/* strlen used for beginning of str{n}cat using EVEX 256/512.
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19
20/* NOTE: This file is meant to be included by strcat-evex or
21 strncat-evex and does not standalone. Before including %rdi
22 must be saved in %rax. */
23
24
25/* Simple strlen implementation that ends at
26 L(strcat_strlen_done). */
27 vpxorq %VZERO_128, %VZERO_128, %VZERO_128
28 movq %rdi, %r8
29 andq $(VEC_SIZE * -1), %r8
30 VPCMPEQ (%r8), %VZERO, %k0
31 KMOV %k0, %VRCX
32#ifdef USE_AS_WCSCPY
33 subl %r8d, %edi
34 shrl $2, %edi
35#endif
36 shrx %VRDI, %VRCX, %VRCX
37#ifdef USE_AS_WCSCPY
38 movq %rax, %rdi
39#endif
40 test %VRCX, %VRCX
41 jnz L(bsf_and_done_v0)
42
43
44 VPCMPEQ VEC_SIZE(%r8), %VZERO, %k0
45 KMOV %k0, %VRCX
46 leaq (VEC_SIZE)(%r8), %rdi
47 test %VRCX, %VRCX
48 jnz L(bsf_and_done_v0)
49
50 VPCMPEQ (VEC_SIZE * 2)(%r8), %VZERO, %k0
51 KMOV %k0, %VRCX
52 test %VRCX, %VRCX
53 jnz L(bsf_and_done_v1)
54
55 VPCMPEQ (VEC_SIZE * 3)(%r8), %VZERO, %k0
56 KMOV %k0, %VRCX
57 test %VRCX, %VRCX
58 jnz L(bsf_and_done_v2)
59
60 VPCMPEQ (VEC_SIZE * 4)(%r8), %VZERO, %k0
61 KMOV %k0, %VRCX
62 test %VRCX, %VRCX
63 jnz L(bsf_and_done_v3)
64
65 andq $-(VEC_SIZE * 4), %rdi
66 .p2align 4,, 8
67L(loop_2x_vec):
68 VMOVA (VEC_SIZE * 4)(%rdi), %VMM(0)
69 VPMIN (VEC_SIZE * 5)(%rdi), %VMM(0), %VMM(1)
70 VMOVA (VEC_SIZE * 6)(%rdi), %VMM(2)
71 VPMIN (VEC_SIZE * 7)(%rdi), %VMM(2), %VMM(3)
72 VPTESTN %VMM(1), %VMM(1), %k1
73 VPTESTN %VMM(3), %VMM(3), %k3
74 subq $(VEC_SIZE * -4), %rdi
75 KORTEST %k1, %k3
76 jz L(loop_2x_vec)
77
78 VPTESTN %VMM(0), %VMM(0), %k0
79 KMOV %k0, %VRCX
80 test %VRCX, %VRCX
81 jnz L(bsf_and_done_v0)
82
83 KMOV %k1, %VRCX
84 test %VRCX, %VRCX
85 jnz L(bsf_and_done_v1)
86
87 VPTESTN %VMM(2), %VMM(2), %k0
88 KMOV %k0, %VRCX
89 test %VRCX, %VRCX
90 jnz L(bsf_and_done_v2)
91
92 KMOV %k3, %VRCX
93L(bsf_and_done_v3):
94 addq $VEC_SIZE, %rdi
95L(bsf_and_done_v2):
96 bsf %VRCX, %VRCX
97 leaq (VEC_SIZE * 2)(%rdi, %rcx, CHAR_SIZE), %rdi
98 jmp L(strcat_strlen_done)
99
100 .p2align 4,, 4
101L(bsf_and_done_v1):
102 addq $VEC_SIZE, %rdi
103L(bsf_and_done_v0):
104 bsf %VRCX, %VRCX
105#ifdef USE_AS_WCSCPY
106 leaq (%rdi, %rcx, CHAR_SIZE), %rdi
107#else
108 addq %rcx, %rdi
109#endif
110L(strcat_strlen_done):
111

source code of glibc/sysdeps/x86_64/multiarch/strcat-strlen-evex.h.S