1/* strcat(dest, src) -- Append SRC on the end of DEST.
2 For Intel 80x86, x>=4.
3 Copyright (C) 1994-2024 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21#include "asm-syntax.h"
22
23#define PARMS 4+4 /* space for 1 saved reg */
24#define RTN PARMS
25#define DEST RTN
26#define SRC DEST+4
27
28 .text
29ENTRY (strcat)
30
31 pushl %edi /* Save callee-safe register. */
32 cfi_adjust_cfa_offset (4)
33
34 movl DEST(%esp), %edx
35 movl SRC(%esp), %ecx
36
37 testb $0xff, (%ecx) /* Is source string empty? */
38 jz L(8) /* yes => return */
39
40 /* Test the first bytes separately until destination is aligned. */
41 testl $3, %edx /* destination pointer aligned? */
42 jz L(1) /* yes => begin scan loop */
43 testb $0xff, (%edx) /* is end of string? */
44 jz L(2) /* yes => start appending */
45 incl %edx /* increment source pointer */
46
47 testl $3, %edx /* destination pointer aligned? */
48 jz L(1) /* yes => begin scan loop */
49 testb $0xff, (%edx) /* is end of string? */
50 jz L(2) /* yes => start appending */
51 incl %edx /* increment source pointer */
52
53 testl $3, %edx /* destination pointer aligned? */
54 jz L(1) /* yes => begin scan loop */
55 testb $0xff, (%edx) /* is end of string? */
56 jz L(2) /* yes => start appending */
57 incl %edx /* increment source pointer */
58
59 /* Now we are aligned. Begin scan loop. */
60 jmp L(1)
61
62 cfi_rel_offset (edi, 0)
63 ALIGN(4)
64
65L(4): addl $16,%edx /* increment destination pointer for round */
66
67L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
68 movl $0xfefefeff, %edi /* magic value */
69
70 /* If you compare this with the algorithm in memchr.S you will
71 notice that here is an `xorl' statement missing. But you must
72 not forget that we are looking for C == 0 and `xorl $0, %eax'
73 is a no-op. */
74
75 addl %eax, %edi /* add the magic value to the word. We get
76 carry bits reported for each byte which
77 is *not* 0 */
78
79 /* According to the algorithm we had to reverse the effect of the
80 XOR first and then test the overflow bits. But because the
81 following XOR would destroy the carry flag and it would (in a
82 representation with more than 32 bits) not alter then last
83 overflow, we can now test this condition. If no carry is signaled
84 no overflow must have occurred in the last byte => it was 0. */
85 jnc L(3)
86
87 /* We are only interested in carry bits that change due to the
88 previous add, so remove original bits */
89 xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
90
91 /* Now test for the other three overflow bits. */
92 orl $0xfefefeff, %edi /* set all non-carry bits */
93 incl %edi /* add 1: if one carry bit was *not* set
94 the addition will not result in 0. */
95
96 /* If at least one byte of the word is C we don't get 0 in %ecx. */
97 jnz L(3)
98
99 movl 4(%edx), %eax /* get word from source */
100 movl $0xfefefeff, %edi /* magic value */
101 addl %eax, %edi /* add the magic value to the word. We get
102 carry bits reported for each byte which
103 is *not* 0 */
104 jnc L(5) /* highest byte is C => stop copying */
105 xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
106 orl $0xfefefeff, %edi /* set all non-carry bits */
107 incl %edi /* add 1: if one carry bit was *not* set
108 the addition will not result in 0. */
109 jnz L(5) /* one byte is NUL => stop copying */
110
111 movl 8(%edx), %eax /* get word from source */
112 movl $0xfefefeff, %edi /* magic value */
113 addl %eax, %edi /* add the magic value to the word. We get
114 carry bits reported for each byte which
115 is *not* 0 */
116 jnc L(6) /* highest byte is C => stop copying */
117 xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
118 orl $0xfefefeff, %edi /* set all non-carry bits */
119 incl %edi /* add 1: if one carry bit was *not* set
120 the addition will not result in 0. */
121 jnz L(6) /* one byte is NUL => stop copying */
122
123 movl 12(%edx), %eax /* get word from source */
124 movl $0xfefefeff, %edi /* magic value */
125 addl %eax, %edi /* add the magic value to the word. We get
126 carry bits reported for each byte which
127 is *not* 0 */
128 jnc L(7) /* highest byte is C => stop copying */
129 xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
130 orl $0xfefefeff, %edi /* set all non-carry bits */
131 incl %edi /* add 1: if one carry bit was *not* set
132 the addition will not result in 0. */
133 jz L(4) /* no byte is NUL => carry on copying */
134
135L(7): addl $4, %edx /* adjust source pointer */
136L(6): addl $4, %edx
137L(5): addl $4, %edx
138
139L(3): testb %al, %al /* is first byte NUL? */
140 jz L(2) /* yes => start copying */
141 incl %edx /* increment source pointer */
142
143 testb %ah, %ah /* is second byte NUL? */
144 jz L(2) /* yes => start copying */
145 incl %edx /* increment source pointer */
146
147 testl $0xff0000, %eax /* is third byte NUL? */
148 jz L(2) /* yes => start copying */
149 incl %edx /* increment source pointer */
150
151L(2): subl %ecx, %edx /* reduce number of loop variants */
152
153 /* Now we have to align the source pointer. */
154 testl $3, %ecx /* pointer correctly aligned? */
155 jz L(29) /* yes => start copy loop */
156 movb (%ecx), %al /* get first byte */
157 movb %al, (%ecx,%edx) /* and store it */
158 andb %al, %al /* is byte NUL? */
159 jz L(8) /* yes => return */
160 incl %ecx /* increment pointer */
161
162 testl $3, %ecx /* pointer correctly aligned? */
163 jz L(29) /* yes => start copy loop */
164 movb (%ecx), %al /* get first byte */
165 movb %al, (%ecx,%edx) /* and store it */
166 andb %al, %al /* is byte NUL? */
167 jz L(8) /* yes => return */
168 incl %ecx /* increment pointer */
169
170 testl $3, %ecx /* pointer correctly aligned? */
171 jz L(29) /* yes => start copy loop */
172 movb (%ecx), %al /* get first byte */
173 movb %al, (%ecx,%edx) /* and store it */
174 andb %al, %al /* is byte NUL? */
175 jz L(8) /* yes => return */
176 incl %ecx /* increment pointer */
177
178 /* Now we are aligned. */
179 jmp L(29) /* start copy loop */
180
181 ALIGN(4)
182
183L(28): movl %eax, 12(%ecx,%edx)/* store word at destination */
184 addl $16, %ecx /* adjust pointer for full round */
185
186L(29): movl (%ecx), %eax /* get word from source */
187 movl $0xfefefeff, %edi /* magic value */
188 addl %eax, %edi /* add the magic value to the word. We get
189 carry bits reported for each byte which
190 is *not* 0 */
191 jnc L(9) /* highest byte is C => stop copying */
192 xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
193 orl $0xfefefeff, %edi /* set all non-carry bits */
194 incl %edi /* add 1: if one carry bit was *not* set
195 the addition will not result in 0. */
196 jnz L(9) /* one byte is NUL => stop copying */
197 movl %eax, (%ecx,%edx) /* store word to destination */
198
199 movl 4(%ecx), %eax /* get word from source */
200 movl $0xfefefeff, %edi /* magic value */
201 addl %eax, %edi /* add the magic value to the word. We get
202 carry bits reported for each byte which
203 is *not* 0 */
204 jnc L(91) /* highest byte is C => stop copying */
205 xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
206 orl $0xfefefeff, %edi /* set all non-carry bits */
207 incl %edi /* add 1: if one carry bit was *not* set
208 the addition will not result in 0. */
209 jnz L(91) /* one byte is NUL => stop copying */
210 movl %eax, 4(%ecx,%edx) /* store word to destination */
211
212 movl 8(%ecx), %eax /* get word from source */
213 movl $0xfefefeff, %edi /* magic value */
214 addl %eax, %edi /* add the magic value to the word. We get
215 carry bits reported for each byte which
216 is *not* 0 */
217 jnc L(92) /* highest byte is C => stop copying */
218 xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
219 orl $0xfefefeff, %edi /* set all non-carry bits */
220 incl %edi /* add 1: if one carry bit was *not* set
221 the addition will not result in 0. */
222 jnz L(92) /* one byte is NUL => stop copying */
223 movl %eax, 8(%ecx,%edx) /* store word to destination */
224
225 movl 12(%ecx), %eax /* get word from source */
226 movl $0xfefefeff, %edi /* magic value */
227 addl %eax, %edi /* add the magic value to the word. We get
228 carry bits reported for each byte which
229 is *not* 0 */
230 jnc L(93) /* highest byte is C => stop copying */
231 xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */
232 orl $0xfefefeff, %edi /* set all non-carry bits */
233 incl %edi /* add 1: if one carry bit was *not* set
234 the addition will not result in 0. */
235 jz L(28) /* no is NUL => carry on copying */
236
237L(93): addl $4, %ecx /* adjust pointer */
238L(92): addl $4, %ecx
239L(91): addl $4, %ecx
240
241L(9): movb %al, (%ecx,%edx) /* store first byte of last word */
242 orb %al, %al /* is it NUL? */
243 jz L(8) /* yes => return */
244
245 movb %ah, 1(%ecx,%edx) /* store second byte of last word */
246 orb %ah, %ah /* is it NUL? */
247 jz L(8) /* yes => return */
248
249 shrl $16, %eax /* make upper bytes accessible */
250 movb %al, 2(%ecx,%edx) /* store third byte of last word */
251 orb %al, %al /* is it NUL? */
252 jz L(8) /* yes => return */
253
254 movb %ah, 3(%ecx,%edx) /* store fourth byte of last word */
255
256L(8): movl DEST(%esp), %eax /* start address of destination is result */
257 popl %edi /* restore saved register */
258 cfi_adjust_cfa_offset (-4)
259 cfi_restore (edi)
260
261 ret
262END (strcat)
263libc_hidden_builtin_def (strcat)
264

source code of glibc/sysdeps/i386/strcat.S