1 | /* strcat(dest, src) -- Append SRC on the end of DEST. |
2 | For Intel 80x86, x>=4. |
3 | Copyright (C) 1994-2024 Free Software Foundation, Inc. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <sysdep.h> |
21 | #include "asm-syntax.h" |
22 | |
23 | #define PARMS 4+4 /* space for 1 saved reg */ |
24 | #define RTN PARMS |
25 | #define DEST RTN |
26 | #define SRC DEST+4 |
27 | |
28 | .text |
29 | ENTRY (strcat) |
30 | |
31 | pushl %edi /* Save callee-safe register. */ |
32 | cfi_adjust_cfa_offset (4) |
33 | |
34 | movl DEST(%esp), %edx |
35 | movl SRC(%esp), %ecx |
36 | |
37 | testb $0xff, (%ecx) /* Is source string empty? */ |
38 | jz L(8) /* yes => return */ |
39 | |
40 | /* Test the first bytes separately until destination is aligned. */ |
41 | testl $3, %edx /* destination pointer aligned? */ |
42 | jz L(1) /* yes => begin scan loop */ |
43 | testb $0xff, (%edx) /* is end of string? */ |
44 | jz L(2) /* yes => start appending */ |
45 | incl %edx /* increment source pointer */ |
46 | |
47 | testl $3, %edx /* destination pointer aligned? */ |
48 | jz L(1) /* yes => begin scan loop */ |
49 | testb $0xff, (%edx) /* is end of string? */ |
50 | jz L(2) /* yes => start appending */ |
51 | incl %edx /* increment source pointer */ |
52 | |
53 | testl $3, %edx /* destination pointer aligned? */ |
54 | jz L(1) /* yes => begin scan loop */ |
55 | testb $0xff, (%edx) /* is end of string? */ |
56 | jz L(2) /* yes => start appending */ |
57 | incl %edx /* increment source pointer */ |
58 | |
59 | /* Now we are aligned. Begin scan loop. */ |
60 | jmp L(1) |
61 | |
62 | cfi_rel_offset (edi, 0) |
63 | ALIGN(4) |
64 | |
65 | L(4): addl $16,%edx /* increment destination pointer for round */ |
66 | |
67 | L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */ |
68 | movl $0xfefefeff, %edi /* magic value */ |
69 | |
70 | /* If you compare this with the algorithm in memchr.S you will |
71 | notice that here is an `xorl' statement missing. But you must |
72 | not forget that we are looking for C == 0 and `xorl $0, %eax' |
73 | is a no-op. */ |
74 | |
75 | addl %eax, %edi /* add the magic value to the word. We get |
76 | carry bits reported for each byte which |
77 | is *not* 0 */ |
78 | |
79 | /* According to the algorithm we had to reverse the effect of the |
80 | XOR first and then test the overflow bits. But because the |
81 | following XOR would destroy the carry flag and it would (in a |
82 | representation with more than 32 bits) not alter then last |
83 | overflow, we can now test this condition. If no carry is signaled |
84 | no overflow must have occurred in the last byte => it was 0. */ |
85 | jnc L(3) |
86 | |
87 | /* We are only interested in carry bits that change due to the |
88 | previous add, so remove original bits */ |
89 | xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
90 | |
91 | /* Now test for the other three overflow bits. */ |
92 | orl $0xfefefeff, %edi /* set all non-carry bits */ |
93 | incl %edi /* add 1: if one carry bit was *not* set |
94 | the addition will not result in 0. */ |
95 | |
96 | /* If at least one byte of the word is C we don't get 0 in %ecx. */ |
97 | jnz L(3) |
98 | |
99 | movl 4(%edx), %eax /* get word from source */ |
100 | movl $0xfefefeff, %edi /* magic value */ |
101 | addl %eax, %edi /* add the magic value to the word. We get |
102 | carry bits reported for each byte which |
103 | is *not* 0 */ |
104 | jnc L(5) /* highest byte is C => stop copying */ |
105 | xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
106 | orl $0xfefefeff, %edi /* set all non-carry bits */ |
107 | incl %edi /* add 1: if one carry bit was *not* set |
108 | the addition will not result in 0. */ |
109 | jnz L(5) /* one byte is NUL => stop copying */ |
110 | |
111 | movl 8(%edx), %eax /* get word from source */ |
112 | movl $0xfefefeff, %edi /* magic value */ |
113 | addl %eax, %edi /* add the magic value to the word. We get |
114 | carry bits reported for each byte which |
115 | is *not* 0 */ |
116 | jnc L(6) /* highest byte is C => stop copying */ |
117 | xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
118 | orl $0xfefefeff, %edi /* set all non-carry bits */ |
119 | incl %edi /* add 1: if one carry bit was *not* set |
120 | the addition will not result in 0. */ |
121 | jnz L(6) /* one byte is NUL => stop copying */ |
122 | |
123 | movl 12(%edx), %eax /* get word from source */ |
124 | movl $0xfefefeff, %edi /* magic value */ |
125 | addl %eax, %edi /* add the magic value to the word. We get |
126 | carry bits reported for each byte which |
127 | is *not* 0 */ |
128 | jnc L(7) /* highest byte is C => stop copying */ |
129 | xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
130 | orl $0xfefefeff, %edi /* set all non-carry bits */ |
131 | incl %edi /* add 1: if one carry bit was *not* set |
132 | the addition will not result in 0. */ |
133 | jz L(4) /* no byte is NUL => carry on copying */ |
134 | |
135 | L(7): addl $4, %edx /* adjust source pointer */ |
136 | L(6): addl $4, %edx |
137 | L(5): addl $4, %edx |
138 | |
139 | L(3): testb %al, %al /* is first byte NUL? */ |
140 | jz L(2) /* yes => start copying */ |
141 | incl %edx /* increment source pointer */ |
142 | |
143 | testb %ah, %ah /* is second byte NUL? */ |
144 | jz L(2) /* yes => start copying */ |
145 | incl %edx /* increment source pointer */ |
146 | |
147 | testl $0xff0000, %eax /* is third byte NUL? */ |
148 | jz L(2) /* yes => start copying */ |
149 | incl %edx /* increment source pointer */ |
150 | |
151 | L(2): subl %ecx, %edx /* reduce number of loop variants */ |
152 | |
153 | /* Now we have to align the source pointer. */ |
154 | testl $3, %ecx /* pointer correctly aligned? */ |
155 | jz L(29) /* yes => start copy loop */ |
156 | movb (%ecx), %al /* get first byte */ |
157 | movb %al, (%ecx,%edx) /* and store it */ |
158 | andb %al, %al /* is byte NUL? */ |
159 | jz L(8) /* yes => return */ |
160 | incl %ecx /* increment pointer */ |
161 | |
162 | testl $3, %ecx /* pointer correctly aligned? */ |
163 | jz L(29) /* yes => start copy loop */ |
164 | movb (%ecx), %al /* get first byte */ |
165 | movb %al, (%ecx,%edx) /* and store it */ |
166 | andb %al, %al /* is byte NUL? */ |
167 | jz L(8) /* yes => return */ |
168 | incl %ecx /* increment pointer */ |
169 | |
170 | testl $3, %ecx /* pointer correctly aligned? */ |
171 | jz L(29) /* yes => start copy loop */ |
172 | movb (%ecx), %al /* get first byte */ |
173 | movb %al, (%ecx,%edx) /* and store it */ |
174 | andb %al, %al /* is byte NUL? */ |
175 | jz L(8) /* yes => return */ |
176 | incl %ecx /* increment pointer */ |
177 | |
178 | /* Now we are aligned. */ |
179 | jmp L(29) /* start copy loop */ |
180 | |
181 | ALIGN(4) |
182 | |
183 | L(28): movl %eax, 12(%ecx,%edx)/* store word at destination */ |
184 | addl $16, %ecx /* adjust pointer for full round */ |
185 | |
186 | L(29): movl (%ecx), %eax /* get word from source */ |
187 | movl $0xfefefeff, %edi /* magic value */ |
188 | addl %eax, %edi /* add the magic value to the word. We get |
189 | carry bits reported for each byte which |
190 | is *not* 0 */ |
191 | jnc L(9) /* highest byte is C => stop copying */ |
192 | xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
193 | orl $0xfefefeff, %edi /* set all non-carry bits */ |
194 | incl %edi /* add 1: if one carry bit was *not* set |
195 | the addition will not result in 0. */ |
196 | jnz L(9) /* one byte is NUL => stop copying */ |
197 | movl %eax, (%ecx,%edx) /* store word to destination */ |
198 | |
199 | movl 4(%ecx), %eax /* get word from source */ |
200 | movl $0xfefefeff, %edi /* magic value */ |
201 | addl %eax, %edi /* add the magic value to the word. We get |
202 | carry bits reported for each byte which |
203 | is *not* 0 */ |
204 | jnc L(91) /* highest byte is C => stop copying */ |
205 | xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
206 | orl $0xfefefeff, %edi /* set all non-carry bits */ |
207 | incl %edi /* add 1: if one carry bit was *not* set |
208 | the addition will not result in 0. */ |
209 | jnz L(91) /* one byte is NUL => stop copying */ |
210 | movl %eax, 4(%ecx,%edx) /* store word to destination */ |
211 | |
212 | movl 8(%ecx), %eax /* get word from source */ |
213 | movl $0xfefefeff, %edi /* magic value */ |
214 | addl %eax, %edi /* add the magic value to the word. We get |
215 | carry bits reported for each byte which |
216 | is *not* 0 */ |
217 | jnc L(92) /* highest byte is C => stop copying */ |
218 | xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
219 | orl $0xfefefeff, %edi /* set all non-carry bits */ |
220 | incl %edi /* add 1: if one carry bit was *not* set |
221 | the addition will not result in 0. */ |
222 | jnz L(92) /* one byte is NUL => stop copying */ |
223 | movl %eax, 8(%ecx,%edx) /* store word to destination */ |
224 | |
225 | movl 12(%ecx), %eax /* get word from source */ |
226 | movl $0xfefefeff, %edi /* magic value */ |
227 | addl %eax, %edi /* add the magic value to the word. We get |
228 | carry bits reported for each byte which |
229 | is *not* 0 */ |
230 | jnc L(93) /* highest byte is C => stop copying */ |
231 | xorl %eax, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
232 | orl $0xfefefeff, %edi /* set all non-carry bits */ |
233 | incl %edi /* add 1: if one carry bit was *not* set |
234 | the addition will not result in 0. */ |
235 | jz L(28) /* no is NUL => carry on copying */ |
236 | |
237 | L(93): addl $4, %ecx /* adjust pointer */ |
238 | L(92): addl $4, %ecx |
239 | L(91): addl $4, %ecx |
240 | |
241 | L(9): movb %al, (%ecx,%edx) /* store first byte of last word */ |
242 | orb %al, %al /* is it NUL? */ |
243 | jz L(8) /* yes => return */ |
244 | |
245 | movb %ah, 1(%ecx,%edx) /* store second byte of last word */ |
246 | orb %ah, %ah /* is it NUL? */ |
247 | jz L(8) /* yes => return */ |
248 | |
249 | shrl $16, %eax /* make upper bytes accessible */ |
250 | movb %al, 2(%ecx,%edx) /* store third byte of last word */ |
251 | orb %al, %al /* is it NUL? */ |
252 | jz L(8) /* yes => return */ |
253 | |
254 | movb %ah, 3(%ecx,%edx) /* store fourth byte of last word */ |
255 | |
256 | L(8): movl DEST(%esp), %eax /* start address of destination is result */ |
257 | popl %edi /* restore saved register */ |
258 | cfi_adjust_cfa_offset (-4) |
259 | cfi_restore (edi) |
260 | |
261 | ret |
262 | END (strcat) |
263 | libc_hidden_builtin_def (strcat) |
264 | |