1 | /* rawmemchr (str, ch) -- Return pointer to first occurrence of CH in STR. |
2 | For Intel 80x86, x>=3. |
3 | Copyright (C) 1994-2024 Free Software Foundation, Inc. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <sysdep.h> |
21 | #include "asm-syntax.h" |
22 | |
23 | #define PARMS 4+4 /* space for 1 saved reg */ |
24 | #define RTN PARMS |
25 | #define STR RTN |
26 | #define CHR STR+4 |
27 | |
28 | .text |
29 | ENTRY (__rawmemchr) |
30 | |
31 | /* Save callee-safe register used in this function. */ |
32 | pushl %edi |
33 | cfi_adjust_cfa_offset (4) |
34 | cfi_rel_offset (edi, 0) |
35 | |
36 | /* Load parameters into registers. */ |
37 | movl STR(%esp), %eax |
38 | movl CHR(%esp), %edx |
39 | |
40 | /* At the moment %edx contains C. What we need for the |
41 | algorithm is C in all bytes of the dword. Avoid |
42 | operations on 16 bit words because these require an |
43 | prefix byte (and one more cycle). */ |
44 | movb %dl, %dh /* Now it is 0|0|c|c */ |
45 | movl %edx, %ecx |
46 | shll $16, %edx /* Now c|c|0|0 */ |
47 | movw %cx, %dx /* And finally c|c|c|c */ |
48 | |
49 | /* Better performance can be achieved if the word (32 |
50 | bit) memory access is aligned on a four-byte-boundary. |
51 | So process first bytes one by one until boundary is |
52 | reached. Don't use a loop for better performance. */ |
53 | |
54 | testb $3, %al /* correctly aligned ? */ |
55 | je L(1) /* yes => begin loop */ |
56 | cmpb %dl, (%eax) /* compare byte */ |
57 | je L(9) /* target found => return */ |
58 | incl %eax /* increment source pointer */ |
59 | |
60 | testb $3, %al /* correctly aligned ? */ |
61 | je L(1) /* yes => begin loop */ |
62 | cmpb %dl, (%eax) /* compare byte */ |
63 | je L(9) /* target found => return */ |
64 | incl %eax /* increment source pointer */ |
65 | |
66 | testb $3, %al /* correctly aligned ? */ |
67 | je L(1) /* yes => begin loop */ |
68 | cmpb %dl, (%eax) /* compare byte */ |
69 | je L(9) /* target found => return */ |
70 | incl %eax /* increment source pointer */ |
71 | |
72 | /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to |
73 | change any of the hole bits of LONGWORD. |
74 | |
75 | 1) Is this safe? Will it catch all the zero bytes? |
76 | Suppose there is a byte with all zeros. Any carry bits |
77 | propagating from its left will fall into the hole at its |
78 | least significant bit and stop. Since there will be no |
79 | carry from its most significant bit, the LSB of the |
80 | byte to the left will be unchanged, and the zero will be |
81 | detected. |
82 | |
83 | 2) Is this worthwhile? Will it ignore everything except |
84 | zero bytes? Suppose every byte of LONGWORD has a bit set |
85 | somewhere. There will be a carry into bit 8. If bit 8 |
86 | is set, this will carry into bit 16. If bit 8 is clear, |
87 | one of bits 9-15 must be set, so there will be a carry |
88 | into bit 16. Similarly, there will be a carry into bit |
89 | 24. If one of bits 24-31 is set, there will be a carry |
90 | into bit 32 (=carry flag), so all of the hole bits will |
91 | be changed. |
92 | |
93 | 3) But wait! Aren't we looking for C, not zero? |
94 | Good point. So what we do is XOR LONGWORD with a longword, |
95 | each of whose bytes is C. This turns each byte that is C |
96 | into a zero. */ |
97 | |
98 | |
99 | /* Each round the main loop processes 16 bytes. */ |
100 | ALIGN (4) |
101 | |
102 | L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ |
103 | movl $0xfefefeff, %edi /* magic value */ |
104 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c |
105 | are now 0 */ |
106 | addl %ecx, %edi /* add the magic value to the word. We get |
107 | carry bits reported for each byte which |
108 | is *not* 0 */ |
109 | |
110 | /* According to the algorithm we had to reverse the effect of the |
111 | XOR first and then test the overflow bits. But because the |
112 | following XOR would destroy the carry flag and it would (in a |
113 | representation with more than 32 bits) not alter then last |
114 | overflow, we can now test this condition. If no carry is signaled |
115 | no overflow must have occurred in the last byte => it was 0. */ |
116 | jnc L(8) |
117 | |
118 | /* We are only interested in carry bits that change due to the |
119 | previous add, so remove original bits */ |
120 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
121 | |
122 | /* Now test for the other three overflow bits. */ |
123 | orl $0xfefefeff, %edi /* set all non-carry bits */ |
124 | incl %edi /* add 1: if one carry bit was *not* set |
125 | the addition will not result in 0. */ |
126 | |
127 | /* If at least one byte of the word is C we don't get 0 in %edi. */ |
128 | jnz L(8) /* found it => return pointer */ |
129 | |
130 | /* This process is unfolded four times for better performance. |
131 | we don't increment the source pointer each time. Instead we |
132 | use offsets and increment by 16 in each run of the loop. But |
133 | before probing for the matching byte we need some extra code |
134 | (following LL(13) below). Even the len can be compared with |
135 | constants instead of decrementing each time. */ |
136 | |
137 | movl 4(%eax), %ecx /* get word (= 4 bytes) in question */ |
138 | movl $0xfefefeff, %edi /* magic value */ |
139 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c |
140 | are now 0 */ |
141 | addl %ecx, %edi /* add the magic value to the word. We get |
142 | carry bits reported for each byte which |
143 | is *not* 0 */ |
144 | jnc L(7) /* highest byte is C => return pointer */ |
145 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
146 | orl $0xfefefeff, %edi /* set all non-carry bits */ |
147 | incl %edi /* add 1: if one carry bit was *not* set |
148 | the addition will not result in 0. */ |
149 | jnz L(7) /* found it => return pointer */ |
150 | |
151 | movl 8(%eax), %ecx /* get word (= 4 bytes) in question */ |
152 | movl $0xfefefeff, %edi /* magic value */ |
153 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c |
154 | are now 0 */ |
155 | addl %ecx, %edi /* add the magic value to the word. We get |
156 | carry bits reported for each byte which |
157 | is *not* 0 */ |
158 | jnc L(6) /* highest byte is C => return pointer */ |
159 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
160 | orl $0xfefefeff, %edi /* set all non-carry bits */ |
161 | incl %edi /* add 1: if one carry bit was *not* set |
162 | the addition will not result in 0. */ |
163 | jnz L(6) /* found it => return pointer */ |
164 | |
165 | movl 12(%eax), %ecx /* get word (= 4 bytes) in question */ |
166 | movl $0xfefefeff, %edi /* magic value */ |
167 | xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c |
168 | are now 0 */ |
169 | addl %ecx, %edi /* add the magic value to the word. We get |
170 | carry bits reported for each byte which |
171 | is *not* 0 */ |
172 | jnc L(5) /* highest byte is C => return pointer */ |
173 | xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ |
174 | orl $0xfefefeff, %edi /* set all non-carry bits */ |
175 | incl %edi /* add 1: if one carry bit was *not* set |
176 | the addition will not result in 0. */ |
177 | jnz L(5) /* found it => return pointer */ |
178 | |
179 | /* Adjust both counters for a full round, i.e. 16 bytes. */ |
180 | addl $16, %eax |
181 | jmp L(1) |
182 | /* add missing source pointer increments */ |
183 | L(5): addl $4, %eax |
184 | L(6): addl $4, %eax |
185 | L(7): addl $4, %eax |
186 | |
187 | /* Test for the matching byte in the word. %ecx contains a NUL |
188 | char in the byte which originally was the byte we are looking |
189 | at. */ |
190 | L(8): testb %cl, %cl /* test first byte in dword */ |
191 | jz L(9) /* if zero => return pointer */ |
192 | incl %eax /* increment source pointer */ |
193 | |
194 | testb %ch, %ch /* test second byte in dword */ |
195 | jz L(9) /* if zero => return pointer */ |
196 | incl %eax /* increment source pointer */ |
197 | |
198 | testl $0xff0000, %ecx /* test third byte in dword */ |
199 | jz L(9) /* if zero => return pointer */ |
200 | incl %eax /* increment source pointer */ |
201 | |
202 | /* No further test needed we we know it is one of the four bytes. */ |
203 | |
204 | L(9): |
205 | popl %edi /* pop saved register */ |
206 | cfi_adjust_cfa_offset (-4) |
207 | cfi_restore (edi) |
208 | |
209 | ret |
210 | END (__rawmemchr) |
211 | |
212 | libc_hidden_def (__rawmemchr) |
213 | weak_alias (__rawmemchr, rawmemchr) |
214 | |