1 | // This module defines pure Rust platform independent implementations of all |
2 | // the memchr routines. We do our best to make them fast. Some of them may even |
3 | // get auto-vectorized. |
4 | |
5 | use core::{cmp, usize}; |
6 | |
7 | #[cfg (target_pointer_width = "16" )] |
8 | const USIZE_BYTES: usize = 2; |
9 | |
10 | #[cfg (target_pointer_width = "32" )] |
11 | const USIZE_BYTES: usize = 4; |
12 | |
13 | #[cfg (target_pointer_width = "64" )] |
14 | const USIZE_BYTES: usize = 8; |
15 | |
16 | // The number of bytes to loop at in one iteration of memchr/memrchr. |
17 | const LOOP_SIZE: usize = 2 * USIZE_BYTES; |
18 | |
19 | /// Return `true` if `x` contains any zero byte. |
20 | /// |
21 | /// From *Matters Computational*, J. Arndt |
22 | /// |
23 | /// "The idea is to subtract one from each of the bytes and then look for |
24 | /// bytes where the borrow propagated all the way to the most significant |
25 | /// bit." |
26 | #[inline (always)] |
27 | fn contains_zero_byte(x: usize) -> bool { |
28 | const LO_U64: u64 = 0x0101010101010101; |
29 | const HI_U64: u64 = 0x8080808080808080; |
30 | |
31 | const LO_USIZE: usize = LO_U64 as usize; |
32 | const HI_USIZE: usize = HI_U64 as usize; |
33 | |
34 | x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0 |
35 | } |
36 | |
37 | /// Repeat the given byte into a word size number. That is, every 8 bits |
38 | /// is equivalent to the given byte. For example, if `b` is `\x4E` or |
39 | /// `01001110` in binary, then the returned value on a 32-bit system would be: |
40 | /// `01001110_01001110_01001110_01001110`. |
41 | #[inline (always)] |
42 | fn repeat_byte(b: u8) -> usize { |
43 | (b as usize) * (usize::MAX / 255) |
44 | } |
45 | |
46 | pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> { |
47 | let vn1 = repeat_byte(n1); |
48 | let confirm = |byte| byte == n1; |
49 | let loop_size = cmp::min(LOOP_SIZE, haystack.len()); |
50 | let align = USIZE_BYTES - 1; |
51 | let start_ptr = haystack.as_ptr(); |
52 | let mut ptr = start_ptr; |
53 | |
54 | unsafe { |
55 | let end_ptr = start_ptr.add(haystack.len()); |
56 | if haystack.len() < USIZE_BYTES { |
57 | return forward_search(start_ptr, end_ptr, ptr, confirm); |
58 | } |
59 | |
60 | let chunk = (ptr as *const usize).read_unaligned(); |
61 | if contains_zero_byte(chunk ^ vn1) { |
62 | return forward_search(start_ptr, end_ptr, ptr, confirm); |
63 | } |
64 | |
65 | ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align)); |
66 | debug_assert!(ptr > start_ptr); |
67 | debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr); |
68 | while loop_size == LOOP_SIZE && ptr <= end_ptr.sub(loop_size) { |
69 | debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); |
70 | |
71 | let a = *(ptr as *const usize); |
72 | let b = *(ptr.add(USIZE_BYTES) as *const usize); |
73 | let eqa = contains_zero_byte(a ^ vn1); |
74 | let eqb = contains_zero_byte(b ^ vn1); |
75 | if eqa || eqb { |
76 | break; |
77 | } |
78 | ptr = ptr.add(LOOP_SIZE); |
79 | } |
80 | forward_search(start_ptr, end_ptr, ptr, confirm) |
81 | } |
82 | } |
83 | |
84 | /// Like `memchr`, but searches for two bytes instead of one. |
85 | pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> { |
86 | let vn1 = repeat_byte(n1); |
87 | let vn2 = repeat_byte(n2); |
88 | let confirm = |byte| byte == n1 || byte == n2; |
89 | let align = USIZE_BYTES - 1; |
90 | let start_ptr = haystack.as_ptr(); |
91 | let mut ptr = start_ptr; |
92 | |
93 | unsafe { |
94 | let end_ptr = start_ptr.add(haystack.len()); |
95 | if haystack.len() < USIZE_BYTES { |
96 | return forward_search(start_ptr, end_ptr, ptr, confirm); |
97 | } |
98 | |
99 | let chunk = (ptr as *const usize).read_unaligned(); |
100 | let eq1 = contains_zero_byte(chunk ^ vn1); |
101 | let eq2 = contains_zero_byte(chunk ^ vn2); |
102 | if eq1 || eq2 { |
103 | return forward_search(start_ptr, end_ptr, ptr, confirm); |
104 | } |
105 | |
106 | ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align)); |
107 | debug_assert!(ptr > start_ptr); |
108 | debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr); |
109 | while ptr <= end_ptr.sub(USIZE_BYTES) { |
110 | debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); |
111 | |
112 | let chunk = *(ptr as *const usize); |
113 | let eq1 = contains_zero_byte(chunk ^ vn1); |
114 | let eq2 = contains_zero_byte(chunk ^ vn2); |
115 | if eq1 || eq2 { |
116 | break; |
117 | } |
118 | ptr = ptr.add(USIZE_BYTES); |
119 | } |
120 | forward_search(start_ptr, end_ptr, ptr, confirm) |
121 | } |
122 | } |
123 | |
124 | /// Like `memchr`, but searches for three bytes instead of one. |
125 | pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> { |
126 | let vn1 = repeat_byte(n1); |
127 | let vn2 = repeat_byte(n2); |
128 | let vn3 = repeat_byte(n3); |
129 | let confirm = |byte| byte == n1 || byte == n2 || byte == n3; |
130 | let align = USIZE_BYTES - 1; |
131 | let start_ptr = haystack.as_ptr(); |
132 | let mut ptr = start_ptr; |
133 | |
134 | unsafe { |
135 | let end_ptr = start_ptr.add(haystack.len()); |
136 | if haystack.len() < USIZE_BYTES { |
137 | return forward_search(start_ptr, end_ptr, ptr, confirm); |
138 | } |
139 | |
140 | let chunk = (ptr as *const usize).read_unaligned(); |
141 | let eq1 = contains_zero_byte(chunk ^ vn1); |
142 | let eq2 = contains_zero_byte(chunk ^ vn2); |
143 | let eq3 = contains_zero_byte(chunk ^ vn3); |
144 | if eq1 || eq2 || eq3 { |
145 | return forward_search(start_ptr, end_ptr, ptr, confirm); |
146 | } |
147 | |
148 | ptr = ptr.add(USIZE_BYTES - (start_ptr as usize & align)); |
149 | debug_assert!(ptr > start_ptr); |
150 | debug_assert!(end_ptr.sub(USIZE_BYTES) >= start_ptr); |
151 | while ptr <= end_ptr.sub(USIZE_BYTES) { |
152 | debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); |
153 | |
154 | let chunk = *(ptr as *const usize); |
155 | let eq1 = contains_zero_byte(chunk ^ vn1); |
156 | let eq2 = contains_zero_byte(chunk ^ vn2); |
157 | let eq3 = contains_zero_byte(chunk ^ vn3); |
158 | if eq1 || eq2 || eq3 { |
159 | break; |
160 | } |
161 | ptr = ptr.add(USIZE_BYTES); |
162 | } |
163 | forward_search(start_ptr, end_ptr, ptr, confirm) |
164 | } |
165 | } |
166 | |
167 | /// Return the last index matching the byte `x` in `text`. |
168 | pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> { |
169 | let vn1 = repeat_byte(n1); |
170 | let confirm = |byte| byte == n1; |
171 | let loop_size = cmp::min(LOOP_SIZE, haystack.len()); |
172 | let align = USIZE_BYTES - 1; |
173 | let start_ptr = haystack.as_ptr(); |
174 | |
175 | unsafe { |
176 | let end_ptr = start_ptr.add(haystack.len()); |
177 | let mut ptr = end_ptr; |
178 | if haystack.len() < USIZE_BYTES { |
179 | return reverse_search(start_ptr, end_ptr, ptr, confirm); |
180 | } |
181 | |
182 | let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned(); |
183 | if contains_zero_byte(chunk ^ vn1) { |
184 | return reverse_search(start_ptr, end_ptr, ptr, confirm); |
185 | } |
186 | |
187 | ptr = (end_ptr as usize & !align) as *const u8; |
188 | debug_assert!(start_ptr <= ptr && ptr <= end_ptr); |
189 | while loop_size == LOOP_SIZE && ptr >= start_ptr.add(loop_size) { |
190 | debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); |
191 | |
192 | let a = *(ptr.sub(2 * USIZE_BYTES) as *const usize); |
193 | let b = *(ptr.sub(1 * USIZE_BYTES) as *const usize); |
194 | let eqa = contains_zero_byte(a ^ vn1); |
195 | let eqb = contains_zero_byte(b ^ vn1); |
196 | if eqa || eqb { |
197 | break; |
198 | } |
199 | ptr = ptr.sub(loop_size); |
200 | } |
201 | reverse_search(start_ptr, end_ptr, ptr, confirm) |
202 | } |
203 | } |
204 | |
205 | /// Like `memrchr`, but searches for two bytes instead of one. |
206 | pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> { |
207 | let vn1 = repeat_byte(n1); |
208 | let vn2 = repeat_byte(n2); |
209 | let confirm = |byte| byte == n1 || byte == n2; |
210 | let align = USIZE_BYTES - 1; |
211 | let start_ptr = haystack.as_ptr(); |
212 | |
213 | unsafe { |
214 | let end_ptr = start_ptr.add(haystack.len()); |
215 | let mut ptr = end_ptr; |
216 | if haystack.len() < USIZE_BYTES { |
217 | return reverse_search(start_ptr, end_ptr, ptr, confirm); |
218 | } |
219 | |
220 | let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned(); |
221 | let eq1 = contains_zero_byte(chunk ^ vn1); |
222 | let eq2 = contains_zero_byte(chunk ^ vn2); |
223 | if eq1 || eq2 { |
224 | return reverse_search(start_ptr, end_ptr, ptr, confirm); |
225 | } |
226 | |
227 | ptr = (end_ptr as usize & !align) as *const u8; |
228 | debug_assert!(start_ptr <= ptr && ptr <= end_ptr); |
229 | while ptr >= start_ptr.add(USIZE_BYTES) { |
230 | debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); |
231 | |
232 | let chunk = *(ptr.sub(USIZE_BYTES) as *const usize); |
233 | let eq1 = contains_zero_byte(chunk ^ vn1); |
234 | let eq2 = contains_zero_byte(chunk ^ vn2); |
235 | if eq1 || eq2 { |
236 | break; |
237 | } |
238 | ptr = ptr.sub(USIZE_BYTES); |
239 | } |
240 | reverse_search(start_ptr, end_ptr, ptr, confirm) |
241 | } |
242 | } |
243 | |
244 | /// Like `memrchr`, but searches for three bytes instead of one. |
245 | pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> { |
246 | let vn1 = repeat_byte(n1); |
247 | let vn2 = repeat_byte(n2); |
248 | let vn3 = repeat_byte(n3); |
249 | let confirm = |byte| byte == n1 || byte == n2 || byte == n3; |
250 | let align = USIZE_BYTES - 1; |
251 | let start_ptr = haystack.as_ptr(); |
252 | |
253 | unsafe { |
254 | let end_ptr = start_ptr.add(haystack.len()); |
255 | let mut ptr = end_ptr; |
256 | if haystack.len() < USIZE_BYTES { |
257 | return reverse_search(start_ptr, end_ptr, ptr, confirm); |
258 | } |
259 | |
260 | let chunk = (ptr.sub(USIZE_BYTES) as *const usize).read_unaligned(); |
261 | let eq1 = contains_zero_byte(chunk ^ vn1); |
262 | let eq2 = contains_zero_byte(chunk ^ vn2); |
263 | let eq3 = contains_zero_byte(chunk ^ vn3); |
264 | if eq1 || eq2 || eq3 { |
265 | return reverse_search(start_ptr, end_ptr, ptr, confirm); |
266 | } |
267 | |
268 | ptr = (end_ptr as usize & !align) as *const u8; |
269 | debug_assert!(start_ptr <= ptr && ptr <= end_ptr); |
270 | while ptr >= start_ptr.add(USIZE_BYTES) { |
271 | debug_assert_eq!(0, (ptr as usize) % USIZE_BYTES); |
272 | |
273 | let chunk = *(ptr.sub(USIZE_BYTES) as *const usize); |
274 | let eq1 = contains_zero_byte(chunk ^ vn1); |
275 | let eq2 = contains_zero_byte(chunk ^ vn2); |
276 | let eq3 = contains_zero_byte(chunk ^ vn3); |
277 | if eq1 || eq2 || eq3 { |
278 | break; |
279 | } |
280 | ptr = ptr.sub(USIZE_BYTES); |
281 | } |
282 | reverse_search(start_ptr, end_ptr, ptr, confirm) |
283 | } |
284 | } |
285 | |
286 | #[inline (always)] |
287 | unsafe fn forward_search<F: Fn(u8) -> bool>( |
288 | start_ptr: *const u8, |
289 | end_ptr: *const u8, |
290 | mut ptr: *const u8, |
291 | confirm: F, |
292 | ) -> Option<usize> { |
293 | debug_assert!(start_ptr <= ptr); |
294 | debug_assert!(ptr <= end_ptr); |
295 | |
296 | while ptr < end_ptr { |
297 | if confirm(*ptr) { |
298 | return Some(sub(a:ptr, b:start_ptr)); |
299 | } |
300 | ptr = ptr.offset(count:1); |
301 | } |
302 | None |
303 | } |
304 | |
305 | #[inline (always)] |
306 | unsafe fn reverse_search<F: Fn(u8) -> bool>( |
307 | start_ptr: *const u8, |
308 | end_ptr: *const u8, |
309 | mut ptr: *const u8, |
310 | confirm: F, |
311 | ) -> Option<usize> { |
312 | debug_assert!(start_ptr <= ptr); |
313 | debug_assert!(ptr <= end_ptr); |
314 | |
315 | while ptr > start_ptr { |
316 | ptr = ptr.offset(count:-1); |
317 | if confirm(*ptr) { |
318 | return Some(sub(a:ptr, b:start_ptr)); |
319 | } |
320 | } |
321 | None |
322 | } |
323 | |
324 | /// Subtract `b` from `a` and return the difference. `a` should be greater than |
325 | /// or equal to `b`. |
326 | fn sub(a: *const u8, b: *const u8) -> usize { |
327 | debug_assert!(a >= b); |
328 | (a as usize) - (b as usize) |
329 | } |
330 | |