1 | #[cfg (not(feature = "runtime-dispatch-simd" ))] |
2 | use core::{mem, ptr, usize}; |
3 | #[cfg (feature = "runtime-dispatch-simd" )] |
4 | use std::{mem, ptr, usize}; |
5 | |
6 | fn splat(byte: u8) -> usize { |
7 | let lo: usize = usize::MAX / 0xFF; |
8 | lo * byte as usize |
9 | } |
10 | |
11 | unsafe fn usize_load_unchecked(bytes: &[u8], offset: usize) -> usize { |
12 | let mut output: usize = 0; |
13 | ptr::copy_nonoverlapping( |
14 | src:bytes.as_ptr().add(offset), |
15 | &mut output as *mut usize as *mut u8, |
16 | count:mem::size_of::<usize>(), |
17 | ); |
18 | output |
19 | } |
20 | |
21 | fn bytewise_equal(lhs: usize, rhs: usize) -> usize { |
22 | let lo: usize = usize::MAX / 0xFF; |
23 | let hi: usize = lo << 7; |
24 | |
25 | let x: usize = lhs ^ rhs; |
26 | !((((x & !hi) + !hi) | x) >> 7) & lo |
27 | } |
28 | |
29 | fn sum_usize(values: usize) -> usize { |
30 | let every_other_byte_lo: usize = usize::MAX / 0xFFFF; |
31 | let every_other_byte: usize = every_other_byte_lo * 0xFF; |
32 | |
33 | // Pairwise reduction to avoid overflow on next step. |
34 | let pair_sum: usize = (values & every_other_byte) + ((values >> 8) & every_other_byte); |
35 | |
36 | // Multiplication results in top two bytes holding sum. |
37 | pair_sum.wrapping_mul(every_other_byte_lo) >> ((mem::size_of::<usize>() - 2) * 8) |
38 | } |
39 | |
40 | fn is_leading_utf8_byte(values: usize) -> usize { |
41 | // a leading UTF-8 byte is one which does not start with the bits 10. |
42 | ((!values >> 7) | (values >> 6)) & splat(byte:1) |
43 | } |
44 | |
45 | pub fn chunk_count(haystack: &[u8], needle: u8) -> usize { |
46 | let chunksize = mem::size_of::<usize>(); |
47 | assert!(haystack.len() >= chunksize); |
48 | |
49 | unsafe { |
50 | let mut offset = 0; |
51 | let mut count = 0; |
52 | |
53 | let needles = splat(needle); |
54 | |
55 | // 2040 |
56 | while haystack.len() >= offset + chunksize * 255 { |
57 | let mut counts = 0; |
58 | for _ in 0..255 { |
59 | counts += bytewise_equal(usize_load_unchecked(haystack, offset), needles); |
60 | offset += chunksize; |
61 | } |
62 | count += sum_usize(counts); |
63 | } |
64 | |
65 | // 8 |
66 | let mut counts = 0; |
67 | for i in 0..(haystack.len() - offset) / chunksize { |
68 | counts += bytewise_equal( |
69 | usize_load_unchecked(haystack, offset + i * chunksize), |
70 | needles, |
71 | ); |
72 | } |
73 | if haystack.len() % 8 != 0 { |
74 | let mask = usize::from_le(!(!0 >> ((haystack.len() % chunksize) * 8))); |
75 | counts += bytewise_equal( |
76 | usize_load_unchecked(haystack, haystack.len() - chunksize), |
77 | needles, |
78 | ) & mask; |
79 | } |
80 | count += sum_usize(counts); |
81 | |
82 | count |
83 | } |
84 | } |
85 | |
86 | pub fn chunk_num_chars(utf8_chars: &[u8]) -> usize { |
87 | let chunksize = mem::size_of::<usize>(); |
88 | assert!(utf8_chars.len() >= chunksize); |
89 | |
90 | unsafe { |
91 | let mut offset = 0; |
92 | let mut count = 0; |
93 | |
94 | // 2040 |
95 | while utf8_chars.len() >= offset + chunksize * 255 { |
96 | let mut counts = 0; |
97 | for _ in 0..255 { |
98 | counts += is_leading_utf8_byte(usize_load_unchecked(utf8_chars, offset)); |
99 | offset += chunksize; |
100 | } |
101 | count += sum_usize(counts); |
102 | } |
103 | |
104 | // 8 |
105 | let mut counts = 0; |
106 | for i in 0..(utf8_chars.len() - offset) / chunksize { |
107 | counts += |
108 | is_leading_utf8_byte(usize_load_unchecked(utf8_chars, offset + i * chunksize)); |
109 | } |
110 | if utf8_chars.len() % 8 != 0 { |
111 | let mask = usize::from_le(!(!0 >> ((utf8_chars.len() % chunksize) * 8))); |
112 | counts += is_leading_utf8_byte(usize_load_unchecked( |
113 | utf8_chars, |
114 | utf8_chars.len() - chunksize, |
115 | )) & mask; |
116 | } |
117 | count += sum_usize(counts); |
118 | |
119 | count |
120 | } |
121 | } |
122 | |