1use super::Adler32Imp;
2
3/// Resolves update implementation if CPU supports simd128 instructions.
4pub fn get_imp() -> Option<Adler32Imp> {
5 get_imp_inner()
6}
7
8#[inline]
9#[cfg(target_feature = "simd128")]
10fn get_imp_inner() -> Option<Adler32Imp> {
11 Some(imp::update)
12}
13
14#[inline]
15#[cfg(not(target_feature = "simd128"))]
16fn get_imp_inner() -> Option<Adler32Imp> {
17 None
18}
19
20#[cfg(target_feature = "simd128")]
21mod imp {
22 const MOD: u32 = 65521;
23 const NMAX: usize = 5552;
24 const BLOCK_SIZE: usize = 32;
25 const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE;
26
27 #[cfg(target_arch = "wasm32")]
28 use core::arch::wasm32::*;
29 #[cfg(target_arch = "wasm64")]
30 use core::arch::wasm64::*;
31
32 pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
33 update_imp(a, b, data)
34 }
35
36 #[inline]
37 #[target_feature(enable = "simd128")]
38 fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) {
39 let mut a = a as u32;
40 let mut b = b as u32;
41
42 let chunks = data.chunks_exact(CHUNK_SIZE);
43 let remainder = chunks.remainder();
44 for chunk in chunks {
45 update_chunk_block(&mut a, &mut b, chunk);
46 }
47
48 update_block(&mut a, &mut b, remainder);
49
50 (a as u16, b as u16)
51 }
52
53 fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
54 debug_assert_eq!(
55 chunk.len(),
56 CHUNK_SIZE,
57 "Unexpected chunk size (expected {}, got {})",
58 CHUNK_SIZE,
59 chunk.len()
60 );
61
62 reduce_add_blocks(a, b, chunk);
63
64 *a %= MOD;
65 *b %= MOD;
66 }
67
68 fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) {
69 debug_assert!(
70 chunk.len() <= CHUNK_SIZE,
71 "Unexpected chunk size (expected <= {}, got {})",
72 CHUNK_SIZE,
73 chunk.len()
74 );
75
76 for byte in reduce_add_blocks(a, b, chunk) {
77 *a += *byte as u32;
78 *b += *a;
79 }
80
81 *a %= MOD;
82 *b %= MOD;
83 }
84
85 #[inline(always)]
86 fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] {
87 if chunk.len() < BLOCK_SIZE {
88 return chunk;
89 }
90
91 let blocks = chunk.chunks_exact(BLOCK_SIZE);
92 let blocks_remainder = blocks.remainder();
93
94 let weight_hi_v = get_weight_hi();
95 let weight_lo_v = get_weight_lo();
96
97 let mut p_v = u32x4(*a * blocks.len() as u32, 0, 0, 0);
98 let mut a_v = u32x4(0, 0, 0, 0);
99 let mut b_v = u32x4(*b, 0, 0, 0);
100
101 for block in blocks {
102 let block_ptr = block.as_ptr() as *const v128;
103 let v_lo = unsafe { block_ptr.read_unaligned() };
104 let v_hi = unsafe { block_ptr.add(1).read_unaligned() };
105
106 p_v = u32x4_add(p_v, a_v);
107
108 a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_lo));
109 let mad = i32x4_dot_i8x16(v_lo, weight_lo_v);
110 b_v = u32x4_add(b_v, mad);
111
112 a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_hi));
113 let mad = i32x4_dot_i8x16(v_hi, weight_hi_v);
114 b_v = u32x4_add(b_v, mad);
115 }
116
117 b_v = u32x4_add(b_v, u32x4_shl(p_v, 5));
118
119 *a += reduce_add(a_v);
120 *b = reduce_add(b_v);
121
122 blocks_remainder
123 }
124
125 #[inline(always)]
126 fn i32x4_dot_i8x16(a: v128, b: v128) -> v128 {
127 let a_lo = u16x8_extend_low_u8x16(a);
128 let a_hi = u16x8_extend_high_u8x16(a);
129
130 let b_lo = u16x8_extend_low_u8x16(b);
131 let b_hi = u16x8_extend_high_u8x16(b);
132
133 let lo = i32x4_dot_i16x8(a_lo, b_lo);
134 let hi = i32x4_dot_i16x8(a_hi, b_hi);
135
136 i32x4_add(lo, hi)
137 }
138
139 #[inline(always)]
140 fn u32x4_extadd_quarters_u8x16(a: v128) -> v128 {
141 u32x4_extadd_pairwise_u16x8(u16x8_extadd_pairwise_u8x16(a))
142 }
143
144 #[inline(always)]
145 fn reduce_add(v: v128) -> u32 {
146 let arr: [u32; 4] = unsafe { std::mem::transmute(v) };
147 let mut sum = 0u32;
148 for val in arr {
149 sum = sum.wrapping_add(val);
150 }
151 sum
152 }
153
154 #[inline(always)]
155 fn get_weight_lo() -> v128 {
156 u8x16(
157 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
158 )
159 }
160
161 #[inline(always)]
162 fn get_weight_hi() -> v128 {
163 u8x16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
164 }
165}
166
167#[cfg(test)]
168mod tests {
169 use rand::Rng;
170
171 #[test]
172 fn zeroes() {
173 assert_sum_eq(&[]);
174 assert_sum_eq(&[0]);
175 assert_sum_eq(&[0, 0]);
176 assert_sum_eq(&[0; 100]);
177 assert_sum_eq(&[0; 1024]);
178 assert_sum_eq(&[0; 512 * 1024]);
179 }
180
181 #[test]
182 fn ones() {
183 assert_sum_eq(&[]);
184 assert_sum_eq(&[1]);
185 assert_sum_eq(&[1, 1]);
186 assert_sum_eq(&[1; 100]);
187 assert_sum_eq(&[1; 1024]);
188 assert_sum_eq(&[1; 512 * 1024]);
189 }
190
191 #[test]
192 fn random() {
193 let mut random = [0; 512 * 1024];
194 rand::thread_rng().fill(&mut random[..]);
195
196 assert_sum_eq(&random[..1]);
197 assert_sum_eq(&random[..100]);
198 assert_sum_eq(&random[..1024]);
199 assert_sum_eq(&random[..512 * 1024]);
200 }
201
202 /// Example calculation from https://en.wikipedia.org/wiki/Adler-32.
203 #[test]
204 fn wiki() {
205 assert_sum_eq(b"Wikipedia");
206 }
207
208 fn assert_sum_eq(data: &[u8]) {
209 if let Some(update) = super::get_imp() {
210 let (a, b) = update(1, 0, data);
211 let left = u32::from(b) << 16 | u32::from(a);
212 let right = adler::adler32_slice(data);
213
214 assert_eq!(left, right, "len({})", data.len());
215 }
216 }
217}
218