1 | use crate::convert::*; |
2 | use zerocopy::transmute; |
3 | |
4 | ///This constant comes from Kunth's prng (Empirically it works better than those from splitmix32). |
5 | pub(crate) const MULTIPLE: u64 = 6364136223846793005; |
6 | |
7 | /// This is a constant with a lot of special properties found by automated search. |
8 | /// See the unit tests below. (Below are alternative values) |
9 | #[cfg (all(target_feature = "ssse3" , not(miri)))] |
10 | const SHUFFLE_MASK: u128 = 0x020a0700_0c01030e_050f0d08_06090b04_u128; |
11 | //const SHUFFLE_MASK: u128 = 0x000d0702_0a040301_05080f0c_0e0b0609_u128; |
12 | //const SHUFFLE_MASK: u128 = 0x040A0700_030E0106_0D050F08_020B0C09_u128; |
13 | |
14 | #[inline (always)] |
15 | #[cfg (feature = "folded_multiply" )] |
16 | pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 { |
17 | let result: u128 = (s as u128).wrapping_mul(by as u128); |
18 | ((result & 0xffff_ffff_ffff_ffff) as u64) ^ ((result >> 64) as u64) |
19 | } |
20 | |
21 | #[inline (always)] |
22 | #[cfg (not(feature = "folded_multiply" ))] |
23 | pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 { |
24 | let b1 = s.wrapping_mul(by.swap_bytes()); |
25 | let b2 = s.swap_bytes().wrapping_mul(!by); |
26 | b1 ^ b2.swap_bytes() |
27 | } |
28 | |
29 | /// Given a small (less than 8 byte slice) returns the same data stored in two u32s. |
30 | /// (order of and non-duplication of bytes is NOT guaranteed) |
31 | #[inline (always)] |
32 | pub(crate) fn read_small(data: &[u8]) -> [u64; 2] { |
33 | debug_assert!(data.len() <= 8); |
34 | if data.len() >= 2 { |
35 | if data.len() >= 4 { |
36 | //len 4-8 |
37 | [data.read_u32().0 as u64, data.read_last_u32() as u64] |
38 | } else { |
39 | //len 2-3 |
40 | [data.read_u16().0 as u64, data[data.len() - 1] as u64] |
41 | } |
42 | } else { |
43 | if data.len() > 0 { |
44 | [data[0] as u64, data[0] as u64] |
45 | } else { |
46 | [0, 0] |
47 | } |
48 | } |
49 | } |
50 | |
51 | #[inline (always)] |
52 | pub(crate) fn shuffle(a: u128) -> u128 { |
53 | #[cfg (all(target_feature = "ssse3" , not(miri)))] |
54 | { |
55 | #[cfg (target_arch = "x86" )] |
56 | use core::arch::x86::*; |
57 | #[cfg (target_arch = "x86_64" )] |
58 | use core::arch::x86_64::*; |
59 | unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(SHUFFLE_MASK))) } |
60 | } |
61 | #[cfg (not(all(target_feature = "ssse3" , not(miri))))] |
62 | { |
63 | a.swap_bytes() |
64 | } |
65 | } |
66 | |
67 | #[allow (unused)] //not used by fallback |
68 | #[inline (always)] |
69 | pub(crate) fn add_and_shuffle(a: u128, b: u128) -> u128 { |
70 | let sum: [u64; 2] = add_by_64s(a:a.convert(), b:b.convert()); |
71 | shuffle(sum.convert()) |
72 | } |
73 | |
74 | #[allow (unused)] //not used by fallback |
75 | #[inline (always)] |
76 | pub(crate) fn shuffle_and_add(base: u128, to_add: u128) -> u128 { |
77 | let shuffled: [u64; 2] = shuffle(base).convert(); |
78 | add_by_64s(a:shuffled, b:to_add.convert()).convert() |
79 | } |
80 | |
81 | #[cfg (all(any(target_arch = "x86" , target_arch = "x86_64" ), target_feature = "sse2" , not(miri)))] |
82 | #[inline (always)] |
83 | pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] { |
84 | unsafe { |
85 | #[cfg (target_arch = "x86" )] |
86 | use core::arch::x86::*; |
87 | #[cfg (target_arch = "x86_64" )] |
88 | use core::arch::x86_64::*; |
89 | transmute!(_mm_add_epi64(transmute!(a), transmute!(b))) |
90 | } |
91 | } |
92 | |
93 | #[cfg (not(all(any(target_arch = "x86" , target_arch = "x86_64" ), target_feature = "sse2" , not(miri))))] |
94 | #[inline (always)] |
95 | pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] { |
96 | [a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])] |
97 | } |
98 | |
99 | #[cfg (all(any(target_arch = "x86" , target_arch = "x86_64" ), target_feature = "aes" , not(miri)))] |
100 | #[allow (unused)] |
101 | #[inline (always)] |
102 | pub(crate) fn aesenc(value: u128, xor: u128) -> u128 { |
103 | #[cfg (target_arch = "x86" )] |
104 | use core::arch::x86::*; |
105 | #[cfg (target_arch = "x86_64" )] |
106 | use core::arch::x86_64::*; |
107 | unsafe { |
108 | let value = transmute!(value); |
109 | transmute!(_mm_aesenc_si128(value, transmute!(xor))) |
110 | } |
111 | } |
112 | |
113 | #[cfg (all( |
114 | any(target_arch = "arm" , target_arch = "aarch64" ), |
115 | any(target_feature = "aes" , target_feature = "crypto" ), |
116 | not(miri), |
117 | feature = "stdsimd" |
118 | ))] |
119 | #[allow (unused)] |
120 | #[inline (always)] |
121 | pub(crate) fn aesenc(value: u128, xor: u128) -> u128 { |
122 | #[cfg (target_arch = "aarch64" )] |
123 | use core::arch::aarch64::*; |
124 | #[cfg (target_arch = "arm" )] |
125 | use core::arch::arm::*; |
126 | let res = unsafe { vaesmcq_u8(vaeseq_u8(transmute!(value), transmute!(0u128))) }; |
127 | let value: u128 = transmute!(res); |
128 | xor ^ value |
129 | } |
130 | |
131 | #[cfg (all(any(target_arch = "x86" , target_arch = "x86_64" ), target_feature = "aes" , not(miri)))] |
132 | #[allow (unused)] |
133 | #[inline (always)] |
134 | pub(crate) fn aesdec(value: u128, xor: u128) -> u128 { |
135 | #[cfg (target_arch = "x86" )] |
136 | use core::arch::x86::*; |
137 | #[cfg (target_arch = "x86_64" )] |
138 | use core::arch::x86_64::*; |
139 | unsafe { |
140 | let value = transmute!(value); |
141 | transmute!(_mm_aesdec_si128(value, transmute!(xor))) |
142 | } |
143 | } |
144 | |
145 | #[cfg (all( |
146 | any(target_arch = "arm" , target_arch = "aarch64" ), |
147 | any(target_feature = "aes" , target_feature = "crypto" ), |
148 | not(miri), |
149 | feature = "stdsimd" |
150 | ))] |
151 | #[allow (unused)] |
152 | #[inline (always)] |
153 | pub(crate) fn aesdec(value: u128, xor: u128) -> u128 { |
154 | #[cfg (target_arch = "aarch64" )] |
155 | use core::arch::aarch64::*; |
156 | #[cfg (target_arch = "arm" )] |
157 | use core::arch::arm::*; |
158 | let res = unsafe { vaesimcq_u8(vaesdq_u8(transmute!(value), transmute!(0u128))) }; |
159 | let value: u128 = transmute!(res); |
160 | xor ^ value |
161 | } |
162 | |
163 | #[allow (unused)] |
164 | #[inline (always)] |
165 | pub(crate) fn add_in_length(enc: &mut u128, len: u64) { |
166 | #[cfg (all(target_arch = "x86_64" , target_feature = "sse2" , not(miri)))] |
167 | { |
168 | #[cfg (target_arch = "x86_64" )] |
169 | use core::arch::x86_64::*; |
170 | |
171 | unsafe { |
172 | let enc = enc as *mut u128; |
173 | let len = _mm_cvtsi64_si128(len as i64); |
174 | let data = _mm_loadu_si128(enc.cast()); |
175 | let sum = _mm_add_epi64(data, len); |
176 | _mm_storeu_si128(enc.cast(), sum); |
177 | } |
178 | } |
179 | #[cfg (not(all(target_arch = "x86_64" , target_feature = "sse2" , not(miri))))] |
180 | { |
181 | let mut t: [u64; 2] = enc.convert(); |
182 | t[0] = t[0].wrapping_add(len); |
183 | *enc = t.convert(); |
184 | } |
185 | } |
186 | |
187 | #[cfg (test)] |
188 | mod test { |
189 | use super::*; |
190 | use crate::convert::Convert; |
191 | |
192 | // This is code to search for the shuffle constant |
193 | // |
194 | //thread_local! { static MASK: Cell<u128> = Cell::new(0); } |
195 | // |
196 | // fn shuffle(a: u128) -> u128 { |
197 | // use std::intrinsics::transmute; |
198 | // #[cfg(target_arch = "x86")] |
199 | // use core::arch::x86::*; |
200 | // #[cfg(target_arch = "x86_64")] |
201 | // use core::arch::x86_64::*; |
202 | // MASK.with(|mask| { |
203 | // unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(mask.get()))) } |
204 | // }) |
205 | // } |
206 | // |
207 | // #[test] |
208 | // fn find_shuffle() { |
209 | // use rand::prelude::*; |
210 | // use SliceRandom; |
211 | // use std::panic; |
212 | // use std::io::Write; |
213 | // |
214 | // let mut value: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ,13, 14, 15]; |
215 | // let mut rand = thread_rng(); |
216 | // let mut successful_list = HashMap::new(); |
217 | // for _attempt in 0..10000000 { |
218 | // rand.shuffle(&mut value); |
219 | // let test_val = value.convert(); |
220 | // MASK.with(|mask| { |
221 | // mask.set(test_val); |
222 | // }); |
223 | // if let Ok(successful) = panic::catch_unwind(|| { |
224 | // test_shuffle_does_not_collide_with_aes(); |
225 | // test_shuffle_moves_high_bits(); |
226 | // test_shuffle_moves_every_value(); |
227 | // //test_shuffle_does_not_loop(); |
228 | // value |
229 | // }) { |
230 | // let successful: u128 = successful.convert(); |
231 | // successful_list.insert(successful, iters_before_loop()); |
232 | // } |
233 | // } |
234 | // let write_file = File::create("/tmp/output").unwrap(); |
235 | // let mut writer = BufWriter::new(&write_file); |
236 | // |
237 | // for success in successful_list { |
238 | // writeln!(writer, "Found successful: {:x?} - {:?}", success.0, success.1); |
239 | // } |
240 | // } |
241 | // |
242 | // fn iters_before_loop() -> u32 { |
243 | // let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF; |
244 | // let mut shuffled = shuffle(numbered); |
245 | // let mut count = 0; |
246 | // loop { |
247 | // // println!("{:>16x}", shuffled); |
248 | // if numbered == shuffled { |
249 | // break; |
250 | // } |
251 | // count += 1; |
252 | // shuffled = shuffle(shuffled); |
253 | // } |
254 | // count |
255 | // } |
256 | |
257 | #[cfg (all( |
258 | any(target_arch = "x86" , target_arch = "x86_64" ), |
259 | target_feature = "ssse3" , |
260 | target_feature = "aes" , |
261 | not(miri) |
262 | ))] |
263 | #[test ] |
264 | fn test_shuffle_does_not_collide_with_aes() { |
265 | let mut value: [u8; 16] = [0; 16]; |
266 | let zero_mask_enc = aesenc(0, 0); |
267 | let zero_mask_dec = aesdec(0, 0); |
268 | for index in 0..16 { |
269 | value[index] = 1; |
270 | let excluded_positions_enc: [u8; 16] = aesenc(value.convert(), zero_mask_enc).convert(); |
271 | let excluded_positions_dec: [u8; 16] = aesdec(value.convert(), zero_mask_dec).convert(); |
272 | let actual_location: [u8; 16] = shuffle(value.convert()).convert(); |
273 | for pos in 0..16 { |
274 | if actual_location[pos] != 0 { |
275 | assert_eq!( |
276 | 0, excluded_positions_enc[pos], |
277 | "Forward Overlap between {:?} and {:?} at {}" , |
278 | excluded_positions_enc, actual_location, index |
279 | ); |
280 | assert_eq!( |
281 | 0, excluded_positions_dec[pos], |
282 | "Reverse Overlap between {:?} and {:?} at {}" , |
283 | excluded_positions_dec, actual_location, index |
284 | ); |
285 | } |
286 | } |
287 | value[index] = 0; |
288 | } |
289 | } |
290 | |
291 | #[test ] |
292 | fn test_shuffle_contains_each_value() { |
293 | let value: [u8; 16] = 0x00010203_04050607_08090A0B_0C0D0E0F_u128.convert(); |
294 | let shuffled: [u8; 16] = shuffle(value.convert()).convert(); |
295 | for index in 0..16_u8 { |
296 | assert!(shuffled.contains(&index), "Value is missing {}" , index); |
297 | } |
298 | } |
299 | |
300 | #[test ] |
301 | fn test_shuffle_moves_every_value() { |
302 | let mut value: [u8; 16] = [0; 16]; |
303 | for index in 0..16 { |
304 | value[index] = 1; |
305 | let shuffled: [u8; 16] = shuffle(value.convert()).convert(); |
306 | assert_eq!(0, shuffled[index], "Value is not moved {}" , index); |
307 | value[index] = 0; |
308 | } |
309 | } |
310 | |
311 | #[test ] |
312 | fn test_shuffle_moves_high_bits() { |
313 | assert!( |
314 | shuffle(1) > (1_u128 << 80), |
315 | "Low bits must be moved to other half {:?} -> {:?}" , |
316 | 0, |
317 | shuffle(1) |
318 | ); |
319 | |
320 | assert!( |
321 | shuffle(1_u128 << 58) >= (1_u128 << 64), |
322 | "High bits must be moved to other half {:?} -> {:?}" , |
323 | 7, |
324 | shuffle(1_u128 << 58) |
325 | ); |
326 | assert!( |
327 | shuffle(1_u128 << 58) < (1_u128 << 112), |
328 | "High bits must not remain high {:?} -> {:?}" , |
329 | 7, |
330 | shuffle(1_u128 << 58) |
331 | ); |
332 | assert!( |
333 | shuffle(1_u128 << 64) < (1_u128 << 64), |
334 | "Low bits must be moved to other half {:?} -> {:?}" , |
335 | 8, |
336 | shuffle(1_u128 << 64) |
337 | ); |
338 | assert!( |
339 | shuffle(1_u128 << 64) >= (1_u128 << 16), |
340 | "Low bits must not remain low {:?} -> {:?}" , |
341 | 8, |
342 | shuffle(1_u128 << 64) |
343 | ); |
344 | |
345 | assert!( |
346 | shuffle(1_u128 << 120) < (1_u128 << 50), |
347 | "High bits must be moved to low half {:?} -> {:?}" , |
348 | 15, |
349 | shuffle(1_u128 << 120) |
350 | ); |
351 | } |
352 | |
353 | #[cfg (all( |
354 | any(target_arch = "x86" , target_arch = "x86_64" ), |
355 | target_feature = "ssse3" , |
356 | not(miri) |
357 | ))] |
358 | #[test ] |
359 | fn test_shuffle_does_not_loop() { |
360 | let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF; |
361 | let mut shuffled = shuffle(numbered); |
362 | for count in 0..100 { |
363 | // println!("{:>16x}", shuffled); |
364 | assert_ne!(numbered, shuffled, "Equal after {} vs {:x}" , count, shuffled); |
365 | shuffled = shuffle(shuffled); |
366 | } |
367 | } |
368 | |
369 | #[test ] |
370 | fn test_add_length() { |
371 | let mut enc = (u64::MAX as u128) << 64 | 50; |
372 | add_in_length(&mut enc, u64::MAX); |
373 | assert_eq!(enc >> 64, u64::MAX as u128); |
374 | assert_eq!(enc as u64, 49); |
375 | } |
376 | } |
377 | |