1use crate::convert::*;
2use zerocopy::transmute;
3
4///This constant comes from Kunth's prng (Empirically it works better than those from splitmix32).
5pub(crate) const MULTIPLE: u64 = 6364136223846793005;
6
7/// This is a constant with a lot of special properties found by automated search.
8/// See the unit tests below. (Below are alternative values)
9#[cfg(all(target_feature = "ssse3", not(miri)))]
10const SHUFFLE_MASK: u128 = 0x020a0700_0c01030e_050f0d08_06090b04_u128;
11//const SHUFFLE_MASK: u128 = 0x000d0702_0a040301_05080f0c_0e0b0609_u128;
12//const SHUFFLE_MASK: u128 = 0x040A0700_030E0106_0D050F08_020B0C09_u128;
13
14#[inline(always)]
15#[cfg(feature = "folded_multiply")]
16pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
17 let result: u128 = (s as u128).wrapping_mul(by as u128);
18 ((result & 0xffff_ffff_ffff_ffff) as u64) ^ ((result >> 64) as u64)
19}
20
21#[inline(always)]
22#[cfg(not(feature = "folded_multiply"))]
23pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
24 let b1 = s.wrapping_mul(by.swap_bytes());
25 let b2 = s.swap_bytes().wrapping_mul(!by);
26 b1 ^ b2.swap_bytes()
27}
28
29/// Given a small (less than 8 byte slice) returns the same data stored in two u32s.
30/// (order of and non-duplication of bytes is NOT guaranteed)
31#[inline(always)]
32pub(crate) fn read_small(data: &[u8]) -> [u64; 2] {
33 debug_assert!(data.len() <= 8);
34 if data.len() >= 2 {
35 if data.len() >= 4 {
36 //len 4-8
37 [data.read_u32().0 as u64, data.read_last_u32() as u64]
38 } else {
39 //len 2-3
40 [data.read_u16().0 as u64, data[data.len() - 1] as u64]
41 }
42 } else {
43 if data.len() > 0 {
44 [data[0] as u64, data[0] as u64]
45 } else {
46 [0, 0]
47 }
48 }
49}
50
51#[inline(always)]
52pub(crate) fn shuffle(a: u128) -> u128 {
53 #[cfg(all(target_feature = "ssse3", not(miri)))]
54 {
55 #[cfg(target_arch = "x86")]
56 use core::arch::x86::*;
57 #[cfg(target_arch = "x86_64")]
58 use core::arch::x86_64::*;
59 unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(SHUFFLE_MASK))) }
60 }
61 #[cfg(not(all(target_feature = "ssse3", not(miri))))]
62 {
63 a.swap_bytes()
64 }
65}
66
67#[allow(unused)] //not used by fallback
68#[inline(always)]
69pub(crate) fn add_and_shuffle(a: u128, b: u128) -> u128 {
70 let sum: [u64; 2] = add_by_64s(a:a.convert(), b:b.convert());
71 shuffle(sum.convert())
72}
73
74#[allow(unused)] //not used by fallback
75#[inline(always)]
76pub(crate) fn shuffle_and_add(base: u128, to_add: u128) -> u128 {
77 let shuffled: [u64; 2] = shuffle(base).convert();
78 add_by_64s(a:shuffled, b:to_add.convert()).convert()
79}
80
81#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri)))]
82#[inline(always)]
83pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
84 unsafe {
85 #[cfg(target_arch = "x86")]
86 use core::arch::x86::*;
87 #[cfg(target_arch = "x86_64")]
88 use core::arch::x86_64::*;
89 transmute!(_mm_add_epi64(transmute!(a), transmute!(b)))
90 }
91}
92
93#[cfg(not(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri))))]
94#[inline(always)]
95pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
96 [a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])]
97}
98
99#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))]
100#[allow(unused)]
101#[inline(always)]
102pub(crate) fn aesenc(value: u128, xor: u128) -> u128 {
103 #[cfg(target_arch = "x86")]
104 use core::arch::x86::*;
105 #[cfg(target_arch = "x86_64")]
106 use core::arch::x86_64::*;
107 unsafe {
108 let value = transmute!(value);
109 transmute!(_mm_aesenc_si128(value, transmute!(xor)))
110 }
111}
112
113#[cfg(all(
114 any(target_arch = "arm", target_arch = "aarch64"),
115 any(target_feature = "aes", target_feature = "crypto"),
116 not(miri),
117 feature = "stdsimd"
118))]
119#[allow(unused)]
120#[inline(always)]
121pub(crate) fn aesenc(value: u128, xor: u128) -> u128 {
122 #[cfg(target_arch = "aarch64")]
123 use core::arch::aarch64::*;
124 #[cfg(target_arch = "arm")]
125 use core::arch::arm::*;
126 let res = unsafe { vaesmcq_u8(vaeseq_u8(transmute!(value), transmute!(0u128))) };
127 let value: u128 = transmute!(res);
128 xor ^ value
129}
130
131#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))]
132#[allow(unused)]
133#[inline(always)]
134pub(crate) fn aesdec(value: u128, xor: u128) -> u128 {
135 #[cfg(target_arch = "x86")]
136 use core::arch::x86::*;
137 #[cfg(target_arch = "x86_64")]
138 use core::arch::x86_64::*;
139 unsafe {
140 let value = transmute!(value);
141 transmute!(_mm_aesdec_si128(value, transmute!(xor)))
142 }
143}
144
145#[cfg(all(
146 any(target_arch = "arm", target_arch = "aarch64"),
147 any(target_feature = "aes", target_feature = "crypto"),
148 not(miri),
149 feature = "stdsimd"
150))]
151#[allow(unused)]
152#[inline(always)]
153pub(crate) fn aesdec(value: u128, xor: u128) -> u128 {
154 #[cfg(target_arch = "aarch64")]
155 use core::arch::aarch64::*;
156 #[cfg(target_arch = "arm")]
157 use core::arch::arm::*;
158 let res = unsafe { vaesimcq_u8(vaesdq_u8(transmute!(value), transmute!(0u128))) };
159 let value: u128 = transmute!(res);
160 xor ^ value
161}
162
163#[allow(unused)]
164#[inline(always)]
165pub(crate) fn add_in_length(enc: &mut u128, len: u64) {
166 #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
167 {
168 #[cfg(target_arch = "x86_64")]
169 use core::arch::x86_64::*;
170
171 unsafe {
172 let enc = enc as *mut u128;
173 let len = _mm_cvtsi64_si128(len as i64);
174 let data = _mm_loadu_si128(enc.cast());
175 let sum = _mm_add_epi64(data, len);
176 _mm_storeu_si128(enc.cast(), sum);
177 }
178 }
179 #[cfg(not(all(target_arch = "x86_64", target_feature = "sse2", not(miri))))]
180 {
181 let mut t: [u64; 2] = enc.convert();
182 t[0] = t[0].wrapping_add(len);
183 *enc = t.convert();
184 }
185}
186
187#[cfg(test)]
188mod test {
189 use super::*;
190 use crate::convert::Convert;
191
192 // This is code to search for the shuffle constant
193 //
194 //thread_local! { static MASK: Cell<u128> = Cell::new(0); }
195 //
196 // fn shuffle(a: u128) -> u128 {
197 // use std::intrinsics::transmute;
198 // #[cfg(target_arch = "x86")]
199 // use core::arch::x86::*;
200 // #[cfg(target_arch = "x86_64")]
201 // use core::arch::x86_64::*;
202 // MASK.with(|mask| {
203 // unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(mask.get()))) }
204 // })
205 // }
206 //
207 // #[test]
208 // fn find_shuffle() {
209 // use rand::prelude::*;
210 // use SliceRandom;
211 // use std::panic;
212 // use std::io::Write;
213 //
214 // let mut value: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ,13, 14, 15];
215 // let mut rand = thread_rng();
216 // let mut successful_list = HashMap::new();
217 // for _attempt in 0..10000000 {
218 // rand.shuffle(&mut value);
219 // let test_val = value.convert();
220 // MASK.with(|mask| {
221 // mask.set(test_val);
222 // });
223 // if let Ok(successful) = panic::catch_unwind(|| {
224 // test_shuffle_does_not_collide_with_aes();
225 // test_shuffle_moves_high_bits();
226 // test_shuffle_moves_every_value();
227 // //test_shuffle_does_not_loop();
228 // value
229 // }) {
230 // let successful: u128 = successful.convert();
231 // successful_list.insert(successful, iters_before_loop());
232 // }
233 // }
234 // let write_file = File::create("/tmp/output").unwrap();
235 // let mut writer = BufWriter::new(&write_file);
236 //
237 // for success in successful_list {
238 // writeln!(writer, "Found successful: {:x?} - {:?}", success.0, success.1);
239 // }
240 // }
241 //
242 // fn iters_before_loop() -> u32 {
243 // let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF;
244 // let mut shuffled = shuffle(numbered);
245 // let mut count = 0;
246 // loop {
247 // // println!("{:>16x}", shuffled);
248 // if numbered == shuffled {
249 // break;
250 // }
251 // count += 1;
252 // shuffled = shuffle(shuffled);
253 // }
254 // count
255 // }
256
257 #[cfg(all(
258 any(target_arch = "x86", target_arch = "x86_64"),
259 target_feature = "ssse3",
260 target_feature = "aes",
261 not(miri)
262 ))]
263 #[test]
264 fn test_shuffle_does_not_collide_with_aes() {
265 let mut value: [u8; 16] = [0; 16];
266 let zero_mask_enc = aesenc(0, 0);
267 let zero_mask_dec = aesdec(0, 0);
268 for index in 0..16 {
269 value[index] = 1;
270 let excluded_positions_enc: [u8; 16] = aesenc(value.convert(), zero_mask_enc).convert();
271 let excluded_positions_dec: [u8; 16] = aesdec(value.convert(), zero_mask_dec).convert();
272 let actual_location: [u8; 16] = shuffle(value.convert()).convert();
273 for pos in 0..16 {
274 if actual_location[pos] != 0 {
275 assert_eq!(
276 0, excluded_positions_enc[pos],
277 "Forward Overlap between {:?} and {:?} at {}",
278 excluded_positions_enc, actual_location, index
279 );
280 assert_eq!(
281 0, excluded_positions_dec[pos],
282 "Reverse Overlap between {:?} and {:?} at {}",
283 excluded_positions_dec, actual_location, index
284 );
285 }
286 }
287 value[index] = 0;
288 }
289 }
290
291 #[test]
292 fn test_shuffle_contains_each_value() {
293 let value: [u8; 16] = 0x00010203_04050607_08090A0B_0C0D0E0F_u128.convert();
294 let shuffled: [u8; 16] = shuffle(value.convert()).convert();
295 for index in 0..16_u8 {
296 assert!(shuffled.contains(&index), "Value is missing {}", index);
297 }
298 }
299
300 #[test]
301 fn test_shuffle_moves_every_value() {
302 let mut value: [u8; 16] = [0; 16];
303 for index in 0..16 {
304 value[index] = 1;
305 let shuffled: [u8; 16] = shuffle(value.convert()).convert();
306 assert_eq!(0, shuffled[index], "Value is not moved {}", index);
307 value[index] = 0;
308 }
309 }
310
311 #[test]
312 fn test_shuffle_moves_high_bits() {
313 assert!(
314 shuffle(1) > (1_u128 << 80),
315 "Low bits must be moved to other half {:?} -> {:?}",
316 0,
317 shuffle(1)
318 );
319
320 assert!(
321 shuffle(1_u128 << 58) >= (1_u128 << 64),
322 "High bits must be moved to other half {:?} -> {:?}",
323 7,
324 shuffle(1_u128 << 58)
325 );
326 assert!(
327 shuffle(1_u128 << 58) < (1_u128 << 112),
328 "High bits must not remain high {:?} -> {:?}",
329 7,
330 shuffle(1_u128 << 58)
331 );
332 assert!(
333 shuffle(1_u128 << 64) < (1_u128 << 64),
334 "Low bits must be moved to other half {:?} -> {:?}",
335 8,
336 shuffle(1_u128 << 64)
337 );
338 assert!(
339 shuffle(1_u128 << 64) >= (1_u128 << 16),
340 "Low bits must not remain low {:?} -> {:?}",
341 8,
342 shuffle(1_u128 << 64)
343 );
344
345 assert!(
346 shuffle(1_u128 << 120) < (1_u128 << 50),
347 "High bits must be moved to low half {:?} -> {:?}",
348 15,
349 shuffle(1_u128 << 120)
350 );
351 }
352
353 #[cfg(all(
354 any(target_arch = "x86", target_arch = "x86_64"),
355 target_feature = "ssse3",
356 not(miri)
357 ))]
358 #[test]
359 fn test_shuffle_does_not_loop() {
360 let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF;
361 let mut shuffled = shuffle(numbered);
362 for count in 0..100 {
363 // println!("{:>16x}", shuffled);
364 assert_ne!(numbered, shuffled, "Equal after {} vs {:x}", count, shuffled);
365 shuffled = shuffle(shuffled);
366 }
367 }
368
369 #[test]
370 fn test_add_length() {
371 let mut enc = (u64::MAX as u128) << 64 | 50;
372 add_in_length(&mut enc, u64::MAX);
373 assert_eq!(enc >> 64, u64::MAX as u128);
374 assert_eq!(enc as u64, 49);
375 }
376}
377