| 1 | use crate::convert::*; |
| 2 | #[allow (unused)] |
| 3 | use zerocopy::transmute; |
| 4 | |
| 5 | ///This constant comes from Kunth's prng (Empirically it works better than those from splitmix32). |
| 6 | pub(crate) const MULTIPLE: u64 = 6364136223846793005; |
| 7 | |
| 8 | /// This is a constant with a lot of special properties found by automated search. |
| 9 | /// See the unit tests below. (Below are alternative values) |
| 10 | #[cfg (all(target_feature = "ssse3" , not(miri)))] |
| 11 | const SHUFFLE_MASK: u128 = 0x020a0700_0c01030e_050f0d08_06090b04_u128; |
| 12 | //const SHUFFLE_MASK: u128 = 0x000d0702_0a040301_05080f0c_0e0b0609_u128; |
| 13 | //const SHUFFLE_MASK: u128 = 0x040A0700_030E0106_0D050F08_020B0C09_u128; |
| 14 | |
| 15 | #[inline (always)] |
| 16 | #[cfg (feature = "folded_multiply" )] |
| 17 | pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 { |
| 18 | let result: u128 = (s as u128).wrapping_mul(by as u128); |
| 19 | ((result & 0xffff_ffff_ffff_ffff) as u64) ^ ((result >> 64) as u64) |
| 20 | } |
| 21 | |
| 22 | #[inline (always)] |
| 23 | #[cfg (not(feature = "folded_multiply" ))] |
| 24 | pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 { |
| 25 | let b1 = s.wrapping_mul(by.swap_bytes()); |
| 26 | let b2 = s.swap_bytes().wrapping_mul(!by); |
| 27 | b1 ^ b2.swap_bytes() |
| 28 | } |
| 29 | |
| 30 | /// Given a small (less than 8 byte slice) returns the same data stored in two u32s. |
| 31 | /// (order of and non-duplication of bytes is NOT guaranteed) |
| 32 | #[inline (always)] |
| 33 | pub(crate) fn read_small(data: &[u8]) -> [u64; 2] { |
| 34 | debug_assert!(data.len() <= 8); |
| 35 | if data.len() >= 2 { |
| 36 | if data.len() >= 4 { |
| 37 | //len 4-8 |
| 38 | [data.read_u32().0 as u64, data.read_last_u32() as u64] |
| 39 | } else { |
| 40 | //len 2-3 |
| 41 | [data.read_u16().0 as u64, data[data.len() - 1] as u64] |
| 42 | } |
| 43 | } else { |
| 44 | if data.len() > 0 { |
| 45 | [data[0] as u64, data[0] as u64] |
| 46 | } else { |
| 47 | [0, 0] |
| 48 | } |
| 49 | } |
| 50 | } |
| 51 | |
| 52 | #[inline (always)] |
| 53 | pub(crate) fn shuffle(a: u128) -> u128 { |
| 54 | #[cfg (all(target_feature = "ssse3" , not(miri)))] |
| 55 | { |
| 56 | #[cfg (target_arch = "x86" )] |
| 57 | use core::arch::x86::*; |
| 58 | #[cfg (target_arch = "x86_64" )] |
| 59 | use core::arch::x86_64::*; |
| 60 | unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(SHUFFLE_MASK))) } |
| 61 | } |
| 62 | #[cfg (not(all(target_feature = "ssse3" , not(miri))))] |
| 63 | { |
| 64 | a.swap_bytes() |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | #[allow (unused)] //not used by fallback |
| 69 | #[inline (always)] |
| 70 | pub(crate) fn add_and_shuffle(a: u128, b: u128) -> u128 { |
| 71 | let sum: [u64; 2] = add_by_64s(a.convert(), b.convert()); |
| 72 | shuffle(sum.convert()) |
| 73 | } |
| 74 | |
| 75 | #[allow (unused)] //not used by fallback |
| 76 | #[inline (always)] |
| 77 | pub(crate) fn shuffle_and_add(base: u128, to_add: u128) -> u128 { |
| 78 | let shuffled: [u64; 2] = shuffle(base).convert(); |
| 79 | add_by_64s(a:shuffled, b:to_add.convert()).convert() |
| 80 | } |
| 81 | |
| 82 | #[cfg (all(any(target_arch = "x86" , target_arch = "x86_64" ), target_feature = "sse2" , not(miri)))] |
| 83 | #[inline (always)] |
| 84 | pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] { |
| 85 | unsafe { |
| 86 | #[cfg (target_arch = "x86" )] |
| 87 | use core::arch::x86::*; |
| 88 | #[cfg (target_arch = "x86_64" )] |
| 89 | use core::arch::x86_64::*; |
| 90 | transmute!(_mm_add_epi64(transmute!(a), transmute!(b))) |
| 91 | } |
| 92 | } |
| 93 | |
| 94 | #[cfg (not(all(any(target_arch = "x86" , target_arch = "x86_64" ), target_feature = "sse2" , not(miri))))] |
| 95 | #[inline (always)] |
| 96 | pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] { |
| 97 | [a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])] |
| 98 | } |
| 99 | |
| 100 | #[cfg (all(any(target_arch = "x86" , target_arch = "x86_64" ), target_feature = "aes" , not(miri)))] |
| 101 | #[allow (unused)] |
| 102 | #[inline (always)] |
| 103 | pub(crate) fn aesenc(value: u128, xor: u128) -> u128 { |
| 104 | #[cfg (target_arch = "x86" )] |
| 105 | use core::arch::x86::*; |
| 106 | #[cfg (target_arch = "x86_64" )] |
| 107 | use core::arch::x86_64::*; |
| 108 | unsafe { |
| 109 | let value = transmute!(value); |
| 110 | transmute!(_mm_aesenc_si128(value, transmute!(xor))) |
| 111 | } |
| 112 | } |
| 113 | |
| 114 | #[cfg (any( |
| 115 | all(feature = "nightly-arm-aes" , target_arch = "aarch64" , target_feature = "aes" , not(miri)), |
| 116 | all(feature = "nightly-arm-aes" , target_arch = "arm" , target_feature = "aes" , not(miri)), |
| 117 | ))] |
| 118 | #[allow (unused)] |
| 119 | #[inline (always)] |
| 120 | pub(crate) fn aesenc(value: u128, xor: u128) -> u128 { |
| 121 | #[cfg (target_arch = "aarch64" )] |
| 122 | use core::arch::aarch64::*; |
| 123 | #[cfg (target_arch = "arm" )] |
| 124 | use core::arch::arm::*; |
| 125 | let res = unsafe { vaesmcq_u8(vaeseq_u8(transmute!(value), transmute!(0u128))) }; |
| 126 | let value: u128 = transmute!(res); |
| 127 | xor ^ value |
| 128 | } |
| 129 | |
| 130 | #[cfg (all(any(target_arch = "x86" , target_arch = "x86_64" ), target_feature = "aes" , not(miri)))] |
| 131 | #[allow (unused)] |
| 132 | #[inline (always)] |
| 133 | pub(crate) fn aesdec(value: u128, xor: u128) -> u128 { |
| 134 | #[cfg (target_arch = "x86" )] |
| 135 | use core::arch::x86::*; |
| 136 | #[cfg (target_arch = "x86_64" )] |
| 137 | use core::arch::x86_64::*; |
| 138 | unsafe { |
| 139 | let value = transmute!(value); |
| 140 | transmute!(_mm_aesdec_si128(value, transmute!(xor))) |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | #[cfg (any( |
| 145 | all(feature = "nightly-arm-aes" , target_arch = "aarch64" , target_feature = "aes" , not(miri)), |
| 146 | all(feature = "nightly-arm-aes" , target_arch = "arm" , target_feature = "aes" , not(miri)), |
| 147 | ))] |
| 148 | #[allow (unused)] |
| 149 | #[inline (always)] |
| 150 | pub(crate) fn aesdec(value: u128, xor: u128) -> u128 { |
| 151 | #[cfg (target_arch = "aarch64" )] |
| 152 | use core::arch::aarch64::*; |
| 153 | #[cfg (target_arch = "arm" )] |
| 154 | use core::arch::arm::*; |
| 155 | let res = unsafe { vaesimcq_u8(vaesdq_u8(transmute!(value), transmute!(0u128))) }; |
| 156 | let value: u128 = transmute!(res); |
| 157 | xor ^ value |
| 158 | } |
| 159 | |
| 160 | #[allow (unused)] |
| 161 | #[inline (always)] |
| 162 | pub(crate) fn add_in_length(enc: &mut u128, len: u64) { |
| 163 | #[cfg (all(target_arch = "x86_64" , target_feature = "sse2" , not(miri)))] |
| 164 | { |
| 165 | #[cfg (target_arch = "x86_64" )] |
| 166 | use core::arch::x86_64::*; |
| 167 | |
| 168 | unsafe { |
| 169 | let enc: *mut u128 = enc as *mut u128; |
| 170 | let len: __m128i = _mm_cvtsi64_si128(len as i64); |
| 171 | let data: __m128i = _mm_loadu_si128(mem_addr:enc.cast()); |
| 172 | let sum: __m128i = _mm_add_epi64(a:data, b:len); |
| 173 | _mm_storeu_si128(mem_addr:enc.cast(), a:sum); |
| 174 | } |
| 175 | } |
| 176 | #[cfg (not(all(target_arch = "x86_64" , target_feature = "sse2" , not(miri))))] |
| 177 | { |
| 178 | let mut t: [u64; 2] = enc.convert(); |
| 179 | t[0] = t[0].wrapping_add(len); |
| 180 | *enc = t.convert(); |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | #[cfg (test)] |
| 185 | mod test { |
| 186 | use super::*; |
| 187 | |
| 188 | // This is code to search for the shuffle constant |
| 189 | // |
| 190 | //thread_local! { static MASK: Cell<u128> = Cell::new(0); } |
| 191 | // |
| 192 | // fn shuffle(a: u128) -> u128 { |
| 193 | // use std::intrinsics::transmute; |
| 194 | // #[cfg(target_arch = "x86")] |
| 195 | // use core::arch::x86::*; |
| 196 | // #[cfg(target_arch = "x86_64")] |
| 197 | // use core::arch::x86_64::*; |
| 198 | // MASK.with(|mask| { |
| 199 | // unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(mask.get()))) } |
| 200 | // }) |
| 201 | // } |
| 202 | // |
| 203 | // #[test] |
| 204 | // fn find_shuffle() { |
| 205 | // use rand::prelude::*; |
| 206 | // use SliceRandom; |
| 207 | // use std::panic; |
| 208 | // use std::io::Write; |
| 209 | // |
| 210 | // let mut value: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ,13, 14, 15]; |
| 211 | // let mut rand = thread_rng(); |
| 212 | // let mut successful_list = HashMap::new(); |
| 213 | // for _attempt in 0..10000000 { |
| 214 | // rand.shuffle(&mut value); |
| 215 | // let test_val = value.convert(); |
| 216 | // MASK.with(|mask| { |
| 217 | // mask.set(test_val); |
| 218 | // }); |
| 219 | // if let Ok(successful) = panic::catch_unwind(|| { |
| 220 | // test_shuffle_does_not_collide_with_aes(); |
| 221 | // test_shuffle_moves_high_bits(); |
| 222 | // test_shuffle_moves_every_value(); |
| 223 | // //test_shuffle_does_not_loop(); |
| 224 | // value |
| 225 | // }) { |
| 226 | // let successful: u128 = successful.convert(); |
| 227 | // successful_list.insert(successful, iters_before_loop()); |
| 228 | // } |
| 229 | // } |
| 230 | // let write_file = File::create("/tmp/output").unwrap(); |
| 231 | // let mut writer = BufWriter::new(&write_file); |
| 232 | // |
| 233 | // for success in successful_list { |
| 234 | // writeln!(writer, "Found successful: {:x?} - {:?}", success.0, success.1); |
| 235 | // } |
| 236 | // } |
| 237 | // |
| 238 | // fn iters_before_loop() -> u32 { |
| 239 | // let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF; |
| 240 | // let mut shuffled = shuffle(numbered); |
| 241 | // let mut count = 0; |
| 242 | // loop { |
| 243 | // // println!("{:>16x}", shuffled); |
| 244 | // if numbered == shuffled { |
| 245 | // break; |
| 246 | // } |
| 247 | // count += 1; |
| 248 | // shuffled = shuffle(shuffled); |
| 249 | // } |
| 250 | // count |
| 251 | // } |
| 252 | |
| 253 | #[cfg (all( |
| 254 | any(target_arch = "x86" , target_arch = "x86_64" ), |
| 255 | target_feature = "ssse3" , |
| 256 | target_feature = "aes" , |
| 257 | not(miri) |
| 258 | ))] |
| 259 | #[test ] |
| 260 | fn test_shuffle_does_not_collide_with_aes() { |
| 261 | let mut value: [u8; 16] = [0; 16]; |
| 262 | let zero_mask_enc = aesenc(0, 0); |
| 263 | let zero_mask_dec = aesdec(0, 0); |
| 264 | for index in 0..16 { |
| 265 | value[index] = 1; |
| 266 | let excluded_positions_enc: [u8; 16] = aesenc(value.convert(), zero_mask_enc).convert(); |
| 267 | let excluded_positions_dec: [u8; 16] = aesdec(value.convert(), zero_mask_dec).convert(); |
| 268 | let actual_location: [u8; 16] = shuffle(value.convert()).convert(); |
| 269 | for pos in 0..16 { |
| 270 | if actual_location[pos] != 0 { |
| 271 | assert_eq!( |
| 272 | 0, excluded_positions_enc[pos], |
| 273 | "Forward Overlap between {:?} and {:?} at {}" , |
| 274 | excluded_positions_enc, actual_location, index |
| 275 | ); |
| 276 | assert_eq!( |
| 277 | 0, excluded_positions_dec[pos], |
| 278 | "Reverse Overlap between {:?} and {:?} at {}" , |
| 279 | excluded_positions_dec, actual_location, index |
| 280 | ); |
| 281 | } |
| 282 | } |
| 283 | value[index] = 0; |
| 284 | } |
| 285 | } |
| 286 | |
| 287 | #[test ] |
| 288 | fn test_shuffle_contains_each_value() { |
| 289 | let value: [u8; 16] = 0x00010203_04050607_08090A0B_0C0D0E0F_u128.convert(); |
| 290 | let shuffled: [u8; 16] = shuffle(value.convert()).convert(); |
| 291 | for index in 0..16_u8 { |
| 292 | assert!(shuffled.contains(&index), "Value is missing {}" , index); |
| 293 | } |
| 294 | } |
| 295 | |
| 296 | #[test ] |
| 297 | fn test_shuffle_moves_every_value() { |
| 298 | let mut value: [u8; 16] = [0; 16]; |
| 299 | for index in 0..16 { |
| 300 | value[index] = 1; |
| 301 | let shuffled: [u8; 16] = shuffle(value.convert()).convert(); |
| 302 | assert_eq!(0, shuffled[index], "Value is not moved {}" , index); |
| 303 | value[index] = 0; |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | #[test ] |
| 308 | fn test_shuffle_moves_high_bits() { |
| 309 | assert!( |
| 310 | shuffle(1) > (1_u128 << 80), |
| 311 | "Low bits must be moved to other half {:?} -> {:?}" , |
| 312 | 0, |
| 313 | shuffle(1) |
| 314 | ); |
| 315 | |
| 316 | assert!( |
| 317 | shuffle(1_u128 << 58) >= (1_u128 << 64), |
| 318 | "High bits must be moved to other half {:?} -> {:?}" , |
| 319 | 7, |
| 320 | shuffle(1_u128 << 58) |
| 321 | ); |
| 322 | assert!( |
| 323 | shuffle(1_u128 << 58) < (1_u128 << 112), |
| 324 | "High bits must not remain high {:?} -> {:?}" , |
| 325 | 7, |
| 326 | shuffle(1_u128 << 58) |
| 327 | ); |
| 328 | assert!( |
| 329 | shuffle(1_u128 << 64) < (1_u128 << 64), |
| 330 | "Low bits must be moved to other half {:?} -> {:?}" , |
| 331 | 8, |
| 332 | shuffle(1_u128 << 64) |
| 333 | ); |
| 334 | assert!( |
| 335 | shuffle(1_u128 << 64) >= (1_u128 << 16), |
| 336 | "Low bits must not remain low {:?} -> {:?}" , |
| 337 | 8, |
| 338 | shuffle(1_u128 << 64) |
| 339 | ); |
| 340 | |
| 341 | assert!( |
| 342 | shuffle(1_u128 << 120) < (1_u128 << 50), |
| 343 | "High bits must be moved to low half {:?} -> {:?}" , |
| 344 | 15, |
| 345 | shuffle(1_u128 << 120) |
| 346 | ); |
| 347 | } |
| 348 | |
| 349 | #[cfg (all( |
| 350 | any(target_arch = "x86" , target_arch = "x86_64" ), |
| 351 | target_feature = "ssse3" , |
| 352 | not(miri) |
| 353 | ))] |
| 354 | #[test ] |
| 355 | fn test_shuffle_does_not_loop() { |
| 356 | let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF; |
| 357 | let mut shuffled = shuffle(numbered); |
| 358 | for count in 0..100 { |
| 359 | // println!("{:>16x}", shuffled); |
| 360 | assert_ne!(numbered, shuffled, "Equal after {} vs {:x}" , count, shuffled); |
| 361 | shuffled = shuffle(shuffled); |
| 362 | } |
| 363 | } |
| 364 | |
| 365 | #[test ] |
| 366 | fn test_add_length() { |
| 367 | let mut enc = (u64::MAX as u128) << 64 | 50; |
| 368 | add_in_length(&mut enc, u64::MAX); |
| 369 | assert_eq!(enc >> 64, u64::MAX as u128); |
| 370 | assert_eq!(enc as u64, 49); |
| 371 | } |
| 372 | } |
| 373 | |