| 1 | //! Utility functions |
| 2 | |
| 3 | // TODO(tarcieri): check performance impact / generated assembly changes |
| 4 | #![allow (clippy::needless_range_loop)] |
| 5 | |
| 6 | use super::arch::*; |
| 7 | use crate::{Block, Block8}; |
| 8 | |
| 9 | pub type U128x8 = [__m128i; 8]; |
| 10 | |
| 11 | #[cfg (test)] |
| 12 | pub(crate) fn check(a: &[__m128i], b: &[[u64; 2]]) { |
| 13 | for (v1, v2) in a.iter().zip(b) { |
| 14 | let t1: [u64; 2] = unsafe { core::mem::transmute(*v1) }; |
| 15 | let t2 = [v2[0].to_be(), v2[1].to_be()]; |
| 16 | assert_eq!(t1, t2); |
| 17 | } |
| 18 | } |
| 19 | |
| 20 | #[inline (always)] |
| 21 | pub(crate) fn load8(blocks: *const Block8) -> U128x8 { |
| 22 | unsafe { |
| 23 | let p: *const GenericArray> = blocks as *const Block; |
| 24 | [ |
| 25 | _mm_loadu_si128(mem_addr:p.add(count:0) as *const __m128i), |
| 26 | _mm_loadu_si128(mem_addr:p.add(count:1) as *const __m128i), |
| 27 | _mm_loadu_si128(mem_addr:p.add(count:2) as *const __m128i), |
| 28 | _mm_loadu_si128(mem_addr:p.add(count:3) as *const __m128i), |
| 29 | _mm_loadu_si128(mem_addr:p.add(count:4) as *const __m128i), |
| 30 | _mm_loadu_si128(mem_addr:p.add(count:5) as *const __m128i), |
| 31 | _mm_loadu_si128(mem_addr:p.add(count:6) as *const __m128i), |
| 32 | _mm_loadu_si128(mem_addr:p.add(count:7) as *const __m128i), |
| 33 | ] |
| 34 | } |
| 35 | } |
| 36 | |
| 37 | #[inline (always)] |
| 38 | pub(crate) fn store8(blocks: *mut Block8, b: U128x8) { |
| 39 | unsafe { |
| 40 | let p: *mut GenericArray> = blocks as *mut Block; |
| 41 | _mm_storeu_si128(mem_addr:p.add(0) as *mut __m128i, a:b[0]); |
| 42 | _mm_storeu_si128(mem_addr:p.add(1) as *mut __m128i, a:b[1]); |
| 43 | _mm_storeu_si128(mem_addr:p.add(2) as *mut __m128i, a:b[2]); |
| 44 | _mm_storeu_si128(mem_addr:p.add(3) as *mut __m128i, a:b[3]); |
| 45 | _mm_storeu_si128(mem_addr:p.add(4) as *mut __m128i, a:b[4]); |
| 46 | _mm_storeu_si128(mem_addr:p.add(5) as *mut __m128i, a:b[5]); |
| 47 | _mm_storeu_si128(mem_addr:p.add(6) as *mut __m128i, a:b[6]); |
| 48 | _mm_storeu_si128(mem_addr:p.add(7) as *mut __m128i, a:b[7]); |
| 49 | } |
| 50 | } |
| 51 | |
| 52 | #[inline (always)] |
| 53 | pub(crate) fn xor8(b: &mut U128x8, key: __m128i) { |
| 54 | unsafe { |
| 55 | b[0] = _mm_xor_si128(a:b[0], b:key); |
| 56 | b[1] = _mm_xor_si128(a:b[1], b:key); |
| 57 | b[2] = _mm_xor_si128(a:b[2], b:key); |
| 58 | b[3] = _mm_xor_si128(a:b[3], b:key); |
| 59 | b[4] = _mm_xor_si128(a:b[4], b:key); |
| 60 | b[5] = _mm_xor_si128(a:b[5], b:key); |
| 61 | b[6] = _mm_xor_si128(a:b[6], b:key); |
| 62 | b[7] = _mm_xor_si128(a:b[7], b:key); |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | #[inline (always)] |
| 67 | pub(crate) fn aesenc8(buffer: &mut U128x8, key: __m128i) { |
| 68 | for i: usize in 0..8 { |
| 69 | buffer[i] = unsafe { _mm_aesenc_si128(a:buffer[i], key) }; |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | #[inline (always)] |
| 74 | pub(crate) fn aesenclast8(buffer: &mut U128x8, key: __m128i) { |
| 75 | for i: usize in 0..8 { |
| 76 | buffer[i] = unsafe { _mm_aesenclast_si128(a:buffer[i], key) }; |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | #[inline (always)] |
| 81 | pub(crate) fn aesdec8(buffer: &mut U128x8, key: __m128i) { |
| 82 | for i: usize in 0..8 { |
| 83 | buffer[i] = unsafe { _mm_aesdec_si128(a:buffer[i], key) }; |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | #[inline (always)] |
| 88 | pub(crate) fn aesdeclast8(buffer: &mut U128x8, key: __m128i) { |
| 89 | for i: usize in 0..8 { |
| 90 | buffer[i] = unsafe { _mm_aesdeclast_si128(a:buffer[i], key) }; |
| 91 | } |
| 92 | } |
| 93 | |