1 | //! Utility functions |
2 | |
3 | // TODO(tarcieri): check performance impact / generated assembly changes |
4 | #![allow (clippy::needless_range_loop)] |
5 | |
6 | use super::arch::*; |
7 | use crate::{Block, Block8}; |
8 | |
9 | pub type U128x8 = [__m128i; 8]; |
10 | |
11 | #[cfg (test)] |
12 | pub(crate) fn check(a: &[__m128i], b: &[[u64; 2]]) { |
13 | for (v1, v2) in a.iter().zip(b) { |
14 | let t1: [u64; 2] = unsafe { core::mem::transmute(*v1) }; |
15 | let t2 = [v2[0].to_be(), v2[1].to_be()]; |
16 | assert_eq!(t1, t2); |
17 | } |
18 | } |
19 | |
20 | #[inline (always)] |
21 | pub(crate) fn load8(blocks: *const Block8) -> U128x8 { |
22 | unsafe { |
23 | let p: *const GenericArray> = blocks as *const Block; |
24 | [ |
25 | _mm_loadu_si128(mem_addr:p.add(count:0) as *const __m128i), |
26 | _mm_loadu_si128(mem_addr:p.add(count:1) as *const __m128i), |
27 | _mm_loadu_si128(mem_addr:p.add(count:2) as *const __m128i), |
28 | _mm_loadu_si128(mem_addr:p.add(count:3) as *const __m128i), |
29 | _mm_loadu_si128(mem_addr:p.add(count:4) as *const __m128i), |
30 | _mm_loadu_si128(mem_addr:p.add(count:5) as *const __m128i), |
31 | _mm_loadu_si128(mem_addr:p.add(count:6) as *const __m128i), |
32 | _mm_loadu_si128(mem_addr:p.add(count:7) as *const __m128i), |
33 | ] |
34 | } |
35 | } |
36 | |
37 | #[inline (always)] |
38 | pub(crate) fn store8(blocks: *mut Block8, b: U128x8) { |
39 | unsafe { |
40 | let p: *mut GenericArray> = blocks as *mut Block; |
41 | _mm_storeu_si128(mem_addr:p.add(0) as *mut __m128i, a:b[0]); |
42 | _mm_storeu_si128(mem_addr:p.add(1) as *mut __m128i, a:b[1]); |
43 | _mm_storeu_si128(mem_addr:p.add(2) as *mut __m128i, a:b[2]); |
44 | _mm_storeu_si128(mem_addr:p.add(3) as *mut __m128i, a:b[3]); |
45 | _mm_storeu_si128(mem_addr:p.add(4) as *mut __m128i, a:b[4]); |
46 | _mm_storeu_si128(mem_addr:p.add(5) as *mut __m128i, a:b[5]); |
47 | _mm_storeu_si128(mem_addr:p.add(6) as *mut __m128i, a:b[6]); |
48 | _mm_storeu_si128(mem_addr:p.add(7) as *mut __m128i, a:b[7]); |
49 | } |
50 | } |
51 | |
52 | #[inline (always)] |
53 | pub(crate) fn xor8(b: &mut U128x8, key: __m128i) { |
54 | unsafe { |
55 | b[0] = _mm_xor_si128(a:b[0], b:key); |
56 | b[1] = _mm_xor_si128(a:b[1], b:key); |
57 | b[2] = _mm_xor_si128(a:b[2], b:key); |
58 | b[3] = _mm_xor_si128(a:b[3], b:key); |
59 | b[4] = _mm_xor_si128(a:b[4], b:key); |
60 | b[5] = _mm_xor_si128(a:b[5], b:key); |
61 | b[6] = _mm_xor_si128(a:b[6], b:key); |
62 | b[7] = _mm_xor_si128(a:b[7], b:key); |
63 | } |
64 | } |
65 | |
66 | #[inline (always)] |
67 | pub(crate) fn aesenc8(buffer: &mut U128x8, key: __m128i) { |
68 | for i: usize in 0..8 { |
69 | buffer[i] = unsafe { _mm_aesenc_si128(a:buffer[i], round_key:key) }; |
70 | } |
71 | } |
72 | |
73 | #[inline (always)] |
74 | pub(crate) fn aesenclast8(buffer: &mut U128x8, key: __m128i) { |
75 | for i: usize in 0..8 { |
76 | buffer[i] = unsafe { _mm_aesenclast_si128(a:buffer[i], round_key:key) }; |
77 | } |
78 | } |
79 | |
80 | #[inline (always)] |
81 | pub(crate) fn aesdec8(buffer: &mut U128x8, key: __m128i) { |
82 | for i: usize in 0..8 { |
83 | buffer[i] = unsafe { _mm_aesdec_si128(a:buffer[i], round_key:key) }; |
84 | } |
85 | } |
86 | |
87 | #[inline (always)] |
88 | pub(crate) fn aesdeclast8(buffer: &mut U128x8, key: __m128i) { |
89 | for i: usize in 0..8 { |
90 | buffer[i] = unsafe { _mm_aesdeclast_si128(a:buffer[i], round_key:key) }; |
91 | } |
92 | } |
93 | |