1//! Utility functions
2
3// TODO(tarcieri): check performance impact / generated assembly changes
4#![allow(clippy::needless_range_loop)]
5
6use super::arch::*;
7use crate::{Block, Block8};
8
9pub type U128x8 = [__m128i; 8];
10
11#[cfg(test)]
12pub(crate) fn check(a: &[__m128i], b: &[[u64; 2]]) {
13 for (v1, v2) in a.iter().zip(b) {
14 let t1: [u64; 2] = unsafe { core::mem::transmute(*v1) };
15 let t2 = [v2[0].to_be(), v2[1].to_be()];
16 assert_eq!(t1, t2);
17 }
18}
19
20#[inline(always)]
21pub(crate) fn load8(blocks: *const Block8) -> U128x8 {
22 unsafe {
23 let p: *const GenericArray> = blocks as *const Block;
24 [
25 _mm_loadu_si128(mem_addr:p.add(count:0) as *const __m128i),
26 _mm_loadu_si128(mem_addr:p.add(count:1) as *const __m128i),
27 _mm_loadu_si128(mem_addr:p.add(count:2) as *const __m128i),
28 _mm_loadu_si128(mem_addr:p.add(count:3) as *const __m128i),
29 _mm_loadu_si128(mem_addr:p.add(count:4) as *const __m128i),
30 _mm_loadu_si128(mem_addr:p.add(count:5) as *const __m128i),
31 _mm_loadu_si128(mem_addr:p.add(count:6) as *const __m128i),
32 _mm_loadu_si128(mem_addr:p.add(count:7) as *const __m128i),
33 ]
34 }
35}
36
37#[inline(always)]
38pub(crate) fn store8(blocks: *mut Block8, b: U128x8) {
39 unsafe {
40 let p: *mut GenericArray> = blocks as *mut Block;
41 _mm_storeu_si128(mem_addr:p.add(0) as *mut __m128i, a:b[0]);
42 _mm_storeu_si128(mem_addr:p.add(1) as *mut __m128i, a:b[1]);
43 _mm_storeu_si128(mem_addr:p.add(2) as *mut __m128i, a:b[2]);
44 _mm_storeu_si128(mem_addr:p.add(3) as *mut __m128i, a:b[3]);
45 _mm_storeu_si128(mem_addr:p.add(4) as *mut __m128i, a:b[4]);
46 _mm_storeu_si128(mem_addr:p.add(5) as *mut __m128i, a:b[5]);
47 _mm_storeu_si128(mem_addr:p.add(6) as *mut __m128i, a:b[6]);
48 _mm_storeu_si128(mem_addr:p.add(7) as *mut __m128i, a:b[7]);
49 }
50}
51
52#[inline(always)]
53pub(crate) fn xor8(b: &mut U128x8, key: __m128i) {
54 unsafe {
55 b[0] = _mm_xor_si128(a:b[0], b:key);
56 b[1] = _mm_xor_si128(a:b[1], b:key);
57 b[2] = _mm_xor_si128(a:b[2], b:key);
58 b[3] = _mm_xor_si128(a:b[3], b:key);
59 b[4] = _mm_xor_si128(a:b[4], b:key);
60 b[5] = _mm_xor_si128(a:b[5], b:key);
61 b[6] = _mm_xor_si128(a:b[6], b:key);
62 b[7] = _mm_xor_si128(a:b[7], b:key);
63 }
64}
65
66#[inline(always)]
67pub(crate) fn aesenc8(buffer: &mut U128x8, key: __m128i) {
68 for i: usize in 0..8 {
69 buffer[i] = unsafe { _mm_aesenc_si128(a:buffer[i], round_key:key) };
70 }
71}
72
73#[inline(always)]
74pub(crate) fn aesenclast8(buffer: &mut U128x8, key: __m128i) {
75 for i: usize in 0..8 {
76 buffer[i] = unsafe { _mm_aesenclast_si128(a:buffer[i], round_key:key) };
77 }
78}
79
80#[inline(always)]
81pub(crate) fn aesdec8(buffer: &mut U128x8, key: __m128i) {
82 for i: usize in 0..8 {
83 buffer[i] = unsafe { _mm_aesdec_si128(a:buffer[i], round_key:key) };
84 }
85}
86
87#[inline(always)]
88pub(crate) fn aesdeclast8(buffer: &mut U128x8, key: __m128i) {
89 for i: usize in 0..8 {
90 buffer[i] = unsafe { _mm_aesdeclast_si128(a:buffer[i], round_key:key) };
91 }
92}
93