1 | //! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD. |
2 | |
3 | use crate::types::*; |
4 | use crate::{vec128_storage, vec256_storage, vec512_storage}; |
5 | use core::marker::PhantomData; |
6 | use core::ops::*; |
7 | |
8 | zerocopy::cryptocorrosion_derive_traits! { |
9 | #[repr(transparent)] |
10 | #[derive (Copy, Clone, Default)] |
11 | #[allow (non_camel_case_types)] |
12 | pub struct x2<W, G>(pub [W; 2], PhantomData<G>); |
13 | } |
14 | |
15 | impl<W, G> x2<W, G> { |
16 | #[inline (always)] |
17 | pub fn new(xs: [W; 2]) -> Self { |
18 | x2(xs, PhantomData) |
19 | } |
20 | } |
21 | macro_rules! fwd_binop_x2 { |
22 | ($trait:ident, $fn:ident) => { |
23 | impl<W: $trait + Copy, G> $trait for x2<W, G> { |
24 | type Output = x2<W::Output, G>; |
25 | #[inline(always)] |
26 | fn $fn(self, rhs: Self) -> Self::Output { |
27 | x2::new([self.0[0].$fn(rhs.0[0]), self.0[1].$fn(rhs.0[1])]) |
28 | } |
29 | } |
30 | }; |
31 | } |
32 | macro_rules! fwd_binop_assign_x2 { |
33 | ($trait:ident, $fn_assign:ident) => { |
34 | impl<W: $trait + Copy, G> $trait for x2<W, G> { |
35 | #[inline(always)] |
36 | fn $fn_assign(&mut self, rhs: Self) { |
37 | (self.0[0]).$fn_assign(rhs.0[0]); |
38 | (self.0[1]).$fn_assign(rhs.0[1]); |
39 | } |
40 | } |
41 | }; |
42 | } |
43 | macro_rules! fwd_unop_x2 { |
44 | ($fn:ident) => { |
45 | #[inline(always)] |
46 | fn $fn(self) -> Self { |
47 | x2::new([self.0[0].$fn(), self.0[1].$fn()]) |
48 | } |
49 | }; |
50 | } |
51 | impl<W, G> RotateEachWord32 for x2<W, G> |
52 | where |
53 | W: Copy + RotateEachWord32, |
54 | { |
55 | fwd_unop_x2!(rotate_each_word_right7); |
56 | fwd_unop_x2!(rotate_each_word_right8); |
57 | fwd_unop_x2!(rotate_each_word_right11); |
58 | fwd_unop_x2!(rotate_each_word_right12); |
59 | fwd_unop_x2!(rotate_each_word_right16); |
60 | fwd_unop_x2!(rotate_each_word_right20); |
61 | fwd_unop_x2!(rotate_each_word_right24); |
62 | fwd_unop_x2!(rotate_each_word_right25); |
63 | } |
64 | impl<W, G> RotateEachWord64 for x2<W, G> |
65 | where |
66 | W: Copy + RotateEachWord64, |
67 | { |
68 | fwd_unop_x2!(rotate_each_word_right32); |
69 | } |
70 | impl<W, G> RotateEachWord128 for x2<W, G> where W: RotateEachWord128 {} |
71 | impl<W, G> BitOps0 for x2<W, G> |
72 | where |
73 | W: BitOps0, |
74 | G: Copy, |
75 | { |
76 | } |
77 | impl<W, G> BitOps32 for x2<W, G> |
78 | where |
79 | W: BitOps32 + BitOps0, |
80 | G: Copy, |
81 | { |
82 | } |
83 | impl<W, G> BitOps64 for x2<W, G> |
84 | where |
85 | W: BitOps64 + BitOps0, |
86 | G: Copy, |
87 | { |
88 | } |
89 | impl<W, G> BitOps128 for x2<W, G> |
90 | where |
91 | W: BitOps128 + BitOps0, |
92 | G: Copy, |
93 | { |
94 | } |
95 | fwd_binop_x2!(BitAnd, bitand); |
96 | fwd_binop_x2!(BitOr, bitor); |
97 | fwd_binop_x2!(BitXor, bitxor); |
98 | fwd_binop_x2!(AndNot, andnot); |
99 | fwd_binop_assign_x2!(BitAndAssign, bitand_assign); |
100 | fwd_binop_assign_x2!(BitOrAssign, bitor_assign); |
101 | fwd_binop_assign_x2!(BitXorAssign, bitxor_assign); |
102 | impl<W, G> ArithOps for x2<W, G> |
103 | where |
104 | W: ArithOps, |
105 | G: Copy, |
106 | { |
107 | } |
108 | fwd_binop_x2!(Add, add); |
109 | fwd_binop_assign_x2!(AddAssign, add_assign); |
110 | impl<W: Not + Copy, G> Not for x2<W, G> { |
111 | type Output = x2<W::Output, G>; |
112 | #[inline (always)] |
113 | fn not(self) -> Self::Output { |
114 | x2::new([self.0[0].not(), self.0[1].not()]) |
115 | } |
116 | } |
117 | impl<W, G> UnsafeFrom<[W; 2]> for x2<W, G> { |
118 | #[inline (always)] |
119 | unsafe fn unsafe_from(xs: [W; 2]) -> Self { |
120 | x2::new(xs) |
121 | } |
122 | } |
123 | impl<W: Copy, G> Vec2<W> for x2<W, G> { |
124 | #[inline (always)] |
125 | fn extract(self, i: u32) -> W { |
126 | self.0[i as usize] |
127 | } |
128 | #[inline (always)] |
129 | fn insert(mut self, w: W, i: u32) -> Self { |
130 | self.0[i as usize] = w; |
131 | self |
132 | } |
133 | } |
134 | impl<W: Copy + Store<vec128_storage>, G> Store<vec256_storage> for x2<W, G> { |
135 | #[inline (always)] |
136 | unsafe fn unpack(p: vec256_storage) -> Self { |
137 | let p: [vec128_storage; 2] = p.split128(); |
138 | x2::new([W::unpack(p[0]), W::unpack(p[1])]) |
139 | } |
140 | } |
141 | impl<W, G> From<x2<W, G>> for vec256_storage |
142 | where |
143 | W: Copy, |
144 | vec128_storage: From<W>, |
145 | { |
146 | #[inline (always)] |
147 | fn from(x: x2<W, G>) -> Self { |
148 | vec256_storage::new128([x.0[0].into(), x.0[1].into()]) |
149 | } |
150 | } |
151 | impl<W, G> Swap64 for x2<W, G> |
152 | where |
153 | W: Swap64 + Copy, |
154 | { |
155 | fwd_unop_x2!(swap1); |
156 | fwd_unop_x2!(swap2); |
157 | fwd_unop_x2!(swap4); |
158 | fwd_unop_x2!(swap8); |
159 | fwd_unop_x2!(swap16); |
160 | fwd_unop_x2!(swap32); |
161 | fwd_unop_x2!(swap64); |
162 | } |
163 | impl<W: Copy, G> MultiLane<[W; 2]> for x2<W, G> { |
164 | #[inline (always)] |
165 | fn to_lanes(self) -> [W; 2] { |
166 | self.0 |
167 | } |
168 | #[inline (always)] |
169 | fn from_lanes(lanes: [W; 2]) -> Self { |
170 | x2::new(xs:lanes) |
171 | } |
172 | } |
173 | impl<W: BSwap + Copy, G> BSwap for x2<W, G> { |
174 | #[inline (always)] |
175 | fn bswap(self) -> Self { |
176 | x2::new([self.0[0].bswap(), self.0[1].bswap()]) |
177 | } |
178 | } |
179 | impl<W: StoreBytes + BSwap + Copy, G> StoreBytes for x2<W, G> { |
180 | #[inline (always)] |
181 | unsafe fn unsafe_read_le(input: &[u8]) -> Self { |
182 | let input: (&[u8], &[u8]) = input.split_at(mid:input.len() / 2); |
183 | x2::new([W::unsafe_read_le(input.0), W::unsafe_read_le(input.1)]) |
184 | } |
185 | #[inline (always)] |
186 | unsafe fn unsafe_read_be(input: &[u8]) -> Self { |
187 | let input: (&[u8], &[u8]) = input.split_at(mid:input.len() / 2); |
188 | x2::new([W::unsafe_read_be(input.0), W::unsafe_read_be(input.1)]) |
189 | } |
190 | #[inline (always)] |
191 | fn write_le(self, out: &mut [u8]) { |
192 | let out: (&mut [u8], &mut [u8]) = out.split_at_mut(mid:out.len() / 2); |
193 | self.0[0].write_le(out.0); |
194 | self.0[1].write_le(out.1); |
195 | } |
196 | #[inline (always)] |
197 | fn write_be(self, out: &mut [u8]) { |
198 | let out: (&mut [u8], &mut [u8]) = out.split_at_mut(mid:out.len() / 2); |
199 | self.0[0].write_be(out.0); |
200 | self.0[1].write_be(out.1); |
201 | } |
202 | } |
203 | impl<W: Copy + LaneWords4, G: Copy> LaneWords4 for x2<W, G> { |
204 | #[inline (always)] |
205 | fn shuffle_lane_words2301(self) -> Self { |
206 | Self::new([ |
207 | self.0[0].shuffle_lane_words2301(), |
208 | self.0[1].shuffle_lane_words2301(), |
209 | ]) |
210 | } |
211 | #[inline (always)] |
212 | fn shuffle_lane_words1230(self) -> Self { |
213 | Self::new([ |
214 | self.0[0].shuffle_lane_words1230(), |
215 | self.0[1].shuffle_lane_words1230(), |
216 | ]) |
217 | } |
218 | #[inline (always)] |
219 | fn shuffle_lane_words3012(self) -> Self { |
220 | Self::new([ |
221 | self.0[0].shuffle_lane_words3012(), |
222 | self.0[1].shuffle_lane_words3012(), |
223 | ]) |
224 | } |
225 | } |
226 | |
227 | zerocopy::cryptocorrosion_derive_traits! { |
228 | #[repr(transparent)] |
229 | #[derive (Copy, Clone, Default)] |
230 | #[allow (non_camel_case_types)] |
231 | pub struct x4<W>(pub [W; 4]); |
232 | } |
233 | |
234 | impl<W> x4<W> { |
235 | #[inline (always)] |
236 | pub fn new(xs: [W; 4]) -> Self { |
237 | x4(xs) |
238 | } |
239 | } |
240 | macro_rules! fwd_binop_x4 { |
241 | ($trait:ident, $fn:ident) => { |
242 | impl<W: $trait + Copy> $trait for x4<W> { |
243 | type Output = x4<W::Output>; |
244 | #[inline(always)] |
245 | fn $fn(self, rhs: Self) -> Self::Output { |
246 | x4([ |
247 | self.0[0].$fn(rhs.0[0]), |
248 | self.0[1].$fn(rhs.0[1]), |
249 | self.0[2].$fn(rhs.0[2]), |
250 | self.0[3].$fn(rhs.0[3]), |
251 | ]) |
252 | } |
253 | } |
254 | }; |
255 | } |
256 | macro_rules! fwd_binop_assign_x4 { |
257 | ($trait:ident, $fn_assign:ident) => { |
258 | impl<W: $trait + Copy> $trait for x4<W> { |
259 | #[inline(always)] |
260 | fn $fn_assign(&mut self, rhs: Self) { |
261 | self.0[0].$fn_assign(rhs.0[0]); |
262 | self.0[1].$fn_assign(rhs.0[1]); |
263 | self.0[2].$fn_assign(rhs.0[2]); |
264 | self.0[3].$fn_assign(rhs.0[3]); |
265 | } |
266 | } |
267 | }; |
268 | } |
269 | macro_rules! fwd_unop_x4 { |
270 | ($fn:ident) => { |
271 | #[inline(always)] |
272 | fn $fn(self) -> Self { |
273 | x4([ |
274 | self.0[0].$fn(), |
275 | self.0[1].$fn(), |
276 | self.0[2].$fn(), |
277 | self.0[3].$fn(), |
278 | ]) |
279 | } |
280 | }; |
281 | } |
282 | impl<W> RotateEachWord32 for x4<W> |
283 | where |
284 | W: Copy + RotateEachWord32, |
285 | { |
286 | fwd_unop_x4!(rotate_each_word_right7); |
287 | fwd_unop_x4!(rotate_each_word_right8); |
288 | fwd_unop_x4!(rotate_each_word_right11); |
289 | fwd_unop_x4!(rotate_each_word_right12); |
290 | fwd_unop_x4!(rotate_each_word_right16); |
291 | fwd_unop_x4!(rotate_each_word_right20); |
292 | fwd_unop_x4!(rotate_each_word_right24); |
293 | fwd_unop_x4!(rotate_each_word_right25); |
294 | } |
295 | impl<W> RotateEachWord64 for x4<W> |
296 | where |
297 | W: Copy + RotateEachWord64, |
298 | { |
299 | fwd_unop_x4!(rotate_each_word_right32); |
300 | } |
301 | impl<W> RotateEachWord128 for x4<W> where W: RotateEachWord128 {} |
302 | impl<W> BitOps0 for x4<W> where W: BitOps0 {} |
303 | impl<W> BitOps32 for x4<W> where W: BitOps32 + BitOps0 {} |
304 | impl<W> BitOps64 for x4<W> where W: BitOps64 + BitOps0 {} |
305 | impl<W> BitOps128 for x4<W> where W: BitOps128 + BitOps0 {} |
306 | fwd_binop_x4!(BitAnd, bitand); |
307 | fwd_binop_x4!(BitOr, bitor); |
308 | fwd_binop_x4!(BitXor, bitxor); |
309 | fwd_binop_x4!(AndNot, andnot); |
310 | fwd_binop_assign_x4!(BitAndAssign, bitand_assign); |
311 | fwd_binop_assign_x4!(BitOrAssign, bitor_assign); |
312 | fwd_binop_assign_x4!(BitXorAssign, bitxor_assign); |
313 | impl<W> ArithOps for x4<W> where W: ArithOps {} |
314 | fwd_binop_x4!(Add, add); |
315 | fwd_binop_assign_x4!(AddAssign, add_assign); |
316 | impl<W: Not + Copy> Not for x4<W> { |
317 | type Output = x4<W::Output>; |
318 | #[inline (always)] |
319 | fn not(self) -> Self::Output { |
320 | x4([ |
321 | self.0[0].not(), |
322 | self.0[1].not(), |
323 | self.0[2].not(), |
324 | self.0[3].not(), |
325 | ]) |
326 | } |
327 | } |
328 | impl<W> UnsafeFrom<[W; 4]> for x4<W> { |
329 | #[inline (always)] |
330 | unsafe fn unsafe_from(xs: [W; 4]) -> Self { |
331 | x4(xs) |
332 | } |
333 | } |
334 | impl<W: Copy> Vec4<W> for x4<W> { |
335 | #[inline (always)] |
336 | fn extract(self, i: u32) -> W { |
337 | self.0[i as usize] |
338 | } |
339 | #[inline (always)] |
340 | fn insert(mut self, w: W, i: u32) -> Self { |
341 | self.0[i as usize] = w; |
342 | self |
343 | } |
344 | } |
345 | impl<W: Copy> Vec4Ext<W> for x4<W> { |
346 | #[inline (always)] |
347 | fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self) |
348 | where |
349 | Self: Sized, |
350 | { |
351 | ( |
352 | x4([a.0[0], b.0[0], c.0[0], d.0[0]]), |
353 | x4([a.0[1], b.0[1], c.0[1], d.0[1]]), |
354 | x4([a.0[2], b.0[2], c.0[2], d.0[2]]), |
355 | x4([a.0[3], b.0[3], c.0[3], d.0[3]]), |
356 | ) |
357 | } |
358 | } |
359 | impl<W: Copy + Store<vec128_storage>> Store<vec512_storage> for x4<W> { |
360 | #[inline (always)] |
361 | unsafe fn unpack(p: vec512_storage) -> Self { |
362 | let p: [vec128_storage; 4] = p.split128(); |
363 | x4([ |
364 | W::unpack(p[0]), |
365 | W::unpack(p[1]), |
366 | W::unpack(p[2]), |
367 | W::unpack(p[3]), |
368 | ]) |
369 | } |
370 | } |
371 | impl<W> From<x4<W>> for vec512_storage |
372 | where |
373 | W: Copy, |
374 | vec128_storage: From<W>, |
375 | { |
376 | #[inline (always)] |
377 | fn from(x: x4<W>) -> Self { |
378 | vec512_storage::new128([x.0[0].into(), x.0[1].into(), x.0[2].into(), x.0[3].into()]) |
379 | } |
380 | } |
381 | impl<W> Swap64 for x4<W> |
382 | where |
383 | W: Swap64 + Copy, |
384 | { |
385 | fwd_unop_x4!(swap1); |
386 | fwd_unop_x4!(swap2); |
387 | fwd_unop_x4!(swap4); |
388 | fwd_unop_x4!(swap8); |
389 | fwd_unop_x4!(swap16); |
390 | fwd_unop_x4!(swap32); |
391 | fwd_unop_x4!(swap64); |
392 | } |
393 | impl<W: Copy> MultiLane<[W; 4]> for x4<W> { |
394 | #[inline (always)] |
395 | fn to_lanes(self) -> [W; 4] { |
396 | self.0 |
397 | } |
398 | #[inline (always)] |
399 | fn from_lanes(lanes: [W; 4]) -> Self { |
400 | x4(lanes) |
401 | } |
402 | } |
403 | impl<W: BSwap + Copy> BSwap for x4<W> { |
404 | #[inline (always)] |
405 | fn bswap(self) -> Self { |
406 | x4([ |
407 | self.0[0].bswap(), |
408 | self.0[1].bswap(), |
409 | self.0[2].bswap(), |
410 | self.0[3].bswap(), |
411 | ]) |
412 | } |
413 | } |
414 | impl<W: StoreBytes + BSwap + Copy> StoreBytes for x4<W> { |
415 | #[inline (always)] |
416 | unsafe fn unsafe_read_le(input: &[u8]) -> Self { |
417 | let n = input.len() / 4; |
418 | x4([ |
419 | W::unsafe_read_le(&input[..n]), |
420 | W::unsafe_read_le(&input[n..n * 2]), |
421 | W::unsafe_read_le(&input[n * 2..n * 3]), |
422 | W::unsafe_read_le(&input[n * 3..]), |
423 | ]) |
424 | } |
425 | #[inline (always)] |
426 | unsafe fn unsafe_read_be(input: &[u8]) -> Self { |
427 | let n = input.len() / 4; |
428 | x4([ |
429 | W::unsafe_read_be(&input[..n]), |
430 | W::unsafe_read_be(&input[n..n * 2]), |
431 | W::unsafe_read_be(&input[n * 2..n * 3]), |
432 | W::unsafe_read_be(&input[n * 3..]), |
433 | ]) |
434 | } |
435 | #[inline (always)] |
436 | fn write_le(self, out: &mut [u8]) { |
437 | let n = out.len() / 4; |
438 | self.0[0].write_le(&mut out[..n]); |
439 | self.0[1].write_le(&mut out[n..n * 2]); |
440 | self.0[2].write_le(&mut out[n * 2..n * 3]); |
441 | self.0[3].write_le(&mut out[n * 3..]); |
442 | } |
443 | #[inline (always)] |
444 | fn write_be(self, out: &mut [u8]) { |
445 | let n = out.len() / 4; |
446 | self.0[0].write_be(&mut out[..n]); |
447 | self.0[1].write_be(&mut out[n..n * 2]); |
448 | self.0[2].write_be(&mut out[n * 2..n * 3]); |
449 | self.0[3].write_be(&mut out[n * 3..]); |
450 | } |
451 | } |
452 | impl<W: Copy + LaneWords4> LaneWords4 for x4<W> { |
453 | #[inline (always)] |
454 | fn shuffle_lane_words2301(self) -> Self { |
455 | x4([ |
456 | self.0[0].shuffle_lane_words2301(), |
457 | self.0[1].shuffle_lane_words2301(), |
458 | self.0[2].shuffle_lane_words2301(), |
459 | self.0[3].shuffle_lane_words2301(), |
460 | ]) |
461 | } |
462 | #[inline (always)] |
463 | fn shuffle_lane_words1230(self) -> Self { |
464 | x4([ |
465 | self.0[0].shuffle_lane_words1230(), |
466 | self.0[1].shuffle_lane_words1230(), |
467 | self.0[2].shuffle_lane_words1230(), |
468 | self.0[3].shuffle_lane_words1230(), |
469 | ]) |
470 | } |
471 | #[inline (always)] |
472 | fn shuffle_lane_words3012(self) -> Self { |
473 | x4([ |
474 | self.0[0].shuffle_lane_words3012(), |
475 | self.0[1].shuffle_lane_words3012(), |
476 | self.0[2].shuffle_lane_words3012(), |
477 | self.0[3].shuffle_lane_words3012(), |
478 | ]) |
479 | } |
480 | } |
481 | |