1//! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD.
2
3use crate::types::*;
4use crate::{vec128_storage, vec256_storage, vec512_storage};
5use core::marker::PhantomData;
6use core::ops::*;
7
8#[derive(Copy, Clone, Default)]
9#[allow(non_camel_case_types)]
10pub struct x2<W, G>(pub [W; 2], PhantomData<G>);
11impl<W, G> x2<W, G> {
12 #[inline(always)]
13 pub fn new(xs: [W; 2]) -> Self {
14 x2(xs, PhantomData)
15 }
16}
17macro_rules! fwd_binop_x2 {
18 ($trait:ident, $fn:ident) => {
19 impl<W: $trait + Copy, G> $trait for x2<W, G> {
20 type Output = x2<W::Output, G>;
21 #[inline(always)]
22 fn $fn(self, rhs: Self) -> Self::Output {
23 x2::new([self.0[0].$fn(rhs.0[0]), self.0[1].$fn(rhs.0[1])])
24 }
25 }
26 };
27}
28macro_rules! fwd_binop_assign_x2 {
29 ($trait:ident, $fn_assign:ident) => {
30 impl<W: $trait + Copy, G> $trait for x2<W, G> {
31 #[inline(always)]
32 fn $fn_assign(&mut self, rhs: Self) {
33 (self.0[0]).$fn_assign(rhs.0[0]);
34 (self.0[1]).$fn_assign(rhs.0[1]);
35 }
36 }
37 };
38}
39macro_rules! fwd_unop_x2 {
40 ($fn:ident) => {
41 #[inline(always)]
42 fn $fn(self) -> Self {
43 x2::new([self.0[0].$fn(), self.0[1].$fn()])
44 }
45 };
46}
47impl<W, G> RotateEachWord32 for x2<W, G>
48where
49 W: Copy + RotateEachWord32,
50{
51 fwd_unop_x2!(rotate_each_word_right7);
52 fwd_unop_x2!(rotate_each_word_right8);
53 fwd_unop_x2!(rotate_each_word_right11);
54 fwd_unop_x2!(rotate_each_word_right12);
55 fwd_unop_x2!(rotate_each_word_right16);
56 fwd_unop_x2!(rotate_each_word_right20);
57 fwd_unop_x2!(rotate_each_word_right24);
58 fwd_unop_x2!(rotate_each_word_right25);
59}
60impl<W, G> RotateEachWord64 for x2<W, G>
61where
62 W: Copy + RotateEachWord64,
63{
64 fwd_unop_x2!(rotate_each_word_right32);
65}
66impl<W, G> RotateEachWord128 for x2<W, G> where W: RotateEachWord128 {}
67impl<W, G> BitOps0 for x2<W, G>
68where
69 W: BitOps0,
70 G: Copy,
71{
72}
73impl<W, G> BitOps32 for x2<W, G>
74where
75 W: BitOps32 + BitOps0,
76 G: Copy,
77{
78}
79impl<W, G> BitOps64 for x2<W, G>
80where
81 W: BitOps64 + BitOps0,
82 G: Copy,
83{
84}
85impl<W, G> BitOps128 for x2<W, G>
86where
87 W: BitOps128 + BitOps0,
88 G: Copy,
89{
90}
91fwd_binop_x2!(BitAnd, bitand);
92fwd_binop_x2!(BitOr, bitor);
93fwd_binop_x2!(BitXor, bitxor);
94fwd_binop_x2!(AndNot, andnot);
95fwd_binop_assign_x2!(BitAndAssign, bitand_assign);
96fwd_binop_assign_x2!(BitOrAssign, bitor_assign);
97fwd_binop_assign_x2!(BitXorAssign, bitxor_assign);
98impl<W, G> ArithOps for x2<W, G>
99where
100 W: ArithOps,
101 G: Copy,
102{
103}
104fwd_binop_x2!(Add, add);
105fwd_binop_assign_x2!(AddAssign, add_assign);
106impl<W: Not + Copy, G> Not for x2<W, G> {
107 type Output = x2<W::Output, G>;
108 #[inline(always)]
109 fn not(self) -> Self::Output {
110 x2::new([self.0[0].not(), self.0[1].not()])
111 }
112}
113impl<W, G> UnsafeFrom<[W; 2]> for x2<W, G> {
114 #[inline(always)]
115 unsafe fn unsafe_from(xs: [W; 2]) -> Self {
116 x2::new(xs)
117 }
118}
119impl<W: Copy, G> Vec2<W> for x2<W, G> {
120 #[inline(always)]
121 fn extract(self, i: u32) -> W {
122 self.0[i as usize]
123 }
124 #[inline(always)]
125 fn insert(mut self, w: W, i: u32) -> Self {
126 self.0[i as usize] = w;
127 self
128 }
129}
130impl<W: Copy + Store<vec128_storage>, G> Store<vec256_storage> for x2<W, G> {
131 #[inline(always)]
132 unsafe fn unpack(p: vec256_storage) -> Self {
133 let p: [vec128_storage; 2] = p.split128();
134 x2::new([W::unpack(p[0]), W::unpack(p[1])])
135 }
136}
137impl<W, G> From<x2<W, G>> for vec256_storage
138where
139 W: Copy,
140 vec128_storage: From<W>,
141{
142 #[inline(always)]
143 fn from(x: x2<W, G>) -> Self {
144 vec256_storage::new128([x.0[0].into(), x.0[1].into()])
145 }
146}
147impl<W, G> Swap64 for x2<W, G>
148where
149 W: Swap64 + Copy,
150{
151 fwd_unop_x2!(swap1);
152 fwd_unop_x2!(swap2);
153 fwd_unop_x2!(swap4);
154 fwd_unop_x2!(swap8);
155 fwd_unop_x2!(swap16);
156 fwd_unop_x2!(swap32);
157 fwd_unop_x2!(swap64);
158}
159impl<W: Copy, G> MultiLane<[W; 2]> for x2<W, G> {
160 #[inline(always)]
161 fn to_lanes(self) -> [W; 2] {
162 self.0
163 }
164 #[inline(always)]
165 fn from_lanes(lanes: [W; 2]) -> Self {
166 x2::new(xs:lanes)
167 }
168}
169impl<W: BSwap + Copy, G> BSwap for x2<W, G> {
170 #[inline(always)]
171 fn bswap(self) -> Self {
172 x2::new([self.0[0].bswap(), self.0[1].bswap()])
173 }
174}
175impl<W: StoreBytes + BSwap + Copy, G> StoreBytes for x2<W, G> {
176 #[inline(always)]
177 unsafe fn unsafe_read_le(input: &[u8]) -> Self {
178 let input: (&[u8], &[u8]) = input.split_at(mid:input.len() / 2);
179 x2::new([W::unsafe_read_le(input:input.0), W::unsafe_read_le(input:input.1)])
180 }
181 #[inline(always)]
182 unsafe fn unsafe_read_be(input: &[u8]) -> Self {
183 let input: (&[u8], &[u8]) = input.split_at(mid:input.len() / 2);
184 x2::new([W::unsafe_read_be(input:input.0), W::unsafe_read_be(input:input.1)])
185 }
186 #[inline(always)]
187 fn write_le(self, out: &mut [u8]) {
188 let out: (&mut [u8], &mut [u8]) = out.split_at_mut(mid:out.len() / 2);
189 self.0[0].write_le(out:out.0);
190 self.0[1].write_le(out:out.1);
191 }
192 #[inline(always)]
193 fn write_be(self, out: &mut [u8]) {
194 let out: (&mut [u8], &mut [u8]) = out.split_at_mut(mid:out.len() / 2);
195 self.0[0].write_be(out:out.0);
196 self.0[1].write_be(out:out.1);
197 }
198}
199impl<W: Copy + LaneWords4, G: Copy> LaneWords4 for x2<W, G> {
200 #[inline(always)]
201 fn shuffle_lane_words2301(self) -> Self {
202 Self::new([
203 self.0[0].shuffle_lane_words2301(),
204 self.0[1].shuffle_lane_words2301(),
205 ])
206 }
207 #[inline(always)]
208 fn shuffle_lane_words1230(self) -> Self {
209 Self::new([
210 self.0[0].shuffle_lane_words1230(),
211 self.0[1].shuffle_lane_words1230(),
212 ])
213 }
214 #[inline(always)]
215 fn shuffle_lane_words3012(self) -> Self {
216 Self::new([
217 self.0[0].shuffle_lane_words3012(),
218 self.0[1].shuffle_lane_words3012(),
219 ])
220 }
221}
222
223#[derive(Copy, Clone, Default)]
224#[allow(non_camel_case_types)]
225pub struct x4<W>(pub [W; 4]);
226impl<W> x4<W> {
227 #[inline(always)]
228 pub fn new(xs: [W; 4]) -> Self {
229 x4(xs)
230 }
231}
232macro_rules! fwd_binop_x4 {
233 ($trait:ident, $fn:ident) => {
234 impl<W: $trait + Copy> $trait for x4<W> {
235 type Output = x4<W::Output>;
236 #[inline(always)]
237 fn $fn(self, rhs: Self) -> Self::Output {
238 x4([
239 self.0[0].$fn(rhs.0[0]),
240 self.0[1].$fn(rhs.0[1]),
241 self.0[2].$fn(rhs.0[2]),
242 self.0[3].$fn(rhs.0[3]),
243 ])
244 }
245 }
246 };
247}
248macro_rules! fwd_binop_assign_x4 {
249 ($trait:ident, $fn_assign:ident) => {
250 impl<W: $trait + Copy> $trait for x4<W> {
251 #[inline(always)]
252 fn $fn_assign(&mut self, rhs: Self) {
253 self.0[0].$fn_assign(rhs.0[0]);
254 self.0[1].$fn_assign(rhs.0[1]);
255 self.0[2].$fn_assign(rhs.0[2]);
256 self.0[3].$fn_assign(rhs.0[3]);
257 }
258 }
259 };
260}
261macro_rules! fwd_unop_x4 {
262 ($fn:ident) => {
263 #[inline(always)]
264 fn $fn(self) -> Self {
265 x4([
266 self.0[0].$fn(),
267 self.0[1].$fn(),
268 self.0[2].$fn(),
269 self.0[3].$fn(),
270 ])
271 }
272 };
273}
274impl<W> RotateEachWord32 for x4<W>
275where
276 W: Copy + RotateEachWord32,
277{
278 fwd_unop_x4!(rotate_each_word_right7);
279 fwd_unop_x4!(rotate_each_word_right8);
280 fwd_unop_x4!(rotate_each_word_right11);
281 fwd_unop_x4!(rotate_each_word_right12);
282 fwd_unop_x4!(rotate_each_word_right16);
283 fwd_unop_x4!(rotate_each_word_right20);
284 fwd_unop_x4!(rotate_each_word_right24);
285 fwd_unop_x4!(rotate_each_word_right25);
286}
287impl<W> RotateEachWord64 for x4<W>
288where
289 W: Copy + RotateEachWord64,
290{
291 fwd_unop_x4!(rotate_each_word_right32);
292}
293impl<W> RotateEachWord128 for x4<W> where W: RotateEachWord128 {}
294impl<W> BitOps0 for x4<W> where W: BitOps0 {}
295impl<W> BitOps32 for x4<W> where W: BitOps32 + BitOps0 {}
296impl<W> BitOps64 for x4<W> where W: BitOps64 + BitOps0 {}
297impl<W> BitOps128 for x4<W> where W: BitOps128 + BitOps0 {}
298fwd_binop_x4!(BitAnd, bitand);
299fwd_binop_x4!(BitOr, bitor);
300fwd_binop_x4!(BitXor, bitxor);
301fwd_binop_x4!(AndNot, andnot);
302fwd_binop_assign_x4!(BitAndAssign, bitand_assign);
303fwd_binop_assign_x4!(BitOrAssign, bitor_assign);
304fwd_binop_assign_x4!(BitXorAssign, bitxor_assign);
305impl<W> ArithOps for x4<W> where W: ArithOps {}
306fwd_binop_x4!(Add, add);
307fwd_binop_assign_x4!(AddAssign, add_assign);
308impl<W: Not + Copy> Not for x4<W> {
309 type Output = x4<W::Output>;
310 #[inline(always)]
311 fn not(self) -> Self::Output {
312 x4([
313 self.0[0].not(),
314 self.0[1].not(),
315 self.0[2].not(),
316 self.0[3].not(),
317 ])
318 }
319}
320impl<W> UnsafeFrom<[W; 4]> for x4<W> {
321 #[inline(always)]
322 unsafe fn unsafe_from(xs: [W; 4]) -> Self {
323 x4(xs)
324 }
325}
326impl<W: Copy> Vec4<W> for x4<W> {
327 #[inline(always)]
328 fn extract(self, i: u32) -> W {
329 self.0[i as usize]
330 }
331 #[inline(always)]
332 fn insert(mut self, w: W, i: u32) -> Self {
333 self.0[i as usize] = w;
334 self
335 }
336}
337impl<W: Copy> Vec4Ext<W> for x4<W> {
338 #[inline(always)]
339 fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self)
340 where
341 Self: Sized,
342 {
343 (
344 x4([a.0[0], b.0[0], c.0[0], d.0[0]]),
345 x4([a.0[1], b.0[1], c.0[1], d.0[1]]),
346 x4([a.0[2], b.0[2], c.0[2], d.0[2]]),
347 x4([a.0[3], b.0[3], c.0[3], d.0[3]]),
348 )
349 }
350}
351impl<W: Copy + Store<vec128_storage>> Store<vec512_storage> for x4<W> {
352 #[inline(always)]
353 unsafe fn unpack(p: vec512_storage) -> Self {
354 let p: [vec128_storage; 4] = p.split128();
355 x4([
356 W::unpack(p[0]),
357 W::unpack(p[1]),
358 W::unpack(p[2]),
359 W::unpack(p[3]),
360 ])
361 }
362}
363impl<W> From<x4<W>> for vec512_storage
364where
365 W: Copy,
366 vec128_storage: From<W>,
367{
368 #[inline(always)]
369 fn from(x: x4<W>) -> Self {
370 vec512_storage::new128([x.0[0].into(), x.0[1].into(), x.0[2].into(), x.0[3].into()])
371 }
372}
373impl<W> Swap64 for x4<W>
374where
375 W: Swap64 + Copy,
376{
377 fwd_unop_x4!(swap1);
378 fwd_unop_x4!(swap2);
379 fwd_unop_x4!(swap4);
380 fwd_unop_x4!(swap8);
381 fwd_unop_x4!(swap16);
382 fwd_unop_x4!(swap32);
383 fwd_unop_x4!(swap64);
384}
385impl<W: Copy> MultiLane<[W; 4]> for x4<W> {
386 #[inline(always)]
387 fn to_lanes(self) -> [W; 4] {
388 self.0
389 }
390 #[inline(always)]
391 fn from_lanes(lanes: [W; 4]) -> Self {
392 x4(lanes)
393 }
394}
395impl<W: BSwap + Copy> BSwap for x4<W> {
396 #[inline(always)]
397 fn bswap(self) -> Self {
398 x4([
399 self.0[0].bswap(),
400 self.0[1].bswap(),
401 self.0[2].bswap(),
402 self.0[3].bswap(),
403 ])
404 }
405}
406impl<W: StoreBytes + BSwap + Copy> StoreBytes for x4<W> {
407 #[inline(always)]
408 unsafe fn unsafe_read_le(input: &[u8]) -> Self {
409 let n = input.len() / 4;
410 x4([
411 W::unsafe_read_le(&input[..n]),
412 W::unsafe_read_le(&input[n..n * 2]),
413 W::unsafe_read_le(&input[n * 2..n * 3]),
414 W::unsafe_read_le(&input[n * 3..]),
415 ])
416 }
417 #[inline(always)]
418 unsafe fn unsafe_read_be(input: &[u8]) -> Self {
419 let n = input.len() / 4;
420 x4([
421 W::unsafe_read_be(&input[..n]),
422 W::unsafe_read_be(&input[n..n * 2]),
423 W::unsafe_read_be(&input[n * 2..n * 3]),
424 W::unsafe_read_be(&input[n * 3..]),
425 ])
426 }
427 #[inline(always)]
428 fn write_le(self, out: &mut [u8]) {
429 let n = out.len() / 4;
430 self.0[0].write_le(&mut out[..n]);
431 self.0[1].write_le(&mut out[n..n * 2]);
432 self.0[2].write_le(&mut out[n * 2..n * 3]);
433 self.0[3].write_le(&mut out[n * 3..]);
434 }
435 #[inline(always)]
436 fn write_be(self, out: &mut [u8]) {
437 let n = out.len() / 4;
438 self.0[0].write_be(&mut out[..n]);
439 self.0[1].write_be(&mut out[n..n * 2]);
440 self.0[2].write_be(&mut out[n * 2..n * 3]);
441 self.0[3].write_be(&mut out[n * 3..]);
442 }
443}
444impl<W: Copy + LaneWords4> LaneWords4 for x4<W> {
445 #[inline(always)]
446 fn shuffle_lane_words2301(self) -> Self {
447 x4([
448 self.0[0].shuffle_lane_words2301(),
449 self.0[1].shuffle_lane_words2301(),
450 self.0[2].shuffle_lane_words2301(),
451 self.0[3].shuffle_lane_words2301(),
452 ])
453 }
454 #[inline(always)]
455 fn shuffle_lane_words1230(self) -> Self {
456 x4([
457 self.0[0].shuffle_lane_words1230(),
458 self.0[1].shuffle_lane_words1230(),
459 self.0[2].shuffle_lane_words1230(),
460 self.0[3].shuffle_lane_words1230(),
461 ])
462 }
463 #[inline(always)]
464 fn shuffle_lane_words3012(self) -> Self {
465 x4([
466 self.0[0].shuffle_lane_words3012(),
467 self.0[1].shuffle_lane_words3012(),
468 self.0[2].shuffle_lane_words3012(),
469 self.0[3].shuffle_lane_words3012(),
470 ])
471 }
472}
473