1#![allow(non_camel_case_types)]
2
3use crate::soft::{x2, x4};
4use crate::types::*;
5use core::ops::*;
6
7#[repr(C)]
8#[derive(Clone, Copy)]
9pub union vec128_storage {
10 d: [u32; 4],
11 q: [u64; 2],
12}
13impl From<[u32; 4]> for vec128_storage {
14 #[inline(always)]
15 fn from(d: [u32; 4]) -> Self {
16 Self { d }
17 }
18}
19impl From<vec128_storage> for [u32; 4] {
20 #[inline(always)]
21 fn from(d: vec128_storage) -> Self {
22 unsafe { d.d }
23 }
24}
25impl From<[u64; 2]> for vec128_storage {
26 #[inline(always)]
27 fn from(q: [u64; 2]) -> Self {
28 Self { q }
29 }
30}
31impl From<vec128_storage> for [u64; 2] {
32 #[inline(always)]
33 fn from(q: vec128_storage) -> Self {
34 unsafe { q.q }
35 }
36}
37impl Default for vec128_storage {
38 #[inline(always)]
39 fn default() -> Self {
40 Self { q: [0, 0] }
41 }
42}
43impl Eq for vec128_storage {}
44impl PartialEq<vec128_storage> for vec128_storage {
45 #[inline(always)]
46 fn eq(&self, rhs: &Self) -> bool {
47 unsafe { self.q == rhs.q }
48 }
49}
50#[derive(Clone, Copy, PartialEq, Eq, Default)]
51pub struct vec256_storage {
52 v128: [vec128_storage; 2],
53}
54impl vec256_storage {
55 #[inline(always)]
56 pub fn new128(v128: [vec128_storage; 2]) -> Self {
57 Self { v128 }
58 }
59 #[inline(always)]
60 pub fn split128(self) -> [vec128_storage; 2] {
61 self.v128
62 }
63}
64impl From<vec256_storage> for [u64; 4] {
65 #[inline(always)]
66 fn from(q: vec256_storage) -> Self {
67 let [a: u64, b: u64]: [u64; 2] = q.v128[0].into();
68 let [c: u64, d: u64]: [u64; 2] = q.v128[1].into();
69 [a, b, c, d]
70 }
71}
72impl From<[u64; 4]> for vec256_storage {
73 #[inline(always)]
74 fn from([a: u64, b: u64, c: u64, d: u64]: [u64; 4]) -> Self {
75 Self {
76 v128: [[a, b].into(), [c, d].into()],
77 }
78 }
79}
80#[derive(Clone, Copy, PartialEq, Eq, Default)]
81pub struct vec512_storage {
82 v128: [vec128_storage; 4],
83}
84impl vec512_storage {
85 #[inline(always)]
86 pub fn new128(v128: [vec128_storage; 4]) -> Self {
87 Self { v128 }
88 }
89 #[inline(always)]
90 pub fn split128(self) -> [vec128_storage; 4] {
91 self.v128
92 }
93}
94
95#[inline(always)]
96fn dmap<T, F>(t: T, f: F) -> T
97where
98 T: Store<vec128_storage> + Into<vec128_storage>,
99 F: Fn(u32) -> u32,
100{
101 let t: vec128_storage = t.into();
102 let d: [u32; 4] = unsafe { t.d };
103 let d: vec128_storage = vec128_storage {
104 d: [f(d[0]), f(d[1]), f(d[2]), f(d[3])],
105 };
106 unsafe { T::unpack(d) }
107}
108
109fn dmap2<T, F>(a: T, b: T, f: F) -> T
110where
111 T: Store<vec128_storage> + Into<vec128_storage>,
112 F: Fn(u32, u32) -> u32,
113{
114 let a: vec128_storage = a.into();
115 let b: vec128_storage = b.into();
116 let ao: [u32; 4] = unsafe { a.d };
117 let bo: [u32; 4] = unsafe { b.d };
118 let d: vec128_storage = vec128_storage {
119 d: [
120 f(ao[0], bo[0]),
121 f(ao[1], bo[1]),
122 f(ao[2], bo[2]),
123 f(ao[3], bo[3]),
124 ],
125 };
126 unsafe { T::unpack(d) }
127}
128
129#[inline(always)]
130fn qmap<T, F>(t: T, f: F) -> T
131where
132 T: Store<vec128_storage> + Into<vec128_storage>,
133 F: Fn(u64) -> u64,
134{
135 let t: vec128_storage = t.into();
136 let q: [u64; 2] = unsafe { t.q };
137 let q: vec128_storage = vec128_storage {
138 q: [f(q[0]), f(q[1])],
139 };
140 unsafe { T::unpack(q) }
141}
142
143#[inline(always)]
144fn qmap2<T, F>(a: T, b: T, f: F) -> T
145where
146 T: Store<vec128_storage> + Into<vec128_storage>,
147 F: Fn(u64, u64) -> u64,
148{
149 let a: vec128_storage = a.into();
150 let b: vec128_storage = b.into();
151 let ao: [u64; 2] = unsafe { a.q };
152 let bo: [u64; 2] = unsafe { b.q };
153 let q: vec128_storage = vec128_storage {
154 q: [f(ao[0], bo[0]), f(ao[1], bo[1])],
155 };
156 unsafe { T::unpack(q) }
157}
158
159#[inline(always)]
160fn o_of_q(q: [u64; 2]) -> u128 {
161 u128::from(q[0]) | (u128::from(q[1]) << 64)
162}
163
164#[inline(always)]
165fn q_of_o(o: u128) -> [u64; 2] {
166 [o as u64, (o >> 64) as u64]
167}
168
169#[inline(always)]
170fn omap<T, F>(a: T, f: F) -> T
171where
172 T: Store<vec128_storage> + Into<vec128_storage>,
173 F: Fn(u128) -> u128,
174{
175 let a: vec128_storage = a.into();
176 let ao: u128 = o_of_q(unsafe { a.q });
177 let o: vec128_storage = vec128_storage { q: q_of_o(f(ao)) };
178 unsafe { T::unpack(o) }
179}
180
181#[inline(always)]
182fn omap2<T, F>(a: T, b: T, f: F) -> T
183where
184 T: Store<vec128_storage> + Into<vec128_storage>,
185 F: Fn(u128, u128) -> u128,
186{
187 let a: vec128_storage = a.into();
188 let b: vec128_storage = b.into();
189 let ao: u128 = o_of_q(unsafe { a.q });
190 let bo: u128 = o_of_q(unsafe { b.q });
191 let o: vec128_storage = vec128_storage {
192 q: q_of_o(f(ao, bo)),
193 };
194 unsafe { T::unpack(o) }
195}
196
197impl RotateEachWord128 for u128x1_generic {}
198impl BitOps128 for u128x1_generic {}
199impl BitOps64 for u128x1_generic {}
200impl BitOps64 for u64x2_generic {}
201impl BitOps32 for u128x1_generic {}
202impl BitOps32 for u64x2_generic {}
203impl BitOps32 for u32x4_generic {}
204impl BitOps0 for u128x1_generic {}
205impl BitOps0 for u64x2_generic {}
206impl BitOps0 for u32x4_generic {}
207
208macro_rules! impl_bitops {
209 ($vec:ident) => {
210 impl Not for $vec {
211 type Output = Self;
212 #[inline(always)]
213 fn not(self) -> Self::Output {
214 omap(self, |x| !x)
215 }
216 }
217 impl BitAnd for $vec {
218 type Output = Self;
219 #[inline(always)]
220 fn bitand(self, rhs: Self) -> Self::Output {
221 omap2(self, rhs, |x, y| x & y)
222 }
223 }
224 impl BitOr for $vec {
225 type Output = Self;
226 #[inline(always)]
227 fn bitor(self, rhs: Self) -> Self::Output {
228 omap2(self, rhs, |x, y| x | y)
229 }
230 }
231 impl BitXor for $vec {
232 type Output = Self;
233 #[inline(always)]
234 fn bitxor(self, rhs: Self) -> Self::Output {
235 omap2(self, rhs, |x, y| x ^ y)
236 }
237 }
238 impl AndNot for $vec {
239 type Output = Self;
240 #[inline(always)]
241 fn andnot(self, rhs: Self) -> Self::Output {
242 omap2(self, rhs, |x, y| !x & y)
243 }
244 }
245 impl BitAndAssign for $vec {
246 #[inline(always)]
247 fn bitand_assign(&mut self, rhs: Self) {
248 *self = *self & rhs
249 }
250 }
251 impl BitOrAssign for $vec {
252 #[inline(always)]
253 fn bitor_assign(&mut self, rhs: Self) {
254 *self = *self | rhs
255 }
256 }
257 impl BitXorAssign for $vec {
258 #[inline(always)]
259 fn bitxor_assign(&mut self, rhs: Self) {
260 *self = *self ^ rhs
261 }
262 }
263
264 impl Swap64 for $vec {
265 #[inline(always)]
266 fn swap1(self) -> Self {
267 qmap(self, |x| {
268 ((x & 0x5555555555555555) << 1) | ((x & 0xaaaaaaaaaaaaaaaa) >> 1)
269 })
270 }
271 #[inline(always)]
272 fn swap2(self) -> Self {
273 qmap(self, |x| {
274 ((x & 0x3333333333333333) << 2) | ((x & 0xcccccccccccccccc) >> 2)
275 })
276 }
277 #[inline(always)]
278 fn swap4(self) -> Self {
279 qmap(self, |x| {
280 ((x & 0x0f0f0f0f0f0f0f0f) << 4) | ((x & 0xf0f0f0f0f0f0f0f0) >> 4)
281 })
282 }
283 #[inline(always)]
284 fn swap8(self) -> Self {
285 qmap(self, |x| {
286 ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8)
287 })
288 }
289 #[inline(always)]
290 fn swap16(self) -> Self {
291 dmap(self, |x| x.rotate_left(16))
292 }
293 #[inline(always)]
294 fn swap32(self) -> Self {
295 qmap(self, |x| x.rotate_left(32))
296 }
297 #[inline(always)]
298 fn swap64(self) -> Self {
299 omap(self, |x| (x << 64) | (x >> 64))
300 }
301 }
302 };
303}
304impl_bitops!(u32x4_generic);
305impl_bitops!(u64x2_generic);
306impl_bitops!(u128x1_generic);
307
308impl RotateEachWord32 for u32x4_generic {
309 #[inline(always)]
310 fn rotate_each_word_right7(self) -> Self {
311 dmap(self, |x| x.rotate_right(7))
312 }
313 #[inline(always)]
314 fn rotate_each_word_right8(self) -> Self {
315 dmap(self, |x| x.rotate_right(8))
316 }
317 #[inline(always)]
318 fn rotate_each_word_right11(self) -> Self {
319 dmap(self, |x| x.rotate_right(11))
320 }
321 #[inline(always)]
322 fn rotate_each_word_right12(self) -> Self {
323 dmap(self, |x| x.rotate_right(12))
324 }
325 #[inline(always)]
326 fn rotate_each_word_right16(self) -> Self {
327 dmap(self, |x| x.rotate_right(16))
328 }
329 #[inline(always)]
330 fn rotate_each_word_right20(self) -> Self {
331 dmap(self, |x| x.rotate_right(20))
332 }
333 #[inline(always)]
334 fn rotate_each_word_right24(self) -> Self {
335 dmap(self, |x| x.rotate_right(24))
336 }
337 #[inline(always)]
338 fn rotate_each_word_right25(self) -> Self {
339 dmap(self, |x| x.rotate_right(25))
340 }
341}
342
343impl RotateEachWord32 for u64x2_generic {
344 #[inline(always)]
345 fn rotate_each_word_right7(self) -> Self {
346 qmap(self, |x| x.rotate_right(7))
347 }
348 #[inline(always)]
349 fn rotate_each_word_right8(self) -> Self {
350 qmap(self, |x| x.rotate_right(8))
351 }
352 #[inline(always)]
353 fn rotate_each_word_right11(self) -> Self {
354 qmap(self, |x| x.rotate_right(11))
355 }
356 #[inline(always)]
357 fn rotate_each_word_right12(self) -> Self {
358 qmap(self, |x| x.rotate_right(12))
359 }
360 #[inline(always)]
361 fn rotate_each_word_right16(self) -> Self {
362 qmap(self, |x| x.rotate_right(16))
363 }
364 #[inline(always)]
365 fn rotate_each_word_right20(self) -> Self {
366 qmap(self, |x| x.rotate_right(20))
367 }
368 #[inline(always)]
369 fn rotate_each_word_right24(self) -> Self {
370 qmap(self, |x| x.rotate_right(24))
371 }
372 #[inline(always)]
373 fn rotate_each_word_right25(self) -> Self {
374 qmap(self, |x| x.rotate_right(25))
375 }
376}
377impl RotateEachWord64 for u64x2_generic {
378 #[inline(always)]
379 fn rotate_each_word_right32(self) -> Self {
380 qmap(self, |x: u64| x.rotate_right(32))
381 }
382}
383
384// workaround for koute/cargo-web#52 (u128::rotate_* broken with cargo web)
385#[inline(always)]
386fn rotate_u128_right(x: u128, i: u32) -> u128 {
387 (x >> i) | (x << (128 - i))
388}
389#[test]
390fn test_rotate_u128() {
391 const X: u128 = 0x0001_0203_0405_0607_0809_0a0b_0c0d_0e0f;
392 assert_eq!(rotate_u128_right(X, 17), X.rotate_right(17));
393}
394
395impl RotateEachWord32 for u128x1_generic {
396 #[inline(always)]
397 fn rotate_each_word_right7(self) -> Self {
398 Self([rotate_u128_right(self.0[0], 7)])
399 }
400 #[inline(always)]
401 fn rotate_each_word_right8(self) -> Self {
402 Self([rotate_u128_right(self.0[0], 8)])
403 }
404 #[inline(always)]
405 fn rotate_each_word_right11(self) -> Self {
406 Self([rotate_u128_right(self.0[0], 11)])
407 }
408 #[inline(always)]
409 fn rotate_each_word_right12(self) -> Self {
410 Self([rotate_u128_right(self.0[0], 12)])
411 }
412 #[inline(always)]
413 fn rotate_each_word_right16(self) -> Self {
414 Self([rotate_u128_right(self.0[0], 16)])
415 }
416 #[inline(always)]
417 fn rotate_each_word_right20(self) -> Self {
418 Self([rotate_u128_right(self.0[0], 20)])
419 }
420 #[inline(always)]
421 fn rotate_each_word_right24(self) -> Self {
422 Self([rotate_u128_right(self.0[0], 24)])
423 }
424 #[inline(always)]
425 fn rotate_each_word_right25(self) -> Self {
426 Self([rotate_u128_right(self.0[0], 25)])
427 }
428}
429impl RotateEachWord64 for u128x1_generic {
430 #[inline(always)]
431 fn rotate_each_word_right32(self) -> Self {
432 Self([rotate_u128_right(self.0[0], i:32)])
433 }
434}
435
436#[derive(Copy, Clone)]
437pub struct GenericMachine;
438impl Machine for GenericMachine {
439 type u32x4 = u32x4_generic;
440 type u64x2 = u64x2_generic;
441 type u128x1 = u128x1_generic;
442 type u32x4x2 = u32x4x2_generic;
443 type u64x2x2 = u64x2x2_generic;
444 type u64x4 = u64x4_generic;
445 type u128x2 = u128x2_generic;
446 type u32x4x4 = u32x4x4_generic;
447 type u64x2x4 = u64x2x4_generic;
448 type u128x4 = u128x4_generic;
449 #[inline(always)]
450 unsafe fn instance() -> Self {
451 Self
452 }
453}
454
455#[derive(Copy, Clone, Debug, PartialEq)]
456pub struct u32x4_generic([u32; 4]);
457#[derive(Copy, Clone, Debug, PartialEq)]
458pub struct u64x2_generic([u64; 2]);
459#[derive(Copy, Clone, Debug, PartialEq)]
460pub struct u128x1_generic([u128; 1]);
461
462impl From<u32x4_generic> for vec128_storage {
463 #[inline(always)]
464 fn from(d: u32x4_generic) -> Self {
465 Self { d: d.0 }
466 }
467}
468impl From<u64x2_generic> for vec128_storage {
469 #[inline(always)]
470 fn from(q: u64x2_generic) -> Self {
471 Self { q: q.0 }
472 }
473}
474impl From<u128x1_generic> for vec128_storage {
475 #[inline(always)]
476 fn from(o: u128x1_generic) -> Self {
477 Self { q: q_of_o(o.0[0]) }
478 }
479}
480
481impl Store<vec128_storage> for u32x4_generic {
482 #[inline(always)]
483 unsafe fn unpack(s: vec128_storage) -> Self {
484 Self(s.d)
485 }
486}
487impl Store<vec128_storage> for u64x2_generic {
488 #[inline(always)]
489 unsafe fn unpack(s: vec128_storage) -> Self {
490 Self(s.q)
491 }
492}
493impl Store<vec128_storage> for u128x1_generic {
494 #[inline(always)]
495 unsafe fn unpack(s: vec128_storage) -> Self {
496 Self([o_of_q(s.q); 1])
497 }
498}
499
500impl ArithOps for u32x4_generic {}
501impl ArithOps for u64x2_generic {}
502impl ArithOps for u128x1_generic {}
503
504impl Add for u32x4_generic {
505 type Output = Self;
506 #[inline(always)]
507 fn add(self, rhs: Self) -> Self::Output {
508 dmap2(self, b:rhs, |x: u32, y: u32| x.wrapping_add(y))
509 }
510}
511impl Add for u64x2_generic {
512 type Output = Self;
513 #[inline(always)]
514 fn add(self, rhs: Self) -> Self::Output {
515 qmap2(self, b:rhs, |x: u64, y: u64| x.wrapping_add(y))
516 }
517}
518impl Add for u128x1_generic {
519 type Output = Self;
520 #[inline(always)]
521 fn add(self, rhs: Self) -> Self::Output {
522 omap2(self, b:rhs, |x: u128, y: u128| x.wrapping_add(y))
523 }
524}
525impl AddAssign for u32x4_generic {
526 #[inline(always)]
527 fn add_assign(&mut self, rhs: Self) {
528 *self = *self + rhs
529 }
530}
531impl AddAssign for u64x2_generic {
532 #[inline(always)]
533 fn add_assign(&mut self, rhs: Self) {
534 *self = *self + rhs
535 }
536}
537impl AddAssign for u128x1_generic {
538 #[inline(always)]
539 fn add_assign(&mut self, rhs: Self) {
540 *self = *self + rhs
541 }
542}
543impl BSwap for u32x4_generic {
544 #[inline(always)]
545 fn bswap(self) -> Self {
546 dmap(self, |x: u32| x.swap_bytes())
547 }
548}
549impl BSwap for u64x2_generic {
550 #[inline(always)]
551 fn bswap(self) -> Self {
552 qmap(self, |x: u64| x.swap_bytes())
553 }
554}
555impl BSwap for u128x1_generic {
556 #[inline(always)]
557 fn bswap(self) -> Self {
558 omap(self, |x: u128| x.swap_bytes())
559 }
560}
561impl StoreBytes for u32x4_generic {
562 #[inline(always)]
563 unsafe fn unsafe_read_le(input: &[u8]) -> Self {
564 assert_eq!(input.len(), 16);
565 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
566 dmap(x, |x| x.to_le())
567 }
568 #[inline(always)]
569 unsafe fn unsafe_read_be(input: &[u8]) -> Self {
570 assert_eq!(input.len(), 16);
571 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
572 dmap(x, |x| x.to_be())
573 }
574 #[inline(always)]
575 fn write_le(self, out: &mut [u8]) {
576 assert_eq!(out.len(), 16);
577 let x = dmap(self, |x| x.to_le());
578 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
579 }
580 #[inline(always)]
581 fn write_be(self, out: &mut [u8]) {
582 assert_eq!(out.len(), 16);
583 let x = dmap(self, |x| x.to_be());
584 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
585 }
586}
587impl StoreBytes for u64x2_generic {
588 #[inline(always)]
589 unsafe fn unsafe_read_le(input: &[u8]) -> Self {
590 assert_eq!(input.len(), 16);
591 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
592 qmap(x, |x| x.to_le())
593 }
594 #[inline(always)]
595 unsafe fn unsafe_read_be(input: &[u8]) -> Self {
596 assert_eq!(input.len(), 16);
597 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
598 qmap(x, |x| x.to_be())
599 }
600 #[inline(always)]
601 fn write_le(self, out: &mut [u8]) {
602 assert_eq!(out.len(), 16);
603 let x = qmap(self, |x| x.to_le());
604 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
605 }
606 #[inline(always)]
607 fn write_be(self, out: &mut [u8]) {
608 assert_eq!(out.len(), 16);
609 let x = qmap(self, |x| x.to_be());
610 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
611 }
612}
613
614#[derive(Copy, Clone)]
615pub struct G0;
616#[derive(Copy, Clone)]
617pub struct G1;
618pub type u32x4x2_generic = x2<u32x4_generic, G0>;
619pub type u64x2x2_generic = x2<u64x2_generic, G0>;
620pub type u64x4_generic = x2<u64x2_generic, G1>;
621pub type u128x2_generic = x2<u128x1_generic, G0>;
622pub type u32x4x4_generic = x4<u32x4_generic>;
623pub type u64x2x4_generic = x4<u64x2_generic>;
624pub type u128x4_generic = x4<u128x1_generic>;
625
626impl Vector<[u32; 16]> for u32x4x4_generic {
627 fn to_scalars(self) -> [u32; 16] {
628 let [a: u32x4_generic, b: u32x4_generic, c: u32x4_generic, d: u32x4_generic] = self.0;
629 let a: [u32; 4] = a.0;
630 let b: [u32; 4] = b.0;
631 let c: [u32; 4] = c.0;
632 let d: [u32; 4] = d.0;
633 [
634 a[0], a[1], a[2], a[3], //
635 b[0], b[1], b[2], b[3], //
636 c[0], c[1], c[2], c[3], //
637 d[0], d[1], d[2], d[3], //
638 ]
639 }
640}
641
642impl MultiLane<[u32; 4]> for u32x4_generic {
643 #[inline(always)]
644 fn to_lanes(self) -> [u32; 4] {
645 self.0
646 }
647 #[inline(always)]
648 fn from_lanes(xs: [u32; 4]) -> Self {
649 Self(xs)
650 }
651}
652impl MultiLane<[u64; 2]> for u64x2_generic {
653 #[inline(always)]
654 fn to_lanes(self) -> [u64; 2] {
655 self.0
656 }
657 #[inline(always)]
658 fn from_lanes(xs: [u64; 2]) -> Self {
659 Self(xs)
660 }
661}
662impl MultiLane<[u64; 4]> for u64x4_generic {
663 #[inline(always)]
664 fn to_lanes(self) -> [u64; 4] {
665 let (a: [u64; 2], b: [u64; 2]) = (self.0[0].to_lanes(), self.0[1].to_lanes());
666 [a[0], a[1], b[0], b[1]]
667 }
668 #[inline(always)]
669 fn from_lanes(xs: [u64; 4]) -> Self {
670 let (a: u64x2_generic, b: u64x2_generic) = (
671 u64x2_generic::from_lanes([xs[0], xs[1]]),
672 u64x2_generic::from_lanes([xs[2], xs[3]]),
673 );
674 x2::new([a, b])
675 }
676}
677impl MultiLane<[u128; 1]> for u128x1_generic {
678 #[inline(always)]
679 fn to_lanes(self) -> [u128; 1] {
680 self.0
681 }
682 #[inline(always)]
683 fn from_lanes(xs: [u128; 1]) -> Self {
684 Self(xs)
685 }
686}
687impl Vec4<u32> for u32x4_generic {
688 #[inline(always)]
689 fn extract(self, i: u32) -> u32 {
690 self.0[i as usize]
691 }
692 #[inline(always)]
693 fn insert(mut self, v: u32, i: u32) -> Self {
694 self.0[i as usize] = v;
695 self
696 }
697}
698impl Vec4<u64> for u64x4_generic {
699 #[inline(always)]
700 fn extract(self, i: u32) -> u64 {
701 let d: [u64; 4] = self.to_lanes();
702 d[i as usize]
703 }
704 #[inline(always)]
705 fn insert(self, v: u64, i: u32) -> Self {
706 self.0[(i / 2) as usize].insert(w:v, i:i % 2);
707 self
708 }
709}
710impl Vec2<u64> for u64x2_generic {
711 #[inline(always)]
712 fn extract(self, i: u32) -> u64 {
713 self.0[i as usize]
714 }
715 #[inline(always)]
716 fn insert(mut self, v: u64, i: u32) -> Self {
717 self.0[i as usize] = v;
718 self
719 }
720}
721
722impl Words4 for u32x4_generic {
723 #[inline(always)]
724 fn shuffle2301(self) -> Self {
725 self.swap64()
726 }
727 #[inline(always)]
728 fn shuffle1230(self) -> Self {
729 let x: [u32; 4] = self.0;
730 Self([x[3], x[0], x[1], x[2]])
731 }
732 #[inline(always)]
733 fn shuffle3012(self) -> Self {
734 let x: [u32; 4] = self.0;
735 Self([x[1], x[2], x[3], x[0]])
736 }
737}
738impl LaneWords4 for u32x4_generic {
739 #[inline(always)]
740 fn shuffle_lane_words2301(self) -> Self {
741 self.shuffle2301()
742 }
743 #[inline(always)]
744 fn shuffle_lane_words1230(self) -> Self {
745 self.shuffle1230()
746 }
747 #[inline(always)]
748 fn shuffle_lane_words3012(self) -> Self {
749 self.shuffle3012()
750 }
751}
752
753impl Words4 for u64x4_generic {
754 #[inline(always)]
755 fn shuffle2301(self) -> Self {
756 x2::new([self.0[1], self.0[0]])
757 }
758 #[inline(always)]
759 fn shuffle1230(self) -> Self {
760 unimplemented!()
761 }
762 #[inline(always)]
763 fn shuffle3012(self) -> Self {
764 unimplemented!()
765 }
766}
767
768impl u32x4<GenericMachine> for u32x4_generic {}
769impl u64x2<GenericMachine> for u64x2_generic {}
770impl u128x1<GenericMachine> for u128x1_generic {}
771impl u32x4x2<GenericMachine> for u32x4x2_generic {}
772impl u64x2x2<GenericMachine> for u64x2x2_generic {}
773impl u64x4<GenericMachine> for u64x4_generic {}
774impl u128x2<GenericMachine> for u128x2_generic {}
775impl u32x4x4<GenericMachine> for u32x4x4_generic {}
776impl u64x2x4<GenericMachine> for u64x2x4_generic {}
777impl u128x4<GenericMachine> for u128x4_generic {}
778
779#[macro_export]
780macro_rules! dispatch {
781 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
782 #[inline(always)]
783 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
784 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
785 #[inline(always)]
786 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
787 fn_impl($mach, $($arg),*)
788 }
789 };
790 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
791 dispatch!($mach, $MTy, {
792 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
793 });
794 }
795}
796#[macro_export]
797macro_rules! dispatch_light128 {
798 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
799 #[inline(always)]
800 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
801 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
802 #[inline(always)]
803 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
804 fn_impl($mach, $($arg),*)
805 }
806 };
807 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
808 dispatch!($mach, $MTy, {
809 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
810 });
811 }
812}
813#[macro_export]
814macro_rules! dispatch_light256 {
815 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
816 #[inline(always)]
817 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
818 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
819 #[inline(always)]
820 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
821 fn_impl($mach, $($arg),*)
822 }
823 };
824 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
825 dispatch!($mach, $MTy, {
826 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
827 });
828 }
829}
830#[macro_export]
831macro_rules! dispatch_light512 {
832 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
833 #[inline(always)]
834 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
835 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
836 #[inline(always)]
837 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
838 fn_impl($mach, $($arg),*)
839 }
840 };
841 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
842 dispatch!($mach, $MTy, {
843 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
844 });
845 }
846}
847
848#[cfg(test)]
849mod test {
850 use super::*;
851
852 #[test]
853 fn test_bswap32() {
854 let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
855 let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
856
857 let m = unsafe { GenericMachine::instance() };
858
859 let x: <GenericMachine as Machine>::u32x4 = m.vec(xs);
860 let x = x.bswap();
861
862 let y = m.vec(ys);
863 assert_eq!(x, y);
864 }
865}
866