1//! Internal `#[repr(simd)]` types
2
3#![allow(non_camel_case_types)]
4
5macro_rules! simd_ty {
6 ($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
7 #[repr(simd)]
8 #[derive(Copy, Clone, Debug, PartialEq)]
9 pub(crate) struct $id([$elem_type; $len]);
10
11 #[allow(clippy::use_self)]
12 impl $id {
13 /// A value of this type where all elements are zeroed out.
14 pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };
15
16 #[inline(always)]
17 pub(crate) const fn new($($param_name: $elem_type),*) -> Self {
18 $id([$($param_name),*])
19 }
20 #[inline(always)]
21 pub(crate) const fn from_array(elements: [$elem_type; $len]) -> Self {
22 $id(elements)
23 }
24 // FIXME: Workaround rust@60637
25 #[inline(always)]
26 pub(crate) fn splat(value: $elem_type) -> Self {
27 #[derive(Copy, Clone)]
28 #[repr(simd)]
29 struct JustOne([$elem_type; 1]);
30 let one = JustOne([value]);
31 // SAFETY: 0 is always in-bounds because we're shuffling
32 // a simd type with exactly one element.
33 unsafe { simd_shuffle!(one, one, [0; $len]) }
34 }
35
36 /// Extract the element at position `index`.
37 /// `index` is not a constant so this is not efficient!
38 /// Use for testing only.
39 // FIXME: Workaround rust@60637
40 #[inline(always)]
41 pub(crate) fn extract(self, index: usize) -> $elem_type {
42 assert!(index < $len);
43 // Now that we know this is in-bounds, use pointer arithmetic to access the right element.
44 let self_ptr = &self as *const Self as *const $elem_type;
45 unsafe {
46 self_ptr.add(index).read()
47 }
48 }
49 }
50 }
51}
52
53macro_rules! simd_m_ty {
54 ($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => {
55 #[repr(simd)]
56 #[derive(Copy, Clone, Debug, PartialEq)]
57 pub(crate) struct $id([$elem_type; $len]);
58
59 #[allow(clippy::use_self)]
60 impl $id {
61 #[inline(always)]
62 const fn bool_to_internal(x: bool) -> $elem_type {
63 [0 as $elem_type, !(0 as $elem_type)][x as usize]
64 }
65
66 #[inline(always)]
67 pub(crate) const fn new($($param_name: bool),*) -> Self {
68 $id([$(Self::bool_to_internal($param_name)),*])
69 }
70
71 // FIXME: Workaround rust@60637
72 #[inline(always)]
73 pub(crate) fn splat(value: bool) -> Self {
74 #[derive(Copy, Clone)]
75 #[repr(simd)]
76 struct JustOne([$elem_type; 1]);
77 let one = JustOne([Self::bool_to_internal(value)]);
78 // SAFETY: 0 is always in-bounds because we're shuffling
79 // a simd type with exactly one element.
80 unsafe { simd_shuffle!(one, one, [0; $len]) }
81 }
82 }
83 }
84}
85
86// 16-bit wide types:
87
88simd_ty!(u8x2[u8;2]: x0, x1);
89simd_ty!(i8x2[i8;2]: x0, x1);
90
91// 32-bit wide types:
92
93simd_ty!(u8x4[u8;4]: x0, x1, x2, x3);
94simd_ty!(u16x2[u16;2]: x0, x1);
95
96simd_ty!(i8x4[i8;4]: x0, x1, x2, x3);
97simd_ty!(i16x2[i16;2]: x0, x1);
98
99// 64-bit wide types:
100
101simd_ty!(
102 u8x8[u8;8]:
103 x0,
104 x1,
105 x2,
106 x3,
107 x4,
108 x5,
109 x6,
110 x7
111);
112simd_ty!(u16x4[u16;4]: x0, x1, x2, x3);
113simd_ty!(u32x2[u32;2]: x0, x1);
114simd_ty!(u64x1[u64;1]: x1);
115
116simd_ty!(
117 i8x8[i8;8]:
118 x0,
119 x1,
120 x2,
121 x3,
122 x4,
123 x5,
124 x6,
125 x7
126);
127simd_ty!(i16x4[i16;4]: x0, x1, x2, x3);
128simd_ty!(i32x2[i32;2]: x0, x1);
129simd_ty!(i64x1[i64;1]: x1);
130
131simd_ty!(f32x2[f32;2]: x0, x1);
132simd_ty!(f64x1[f64;1]: x1);
133
134// 128-bit wide types:
135
136simd_ty!(
137 u8x16[u8;16]:
138 x0,
139 x1,
140 x2,
141 x3,
142 x4,
143 x5,
144 x6,
145 x7,
146 x8,
147 x9,
148 x10,
149 x11,
150 x12,
151 x13,
152 x14,
153 x15
154);
155simd_ty!(
156 u16x8[u16;8]:
157 x0,
158 x1,
159 x2,
160 x3,
161 x4,
162 x5,
163 x6,
164 x7
165);
166simd_ty!(u32x4[u32;4]: x0, x1, x2, x3);
167simd_ty!(u64x2[u64;2]: x0, x1);
168
169simd_ty!(
170 i8x16[i8;16]:
171 x0,
172 x1,
173 x2,
174 x3,
175 x4,
176 x5,
177 x6,
178 x7,
179 x8,
180 x9,
181 x10,
182 x11,
183 x12,
184 x13,
185 x14,
186 x15
187);
188simd_ty!(
189 i16x8[i16;8]:
190 x0,
191 x1,
192 x2,
193 x3,
194 x4,
195 x5,
196 x6,
197 x7
198);
199simd_ty!(i32x4[i32;4]: x0, x1, x2, x3);
200simd_ty!(i64x2[i64;2]: x0, x1);
201
202simd_ty!(f16x4[f16;4]: x0, x1, x2, x3);
203
204simd_ty!(
205 f16x8[f16;8]:
206 x0,
207 x1,
208 x2,
209 x3,
210 x4,
211 x5,
212 x6,
213 x7
214);
215simd_ty!(f32x4[f32;4]: x0, x1, x2, x3);
216simd_ty!(f64x2[f64;2]: x0, x1);
217
218simd_m_ty!(
219 m8x16[i8;16]:
220 x0,
221 x1,
222 x2,
223 x3,
224 x4,
225 x5,
226 x6,
227 x7,
228 x8,
229 x9,
230 x10,
231 x11,
232 x12,
233 x13,
234 x14,
235 x15
236);
237simd_m_ty!(
238 m16x8[i16;8]:
239 x0,
240 x1,
241 x2,
242 x3,
243 x4,
244 x5,
245 x6,
246 x7
247);
248simd_m_ty!(m32x4[i32;4]: x0, x1, x2, x3);
249simd_m_ty!(m64x2[i64;2]: x0, x1);
250
251// 256-bit wide types:
252
253simd_ty!(
254 u8x32[u8;32]:
255 x0,
256 x1,
257 x2,
258 x3,
259 x4,
260 x5,
261 x6,
262 x7,
263 x8,
264 x9,
265 x10,
266 x11,
267 x12,
268 x13,
269 x14,
270 x15,
271 x16,
272 x17,
273 x18,
274 x19,
275 x20,
276 x21,
277 x22,
278 x23,
279 x24,
280 x25,
281 x26,
282 x27,
283 x28,
284 x29,
285 x30,
286 x31
287);
288simd_ty!(
289 u16x16[u16;16]:
290 x0,
291 x1,
292 x2,
293 x3,
294 x4,
295 x5,
296 x6,
297 x7,
298 x8,
299 x9,
300 x10,
301 x11,
302 x12,
303 x13,
304 x14,
305 x15
306);
307simd_ty!(
308 u32x8[u32;8]:
309 x0,
310 x1,
311 x2,
312 x3,
313 x4,
314 x5,
315 x6,
316 x7
317);
318simd_ty!(u64x4[u64;4]: x0, x1, x2, x3);
319
320simd_ty!(
321 i8x32[i8;32]:
322 x0,
323 x1,
324 x2,
325 x3,
326 x4,
327 x5,
328 x6,
329 x7,
330 x8,
331 x9,
332 x10,
333 x11,
334 x12,
335 x13,
336 x14,
337 x15,
338 x16,
339 x17,
340 x18,
341 x19,
342 x20,
343 x21,
344 x22,
345 x23,
346 x24,
347 x25,
348 x26,
349 x27,
350 x28,
351 x29,
352 x30,
353 x31
354);
355simd_ty!(
356 i16x16[i16;16]:
357 x0,
358 x1,
359 x2,
360 x3,
361 x4,
362 x5,
363 x6,
364 x7,
365 x8,
366 x9,
367 x10,
368 x11,
369 x12,
370 x13,
371 x14,
372 x15
373);
374simd_ty!(
375 i32x8[i32;8]:
376 x0,
377 x1,
378 x2,
379 x3,
380 x4,
381 x5,
382 x6,
383 x7
384);
385simd_ty!(i64x4[i64;4]: x0, x1, x2, x3);
386
387simd_ty!(
388 f16x16[f16;16]:
389 x0,
390 x1,
391 x2,
392 x3,
393 x4,
394 x5,
395 x6,
396 x7,
397 x8,
398 x9,
399 x10,
400 x11,
401 x12,
402 x13,
403 x14,
404 x15
405);
406simd_ty!(
407 f32x8[f32;8]:
408 x0,
409 x1,
410 x2,
411 x3,
412 x4,
413 x5,
414 x6,
415 x7
416);
417simd_ty!(f64x4[f64;4]: x0, x1, x2, x3);
418
419simd_m_ty!(
420 m8x32[i8;32]:
421 x0,
422 x1,
423 x2,
424 x3,
425 x4,
426 x5,
427 x6,
428 x7,
429 x8,
430 x9,
431 x10,
432 x11,
433 x12,
434 x13,
435 x14,
436 x15,
437 x16,
438 x17,
439 x18,
440 x19,
441 x20,
442 x21,
443 x22,
444 x23,
445 x24,
446 x25,
447 x26,
448 x27,
449 x28,
450 x29,
451 x30,
452 x31
453);
454simd_m_ty!(
455 m16x16[i16;16]:
456 x0,
457 x1,
458 x2,
459 x3,
460 x4,
461 x5,
462 x6,
463 x7,
464 x8,
465 x9,
466 x10,
467 x11,
468 x12,
469 x13,
470 x14,
471 x15
472);
473simd_m_ty!(
474 m32x8[i32;8]:
475 x0,
476 x1,
477 x2,
478 x3,
479 x4,
480 x5,
481 x6,
482 x7
483);
484
485// 512-bit wide types:
486
487simd_ty!(
488 i8x64[i8;64]:
489 x0,
490 x1,
491 x2,
492 x3,
493 x4,
494 x5,
495 x6,
496 x7,
497 x8,
498 x9,
499 x10,
500 x11,
501 x12,
502 x13,
503 x14,
504 x15,
505 x16,
506 x17,
507 x18,
508 x19,
509 x20,
510 x21,
511 x22,
512 x23,
513 x24,
514 x25,
515 x26,
516 x27,
517 x28,
518 x29,
519 x30,
520 x31,
521 x32,
522 x33,
523 x34,
524 x35,
525 x36,
526 x37,
527 x38,
528 x39,
529 x40,
530 x41,
531 x42,
532 x43,
533 x44,
534 x45,
535 x46,
536 x47,
537 x48,
538 x49,
539 x50,
540 x51,
541 x52,
542 x53,
543 x54,
544 x55,
545 x56,
546 x57,
547 x58,
548 x59,
549 x60,
550 x61,
551 x62,
552 x63
553);
554
555simd_ty!(
556 u8x64[u8;64]:
557 x0,
558 x1,
559 x2,
560 x3,
561 x4,
562 x5,
563 x6,
564 x7,
565 x8,
566 x9,
567 x10,
568 x11,
569 x12,
570 x13,
571 x14,
572 x15,
573 x16,
574 x17,
575 x18,
576 x19,
577 x20,
578 x21,
579 x22,
580 x23,
581 x24,
582 x25,
583 x26,
584 x27,
585 x28,
586 x29,
587 x30,
588 x31,
589 x32,
590 x33,
591 x34,
592 x35,
593 x36,
594 x37,
595 x38,
596 x39,
597 x40,
598 x41,
599 x42,
600 x43,
601 x44,
602 x45,
603 x46,
604 x47,
605 x48,
606 x49,
607 x50,
608 x51,
609 x52,
610 x53,
611 x54,
612 x55,
613 x56,
614 x57,
615 x58,
616 x59,
617 x60,
618 x61,
619 x62,
620 x63
621);
622
623simd_ty!(
624 i16x32[i16;32]:
625 x0,
626 x1,
627 x2,
628 x3,
629 x4,
630 x5,
631 x6,
632 x7,
633 x8,
634 x9,
635 x10,
636 x11,
637 x12,
638 x13,
639 x14,
640 x15,
641 x16,
642 x17,
643 x18,
644 x19,
645 x20,
646 x21,
647 x22,
648 x23,
649 x24,
650 x25,
651 x26,
652 x27,
653 x28,
654 x29,
655 x30,
656 x31
657);
658
659simd_ty!(
660 u16x32[u16;32]:
661 x0,
662 x1,
663 x2,
664 x3,
665 x4,
666 x5,
667 x6,
668 x7,
669 x8,
670 x9,
671 x10,
672 x11,
673 x12,
674 x13,
675 x14,
676 x15,
677 x16,
678 x17,
679 x18,
680 x19,
681 x20,
682 x21,
683 x22,
684 x23,
685 x24,
686 x25,
687 x26,
688 x27,
689 x28,
690 x29,
691 x30,
692 x31
693);
694
695simd_ty!(
696 i32x16[i32;16]:
697 x0,
698 x1,
699 x2,
700 x3,
701 x4,
702 x5,
703 x6,
704 x7,
705 x8,
706 x9,
707 x10,
708 x11,
709 x12,
710 x13,
711 x14,
712 x15
713);
714
715simd_ty!(
716 u32x16[u32;16]:
717 x0,
718 x1,
719 x2,
720 x3,
721 x4,
722 x5,
723 x6,
724 x7,
725 x8,
726 x9,
727 x10,
728 x11,
729 x12,
730 x13,
731 x14,
732 x15
733);
734
735simd_ty!(
736 f16x32[f16;32]:
737 x0,
738 x1,
739 x2,
740 x3,
741 x4,
742 x5,
743 x6,
744 x7,
745 x8,
746 x9,
747 x10,
748 x11,
749 x12,
750 x13,
751 x14,
752 x15,
753 x16,
754 x17,
755 x18,
756 x19,
757 x20,
758 x21,
759 x22,
760 x23,
761 x24,
762 x25,
763 x26,
764 x27,
765 x28,
766 x29,
767 x30,
768 x31
769);
770simd_ty!(
771 f32x16[f32;16]:
772 x0,
773 x1,
774 x2,
775 x3,
776 x4,
777 x5,
778 x6,
779 x7,
780 x8,
781 x9,
782 x10,
783 x11,
784 x12,
785 x13,
786 x14,
787 x15
788);
789
790simd_ty!(
791 i64x8[i64;8]:
792 x0,
793 x1,
794 x2,
795 x3,
796 x4,
797 x5,
798 x6,
799 x7
800);
801
802simd_ty!(
803 u64x8[u64;8]:
804 x0,
805 x1,
806 x2,
807 x3,
808 x4,
809 x5,
810 x6,
811 x7
812);
813
814simd_ty!(
815 f64x8[f64;8]:
816 x0,
817 x1,
818 x2,
819 x3,
820 x4,
821 x5,
822 x6,
823 x7
824);
825
826// 1024-bit wide types:
827simd_ty!(
828 u16x64[u16;64]:
829 x0,
830 x1,
831 x2,
832 x3,
833 x4,
834 x5,
835 x6,
836 x7,
837 x8,
838 x9,
839 x10,
840 x11,
841 x12,
842 x13,
843 x14,
844 x15,
845 x16,
846 x17,
847 x18,
848 x19,
849 x20,
850 x21,
851 x22,
852 x23,
853 x24,
854 x25,
855 x26,
856 x27,
857 x28,
858 x29,
859 x30,
860 x31,
861 x32,
862 x33,
863 x34,
864 x35,
865 x36,
866 x37,
867 x38,
868 x39,
869 x40,
870 x41,
871 x42,
872 x43,
873 x44,
874 x45,
875 x46,
876 x47,
877 x48,
878 x49,
879 x50,
880 x51,
881 x52,
882 x53,
883 x54,
884 x55,
885 x56,
886 x57,
887 x58,
888 x59,
889 x60,
890 x61,
891 x62,
892 x63
893);
894simd_ty!(
895 i32x32[i32;32]:
896 x0,
897 x1,
898 x2,
899 x3,
900 x4,
901 x5,
902 x6,
903 x7,
904 x8,
905 x9,
906 x10,
907 x11,
908 x12,
909 x13,
910 x14,
911 x15,
912 x16,
913 x17,
914 x18,
915 x19,
916 x20,
917 x21,
918 x22,
919 x23,
920 x24,
921 x25,
922 x26,
923 x27,
924 x28,
925 x29,
926 x30,
927 x31
928);
929simd_ty!(
930 u32x32[u32;32]:
931 x0,
932 x1,
933 x2,
934 x3,
935 x4,
936 x5,
937 x6,
938 x7,
939 x8,
940 x9,
941 x10,
942 x11,
943 x12,
944 x13,
945 x14,
946 x15,
947 x16,
948 x17,
949 x18,
950 x19,
951 x20,
952 x21,
953 x22,
954 x23,
955 x24,
956 x25,
957 x26,
958 x27,
959 x28,
960 x29,
961 x30,
962 x31
963);
964
965/// Used to continue `Debug`ging SIMD types as `MySimd(1, 2, 3, 4)`, as they
966/// were before moving to array-based simd.
967#[inline]
968pub(crate) fn debug_simd_finish<T: crate::fmt::Debug, const N: usize>(
969 formatter: &mut crate::fmt::Formatter<'_>,
970 type_name: &str,
971 array: [T; N],
972) -> crate::fmt::Result {
973 crate::fmt::Formatter::debug_tuple_fields_finish(
974 self:formatter,
975 type_name,
976 &crate::array::from_fn::<&dyn crate::fmt::Debug, N, _>(|i: usize| &array[i]),
977 )
978}
979

Provided by KDAB

Privacy Policy