1//! Internal `#[repr(simd)]` types
2
3#![allow(non_camel_case_types)]
4
5macro_rules! simd_ty {
6 ($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
7 #[repr(simd)]
8 #[derive(Copy, Clone)]
9 pub(crate) struct $id([$elem_type; $len]);
10
11 #[allow(clippy::use_self)]
12 impl $id {
13 /// A value of this type where all elements are zeroed out.
14 pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };
15
16 #[inline(always)]
17 pub(crate) const fn new($($param_name: $elem_type),*) -> Self {
18 $id([$($param_name),*])
19 }
20 #[inline(always)]
21 pub(crate) const fn from_array(elements: [$elem_type; $len]) -> Self {
22 $id(elements)
23 }
24 // FIXME: Workaround rust@60637
25 #[inline(always)]
26 pub(crate) fn splat(value: $elem_type) -> Self {
27 #[derive(Copy, Clone)]
28 #[repr(simd)]
29 struct JustOne([$elem_type; 1]);
30 let one = JustOne([value]);
31 // SAFETY: 0 is always in-bounds because we're shuffling
32 // a simd type with exactly one element.
33 unsafe { simd_shuffle!(one, one, [0; $len]) }
34 }
35
36 /// Extract the element at position `index`.
37 /// `index` is not a constant so this is not efficient!
38 /// Use for testing only.
39 // FIXME: Workaround rust@60637
40 #[inline(always)]
41 pub(crate) fn extract(&self, index: usize) -> $elem_type {
42 self.as_array()[index]
43 }
44
45 #[inline]
46 pub(crate) fn as_array(&self) -> &[$elem_type; $len] {
47 let simd_ptr: *const Self = self;
48 let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
49 // SAFETY: We can always read the prefix of a simd type as an array.
50 // There might be more padding afterwards for some widths, but
51 // that's not a problem for reading less than that.
52 unsafe { &*array_ptr }
53 }
54 }
55
56 impl core::cmp::PartialEq for $id {
57 #[inline]
58 fn eq(&self, other: &Self) -> bool {
59 self.as_array() == other.as_array()
60 }
61 }
62
63 impl core::fmt::Debug for $id {
64 #[inline]
65 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
66 debug_simd_finish(f, stringify!($id), self.as_array())
67 }
68 }
69 }
70}
71
72macro_rules! simd_m_ty {
73 ($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => {
74 #[repr(simd)]
75 #[derive(Copy, Clone)]
76 pub(crate) struct $id([$elem_type; $len]);
77
78 #[allow(clippy::use_self)]
79 impl $id {
80 #[inline(always)]
81 const fn bool_to_internal(x: bool) -> $elem_type {
82 [0 as $elem_type, !(0 as $elem_type)][x as usize]
83 }
84
85 #[inline(always)]
86 pub(crate) const fn new($($param_name: bool),*) -> Self {
87 $id([$(Self::bool_to_internal($param_name)),*])
88 }
89
90 // FIXME: Workaround rust@60637
91 #[inline(always)]
92 pub(crate) fn splat(value: bool) -> Self {
93 #[derive(Copy, Clone)]
94 #[repr(simd)]
95 struct JustOne([$elem_type; 1]);
96 let one = JustOne([Self::bool_to_internal(value)]);
97 // SAFETY: 0 is always in-bounds because we're shuffling
98 // a simd type with exactly one element.
99 unsafe { simd_shuffle!(one, one, [0; $len]) }
100 }
101
102 #[inline]
103 pub(crate) fn as_array(&self) -> &[$elem_type; $len] {
104 let simd_ptr: *const Self = self;
105 let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
106 // SAFETY: We can always read the prefix of a simd type as an array.
107 // There might be more padding afterwards for some widths, but
108 // that's not a problem for reading less than that.
109 unsafe { &*array_ptr }
110 }
111 }
112
113 impl core::cmp::PartialEq for $id {
114 #[inline]
115 fn eq(&self, other: &Self) -> bool {
116 self.as_array() == other.as_array()
117 }
118 }
119
120 impl core::fmt::Debug for $id {
121 #[inline]
122 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
123 debug_simd_finish(f, stringify!($id), self.as_array())
124 }
125 }
126 }
127}
128
129// 16-bit wide types:
130
131simd_ty!(u8x2[u8;2]: x0, x1);
132simd_ty!(i8x2[i8;2]: x0, x1);
133
134// 32-bit wide types:
135
136simd_ty!(u8x4[u8;4]: x0, x1, x2, x3);
137simd_ty!(u16x2[u16;2]: x0, x1);
138
139simd_ty!(i8x4[i8;4]: x0, x1, x2, x3);
140simd_ty!(i16x2[i16;2]: x0, x1);
141
142// 64-bit wide types:
143
144simd_ty!(
145 u8x8[u8;8]:
146 x0,
147 x1,
148 x2,
149 x3,
150 x4,
151 x5,
152 x6,
153 x7
154);
155simd_ty!(u16x4[u16;4]: x0, x1, x2, x3);
156simd_ty!(u32x2[u32;2]: x0, x1);
157simd_ty!(u64x1[u64;1]: x1);
158
159simd_ty!(
160 i8x8[i8;8]:
161 x0,
162 x1,
163 x2,
164 x3,
165 x4,
166 x5,
167 x6,
168 x7
169);
170simd_ty!(i16x4[i16;4]: x0, x1, x2, x3);
171simd_ty!(i32x2[i32;2]: x0, x1);
172simd_ty!(i64x1[i64;1]: x1);
173
174simd_ty!(f32x2[f32;2]: x0, x1);
175simd_ty!(f64x1[f64;1]: x1);
176
177// 128-bit wide types:
178
179simd_ty!(
180 u8x16[u8;16]:
181 x0,
182 x1,
183 x2,
184 x3,
185 x4,
186 x5,
187 x6,
188 x7,
189 x8,
190 x9,
191 x10,
192 x11,
193 x12,
194 x13,
195 x14,
196 x15
197);
198simd_ty!(
199 u16x8[u16;8]:
200 x0,
201 x1,
202 x2,
203 x3,
204 x4,
205 x5,
206 x6,
207 x7
208);
209simd_ty!(u32x4[u32;4]: x0, x1, x2, x3);
210simd_ty!(u64x2[u64;2]: x0, x1);
211
212simd_ty!(
213 i8x16[i8;16]:
214 x0,
215 x1,
216 x2,
217 x3,
218 x4,
219 x5,
220 x6,
221 x7,
222 x8,
223 x9,
224 x10,
225 x11,
226 x12,
227 x13,
228 x14,
229 x15
230);
231simd_ty!(
232 i16x8[i16;8]:
233 x0,
234 x1,
235 x2,
236 x3,
237 x4,
238 x5,
239 x6,
240 x7
241);
242simd_ty!(i32x4[i32;4]: x0, x1, x2, x3);
243simd_ty!(i64x2[i64;2]: x0, x1);
244
245simd_ty!(f16x4[f16;4]: x0, x1, x2, x3);
246
247simd_ty!(
248 f16x8[f16;8]:
249 x0,
250 x1,
251 x2,
252 x3,
253 x4,
254 x5,
255 x6,
256 x7
257);
258simd_ty!(f32x4[f32;4]: x0, x1, x2, x3);
259simd_ty!(f64x2[f64;2]: x0, x1);
260
261simd_m_ty!(
262 m8x16[i8;16]:
263 x0,
264 x1,
265 x2,
266 x3,
267 x4,
268 x5,
269 x6,
270 x7,
271 x8,
272 x9,
273 x10,
274 x11,
275 x12,
276 x13,
277 x14,
278 x15
279);
280simd_m_ty!(
281 m16x8[i16;8]:
282 x0,
283 x1,
284 x2,
285 x3,
286 x4,
287 x5,
288 x6,
289 x7
290);
291simd_m_ty!(m32x4[i32;4]: x0, x1, x2, x3);
292simd_m_ty!(m64x2[i64;2]: x0, x1);
293
294// 256-bit wide types:
295
296simd_ty!(
297 u8x32[u8;32]:
298 x0,
299 x1,
300 x2,
301 x3,
302 x4,
303 x5,
304 x6,
305 x7,
306 x8,
307 x9,
308 x10,
309 x11,
310 x12,
311 x13,
312 x14,
313 x15,
314 x16,
315 x17,
316 x18,
317 x19,
318 x20,
319 x21,
320 x22,
321 x23,
322 x24,
323 x25,
324 x26,
325 x27,
326 x28,
327 x29,
328 x30,
329 x31
330);
331simd_ty!(
332 u16x16[u16;16]:
333 x0,
334 x1,
335 x2,
336 x3,
337 x4,
338 x5,
339 x6,
340 x7,
341 x8,
342 x9,
343 x10,
344 x11,
345 x12,
346 x13,
347 x14,
348 x15
349);
350simd_ty!(
351 u32x8[u32;8]:
352 x0,
353 x1,
354 x2,
355 x3,
356 x4,
357 x5,
358 x6,
359 x7
360);
361simd_ty!(u64x4[u64;4]: x0, x1, x2, x3);
362
363simd_ty!(
364 i8x32[i8;32]:
365 x0,
366 x1,
367 x2,
368 x3,
369 x4,
370 x5,
371 x6,
372 x7,
373 x8,
374 x9,
375 x10,
376 x11,
377 x12,
378 x13,
379 x14,
380 x15,
381 x16,
382 x17,
383 x18,
384 x19,
385 x20,
386 x21,
387 x22,
388 x23,
389 x24,
390 x25,
391 x26,
392 x27,
393 x28,
394 x29,
395 x30,
396 x31
397);
398simd_ty!(
399 i16x16[i16;16]:
400 x0,
401 x1,
402 x2,
403 x3,
404 x4,
405 x5,
406 x6,
407 x7,
408 x8,
409 x9,
410 x10,
411 x11,
412 x12,
413 x13,
414 x14,
415 x15
416);
417simd_ty!(
418 i32x8[i32;8]:
419 x0,
420 x1,
421 x2,
422 x3,
423 x4,
424 x5,
425 x6,
426 x7
427);
428simd_ty!(i64x4[i64;4]: x0, x1, x2, x3);
429
430simd_ty!(
431 f16x16[f16;16]:
432 x0,
433 x1,
434 x2,
435 x3,
436 x4,
437 x5,
438 x6,
439 x7,
440 x8,
441 x9,
442 x10,
443 x11,
444 x12,
445 x13,
446 x14,
447 x15
448);
449simd_ty!(
450 f32x8[f32;8]:
451 x0,
452 x1,
453 x2,
454 x3,
455 x4,
456 x5,
457 x6,
458 x7
459);
460simd_ty!(f64x4[f64;4]: x0, x1, x2, x3);
461
462simd_m_ty!(
463 m8x32[i8;32]:
464 x0,
465 x1,
466 x2,
467 x3,
468 x4,
469 x5,
470 x6,
471 x7,
472 x8,
473 x9,
474 x10,
475 x11,
476 x12,
477 x13,
478 x14,
479 x15,
480 x16,
481 x17,
482 x18,
483 x19,
484 x20,
485 x21,
486 x22,
487 x23,
488 x24,
489 x25,
490 x26,
491 x27,
492 x28,
493 x29,
494 x30,
495 x31
496);
497simd_m_ty!(
498 m16x16[i16;16]:
499 x0,
500 x1,
501 x2,
502 x3,
503 x4,
504 x5,
505 x6,
506 x7,
507 x8,
508 x9,
509 x10,
510 x11,
511 x12,
512 x13,
513 x14,
514 x15
515);
516simd_m_ty!(
517 m32x8[i32;8]:
518 x0,
519 x1,
520 x2,
521 x3,
522 x4,
523 x5,
524 x6,
525 x7
526);
527
528// 512-bit wide types:
529
530simd_ty!(
531 i8x64[i8;64]:
532 x0,
533 x1,
534 x2,
535 x3,
536 x4,
537 x5,
538 x6,
539 x7,
540 x8,
541 x9,
542 x10,
543 x11,
544 x12,
545 x13,
546 x14,
547 x15,
548 x16,
549 x17,
550 x18,
551 x19,
552 x20,
553 x21,
554 x22,
555 x23,
556 x24,
557 x25,
558 x26,
559 x27,
560 x28,
561 x29,
562 x30,
563 x31,
564 x32,
565 x33,
566 x34,
567 x35,
568 x36,
569 x37,
570 x38,
571 x39,
572 x40,
573 x41,
574 x42,
575 x43,
576 x44,
577 x45,
578 x46,
579 x47,
580 x48,
581 x49,
582 x50,
583 x51,
584 x52,
585 x53,
586 x54,
587 x55,
588 x56,
589 x57,
590 x58,
591 x59,
592 x60,
593 x61,
594 x62,
595 x63
596);
597
598simd_ty!(
599 u8x64[u8;64]:
600 x0,
601 x1,
602 x2,
603 x3,
604 x4,
605 x5,
606 x6,
607 x7,
608 x8,
609 x9,
610 x10,
611 x11,
612 x12,
613 x13,
614 x14,
615 x15,
616 x16,
617 x17,
618 x18,
619 x19,
620 x20,
621 x21,
622 x22,
623 x23,
624 x24,
625 x25,
626 x26,
627 x27,
628 x28,
629 x29,
630 x30,
631 x31,
632 x32,
633 x33,
634 x34,
635 x35,
636 x36,
637 x37,
638 x38,
639 x39,
640 x40,
641 x41,
642 x42,
643 x43,
644 x44,
645 x45,
646 x46,
647 x47,
648 x48,
649 x49,
650 x50,
651 x51,
652 x52,
653 x53,
654 x54,
655 x55,
656 x56,
657 x57,
658 x58,
659 x59,
660 x60,
661 x61,
662 x62,
663 x63
664);
665
666simd_ty!(
667 i16x32[i16;32]:
668 x0,
669 x1,
670 x2,
671 x3,
672 x4,
673 x5,
674 x6,
675 x7,
676 x8,
677 x9,
678 x10,
679 x11,
680 x12,
681 x13,
682 x14,
683 x15,
684 x16,
685 x17,
686 x18,
687 x19,
688 x20,
689 x21,
690 x22,
691 x23,
692 x24,
693 x25,
694 x26,
695 x27,
696 x28,
697 x29,
698 x30,
699 x31
700);
701
702simd_ty!(
703 u16x32[u16;32]:
704 x0,
705 x1,
706 x2,
707 x3,
708 x4,
709 x5,
710 x6,
711 x7,
712 x8,
713 x9,
714 x10,
715 x11,
716 x12,
717 x13,
718 x14,
719 x15,
720 x16,
721 x17,
722 x18,
723 x19,
724 x20,
725 x21,
726 x22,
727 x23,
728 x24,
729 x25,
730 x26,
731 x27,
732 x28,
733 x29,
734 x30,
735 x31
736);
737
738simd_ty!(
739 i32x16[i32;16]:
740 x0,
741 x1,
742 x2,
743 x3,
744 x4,
745 x5,
746 x6,
747 x7,
748 x8,
749 x9,
750 x10,
751 x11,
752 x12,
753 x13,
754 x14,
755 x15
756);
757
758simd_ty!(
759 u32x16[u32;16]:
760 x0,
761 x1,
762 x2,
763 x3,
764 x4,
765 x5,
766 x6,
767 x7,
768 x8,
769 x9,
770 x10,
771 x11,
772 x12,
773 x13,
774 x14,
775 x15
776);
777
778simd_ty!(
779 f16x32[f16;32]:
780 x0,
781 x1,
782 x2,
783 x3,
784 x4,
785 x5,
786 x6,
787 x7,
788 x8,
789 x9,
790 x10,
791 x11,
792 x12,
793 x13,
794 x14,
795 x15,
796 x16,
797 x17,
798 x18,
799 x19,
800 x20,
801 x21,
802 x22,
803 x23,
804 x24,
805 x25,
806 x26,
807 x27,
808 x28,
809 x29,
810 x30,
811 x31
812);
813simd_ty!(
814 f32x16[f32;16]:
815 x0,
816 x1,
817 x2,
818 x3,
819 x4,
820 x5,
821 x6,
822 x7,
823 x8,
824 x9,
825 x10,
826 x11,
827 x12,
828 x13,
829 x14,
830 x15
831);
832
833simd_ty!(
834 i64x8[i64;8]:
835 x0,
836 x1,
837 x2,
838 x3,
839 x4,
840 x5,
841 x6,
842 x7
843);
844
845simd_ty!(
846 u64x8[u64;8]:
847 x0,
848 x1,
849 x2,
850 x3,
851 x4,
852 x5,
853 x6,
854 x7
855);
856
857simd_ty!(
858 f64x8[f64;8]:
859 x0,
860 x1,
861 x2,
862 x3,
863 x4,
864 x5,
865 x6,
866 x7
867);
868
869// 1024-bit wide types:
870simd_ty!(
871 u16x64[u16;64]:
872 x0,
873 x1,
874 x2,
875 x3,
876 x4,
877 x5,
878 x6,
879 x7,
880 x8,
881 x9,
882 x10,
883 x11,
884 x12,
885 x13,
886 x14,
887 x15,
888 x16,
889 x17,
890 x18,
891 x19,
892 x20,
893 x21,
894 x22,
895 x23,
896 x24,
897 x25,
898 x26,
899 x27,
900 x28,
901 x29,
902 x30,
903 x31,
904 x32,
905 x33,
906 x34,
907 x35,
908 x36,
909 x37,
910 x38,
911 x39,
912 x40,
913 x41,
914 x42,
915 x43,
916 x44,
917 x45,
918 x46,
919 x47,
920 x48,
921 x49,
922 x50,
923 x51,
924 x52,
925 x53,
926 x54,
927 x55,
928 x56,
929 x57,
930 x58,
931 x59,
932 x60,
933 x61,
934 x62,
935 x63
936);
937simd_ty!(
938 i32x32[i32;32]:
939 x0,
940 x1,
941 x2,
942 x3,
943 x4,
944 x5,
945 x6,
946 x7,
947 x8,
948 x9,
949 x10,
950 x11,
951 x12,
952 x13,
953 x14,
954 x15,
955 x16,
956 x17,
957 x18,
958 x19,
959 x20,
960 x21,
961 x22,
962 x23,
963 x24,
964 x25,
965 x26,
966 x27,
967 x28,
968 x29,
969 x30,
970 x31
971);
972simd_ty!(
973 u32x32[u32;32]:
974 x0,
975 x1,
976 x2,
977 x3,
978 x4,
979 x5,
980 x6,
981 x7,
982 x8,
983 x9,
984 x10,
985 x11,
986 x12,
987 x13,
988 x14,
989 x15,
990 x16,
991 x17,
992 x18,
993 x19,
994 x20,
995 x21,
996 x22,
997 x23,
998 x24,
999 x25,
1000 x26,
1001 x27,
1002 x28,
1003 x29,
1004 x30,
1005 x31
1006);
1007
1008/// Used to continue `Debug`ging SIMD types as `MySimd(1, 2, 3, 4)`, as they
1009/// were before moving to array-based simd.
1010#[inline]
1011pub(crate) fn debug_simd_finish<T: crate::fmt::Debug, const N: usize>(
1012 formatter: &mut crate::fmt::Formatter<'_>,
1013 type_name: &str,
1014 array: &[T; N],
1015) -> crate::fmt::Result {
1016 crate::fmt::Formatter::debug_tuple_fields_finish(
1017 self:formatter,
1018 type_name,
1019 &crate::array::from_fn::<&dyn crate::fmt::Debug, N, _>(|i: usize| &array[i]),
1020 )
1021}
1022