1 | //! Internal `#[repr(simd)]` types |
2 | |
3 | #![allow (non_camel_case_types)] |
4 | |
5 | macro_rules! simd_ty { |
6 | ($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => { |
7 | #[repr(simd)] |
8 | #[derive(Copy, Clone, Debug, PartialEq)] |
9 | pub(crate) struct $id([$elem_type; $len]); |
10 | |
11 | #[allow(clippy::use_self)] |
12 | impl $id { |
13 | /// A value of this type where all elements are zeroed out. |
14 | pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() }; |
15 | |
16 | #[inline(always)] |
17 | pub(crate) const fn new($($param_name: $elem_type),*) -> Self { |
18 | $id([$($param_name),*]) |
19 | } |
20 | #[inline(always)] |
21 | pub(crate) const fn from_array(elements: [$elem_type; $len]) -> Self { |
22 | $id(elements) |
23 | } |
24 | // FIXME: Workaround rust@60637 |
25 | #[inline(always)] |
26 | pub(crate) fn splat(value: $elem_type) -> Self { |
27 | #[derive(Copy, Clone)] |
28 | #[repr(simd)] |
29 | struct JustOne([$elem_type; 1]); |
30 | let one = JustOne([value]); |
31 | // SAFETY: 0 is always in-bounds because we're shuffling |
32 | // a simd type with exactly one element. |
33 | unsafe { simd_shuffle!(one, one, [0; $len]) } |
34 | } |
35 | |
36 | /// Extract the element at position `index`. |
37 | /// `index` is not a constant so this is not efficient! |
38 | /// Use for testing only. |
39 | // FIXME: Workaround rust@60637 |
40 | #[inline(always)] |
41 | pub(crate) fn extract(self, index: usize) -> $elem_type { |
42 | assert!(index < $len); |
43 | // Now that we know this is in-bounds, use pointer arithmetic to access the right element. |
44 | let self_ptr = &self as *const Self as *const $elem_type; |
45 | unsafe { |
46 | self_ptr.add(index).read() |
47 | } |
48 | } |
49 | } |
50 | } |
51 | } |
52 | |
53 | macro_rules! simd_m_ty { |
54 | ($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => { |
55 | #[repr(simd)] |
56 | #[derive(Copy, Clone, Debug, PartialEq)] |
57 | pub(crate) struct $id([$elem_type; $len]); |
58 | |
59 | #[allow(clippy::use_self)] |
60 | impl $id { |
61 | #[inline(always)] |
62 | const fn bool_to_internal(x: bool) -> $elem_type { |
63 | [0 as $elem_type, !(0 as $elem_type)][x as usize] |
64 | } |
65 | |
66 | #[inline(always)] |
67 | pub(crate) const fn new($($param_name: bool),*) -> Self { |
68 | $id([$(Self::bool_to_internal($param_name)),*]) |
69 | } |
70 | |
71 | // FIXME: Workaround rust@60637 |
72 | #[inline(always)] |
73 | pub(crate) fn splat(value: bool) -> Self { |
74 | #[derive(Copy, Clone)] |
75 | #[repr(simd)] |
76 | struct JustOne([$elem_type; 1]); |
77 | let one = JustOne([Self::bool_to_internal(value)]); |
78 | // SAFETY: 0 is always in-bounds because we're shuffling |
79 | // a simd type with exactly one element. |
80 | unsafe { simd_shuffle!(one, one, [0; $len]) } |
81 | } |
82 | } |
83 | } |
84 | } |
85 | |
86 | // 16-bit wide types: |
87 | |
88 | simd_ty!(u8x2[u8;2]: x0, x1); |
89 | simd_ty!(i8x2[i8;2]: x0, x1); |
90 | |
91 | // 32-bit wide types: |
92 | |
93 | simd_ty!(u8x4[u8;4]: x0, x1, x2, x3); |
94 | simd_ty!(u16x2[u16;2]: x0, x1); |
95 | |
96 | simd_ty!(i8x4[i8;4]: x0, x1, x2, x3); |
97 | simd_ty!(i16x2[i16;2]: x0, x1); |
98 | |
99 | // 64-bit wide types: |
100 | |
101 | simd_ty!( |
102 | u8x8[u8;8]: |
103 | x0, |
104 | x1, |
105 | x2, |
106 | x3, |
107 | x4, |
108 | x5, |
109 | x6, |
110 | x7 |
111 | ); |
112 | simd_ty!(u16x4[u16;4]: x0, x1, x2, x3); |
113 | simd_ty!(u32x2[u32;2]: x0, x1); |
114 | simd_ty!(u64x1[u64;1]: x1); |
115 | |
116 | simd_ty!( |
117 | i8x8[i8;8]: |
118 | x0, |
119 | x1, |
120 | x2, |
121 | x3, |
122 | x4, |
123 | x5, |
124 | x6, |
125 | x7 |
126 | ); |
127 | simd_ty!(i16x4[i16;4]: x0, x1, x2, x3); |
128 | simd_ty!(i32x2[i32;2]: x0, x1); |
129 | simd_ty!(i64x1[i64;1]: x1); |
130 | |
131 | simd_ty!(f32x2[f32;2]: x0, x1); |
132 | simd_ty!(f64x1[f64;1]: x1); |
133 | |
134 | // 128-bit wide types: |
135 | |
136 | simd_ty!( |
137 | u8x16[u8;16]: |
138 | x0, |
139 | x1, |
140 | x2, |
141 | x3, |
142 | x4, |
143 | x5, |
144 | x6, |
145 | x7, |
146 | x8, |
147 | x9, |
148 | x10, |
149 | x11, |
150 | x12, |
151 | x13, |
152 | x14, |
153 | x15 |
154 | ); |
155 | simd_ty!( |
156 | u16x8[u16;8]: |
157 | x0, |
158 | x1, |
159 | x2, |
160 | x3, |
161 | x4, |
162 | x5, |
163 | x6, |
164 | x7 |
165 | ); |
166 | simd_ty!(u32x4[u32;4]: x0, x1, x2, x3); |
167 | simd_ty!(u64x2[u64;2]: x0, x1); |
168 | |
169 | simd_ty!( |
170 | i8x16[i8;16]: |
171 | x0, |
172 | x1, |
173 | x2, |
174 | x3, |
175 | x4, |
176 | x5, |
177 | x6, |
178 | x7, |
179 | x8, |
180 | x9, |
181 | x10, |
182 | x11, |
183 | x12, |
184 | x13, |
185 | x14, |
186 | x15 |
187 | ); |
188 | simd_ty!( |
189 | i16x8[i16;8]: |
190 | x0, |
191 | x1, |
192 | x2, |
193 | x3, |
194 | x4, |
195 | x5, |
196 | x6, |
197 | x7 |
198 | ); |
199 | simd_ty!(i32x4[i32;4]: x0, x1, x2, x3); |
200 | simd_ty!(i64x2[i64;2]: x0, x1); |
201 | |
202 | simd_ty!(f16x4[f16;4]: x0, x1, x2, x3); |
203 | |
204 | simd_ty!( |
205 | f16x8[f16;8]: |
206 | x0, |
207 | x1, |
208 | x2, |
209 | x3, |
210 | x4, |
211 | x5, |
212 | x6, |
213 | x7 |
214 | ); |
215 | simd_ty!(f32x4[f32;4]: x0, x1, x2, x3); |
216 | simd_ty!(f64x2[f64;2]: x0, x1); |
217 | |
218 | simd_m_ty!( |
219 | m8x16[i8;16]: |
220 | x0, |
221 | x1, |
222 | x2, |
223 | x3, |
224 | x4, |
225 | x5, |
226 | x6, |
227 | x7, |
228 | x8, |
229 | x9, |
230 | x10, |
231 | x11, |
232 | x12, |
233 | x13, |
234 | x14, |
235 | x15 |
236 | ); |
237 | simd_m_ty!( |
238 | m16x8[i16;8]: |
239 | x0, |
240 | x1, |
241 | x2, |
242 | x3, |
243 | x4, |
244 | x5, |
245 | x6, |
246 | x7 |
247 | ); |
248 | simd_m_ty!(m32x4[i32;4]: x0, x1, x2, x3); |
249 | simd_m_ty!(m64x2[i64;2]: x0, x1); |
250 | |
251 | // 256-bit wide types: |
252 | |
253 | simd_ty!( |
254 | u8x32[u8;32]: |
255 | x0, |
256 | x1, |
257 | x2, |
258 | x3, |
259 | x4, |
260 | x5, |
261 | x6, |
262 | x7, |
263 | x8, |
264 | x9, |
265 | x10, |
266 | x11, |
267 | x12, |
268 | x13, |
269 | x14, |
270 | x15, |
271 | x16, |
272 | x17, |
273 | x18, |
274 | x19, |
275 | x20, |
276 | x21, |
277 | x22, |
278 | x23, |
279 | x24, |
280 | x25, |
281 | x26, |
282 | x27, |
283 | x28, |
284 | x29, |
285 | x30, |
286 | x31 |
287 | ); |
288 | simd_ty!( |
289 | u16x16[u16;16]: |
290 | x0, |
291 | x1, |
292 | x2, |
293 | x3, |
294 | x4, |
295 | x5, |
296 | x6, |
297 | x7, |
298 | x8, |
299 | x9, |
300 | x10, |
301 | x11, |
302 | x12, |
303 | x13, |
304 | x14, |
305 | x15 |
306 | ); |
307 | simd_ty!( |
308 | u32x8[u32;8]: |
309 | x0, |
310 | x1, |
311 | x2, |
312 | x3, |
313 | x4, |
314 | x5, |
315 | x6, |
316 | x7 |
317 | ); |
318 | simd_ty!(u64x4[u64;4]: x0, x1, x2, x3); |
319 | |
320 | simd_ty!( |
321 | i8x32[i8;32]: |
322 | x0, |
323 | x1, |
324 | x2, |
325 | x3, |
326 | x4, |
327 | x5, |
328 | x6, |
329 | x7, |
330 | x8, |
331 | x9, |
332 | x10, |
333 | x11, |
334 | x12, |
335 | x13, |
336 | x14, |
337 | x15, |
338 | x16, |
339 | x17, |
340 | x18, |
341 | x19, |
342 | x20, |
343 | x21, |
344 | x22, |
345 | x23, |
346 | x24, |
347 | x25, |
348 | x26, |
349 | x27, |
350 | x28, |
351 | x29, |
352 | x30, |
353 | x31 |
354 | ); |
355 | simd_ty!( |
356 | i16x16[i16;16]: |
357 | x0, |
358 | x1, |
359 | x2, |
360 | x3, |
361 | x4, |
362 | x5, |
363 | x6, |
364 | x7, |
365 | x8, |
366 | x9, |
367 | x10, |
368 | x11, |
369 | x12, |
370 | x13, |
371 | x14, |
372 | x15 |
373 | ); |
374 | simd_ty!( |
375 | i32x8[i32;8]: |
376 | x0, |
377 | x1, |
378 | x2, |
379 | x3, |
380 | x4, |
381 | x5, |
382 | x6, |
383 | x7 |
384 | ); |
385 | simd_ty!(i64x4[i64;4]: x0, x1, x2, x3); |
386 | |
387 | simd_ty!( |
388 | f16x16[f16;16]: |
389 | x0, |
390 | x1, |
391 | x2, |
392 | x3, |
393 | x4, |
394 | x5, |
395 | x6, |
396 | x7, |
397 | x8, |
398 | x9, |
399 | x10, |
400 | x11, |
401 | x12, |
402 | x13, |
403 | x14, |
404 | x15 |
405 | ); |
406 | simd_ty!( |
407 | f32x8[f32;8]: |
408 | x0, |
409 | x1, |
410 | x2, |
411 | x3, |
412 | x4, |
413 | x5, |
414 | x6, |
415 | x7 |
416 | ); |
417 | simd_ty!(f64x4[f64;4]: x0, x1, x2, x3); |
418 | |
419 | simd_m_ty!( |
420 | m8x32[i8;32]: |
421 | x0, |
422 | x1, |
423 | x2, |
424 | x3, |
425 | x4, |
426 | x5, |
427 | x6, |
428 | x7, |
429 | x8, |
430 | x9, |
431 | x10, |
432 | x11, |
433 | x12, |
434 | x13, |
435 | x14, |
436 | x15, |
437 | x16, |
438 | x17, |
439 | x18, |
440 | x19, |
441 | x20, |
442 | x21, |
443 | x22, |
444 | x23, |
445 | x24, |
446 | x25, |
447 | x26, |
448 | x27, |
449 | x28, |
450 | x29, |
451 | x30, |
452 | x31 |
453 | ); |
454 | simd_m_ty!( |
455 | m16x16[i16;16]: |
456 | x0, |
457 | x1, |
458 | x2, |
459 | x3, |
460 | x4, |
461 | x5, |
462 | x6, |
463 | x7, |
464 | x8, |
465 | x9, |
466 | x10, |
467 | x11, |
468 | x12, |
469 | x13, |
470 | x14, |
471 | x15 |
472 | ); |
473 | simd_m_ty!( |
474 | m32x8[i32;8]: |
475 | x0, |
476 | x1, |
477 | x2, |
478 | x3, |
479 | x4, |
480 | x5, |
481 | x6, |
482 | x7 |
483 | ); |
484 | |
485 | // 512-bit wide types: |
486 | |
487 | simd_ty!( |
488 | i8x64[i8;64]: |
489 | x0, |
490 | x1, |
491 | x2, |
492 | x3, |
493 | x4, |
494 | x5, |
495 | x6, |
496 | x7, |
497 | x8, |
498 | x9, |
499 | x10, |
500 | x11, |
501 | x12, |
502 | x13, |
503 | x14, |
504 | x15, |
505 | x16, |
506 | x17, |
507 | x18, |
508 | x19, |
509 | x20, |
510 | x21, |
511 | x22, |
512 | x23, |
513 | x24, |
514 | x25, |
515 | x26, |
516 | x27, |
517 | x28, |
518 | x29, |
519 | x30, |
520 | x31, |
521 | x32, |
522 | x33, |
523 | x34, |
524 | x35, |
525 | x36, |
526 | x37, |
527 | x38, |
528 | x39, |
529 | x40, |
530 | x41, |
531 | x42, |
532 | x43, |
533 | x44, |
534 | x45, |
535 | x46, |
536 | x47, |
537 | x48, |
538 | x49, |
539 | x50, |
540 | x51, |
541 | x52, |
542 | x53, |
543 | x54, |
544 | x55, |
545 | x56, |
546 | x57, |
547 | x58, |
548 | x59, |
549 | x60, |
550 | x61, |
551 | x62, |
552 | x63 |
553 | ); |
554 | |
555 | simd_ty!( |
556 | u8x64[u8;64]: |
557 | x0, |
558 | x1, |
559 | x2, |
560 | x3, |
561 | x4, |
562 | x5, |
563 | x6, |
564 | x7, |
565 | x8, |
566 | x9, |
567 | x10, |
568 | x11, |
569 | x12, |
570 | x13, |
571 | x14, |
572 | x15, |
573 | x16, |
574 | x17, |
575 | x18, |
576 | x19, |
577 | x20, |
578 | x21, |
579 | x22, |
580 | x23, |
581 | x24, |
582 | x25, |
583 | x26, |
584 | x27, |
585 | x28, |
586 | x29, |
587 | x30, |
588 | x31, |
589 | x32, |
590 | x33, |
591 | x34, |
592 | x35, |
593 | x36, |
594 | x37, |
595 | x38, |
596 | x39, |
597 | x40, |
598 | x41, |
599 | x42, |
600 | x43, |
601 | x44, |
602 | x45, |
603 | x46, |
604 | x47, |
605 | x48, |
606 | x49, |
607 | x50, |
608 | x51, |
609 | x52, |
610 | x53, |
611 | x54, |
612 | x55, |
613 | x56, |
614 | x57, |
615 | x58, |
616 | x59, |
617 | x60, |
618 | x61, |
619 | x62, |
620 | x63 |
621 | ); |
622 | |
623 | simd_ty!( |
624 | i16x32[i16;32]: |
625 | x0, |
626 | x1, |
627 | x2, |
628 | x3, |
629 | x4, |
630 | x5, |
631 | x6, |
632 | x7, |
633 | x8, |
634 | x9, |
635 | x10, |
636 | x11, |
637 | x12, |
638 | x13, |
639 | x14, |
640 | x15, |
641 | x16, |
642 | x17, |
643 | x18, |
644 | x19, |
645 | x20, |
646 | x21, |
647 | x22, |
648 | x23, |
649 | x24, |
650 | x25, |
651 | x26, |
652 | x27, |
653 | x28, |
654 | x29, |
655 | x30, |
656 | x31 |
657 | ); |
658 | |
659 | simd_ty!( |
660 | u16x32[u16;32]: |
661 | x0, |
662 | x1, |
663 | x2, |
664 | x3, |
665 | x4, |
666 | x5, |
667 | x6, |
668 | x7, |
669 | x8, |
670 | x9, |
671 | x10, |
672 | x11, |
673 | x12, |
674 | x13, |
675 | x14, |
676 | x15, |
677 | x16, |
678 | x17, |
679 | x18, |
680 | x19, |
681 | x20, |
682 | x21, |
683 | x22, |
684 | x23, |
685 | x24, |
686 | x25, |
687 | x26, |
688 | x27, |
689 | x28, |
690 | x29, |
691 | x30, |
692 | x31 |
693 | ); |
694 | |
695 | simd_ty!( |
696 | i32x16[i32;16]: |
697 | x0, |
698 | x1, |
699 | x2, |
700 | x3, |
701 | x4, |
702 | x5, |
703 | x6, |
704 | x7, |
705 | x8, |
706 | x9, |
707 | x10, |
708 | x11, |
709 | x12, |
710 | x13, |
711 | x14, |
712 | x15 |
713 | ); |
714 | |
715 | simd_ty!( |
716 | u32x16[u32;16]: |
717 | x0, |
718 | x1, |
719 | x2, |
720 | x3, |
721 | x4, |
722 | x5, |
723 | x6, |
724 | x7, |
725 | x8, |
726 | x9, |
727 | x10, |
728 | x11, |
729 | x12, |
730 | x13, |
731 | x14, |
732 | x15 |
733 | ); |
734 | |
735 | simd_ty!( |
736 | f16x32[f16;32]: |
737 | x0, |
738 | x1, |
739 | x2, |
740 | x3, |
741 | x4, |
742 | x5, |
743 | x6, |
744 | x7, |
745 | x8, |
746 | x9, |
747 | x10, |
748 | x11, |
749 | x12, |
750 | x13, |
751 | x14, |
752 | x15, |
753 | x16, |
754 | x17, |
755 | x18, |
756 | x19, |
757 | x20, |
758 | x21, |
759 | x22, |
760 | x23, |
761 | x24, |
762 | x25, |
763 | x26, |
764 | x27, |
765 | x28, |
766 | x29, |
767 | x30, |
768 | x31 |
769 | ); |
770 | simd_ty!( |
771 | f32x16[f32;16]: |
772 | x0, |
773 | x1, |
774 | x2, |
775 | x3, |
776 | x4, |
777 | x5, |
778 | x6, |
779 | x7, |
780 | x8, |
781 | x9, |
782 | x10, |
783 | x11, |
784 | x12, |
785 | x13, |
786 | x14, |
787 | x15 |
788 | ); |
789 | |
790 | simd_ty!( |
791 | i64x8[i64;8]: |
792 | x0, |
793 | x1, |
794 | x2, |
795 | x3, |
796 | x4, |
797 | x5, |
798 | x6, |
799 | x7 |
800 | ); |
801 | |
802 | simd_ty!( |
803 | u64x8[u64;8]: |
804 | x0, |
805 | x1, |
806 | x2, |
807 | x3, |
808 | x4, |
809 | x5, |
810 | x6, |
811 | x7 |
812 | ); |
813 | |
814 | simd_ty!( |
815 | f64x8[f64;8]: |
816 | x0, |
817 | x1, |
818 | x2, |
819 | x3, |
820 | x4, |
821 | x5, |
822 | x6, |
823 | x7 |
824 | ); |
825 | |
826 | // 1024-bit wide types: |
827 | simd_ty!( |
828 | u16x64[u16;64]: |
829 | x0, |
830 | x1, |
831 | x2, |
832 | x3, |
833 | x4, |
834 | x5, |
835 | x6, |
836 | x7, |
837 | x8, |
838 | x9, |
839 | x10, |
840 | x11, |
841 | x12, |
842 | x13, |
843 | x14, |
844 | x15, |
845 | x16, |
846 | x17, |
847 | x18, |
848 | x19, |
849 | x20, |
850 | x21, |
851 | x22, |
852 | x23, |
853 | x24, |
854 | x25, |
855 | x26, |
856 | x27, |
857 | x28, |
858 | x29, |
859 | x30, |
860 | x31, |
861 | x32, |
862 | x33, |
863 | x34, |
864 | x35, |
865 | x36, |
866 | x37, |
867 | x38, |
868 | x39, |
869 | x40, |
870 | x41, |
871 | x42, |
872 | x43, |
873 | x44, |
874 | x45, |
875 | x46, |
876 | x47, |
877 | x48, |
878 | x49, |
879 | x50, |
880 | x51, |
881 | x52, |
882 | x53, |
883 | x54, |
884 | x55, |
885 | x56, |
886 | x57, |
887 | x58, |
888 | x59, |
889 | x60, |
890 | x61, |
891 | x62, |
892 | x63 |
893 | ); |
894 | simd_ty!( |
895 | i32x32[i32;32]: |
896 | x0, |
897 | x1, |
898 | x2, |
899 | x3, |
900 | x4, |
901 | x5, |
902 | x6, |
903 | x7, |
904 | x8, |
905 | x9, |
906 | x10, |
907 | x11, |
908 | x12, |
909 | x13, |
910 | x14, |
911 | x15, |
912 | x16, |
913 | x17, |
914 | x18, |
915 | x19, |
916 | x20, |
917 | x21, |
918 | x22, |
919 | x23, |
920 | x24, |
921 | x25, |
922 | x26, |
923 | x27, |
924 | x28, |
925 | x29, |
926 | x30, |
927 | x31 |
928 | ); |
929 | simd_ty!( |
930 | u32x32[u32;32]: |
931 | x0, |
932 | x1, |
933 | x2, |
934 | x3, |
935 | x4, |
936 | x5, |
937 | x6, |
938 | x7, |
939 | x8, |
940 | x9, |
941 | x10, |
942 | x11, |
943 | x12, |
944 | x13, |
945 | x14, |
946 | x15, |
947 | x16, |
948 | x17, |
949 | x18, |
950 | x19, |
951 | x20, |
952 | x21, |
953 | x22, |
954 | x23, |
955 | x24, |
956 | x25, |
957 | x26, |
958 | x27, |
959 | x28, |
960 | x29, |
961 | x30, |
962 | x31 |
963 | ); |
964 | |
965 | /// Used to continue `Debug`ging SIMD types as `MySimd(1, 2, 3, 4)`, as they |
966 | /// were before moving to array-based simd. |
967 | #[inline ] |
968 | pub(crate) fn debug_simd_finish<T: crate::fmt::Debug, const N: usize>( |
969 | formatter: &mut crate::fmt::Formatter<'_>, |
970 | type_name: &str, |
971 | array: [T; N], |
972 | ) -> crate::fmt::Result { |
973 | crate::fmt::Formatter::debug_tuple_fields_finish( |
974 | self:formatter, |
975 | type_name, |
976 | &crate::array::from_fn::<&dyn crate::fmt::Debug, N, _>(|i: usize| &array[i]), |
977 | ) |
978 | } |
979 | |