1//! Internal `#[repr(simd)]` types
2
3#![allow(non_camel_case_types)]
4
5macro_rules! simd_ty {
6 ($id:ident [$ety:ident]: $($elem_name:ident),*) => {
7 #[repr(simd)]
8 #[derive(Copy, Clone, Debug, PartialEq)]
9 pub(crate) struct $id { $(pub $elem_name: $ety),* }
10
11 #[allow(clippy::use_self)]
12 impl $id {
13 #[inline(always)]
14 pub(crate) const fn new($($elem_name: $ety),*) -> Self {
15 $id { $($elem_name),* }
16 }
17 // FIXME: Workaround rust@60637
18 #[inline(always)]
19 pub(crate) const fn splat(value: $ety) -> Self {
20 $id { $(
21 $elem_name: value
22 ),* }
23 }
24
25 /// Extract the element at position `index`.
26 /// `index` is not a constant so this is not efficient!
27 /// Use for testing only.
28 // FIXME: Workaround rust@60637
29 #[inline(always)]
30 pub(crate) fn extract(self, index: usize) -> $ety {
31 // Here we assume that there is no padding.
32 let len = crate::mem::size_of::<Self>() / crate::mem::size_of::<$ety>();
33 assert!(index < len);
34 // Now that we know this is in-bounds, use pointer arithmetic to access the right element.
35 let self_ptr = &self as *const Self as *const $ety;
36 unsafe {
37 self_ptr.add(index).read()
38 }
39 }
40 }
41 }
42}
43
44macro_rules! simd_m_ty {
45 ($id:ident [$ety:ident]: $($elem_name:ident),*) => {
46 #[repr(simd)]
47 #[derive(Copy, Clone, Debug, PartialEq)]
48 pub(crate) struct $id { $(pub $elem_name: $ety),* }
49
50 #[allow(clippy::use_self)]
51 impl $id {
52 #[inline(always)]
53 const fn bool_to_internal(x: bool) -> $ety {
54 [0 as $ety, !(0 as $ety)][x as usize]
55 }
56
57 #[inline(always)]
58 pub(crate) const fn new($($elem_name: bool),*) -> Self {
59 $id { $($elem_name: Self::bool_to_internal($elem_name)),* }
60 }
61
62 // FIXME: Workaround rust@60637
63 #[inline(always)]
64 pub(crate) const fn splat(value: bool) -> Self {
65 $id { $(
66 $elem_name: Self::bool_to_internal(value)
67 ),* }
68 }
69 }
70 }
71}
72
73// 16-bit wide types:
74
75simd_ty!(u8x2[u8]: x0, x1);
76simd_ty!(i8x2[i8]: x0, x1);
77
78// 32-bit wide types:
79
80simd_ty!(u8x4[u8]: x0, x1, x2, x3);
81simd_ty!(u16x2[u16]: x0, x1);
82
83simd_ty!(i8x4[i8]: x0, x1, x2, x3);
84simd_ty!(i16x2[i16]: x0, x1);
85
86// 64-bit wide types:
87
88simd_ty!(
89 u8x8[u8]:
90 x0,
91 x1,
92 x2,
93 x3,
94 x4,
95 x5,
96 x6,
97 x7
98);
99simd_ty!(u16x4[u16]: x0, x1, x2, x3);
100simd_ty!(u32x2[u32]: x0, x1);
101simd_ty!(u64x1[u64]: x1);
102
103simd_ty!(
104 i8x8[i8]:
105 x0,
106 x1,
107 x2,
108 x3,
109 x4,
110 x5,
111 x6,
112 x7
113);
114simd_ty!(i16x4[i16]: x0, x1, x2, x3);
115simd_ty!(i32x2[i32]: x0, x1);
116simd_ty!(i64x1[i64]: x1);
117
118simd_ty!(f32x2[f32]: x0, x1);
119simd_ty!(f64x1[f64]: x1);
120
121// 128-bit wide types:
122
123simd_ty!(
124 u8x16[u8]:
125 x0,
126 x1,
127 x2,
128 x3,
129 x4,
130 x5,
131 x6,
132 x7,
133 x8,
134 x9,
135 x10,
136 x11,
137 x12,
138 x13,
139 x14,
140 x15
141);
142simd_ty!(
143 u16x8[u16]:
144 x0,
145 x1,
146 x2,
147 x3,
148 x4,
149 x5,
150 x6,
151 x7
152);
153simd_ty!(u32x4[u32]: x0, x1, x2, x3);
154simd_ty!(u64x2[u64]: x0, x1);
155
156simd_ty!(
157 i8x16[i8]:
158 x0,
159 x1,
160 x2,
161 x3,
162 x4,
163 x5,
164 x6,
165 x7,
166 x8,
167 x9,
168 x10,
169 x11,
170 x12,
171 x13,
172 x14,
173 x15
174);
175simd_ty!(
176 i16x8[i16]:
177 x0,
178 x1,
179 x2,
180 x3,
181 x4,
182 x5,
183 x6,
184 x7
185);
186simd_ty!(i32x4[i32]: x0, x1, x2, x3);
187simd_ty!(i64x2[i64]: x0, x1);
188
189simd_ty!(f32x4[f32]: x0, x1, x2, x3);
190simd_ty!(f64x2[f64]: x0, x1);
191simd_ty!(f64x4[f64]: x0, x1, x2, x3);
192
193simd_m_ty!(
194 m8x16[i8]:
195 x0,
196 x1,
197 x2,
198 x3,
199 x4,
200 x5,
201 x6,
202 x7,
203 x8,
204 x9,
205 x10,
206 x11,
207 x12,
208 x13,
209 x14,
210 x15
211);
212simd_m_ty!(
213 m16x8[i16]:
214 x0,
215 x1,
216 x2,
217 x3,
218 x4,
219 x5,
220 x6,
221 x7
222);
223simd_m_ty!(m32x4[i32]: x0, x1, x2, x3);
224simd_m_ty!(m64x2[i64]: x0, x1);
225
226// 256-bit wide types:
227
228simd_ty!(
229 u8x32[u8]:
230 x0,
231 x1,
232 x2,
233 x3,
234 x4,
235 x5,
236 x6,
237 x7,
238 x8,
239 x9,
240 x10,
241 x11,
242 x12,
243 x13,
244 x14,
245 x15,
246 x16,
247 x17,
248 x18,
249 x19,
250 x20,
251 x21,
252 x22,
253 x23,
254 x24,
255 x25,
256 x26,
257 x27,
258 x28,
259 x29,
260 x30,
261 x31
262);
263simd_ty!(
264 u16x16[u16]:
265 x0,
266 x1,
267 x2,
268 x3,
269 x4,
270 x5,
271 x6,
272 x7,
273 x8,
274 x9,
275 x10,
276 x11,
277 x12,
278 x13,
279 x14,
280 x15
281);
282simd_ty!(
283 u32x8[u32]:
284 x0,
285 x1,
286 x2,
287 x3,
288 x4,
289 x5,
290 x6,
291 x7
292);
293simd_ty!(u64x4[u64]: x0, x1, x2, x3);
294
295simd_ty!(
296 i8x32[i8]:
297 x0,
298 x1,
299 x2,
300 x3,
301 x4,
302 x5,
303 x6,
304 x7,
305 x8,
306 x9,
307 x10,
308 x11,
309 x12,
310 x13,
311 x14,
312 x15,
313 x16,
314 x17,
315 x18,
316 x19,
317 x20,
318 x21,
319 x22,
320 x23,
321 x24,
322 x25,
323 x26,
324 x27,
325 x28,
326 x29,
327 x30,
328 x31
329);
330simd_ty!(
331 i16x16[i16]:
332 x0,
333 x1,
334 x2,
335 x3,
336 x4,
337 x5,
338 x6,
339 x7,
340 x8,
341 x9,
342 x10,
343 x11,
344 x12,
345 x13,
346 x14,
347 x15
348);
349simd_ty!(
350 i32x8[i32]:
351 x0,
352 x1,
353 x2,
354 x3,
355 x4,
356 x5,
357 x6,
358 x7
359);
360simd_ty!(i64x4[i64]: x0, x1, x2, x3);
361
362simd_ty!(
363 f32x8[f32]:
364 x0,
365 x1,
366 x2,
367 x3,
368 x4,
369 x5,
370 x6,
371 x7
372);
373
374simd_m_ty!(
375 m8x32[i8]:
376 x0,
377 x1,
378 x2,
379 x3,
380 x4,
381 x5,
382 x6,
383 x7,
384 x8,
385 x9,
386 x10,
387 x11,
388 x12,
389 x13,
390 x14,
391 x15,
392 x16,
393 x17,
394 x18,
395 x19,
396 x20,
397 x21,
398 x22,
399 x23,
400 x24,
401 x25,
402 x26,
403 x27,
404 x28,
405 x29,
406 x30,
407 x31
408);
409simd_m_ty!(
410 m16x16[i16]:
411 x0,
412 x1,
413 x2,
414 x3,
415 x4,
416 x5,
417 x6,
418 x7,
419 x8,
420 x9,
421 x10,
422 x11,
423 x12,
424 x13,
425 x14,
426 x15
427);
428simd_m_ty!(
429 m32x8[i32]:
430 x0,
431 x1,
432 x2,
433 x3,
434 x4,
435 x5,
436 x6,
437 x7
438);
439
440
441// 512-bit wide types:
442
443simd_ty!(
444 i8x64[i8]:
445 x0,
446 x1,
447 x2,
448 x3,
449 x4,
450 x5,
451 x6,
452 x7,
453 x8,
454 x9,
455 x10,
456 x11,
457 x12,
458 x13,
459 x14,
460 x15,
461 x16,
462 x17,
463 x18,
464 x19,
465 x20,
466 x21,
467 x22,
468 x23,
469 x24,
470 x25,
471 x26,
472 x27,
473 x28,
474 x29,
475 x30,
476 x31,
477 x32,
478 x33,
479 x34,
480 x35,
481 x36,
482 x37,
483 x38,
484 x39,
485 x40,
486 x41,
487 x42,
488 x43,
489 x44,
490 x45,
491 x46,
492 x47,
493 x48,
494 x49,
495 x50,
496 x51,
497 x52,
498 x53,
499 x54,
500 x55,
501 x56,
502 x57,
503 x58,
504 x59,
505 x60,
506 x61,
507 x62,
508 x63
509);
510
511simd_ty!(
512 u8x64[u8]:
513 x0,
514 x1,
515 x2,
516 x3,
517 x4,
518 x5,
519 x6,
520 x7,
521 x8,
522 x9,
523 x10,
524 x11,
525 x12,
526 x13,
527 x14,
528 x15,
529 x16,
530 x17,
531 x18,
532 x19,
533 x20,
534 x21,
535 x22,
536 x23,
537 x24,
538 x25,
539 x26,
540 x27,
541 x28,
542 x29,
543 x30,
544 x31,
545 x32,
546 x33,
547 x34,
548 x35,
549 x36,
550 x37,
551 x38,
552 x39,
553 x40,
554 x41,
555 x42,
556 x43,
557 x44,
558 x45,
559 x46,
560 x47,
561 x48,
562 x49,
563 x50,
564 x51,
565 x52,
566 x53,
567 x54,
568 x55,
569 x56,
570 x57,
571 x58,
572 x59,
573 x60,
574 x61,
575 x62,
576 x63
577);
578
579simd_ty!(
580 i16x32[i16]:
581 x0,
582 x1,
583 x2,
584 x3,
585 x4,
586 x5,
587 x6,
588 x7,
589 x8,
590 x9,
591 x10,
592 x11,
593 x12,
594 x13,
595 x14,
596 x15,
597 x16,
598 x17,
599 x18,
600 x19,
601 x20,
602 x21,
603 x22,
604 x23,
605 x24,
606 x25,
607 x26,
608 x27,
609 x28,
610 x29,
611 x30,
612 x31
613);
614
615simd_ty!(
616 u16x32[u16]:
617 x0,
618 x1,
619 x2,
620 x3,
621 x4,
622 x5,
623 x6,
624 x7,
625 x8,
626 x9,
627 x10,
628 x11,
629 x12,
630 x13,
631 x14,
632 x15,
633 x16,
634 x17,
635 x18,
636 x19,
637 x20,
638 x21,
639 x22,
640 x23,
641 x24,
642 x25,
643 x26,
644 x27,
645 x28,
646 x29,
647 x30,
648 x31
649);
650
651simd_ty!(
652 i32x16[i32]:
653 x0,
654 x1,
655 x2,
656 x3,
657 x4,
658 x5,
659 x6,
660 x7,
661 x8,
662 x9,
663 x10,
664 x11,
665 x12,
666 x13,
667 x14,
668 x15
669);
670
671simd_ty!(
672 u32x16[u32]:
673 x0,
674 x1,
675 x2,
676 x3,
677 x4,
678 x5,
679 x6,
680 x7,
681 x8,
682 x9,
683 x10,
684 x11,
685 x12,
686 x13,
687 x14,
688 x15
689);
690
691simd_ty!(
692 f32x16[f32]:
693 x0,
694 x1,
695 x2,
696 x3,
697 x4,
698 x5,
699 x6,
700 x7,
701 x8,
702 x9,
703 x10,
704 x11,
705 x12,
706 x13,
707 x14,
708 x15
709);
710
711simd_ty!(
712 i64x8[i64]:
713 x0,
714 x1,
715 x2,
716 x3,
717 x4,
718 x5,
719 x6,
720 x7
721);
722
723simd_ty!(
724 u64x8[u64]:
725 x0,
726 x1,
727 x2,
728 x3,
729 x4,
730 x5,
731 x6,
732 x7
733);
734
735simd_ty!(
736 f64x8[f64]:
737 x0,
738 x1,
739 x2,
740 x3,
741 x4,
742 x5,
743 x6,
744 x7
745);
746