| 1 | #include <cstdint> |
| 2 | |
| 3 | struct alignas(16) xmm_t { |
| 4 | uint64_t a, b; |
| 5 | }; |
| 6 | |
| 7 | int main() { |
| 8 | constexpr uint64_t mm[] = { |
| 9 | 0x0001020304050607, |
| 10 | 0x1011121314151617, |
| 11 | 0x2021222324252627, |
| 12 | 0x3031323334353637, |
| 13 | 0x4041424344454647, |
| 14 | 0x5051525354555657, |
| 15 | 0x6061626364656667, |
| 16 | 0x7071727374757677, |
| 17 | }; |
| 18 | |
| 19 | constexpr xmm_t xmm[] = { |
| 20 | { .a: 0x0706050403020100, .b: 0x0F0E0D0C0B0A0908, }, |
| 21 | { .a: 0x0807060504030201, .b: 0x100F0E0D0C0B0A09, }, |
| 22 | { .a: 0x0908070605040302, .b: 0x11100F0E0D0C0B0A, }, |
| 23 | { .a: 0x0A09080706050403, .b: 0x1211100F0E0D0C0B, }, |
| 24 | { .a: 0x0B0A090807060504, .b: 0x131211100F0E0D0C, }, |
| 25 | { .a: 0x0C0B0A0908070605, .b: 0x14131211100F0E0D, }, |
| 26 | { .a: 0x0D0C0B0A09080706, .b: 0x1514131211100F0E, }, |
| 27 | { .a: 0x0E0D0C0B0A090807, .b: 0x161514131211100F, }, |
| 28 | }; |
| 29 | |
| 30 | asm volatile( |
| 31 | "movq 0x00(%0), %%mm0\n\t" |
| 32 | "movq 0x08(%0), %%mm1\n\t" |
| 33 | "movq 0x10(%0), %%mm2\n\t" |
| 34 | "movq 0x18(%0), %%mm3\n\t" |
| 35 | "movq 0x20(%0), %%mm4\n\t" |
| 36 | "movq 0x28(%0), %%mm5\n\t" |
| 37 | "movq 0x30(%0), %%mm6\n\t" |
| 38 | "movq 0x38(%0), %%mm7\n\t" |
| 39 | "\n\t" |
| 40 | "movaps 0x00(%1), %%xmm0\n\t" |
| 41 | "movaps 0x10(%1), %%xmm1\n\t" |
| 42 | "movaps 0x20(%1), %%xmm2\n\t" |
| 43 | "movaps 0x30(%1), %%xmm3\n\t" |
| 44 | "movaps 0x40(%1), %%xmm4\n\t" |
| 45 | "movaps 0x50(%1), %%xmm5\n\t" |
| 46 | "movaps 0x60(%1), %%xmm6\n\t" |
| 47 | "movaps 0x70(%1), %%xmm7\n\t" |
| 48 | "\n\t" |
| 49 | "int3\n\t" |
| 50 | : |
| 51 | : "a" (mm), "b" (xmm) |
| 52 | : "%mm0" , "%mm1" , "%mm2" , "%mm3" , "%mm4" , "%mm5" , "%mm6" , "%mm7" , |
| 53 | "%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm6" , "%xmm7" |
| 54 | ); |
| 55 | |
| 56 | return 0; |
| 57 | } |
| 58 | |