1 | #include <cstdint> |
2 | |
3 | struct alignas(16) xmm_t { |
4 | uint64_t a, b; |
5 | }; |
6 | |
7 | int main() { |
8 | constexpr uint64_t r8[] = { |
9 | 0x0001020304050607, |
10 | 0x1011121314151617, |
11 | 0x2021222324252627, |
12 | 0x3031323334353637, |
13 | 0x4041424344454647, |
14 | 0x5051525354555657, |
15 | 0x6061626364656667, |
16 | 0x7071727374757677, |
17 | }; |
18 | |
19 | constexpr xmm_t xmm8[] = { |
20 | { .a: 0x0F0E0D0C0B0A0908, .b: 0x1716151413121110, }, |
21 | { .a: 0x100F0E0D0C0B0A09, .b: 0x1817161514131211, }, |
22 | { .a: 0x11100F0E0D0C0B0A, .b: 0x1918171615141312, }, |
23 | { .a: 0x1211100F0E0D0C0B, .b: 0x1A19181716151413, }, |
24 | { .a: 0x131211100F0E0D0C, .b: 0x1B1A191817161514, }, |
25 | { .a: 0x14131211100F0E0D, .b: 0x1C1B1A1918171615, }, |
26 | { .a: 0x1514131211100F0E, .b: 0x1D1C1B1A19181716, }, |
27 | { .a: 0x161514131211100F, .b: 0x1E1D1C1B1A191817, }, |
28 | }; |
29 | |
30 | asm volatile( |
31 | "movq 0x00(%0), %%r8\n\t" |
32 | "movq 0x08(%0), %%r9\n\t" |
33 | "movq 0x10(%0), %%r10\n\t" |
34 | "movq 0x18(%0), %%r11\n\t" |
35 | "movq 0x20(%0), %%r12\n\t" |
36 | "movq 0x28(%0), %%r13\n\t" |
37 | "movq 0x30(%0), %%r14\n\t" |
38 | "movq 0x38(%0), %%r15\n\t" |
39 | "\n\t" |
40 | "movaps 0x00(%1), %%xmm8\n\t" |
41 | "movaps 0x10(%1), %%xmm9\n\t" |
42 | "movaps 0x20(%1), %%xmm10\n\t" |
43 | "movaps 0x30(%1), %%xmm11\n\t" |
44 | "movaps 0x40(%1), %%xmm12\n\t" |
45 | "movaps 0x50(%1), %%xmm13\n\t" |
46 | "movaps 0x60(%1), %%xmm14\n\t" |
47 | "movaps 0x70(%1), %%xmm15\n\t" |
48 | "\n\t" |
49 | "int3\n\t" |
50 | : |
51 | : "a" (r8), "b" (xmm8) |
52 | : "%r8" , "%r9" , "%r10" , "%r11" , "%r12" , "%r13" , "%r14" , "%r15" , "%xmm8" , |
53 | "%xmm9" , "%xmm10" , "%xmm11" , "%xmm12" , "%xmm13" , "%xmm14" , "%xmm15" |
54 | ); |
55 | |
56 | return 0; |
57 | } |
58 | |