1 | #include <cinttypes> |
2 | #include <cstdint> |
3 | #include <cstdio> |
4 | |
5 | union alignas(32) ymm_t { |
6 | uint64_t as_uint64[4]; |
7 | uint8_t as_uint8[32]; |
8 | }; |
9 | |
10 | int main() { |
11 | constexpr ymm_t ymm_fill = { |
12 | .as_uint64 = { 0, 0, 0, 0 } |
13 | }; |
14 | |
15 | ymm_t ymm[16]; |
16 | |
17 | asm volatile( |
18 | "vmovaps %1, %%ymm0\n\t" |
19 | "vmovaps %1, %%ymm1\n\t" |
20 | "vmovaps %1, %%ymm2\n\t" |
21 | "vmovaps %1, %%ymm3\n\t" |
22 | "vmovaps %1, %%ymm4\n\t" |
23 | "vmovaps %1, %%ymm5\n\t" |
24 | "vmovaps %1, %%ymm6\n\t" |
25 | "vmovaps %1, %%ymm7\n\t" |
26 | #if defined(__x86_64__) || defined(_M_X64) |
27 | "vmovaps %1, %%ymm8\n\t" |
28 | "vmovaps %1, %%ymm9\n\t" |
29 | "vmovaps %1, %%ymm10\n\t" |
30 | "vmovaps %1, %%ymm11\n\t" |
31 | "vmovaps %1, %%ymm12\n\t" |
32 | "vmovaps %1, %%ymm13\n\t" |
33 | "vmovaps %1, %%ymm14\n\t" |
34 | "vmovaps %1, %%ymm15\n\t" |
35 | #endif |
36 | "\n\t" |
37 | "int3\n\t" |
38 | "\n\t" |
39 | "vmovaps %%ymm0, 0x000(%0)\n\t" |
40 | "vmovaps %%ymm1, 0x020(%0)\n\t" |
41 | "vmovaps %%ymm2, 0x040(%0)\n\t" |
42 | "vmovaps %%ymm3, 0x060(%0)\n\t" |
43 | "vmovaps %%ymm4, 0x080(%0)\n\t" |
44 | "vmovaps %%ymm5, 0x0A0(%0)\n\t" |
45 | "vmovaps %%ymm6, 0x0C0(%0)\n\t" |
46 | "vmovaps %%ymm7, 0x0E0(%0)\n\t" |
47 | #if defined(__x86_64__) || defined(_M_X64) |
48 | "vmovaps %%ymm8, 0x100(%0)\n\t" |
49 | "vmovaps %%ymm9, 0x120(%0)\n\t" |
50 | "vmovaps %%ymm10, 0x140(%0)\n\t" |
51 | "vmovaps %%ymm11, 0x160(%0)\n\t" |
52 | "vmovaps %%ymm12, 0x180(%0)\n\t" |
53 | "vmovaps %%ymm13, 0x1A0(%0)\n\t" |
54 | "vmovaps %%ymm14, 0x1C0(%0)\n\t" |
55 | "vmovaps %%ymm15, 0x1E0(%0)\n\t" |
56 | #endif |
57 | : |
58 | : "b" (ymm), "m" (ymm_fill) |
59 | : "%ymm0" , "%ymm1" , "%ymm2" , "%ymm3" , "%ymm4" , "%ymm5" , "%ymm6" , "%ymm7" |
60 | #if defined(__x86_64__) || defined(_M_X64) |
61 | , "%ymm8" , "%ymm9" , "%ymm10" , "%ymm11" , "%ymm12" , "%ymm13" , "%ymm14" , |
62 | "%ymm15" |
63 | #endif |
64 | ); |
65 | |
66 | for (int i = 0; i < 16; ++i) { |
67 | printf(format: "ymm%d = { " , i); |
68 | for (int j = 0; j < sizeof(ymm->as_uint8); ++j) |
69 | printf(format: "0x%02x " , ymm[i].as_uint8[j]); |
70 | printf(format: "}\n" ); |
71 | } |
72 | |
73 | return 0; |
74 | } |
75 | |