1 | #include <cinttypes> |
2 | #include <cstdint> |
3 | #include <cstdio> |
4 | |
5 | union alignas(64) zmm_t { |
6 | uint64_t as_uint64[8]; |
7 | uint8_t as_uint8[64]; |
8 | }; |
9 | |
10 | int main() { |
11 | constexpr zmm_t zmm_fill = { |
12 | .as_uint64 = { 0, 0, 0, 0, 0, 0, 0, 0 } |
13 | }; |
14 | |
15 | zmm_t zmm[32]; |
16 | |
17 | asm volatile( |
18 | "vmovaps %1, %%zmm0\n\t" |
19 | "vmovaps %1, %%zmm1\n\t" |
20 | "vmovaps %1, %%zmm2\n\t" |
21 | "vmovaps %1, %%zmm3\n\t" |
22 | "vmovaps %1, %%zmm4\n\t" |
23 | "vmovaps %1, %%zmm5\n\t" |
24 | "vmovaps %1, %%zmm6\n\t" |
25 | "vmovaps %1, %%zmm7\n\t" |
26 | #if defined(__x86_64__) || defined(_M_X64) |
27 | "vmovaps %1, %%zmm8\n\t" |
28 | "vmovaps %1, %%zmm9\n\t" |
29 | "vmovaps %1, %%zmm10\n\t" |
30 | "vmovaps %1, %%zmm11\n\t" |
31 | "vmovaps %1, %%zmm12\n\t" |
32 | "vmovaps %1, %%zmm13\n\t" |
33 | "vmovaps %1, %%zmm14\n\t" |
34 | "vmovaps %1, %%zmm15\n\t" |
35 | "vmovaps %1, %%zmm16\n\t" |
36 | "vmovaps %1, %%zmm17\n\t" |
37 | "vmovaps %1, %%zmm18\n\t" |
38 | "vmovaps %1, %%zmm19\n\t" |
39 | "vmovaps %1, %%zmm20\n\t" |
40 | "vmovaps %1, %%zmm21\n\t" |
41 | "vmovaps %1, %%zmm22\n\t" |
42 | "vmovaps %1, %%zmm23\n\t" |
43 | "vmovaps %1, %%zmm24\n\t" |
44 | "vmovaps %1, %%zmm25\n\t" |
45 | "vmovaps %1, %%zmm26\n\t" |
46 | "vmovaps %1, %%zmm27\n\t" |
47 | "vmovaps %1, %%zmm28\n\t" |
48 | "vmovaps %1, %%zmm29\n\t" |
49 | "vmovaps %1, %%zmm30\n\t" |
50 | "vmovaps %1, %%zmm31\n\t" |
51 | #endif |
52 | "\n\t" |
53 | "int3\n\t" |
54 | "\n\t" |
55 | "vmovaps %%zmm0, 0x000(%0)\n\t" |
56 | "vmovaps %%zmm1, 0x040(%0)\n\t" |
57 | "vmovaps %%zmm2, 0x080(%0)\n\t" |
58 | "vmovaps %%zmm3, 0x0C0(%0)\n\t" |
59 | "vmovaps %%zmm4, 0x100(%0)\n\t" |
60 | "vmovaps %%zmm5, 0x140(%0)\n\t" |
61 | "vmovaps %%zmm6, 0x180(%0)\n\t" |
62 | "vmovaps %%zmm7, 0x1C0(%0)\n\t" |
63 | #if defined(__x86_64__) || defined(_M_X64) |
64 | "vmovaps %%zmm8, 0x200(%0)\n\t" |
65 | "vmovaps %%zmm9, 0x240(%0)\n\t" |
66 | "vmovaps %%zmm10, 0x280(%0)\n\t" |
67 | "vmovaps %%zmm11, 0x2C0(%0)\n\t" |
68 | "vmovaps %%zmm12, 0x300(%0)\n\t" |
69 | "vmovaps %%zmm13, 0x340(%0)\n\t" |
70 | "vmovaps %%zmm14, 0x380(%0)\n\t" |
71 | "vmovaps %%zmm15, 0x3C0(%0)\n\t" |
72 | "vmovaps %%zmm16, 0x400(%0)\n\t" |
73 | "vmovaps %%zmm17, 0x440(%0)\n\t" |
74 | "vmovaps %%zmm18, 0x480(%0)\n\t" |
75 | "vmovaps %%zmm19, 0x4C0(%0)\n\t" |
76 | "vmovaps %%zmm20, 0x500(%0)\n\t" |
77 | "vmovaps %%zmm21, 0x540(%0)\n\t" |
78 | "vmovaps %%zmm22, 0x580(%0)\n\t" |
79 | "vmovaps %%zmm23, 0x5C0(%0)\n\t" |
80 | "vmovaps %%zmm24, 0x600(%0)\n\t" |
81 | "vmovaps %%zmm25, 0x640(%0)\n\t" |
82 | "vmovaps %%zmm26, 0x680(%0)\n\t" |
83 | "vmovaps %%zmm27, 0x6C0(%0)\n\t" |
84 | "vmovaps %%zmm28, 0x700(%0)\n\t" |
85 | "vmovaps %%zmm29, 0x740(%0)\n\t" |
86 | "vmovaps %%zmm30, 0x780(%0)\n\t" |
87 | "vmovaps %%zmm31, 0x7C0(%0)\n\t" |
88 | #endif |
89 | : |
90 | : "b" (zmm), "m" (zmm_fill) |
91 | : "%zmm0" , "%zmm1" , "%zmm2" , "%zmm3" , "%zmm4" , "%zmm5" , "%zmm6" , "%zmm7" |
92 | #if defined(__x86_64__) || defined(_M_X64) |
93 | , "%zmm8" , "%zmm9" , "%zmm10" , "%zmm11" , "%zmm12" , "%zmm13" , "%zmm14" , |
94 | "%zmm15" , "%zmm16" , "%zmm17" , "%zmm18" , "%zmm19" , "%zmm20" , "%zmm21" , |
95 | "%zmm22" , "%zmm23" , "%zmm24" , "%zmm25" , "%zmm26" , "%zmm27" , "%zmm28" , |
96 | "%zmm29" , "%zmm30" , "%zmm31" |
97 | #endif |
98 | ); |
99 | |
100 | for (int i = 0; i < 32; ++i) { |
101 | printf(format: "zmm%d = { " , i); |
102 | for (int j = 0; j < sizeof(zmm->as_uint8); ++j) |
103 | printf(format: "0x%02x " , zmm[i].as_uint8[j]); |
104 | printf(format: "}\n" ); |
105 | } |
106 | |
107 | return 0; |
108 | } |
109 | |