| 1 | #include <cinttypes> |
| 2 | #include <cstdint> |
| 3 | #include <cstdio> |
| 4 | |
| 5 | union alignas(64) zmm_t { |
| 6 | uint64_t as_uint64[8]; |
| 7 | uint8_t as_uint8[64]; |
| 8 | }; |
| 9 | |
| 10 | int main() { |
| 11 | constexpr zmm_t zmm_fill = { |
| 12 | .as_uint64 = { 0, 0, 0, 0, 0, 0, 0, 0 } |
| 13 | }; |
| 14 | |
| 15 | zmm_t zmm[32]; |
| 16 | |
| 17 | asm volatile( |
| 18 | "vmovaps %1, %%zmm0\n\t" |
| 19 | "vmovaps %1, %%zmm1\n\t" |
| 20 | "vmovaps %1, %%zmm2\n\t" |
| 21 | "vmovaps %1, %%zmm3\n\t" |
| 22 | "vmovaps %1, %%zmm4\n\t" |
| 23 | "vmovaps %1, %%zmm5\n\t" |
| 24 | "vmovaps %1, %%zmm6\n\t" |
| 25 | "vmovaps %1, %%zmm7\n\t" |
| 26 | #if defined(__x86_64__) || defined(_M_X64) |
| 27 | "vmovaps %1, %%zmm8\n\t" |
| 28 | "vmovaps %1, %%zmm9\n\t" |
| 29 | "vmovaps %1, %%zmm10\n\t" |
| 30 | "vmovaps %1, %%zmm11\n\t" |
| 31 | "vmovaps %1, %%zmm12\n\t" |
| 32 | "vmovaps %1, %%zmm13\n\t" |
| 33 | "vmovaps %1, %%zmm14\n\t" |
| 34 | "vmovaps %1, %%zmm15\n\t" |
| 35 | "vmovaps %1, %%zmm16\n\t" |
| 36 | "vmovaps %1, %%zmm17\n\t" |
| 37 | "vmovaps %1, %%zmm18\n\t" |
| 38 | "vmovaps %1, %%zmm19\n\t" |
| 39 | "vmovaps %1, %%zmm20\n\t" |
| 40 | "vmovaps %1, %%zmm21\n\t" |
| 41 | "vmovaps %1, %%zmm22\n\t" |
| 42 | "vmovaps %1, %%zmm23\n\t" |
| 43 | "vmovaps %1, %%zmm24\n\t" |
| 44 | "vmovaps %1, %%zmm25\n\t" |
| 45 | "vmovaps %1, %%zmm26\n\t" |
| 46 | "vmovaps %1, %%zmm27\n\t" |
| 47 | "vmovaps %1, %%zmm28\n\t" |
| 48 | "vmovaps %1, %%zmm29\n\t" |
| 49 | "vmovaps %1, %%zmm30\n\t" |
| 50 | "vmovaps %1, %%zmm31\n\t" |
| 51 | #endif |
| 52 | "\n\t" |
| 53 | "int3\n\t" |
| 54 | "\n\t" |
| 55 | "vmovaps %%zmm0, 0x000(%0)\n\t" |
| 56 | "vmovaps %%zmm1, 0x040(%0)\n\t" |
| 57 | "vmovaps %%zmm2, 0x080(%0)\n\t" |
| 58 | "vmovaps %%zmm3, 0x0C0(%0)\n\t" |
| 59 | "vmovaps %%zmm4, 0x100(%0)\n\t" |
| 60 | "vmovaps %%zmm5, 0x140(%0)\n\t" |
| 61 | "vmovaps %%zmm6, 0x180(%0)\n\t" |
| 62 | "vmovaps %%zmm7, 0x1C0(%0)\n\t" |
| 63 | #if defined(__x86_64__) || defined(_M_X64) |
| 64 | "vmovaps %%zmm8, 0x200(%0)\n\t" |
| 65 | "vmovaps %%zmm9, 0x240(%0)\n\t" |
| 66 | "vmovaps %%zmm10, 0x280(%0)\n\t" |
| 67 | "vmovaps %%zmm11, 0x2C0(%0)\n\t" |
| 68 | "vmovaps %%zmm12, 0x300(%0)\n\t" |
| 69 | "vmovaps %%zmm13, 0x340(%0)\n\t" |
| 70 | "vmovaps %%zmm14, 0x380(%0)\n\t" |
| 71 | "vmovaps %%zmm15, 0x3C0(%0)\n\t" |
| 72 | "vmovaps %%zmm16, 0x400(%0)\n\t" |
| 73 | "vmovaps %%zmm17, 0x440(%0)\n\t" |
| 74 | "vmovaps %%zmm18, 0x480(%0)\n\t" |
| 75 | "vmovaps %%zmm19, 0x4C0(%0)\n\t" |
| 76 | "vmovaps %%zmm20, 0x500(%0)\n\t" |
| 77 | "vmovaps %%zmm21, 0x540(%0)\n\t" |
| 78 | "vmovaps %%zmm22, 0x580(%0)\n\t" |
| 79 | "vmovaps %%zmm23, 0x5C0(%0)\n\t" |
| 80 | "vmovaps %%zmm24, 0x600(%0)\n\t" |
| 81 | "vmovaps %%zmm25, 0x640(%0)\n\t" |
| 82 | "vmovaps %%zmm26, 0x680(%0)\n\t" |
| 83 | "vmovaps %%zmm27, 0x6C0(%0)\n\t" |
| 84 | "vmovaps %%zmm28, 0x700(%0)\n\t" |
| 85 | "vmovaps %%zmm29, 0x740(%0)\n\t" |
| 86 | "vmovaps %%zmm30, 0x780(%0)\n\t" |
| 87 | "vmovaps %%zmm31, 0x7C0(%0)\n\t" |
| 88 | #endif |
| 89 | : |
| 90 | : "b" (zmm), "m" (zmm_fill) |
| 91 | : "%zmm0" , "%zmm1" , "%zmm2" , "%zmm3" , "%zmm4" , "%zmm5" , "%zmm6" , "%zmm7" |
| 92 | #if defined(__x86_64__) || defined(_M_X64) |
| 93 | , "%zmm8" , "%zmm9" , "%zmm10" , "%zmm11" , "%zmm12" , "%zmm13" , "%zmm14" , |
| 94 | "%zmm15" , "%zmm16" , "%zmm17" , "%zmm18" , "%zmm19" , "%zmm20" , "%zmm21" , |
| 95 | "%zmm22" , "%zmm23" , "%zmm24" , "%zmm25" , "%zmm26" , "%zmm27" , "%zmm28" , |
| 96 | "%zmm29" , "%zmm30" , "%zmm31" |
| 97 | #endif |
| 98 | ); |
| 99 | |
| 100 | for (int i = 0; i < 32; ++i) { |
| 101 | printf(format: "zmm%d = { " , i); |
| 102 | for (int j = 0; j < sizeof(zmm->as_uint8); ++j) |
| 103 | printf(format: "0x%02x " , zmm[i].as_uint8[j]); |
| 104 | printf(format: "}\n" ); |
| 105 | } |
| 106 | |
| 107 | return 0; |
| 108 | } |
| 109 | |