| 1 | // clang-format off |
| 2 | // Compile with: |
| 3 | // clang -target aarch64-unknown-linux-gnu main.c -o a.out -g -march=armv8.6-a+sve+sme+sme2 |
| 4 | // |
| 5 | // For minimal corefile size, do this before running the program: |
| 6 | // echo 0x20 > /proc/self/coredump_filter |
| 7 | // |
| 8 | // Must be run on a system that has SVE, SME and SME2, including the smefa64 |
| 9 | // extension. |
| 10 | // |
| 11 | // Example command: |
| 12 | // ./a.out 0 32 64 1 |
| 13 | // |
| 14 | // This would not enter streaming mode, set non-streaming VL to 32 |
| 15 | // bytes, streaming VL to 64 bytes and enable ZA and ZT0. |
| 16 | // |
| 17 | // To generate all the test files, use the generate.sh script that's in this |
| 18 | // folder. |
| 19 | // clang-format on |
| 20 | |
| 21 | #include <stdbool.h> |
| 22 | #include <stdint.h> |
| 23 | #include <stdlib.h> |
| 24 | #include <string.h> |
| 25 | #include <sys/prctl.h> |
| 26 | |
| 27 | #ifndef PR_SME_SET_VL |
| 28 | #define PR_SME_SET_VL 63 |
| 29 | #endif |
| 30 | |
| 31 | #define SM_INST(c) asm volatile("msr s0_3_c4_c" #c "_3, xzr") |
| 32 | #define SMSTART_SM SM_INST(3) |
| 33 | #define SMSTART_ZA SM_INST(5) |
| 34 | |
| 35 | void set_sve_registers() { |
| 36 | // We assume the smefa64 feature is present, which allows ffr access |
| 37 | // in streaming mode. |
| 38 | asm volatile("setffr\n\t" ); |
| 39 | asm volatile("ptrue p0.b\n\t" ); |
| 40 | asm volatile("ptrue p1.h\n\t" ); |
| 41 | asm volatile("ptrue p2.s\n\t" ); |
| 42 | asm volatile("ptrue p3.d\n\t" ); |
| 43 | asm volatile("pfalse p4.b\n\t" ); |
| 44 | asm volatile("ptrue p5.b\n\t" ); |
| 45 | asm volatile("ptrue p6.h\n\t" ); |
| 46 | asm volatile("ptrue p7.s\n\t" ); |
| 47 | asm volatile("ptrue p8.d\n\t" ); |
| 48 | asm volatile("pfalse p9.b\n\t" ); |
| 49 | asm volatile("ptrue p10.b\n\t" ); |
| 50 | asm volatile("ptrue p11.h\n\t" ); |
| 51 | asm volatile("ptrue p12.s\n\t" ); |
| 52 | asm volatile("ptrue p13.d\n\t" ); |
| 53 | asm volatile("pfalse p14.b\n\t" ); |
| 54 | asm volatile("ptrue p15.b\n\t" ); |
| 55 | |
| 56 | asm volatile("cpy z0.b, p0/z, #1\n\t" ); |
| 57 | asm volatile("cpy z1.b, p5/z, #2\n\t" ); |
| 58 | asm volatile("cpy z2.b, p10/z, #3\n\t" ); |
| 59 | asm volatile("cpy z3.b, p15/z, #4\n\t" ); |
| 60 | asm volatile("cpy z4.b, p0/z, #5\n\t" ); |
| 61 | asm volatile("cpy z5.b, p5/z, #6\n\t" ); |
| 62 | asm volatile("cpy z6.b, p10/z, #7\n\t" ); |
| 63 | asm volatile("cpy z7.b, p15/z, #8\n\t" ); |
| 64 | asm volatile("cpy z8.b, p0/z, #9\n\t" ); |
| 65 | asm volatile("cpy z9.b, p5/z, #10\n\t" ); |
| 66 | asm volatile("cpy z10.b, p10/z, #11\n\t" ); |
| 67 | asm volatile("cpy z11.b, p15/z, #12\n\t" ); |
| 68 | asm volatile("cpy z12.b, p0/z, #13\n\t" ); |
| 69 | asm volatile("cpy z13.b, p5/z, #14\n\t" ); |
| 70 | asm volatile("cpy z14.b, p10/z, #15\n\t" ); |
| 71 | asm volatile("cpy z15.b, p15/z, #16\n\t" ); |
| 72 | asm volatile("cpy z16.b, p0/z, #17\n\t" ); |
| 73 | asm volatile("cpy z17.b, p5/z, #18\n\t" ); |
| 74 | asm volatile("cpy z18.b, p10/z, #19\n\t" ); |
| 75 | asm volatile("cpy z19.b, p15/z, #20\n\t" ); |
| 76 | asm volatile("cpy z20.b, p0/z, #21\n\t" ); |
| 77 | asm volatile("cpy z21.b, p5/z, #22\n\t" ); |
| 78 | asm volatile("cpy z22.b, p10/z, #23\n\t" ); |
| 79 | asm volatile("cpy z23.b, p15/z, #24\n\t" ); |
| 80 | asm volatile("cpy z24.b, p0/z, #25\n\t" ); |
| 81 | asm volatile("cpy z25.b, p5/z, #26\n\t" ); |
| 82 | asm volatile("cpy z26.b, p10/z, #27\n\t" ); |
| 83 | asm volatile("cpy z27.b, p15/z, #28\n\t" ); |
| 84 | asm volatile("cpy z28.b, p0/z, #29\n\t" ); |
| 85 | asm volatile("cpy z29.b, p5/z, #30\n\t" ); |
| 86 | asm volatile("cpy z30.b, p10/z, #31\n\t" ); |
| 87 | asm volatile("cpy z31.b, p15/z, #32\n\t" ); |
| 88 | } |
| 89 | |
| 90 | void set_za_register(int streaming_vl) { |
| 91 | #define MAX_VL_BYTES 256 |
| 92 | uint8_t data[MAX_VL_BYTES]; |
| 93 | |
| 94 | for (unsigned i = 0; i < streaming_vl; ++i) { |
| 95 | for (unsigned j = 0; j < MAX_VL_BYTES; ++j) |
| 96 | data[j] = i + 1; |
| 97 | asm volatile("mov w12, %w0\n\t" |
| 98 | "ldr za[w12, 0], [%1]\n\t" ::"r" (i), |
| 99 | "r" (&data) |
| 100 | : "w12" ); |
| 101 | } |
| 102 | #undef MAX_VL_BYTES |
| 103 | } |
| 104 | |
| 105 | void set_zt0_register() { |
| 106 | #define ZTO_LEN (512 / 8) |
| 107 | uint8_t data[ZTO_LEN]; |
| 108 | for (unsigned i = 0; i < ZTO_LEN; ++i) |
| 109 | data[i] = i + 1; |
| 110 | |
| 111 | asm volatile("ldr zt0, [%0]" ::"r" (&data)); |
| 112 | #undef ZT0_LEN |
| 113 | } |
| 114 | |
| 115 | void set_tpidr2(uint64_t value) { |
| 116 | __asm__ volatile("msr S3_3_C13_C0_5, %0" ::"r" (value)); |
| 117 | } |
| 118 | |
| 119 | int main(int argc, char **argv) { |
| 120 | // Arguments: |
| 121 | // SVE mode: 1 for streaming SVE (SSVE), any other value |
| 122 | // for non-streaming SVE mode. |
| 123 | // Non-Streaming Vector length: In bytes, an integer e.g. "32". |
| 124 | // Streaming Vector length: As above, but for streaming mode. |
| 125 | // ZA mode: 1 for enabled, any other value for disabled. |
| 126 | if (argc != 5) |
| 127 | return 1; |
| 128 | |
| 129 | // We assume this is run on a system with SME, so tpidr2 can always be |
| 130 | // accessed. |
| 131 | set_tpidr2(0x1122334455667788); |
| 132 | |
| 133 | // Streaming mode or not? |
| 134 | bool streaming_mode = strcmp(s1: argv[1], s2: "1" ) == 0; |
| 135 | |
| 136 | // Set vector length (is a syscall, resets modes). |
| 137 | int non_streaming_vl = atoi(nptr: argv[2]); |
| 138 | prctl(PR_SVE_SET_VL, non_streaming_vl); |
| 139 | int streaming_vl = atoi(nptr: argv[3]); |
| 140 | prctl(PR_SME_SET_VL, streaming_vl); |
| 141 | |
| 142 | if (streaming_mode) |
| 143 | SMSTART_SM; |
| 144 | |
| 145 | set_sve_registers(); |
| 146 | |
| 147 | // ZA enabled or disabled? |
| 148 | if (strcmp(s1: argv[4], s2: "1" ) == 0) { |
| 149 | SMSTART_ZA; |
| 150 | set_za_register(streaming_vl); |
| 151 | set_zt0_register(); |
| 152 | } |
| 153 | |
| 154 | *(volatile char *)(0) = 0; // Crashes here. |
| 155 | |
| 156 | return 0; |
| 157 | } |
| 158 | |