| 1 | #include <gpuintrin.h> |
| 2 | #include <stdint.h> |
| 3 | |
| 4 | uint32_t global[64]; |
| 5 | |
| 6 | [[gnu::constructor(202)]] void ctorc() { |
| 7 | for (unsigned I = 0; I < 64; I++) |
| 8 | global[I] += 20; |
| 9 | } |
| 10 | |
| 11 | [[gnu::constructor(200)]] void ctora() { |
| 12 | for (unsigned I = 0; I < 64; I++) |
| 13 | global[I] = 40; |
| 14 | } |
| 15 | |
| 16 | [[gnu::constructor(201)]] void ctorb() { |
| 17 | for (unsigned I = 0; I < 64; I++) |
| 18 | global[I] *= 2; |
| 19 | } |
| 20 | |
| 21 | __gpu_kernel void global_ctor(uint32_t *out) { |
| 22 | global[__gpu_thread_id(dim: 0)] += __gpu_thread_id(dim: 0); |
| 23 | out[__gpu_thread_id(dim: 0) + (__gpu_num_threads(dim: 0) * __gpu_block_id(dim: 0))] = |
| 24 | global[__gpu_thread_id(dim: 0)]; |
| 25 | } |
| 26 | |