| 1 | // RUN: %libomptarget-compilexx-generic |
| 2 | // RUN: env LIBOMPTARGET_INFO=63 %libomptarget-run-generic 2>&1 | %fcheck-generic |
| 3 | // REQUIRES: gpu |
| 4 | |
| 5 | #include <ompx.h> |
| 6 | |
| 7 | #include <cassert> |
| 8 | #include <vector> |
| 9 | |
| 10 | // CHECK: "PluginInterface" device 0 info: Launching kernel __omp_offloading_{{.*}} with [2,4,6] blocks and [32,4,2] threads in BARE mode |
| 11 | |
| 12 | int main(int argc, char *argv[]) { |
| 13 | int bs[3] = {32u, 4u, 2u}; |
| 14 | int gs[3] = {2u, 4u, 6u}; |
| 15 | int n = bs[0] * bs[1] * bs[2] * gs[0] * gs[1] * gs[2]; |
| 16 | std::vector<int> x_buf(n); |
| 17 | std::vector<int> y_buf(n); |
| 18 | std::vector<int> z_buf(n); |
| 19 | |
| 20 | auto x = x_buf.data(); |
| 21 | auto y = y_buf.data(); |
| 22 | auto z = z_buf.data(); |
| 23 | for (int i = 0; i < n; ++i) { |
| 24 | x[i] = i; |
| 25 | y[i] = i + 1; |
| 26 | } |
| 27 | |
| 28 | #pragma omp target teams ompx_bare num_teams(gs[0], gs[1], gs[2]) \ |
| 29 | thread_limit(bs[0], bs[1], bs[2]) map(to : x[ : n], y[ : n]) \ |
| 30 | map(from : z[ : n]) |
| 31 | { |
| 32 | int tid_x = ompx_thread_id_x(); |
| 33 | int tid_y = ompx_thread_id_y(); |
| 34 | int tid_z = ompx_thread_id_z(); |
| 35 | int gid_x = ompx_block_id_x(); |
| 36 | int gid_y = ompx_block_id_y(); |
| 37 | int gid_z = ompx_block_id_z(); |
| 38 | int bs_x = ompx_block_dim_x(); |
| 39 | int bs_y = ompx_block_dim_y(); |
| 40 | int bs_z = ompx_block_dim_z(); |
| 41 | int bs = bs_x * bs_y * bs_z; |
| 42 | int gs_x = ompx_grid_dim_x(); |
| 43 | int gs_y = ompx_grid_dim_y(); |
| 44 | int gid = gid_z * gs_y * gs_x + gid_y * gs_x + gid_x; |
| 45 | int tid = tid_z * bs_x * bs_y + tid_y * bs_x + tid_x; |
| 46 | int i = gid * bs + tid; |
| 47 | z[i] = x[i] + y[i]; |
| 48 | } |
| 49 | |
| 50 | for (int i = 0; i < n; ++i) { |
| 51 | if (z[i] != (2 * i + 1)) |
| 52 | return 1; |
| 53 | } |
| 54 | |
| 55 | return 0; |
| 56 | } |
| 57 | |