| 1 | // RUN: %libomptarget-compileopt-run-and-check-generic |
| 2 | // |
| 3 | // REQUIRES: gpu |
| 4 | |
| 5 | #include <math.h> |
| 6 | #include <omp.h> |
| 7 | #include <ompx.h> |
| 8 | #include <stdio.h> |
| 9 | #include <stdlib.h> |
| 10 | |
| 11 | int main(int argc, char **argv) { |
| 12 | int N = 1 << 29; |
| 13 | if (argc > 1) |
| 14 | N = atoi(nptr: argv[1]); |
| 15 | float a = 2.f; |
| 16 | |
| 17 | float *X = (float *)malloc(size: sizeof(*X) * N); |
| 18 | float *Y = (float *)malloc(size: sizeof(*X) * N); |
| 19 | |
| 20 | for (int i = 0; i < N; i++) { |
| 21 | X[i] = 1.0f; |
| 22 | Y[i] = 2.0f; |
| 23 | } |
| 24 | |
| 25 | int TL = 256; |
| 26 | int NT = (N + TL - 1) / TL; |
| 27 | |
| 28 | #pragma omp target data map(to : X [0:N]) map(Y [0:N]) |
| 29 | #pragma omp target teams num_teams(NT) thread_limit(TL) |
| 30 | { |
| 31 | #pragma omp parallel |
| 32 | { |
| 33 | int tid = ompx_thread_id_x(); |
| 34 | int bid = ompx_block_id_x(); |
| 35 | int tdim = ompx_block_dim_x(); |
| 36 | int gid = tid + bid * tdim; |
| 37 | if (gid < N) |
| 38 | Y[gid] = a * X[gid] + Y[gid]; |
| 39 | } |
| 40 | } |
| 41 | |
| 42 | float maxError = 0.0f; |
| 43 | for (int i = 0; i < N; i++) { |
| 44 | maxError = fmax(x: maxError, y: fabs(x: Y[i] - 4.0f)); |
| 45 | if (maxError) { |
| 46 | printf(format: "%i %f %f\n" , i, maxError, Y[i]); |
| 47 | break; |
| 48 | } |
| 49 | } |
| 50 | // CHECK: Max error: 0.00 |
| 51 | printf(format: "Max error: %f\n" , maxError); |
| 52 | |
| 53 | return 0; |
| 54 | } |
| 55 | |