1 | // RUN: %libomptarget-compileopt-run-and-check-generic |
2 | // |
3 | // UNSUPPORTED: x86_64-pc-linux-gnu |
4 | // UNSUPPORTED: x86_64-pc-linux-gnu-LTO |
5 | // UNSUPPORTED: aarch64-unknown-linux-gnu |
6 | // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO |
7 | // UNSUPPORTED: s390x-ibm-linux-gnu |
8 | // UNSUPPORTED: s390x-ibm-linux-gnu-LTO |
9 | |
10 | #include <math.h> |
11 | #include <omp.h> |
12 | #include <ompx.h> |
13 | #include <stdio.h> |
14 | #include <stdlib.h> |
15 | |
16 | int main(int argc, char **argv) { |
17 | int N = 1 << 29; |
18 | if (argc > 1) |
19 | N = atoi(nptr: argv[1]); |
20 | float a = 2.f; |
21 | |
22 | float *X = (float *)malloc(size: sizeof(*X) * N); |
23 | float *Y = (float *)malloc(size: sizeof(*X) * N); |
24 | |
25 | for (int i = 0; i < N; i++) { |
26 | X[i] = 1.0f; |
27 | Y[i] = 2.0f; |
28 | } |
29 | |
30 | int TL = 256; |
31 | int NT = (N + TL - 1) / TL; |
32 | |
33 | #pragma omp target data map(to : X [0:N]) map(Y [0:N]) |
34 | #pragma omp target teams num_teams(NT) thread_limit(TL) |
35 | { |
36 | #pragma omp parallel |
37 | { |
38 | int tid = ompx_thread_id_x(); |
39 | int bid = ompx_block_id_x(); |
40 | int tdim = ompx_block_dim_x(); |
41 | int gid = tid + bid * tdim; |
42 | if (gid < N) |
43 | Y[gid] = a * X[gid] + Y[gid]; |
44 | } |
45 | } |
46 | |
47 | float maxError = 0.0f; |
48 | for (int i = 0; i < N; i++) { |
49 | maxError = fmax(x: maxError, y: fabs(x: Y[i] - 4.0f)); |
50 | if (maxError) { |
51 | printf(format: "%i %f %f\n" , i, maxError, Y[i]); |
52 | break; |
53 | } |
54 | } |
55 | // CHECK: Max error: 0.00 |
56 | printf(format: "Max error: %f\n" , maxError); |
57 | |
58 | return 0; |
59 | } |
60 | |