1 | // clang-format off |
2 | // RUN: %libomptarget-compilexx-generic |
3 | // RUN: env OMPX_APU_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=60 %libomptarget-run-generic 2>&1 \ |
4 | // RUN: | %fcheck-generic -check-prefix=CHECK |
5 | |
6 | // UNSUPPORTED: aarch64-unknown-linux-gnu |
7 | // UNSUPPORTED: aarch64-unknown-linux-gnu-LTO |
8 | // UNSUPPORTED: nvptx64-nvidia-cuda |
9 | // UNSUPPORTED: nvptx64-nvidia-cuda-LTO |
10 | // UNSUPPORTED: x86_64-pc-linux-gnu |
11 | // UNSUPPORTED: x86_64-pc-linux-gnu-LTO |
12 | // UNSUPPORTED: s390x-ibm-linux-gnu |
13 | // UNSUPPORTED: s390x-ibm-linux-gnu-LTO |
14 | |
15 | // REQUIRES: unified_shared_memory |
16 | |
17 | // clang-format on |
18 | |
19 | #include <cstdint> |
20 | #include <cstdio> |
21 | |
22 | /// Test for globals under automatic zero-copy. |
23 | /// Because we are building without unified_shared_memory |
24 | /// requirement pragma, all globals are allocated in the device |
25 | /// memory of all used GPUs. To ensure those globals contain the intended |
26 | /// values, we need to execute H2D and D2H memory copies even if we are running |
27 | /// in automatic zero-copy. This only applies to globals. Local variables (their |
28 | /// host pointers) are passed to the kernels by-value, according to the |
29 | /// automatic zero-copy behavior. |
30 | |
31 | #pragma omp begin declare target |
32 | int32_t x; // 4 bytes |
33 | int32_t z[10]; // 40 bytes |
34 | int32_t *k; // 20 bytes |
35 | #pragma omp end declare target |
36 | |
37 | int main() { |
38 | int32_t *dev_k = nullptr; |
39 | x = 3; |
40 | int32_t y = -1; |
41 | for (size_t t = 0; t < 10; t++) |
42 | z[t] = t; |
43 | k = new int32_t[5]; |
44 | |
45 | printf(format: "Host pointer for k = %p\n" , k); |
46 | for (size_t t = 0; t < 5; t++) |
47 | k[t] = -t; |
48 | |
49 | /// target update to forces a copy between host and device global, which we must |
50 | /// execute to keep the two global copies consistent. CHECK: Copying data from |
51 | /// host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=40, Name=z |
52 | #pragma omp target update to(z[ : 10]) |
53 | |
54 | /// target map with always modifier (for x) forces a copy between host and |
55 | /// device global, which we must execute to keep the two global copies |
56 | /// consistent. k's content (host address) is passed by-value to the kernel |
57 | /// (Size=20 case). y, being a local variable, is also passed by-value to the |
58 | /// kernel (Size=4 case) CHECK: Return HstPtrBegin {{.*}} Size=4 for unified |
59 | /// shared memory CHECK: Return HstPtrBegin {{.*}} Size=20 for unified shared |
60 | /// memory CHECK: Copying data from host to device, HstPtr={{.*}}, |
61 | /// TgtPtr={{.*}}, Size=4, Name=x |
62 | #pragma omp target map(to : k[ : 5]) map(always, tofrom : x) map(tofrom : y) \ |
63 | map(from : dev_k) |
64 | { |
65 | x++; |
66 | y++; |
67 | for (size_t t = 0; t < 10; t++) |
68 | z[t]++; |
69 | dev_k = k; |
70 | } |
71 | /// CHECK-NOT: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, |
72 | /// Size=20, Name=k |
73 | |
74 | /// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, |
75 | /// Size=4, Name=x |
76 | |
77 | /// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, |
78 | /// Size=40, Name=z |
79 | #pragma omp target update from(z[ : 10]) |
80 | |
81 | /// CHECK-NOT: k pointer not correctly passed to kernel |
82 | if (dev_k != k) |
83 | printf(format: "k pointer not correctly passed to kernel\n" ); |
84 | |
85 | delete[] k; |
86 | return 0; |
87 | } |
88 | |