1// clang-format off
2// RUN: %libomptarget-compilexx-generic
3// RUN: env OMPX_APU_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=60 %libomptarget-run-generic 2>&1 \
4// RUN: | %fcheck-generic -check-prefix=CHECK
5
6// UNSUPPORTED: aarch64-unknown-linux-gnu
7// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
8// UNSUPPORTED: nvptx64-nvidia-cuda
9// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
10// UNSUPPORTED: x86_64-pc-linux-gnu
11// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
12// UNSUPPORTED: s390x-ibm-linux-gnu
13// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
14
15// REQUIRES: unified_shared_memory
16
17// clang-format on
18
19#include <cstdint>
20#include <cstdio>
21
22/// Test for globals under automatic zero-copy.
23/// Because we are building without unified_shared_memory
24/// requirement pragma, all globals are allocated in the device
25/// memory of all used GPUs. To ensure those globals contain the intended
26/// values, we need to execute H2D and D2H memory copies even if we are running
27/// in automatic zero-copy. This only applies to globals. Local variables (their
28/// host pointers) are passed to the kernels by-value, according to the
29/// automatic zero-copy behavior.
30
31#pragma omp begin declare target
32int32_t x; // 4 bytes
33int32_t z[10]; // 40 bytes
34int32_t *k; // 20 bytes
35#pragma omp end declare target
36
37int main() {
38 int32_t *dev_k = nullptr;
39 x = 3;
40 int32_t y = -1;
41 for (size_t t = 0; t < 10; t++)
42 z[t] = t;
43 k = new int32_t[5];
44
45 printf(format: "Host pointer for k = %p\n", k);
46 for (size_t t = 0; t < 5; t++)
47 k[t] = -t;
48
49/// target update to forces a copy between host and device global, which we must
50/// execute to keep the two global copies consistent. CHECK: Copying data from
51/// host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=40, Name=z
52#pragma omp target update to(z[ : 10])
53
54/// target map with always modifier (for x) forces a copy between host and
55/// device global, which we must execute to keep the two global copies
56/// consistent. k's content (host address) is passed by-value to the kernel
57/// (Size=20 case). y, being a local variable, is also passed by-value to the
58/// kernel (Size=4 case) CHECK: Return HstPtrBegin {{.*}} Size=4 for unified
59/// shared memory CHECK: Return HstPtrBegin {{.*}} Size=20 for unified shared
60/// memory CHECK: Copying data from host to device, HstPtr={{.*}},
61/// TgtPtr={{.*}}, Size=4, Name=x
62#pragma omp target map(to : k[ : 5]) map(always, tofrom : x) map(tofrom : y) \
63 map(from : dev_k)
64 {
65 x++;
66 y++;
67 for (size_t t = 0; t < 10; t++)
68 z[t]++;
69 dev_k = k;
70 }
71/// CHECK-NOT: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
72/// Size=20, Name=k
73
74/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
75/// Size=4, Name=x
76
77/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
78/// Size=40, Name=z
79#pragma omp target update from(z[ : 10])
80
81 /// CHECK-NOT: k pointer not correctly passed to kernel
82 if (dev_k != k)
83 printf(format: "k pointer not correctly passed to kernel\n");
84
85 delete[] k;
86 return 0;
87}
88

source code of offload/test/mapping/auto_zero_copy_globals.cpp