1// clang-format off
2// RUN: %libomptarget-compilexx-generic
3// RUN: env OMPX_APU_MAPS=1 HSA_XNACK=1 LIBOMPTARGET_INFO=60 %libomptarget-run-generic 2>&1 \
4// RUN: | %fcheck-generic -check-prefix=CHECK
5
6// REQUIRES: amdgpu
7// REQUIRES: unified_shared_memory
8
9// clang-format on
10
11#include <cstdint>
12#include <cstdio>
13
14/// Test for globals under automatic zero-copy.
15/// Because we are building without unified_shared_memory
16/// requirement pragma, all globals are allocated in the device
17/// memory of all used GPUs. To ensure those globals contain the intended
18/// values, we need to execute H2D and D2H memory copies even if we are running
19/// in automatic zero-copy. This only applies to globals. Local variables (their
20/// host pointers) are passed to the kernels by-value, according to the
21/// automatic zero-copy behavior.
22
23#pragma omp begin declare target
24int32_t x; // 4 bytes
25int32_t z[10]; // 40 bytes
26int32_t *k; // 20 bytes
27#pragma omp end declare target
28
29int main() {
30 int32_t *dev_k = nullptr;
31 x = 3;
32 int32_t y = -1;
33 for (size_t t = 0; t < 10; t++)
34 z[t] = t;
35 k = new int32_t[5];
36
37 printf(format: "Host pointer for k = %p\n", k);
38 for (size_t t = 0; t < 5; t++)
39 k[t] = -t;
40
41/// target update to forces a copy between host and device global, which we must
42/// execute to keep the two global copies consistent. CHECK: Copying data from
43/// host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=40, Name=z
44#pragma omp target update to(z[ : 10])
45
46/// target map with always modifier (for x) forces a copy between host and
47/// device global, which we must execute to keep the two global copies
48/// consistent. k's content (host address) is passed by-value to the kernel
49/// (Size=20 case). y, being a local variable, is also passed by-value to the
50/// kernel (Size=4 case) CHECK: Return HstPtrBegin {{.*}} Size=4 for unified
51/// shared memory CHECK: Return HstPtrBegin {{.*}} Size=20 for unified shared
52/// memory CHECK: Copying data from host to device, HstPtr={{.*}},
53/// TgtPtr={{.*}}, Size=4, Name=x
54#pragma omp target map(to : k[ : 5]) map(always, tofrom : x) map(tofrom : y) \
55 map(from : dev_k)
56 {
57 x++;
58 y++;
59 for (size_t t = 0; t < 10; t++)
60 z[t]++;
61 dev_k = k;
62 }
63/// CHECK-NOT: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
64/// Size=20, Name=k
65
66/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
67/// Size=4, Name=x
68
69/// CHECK: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}},
70/// Size=40, Name=z
71#pragma omp target update from(z[ : 10])
72
73 /// CHECK-NOT: k pointer not correctly passed to kernel
74 if (dev_k != k)
75 printf(format: "k pointer not correctly passed to kernel\n");
76
77 delete[] k;
78 return 0;
79}
80

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of offload/test/mapping/auto_zero_copy_globals.cpp