1 | // RUN: %libomptarget-compile-generic |
2 | // RUN: env HSA_XNACK=1 \ |
3 | // RUN: %libomptarget-run-generic | %fcheck-generic |
4 | // XFAIL: nvptx64-nvidia-cuda |
5 | // XFAIL: nvptx64-nvidia-cuda-LTO |
6 | |
7 | // REQUIRES: unified_shared_memory |
8 | |
9 | #include <omp.h> |
10 | #include <stdio.h> |
11 | |
12 | // --------------------------------------------------------------------------- |
13 | // Various definitions copied from OpenMP RTL |
14 | |
15 | extern void __tgt_register_requires(int64_t); |
16 | |
17 | // End of definitions copied from OpenMP RTL. |
18 | // --------------------------------------------------------------------------- |
19 | |
20 | #pragma omp requires unified_shared_memory |
21 | |
22 | #define N 1024 |
23 | |
24 | void init(int A[], int B[], int C[]) { |
25 | for (int i = 0; i < N; ++i) { |
26 | A[i] = 0; |
27 | B[i] = 1; |
28 | C[i] = i; |
29 | } |
30 | } |
31 | |
32 | int main(int argc, char *argv[]) { |
33 | const int device = omp_get_default_device(); |
34 | |
35 | // Manual registration of requires flags for Clang versions |
36 | // that do not support requires. |
37 | __tgt_register_requires(8); |
38 | |
39 | // CHECK: Initial device: [[INITIAL_DEVICE:[0-9]+]] |
40 | printf(format: "Initial device: %d\n" , omp_get_initial_device()); |
41 | // CHECK: Num devices: [[INITIAL_DEVICE]] |
42 | printf(format: "Num devices: %d\n" , omp_get_num_devices()); |
43 | |
44 | // |
45 | // Target alloc & target memcpy |
46 | // |
47 | int A[N], B[N], C[N]; |
48 | |
49 | // Init |
50 | init(A, B, C); |
51 | |
52 | int *pA, *pB, *pC; |
53 | |
54 | // map ptrs |
55 | pA = &A[0]; |
56 | pB = &B[0]; |
57 | pC = &C[0]; |
58 | |
59 | int *d_A = (int *)omp_target_alloc(N * sizeof(int), device); |
60 | int *d_B = (int *)omp_target_alloc(N * sizeof(int), device); |
61 | int *d_C = (int *)omp_target_alloc(N * sizeof(int), device); |
62 | |
63 | // CHECK: omp_target_alloc succeeded |
64 | printf(format: "omp_target_alloc %s\n" , d_A && d_B && d_C ? "succeeded" : "failed" ); |
65 | |
66 | omp_target_memcpy(d_B, pB, N * sizeof(int), 0, 0, device, |
67 | omp_get_initial_device()); |
68 | omp_target_memcpy(d_C, pC, N * sizeof(int), 0, 0, device, |
69 | omp_get_initial_device()); |
70 | |
71 | #pragma omp target is_device_ptr(d_A, d_B, d_C) device(device) |
72 | { |
73 | #pragma omp parallel for schedule(static, 1) |
74 | for (int i = 0; i < N; i++) { |
75 | d_A[i] = d_B[i] + d_C[i] + 1; |
76 | } |
77 | } |
78 | |
79 | omp_target_memcpy(pA, d_A, N * sizeof(int), 0, 0, omp_get_initial_device(), |
80 | device); |
81 | |
82 | // CHECK: Test omp_target_memcpy: Succeeded |
83 | int fail = 0; |
84 | for (int i = 0; i < N; ++i) { |
85 | if (A[i] != i + 2) |
86 | fail++; |
87 | } |
88 | if (fail) { |
89 | printf(format: "Test omp_target_memcpy: Failed\n" ); |
90 | } else { |
91 | printf(format: "Test omp_target_memcpy: Succeeded\n" ); |
92 | } |
93 | |
94 | // |
95 | // target_is_present and target_associate/disassociate_ptr |
96 | // |
97 | init(A, B, C); |
98 | |
99 | // CHECK: B is not present, associating it... |
100 | // CHECK: omp_target_associate_ptr B succeeded |
101 | if (!omp_target_is_present(B, device)) { |
102 | printf(format: "B is not present, associating it...\n" ); |
103 | int rc = omp_target_associate_ptr(B, d_B, N * sizeof(int), 0, device); |
104 | printf(format: "omp_target_associate_ptr B %s\n" , !rc ? "succeeded" : "failed" ); |
105 | } |
106 | |
107 | // CHECK: C is not present, associating it... |
108 | // CHECK: omp_target_associate_ptr C succeeded |
109 | if (!omp_target_is_present(C, device)) { |
110 | printf(format: "C is not present, associating it...\n" ); |
111 | int rc = omp_target_associate_ptr(C, d_C, N * sizeof(int), 0, device); |
112 | printf(format: "omp_target_associate_ptr C %s\n" , !rc ? "succeeded" : "failed" ); |
113 | } |
114 | |
115 | // CHECK: Inside target data: A is not present |
116 | // CHECK: Inside target data: B is present |
117 | // CHECK: Inside target data: C is present |
118 | #pragma omp target data map(from : B, C) device(device) |
119 | { |
120 | printf(format: "Inside target data: A is%s present\n" , |
121 | omp_target_is_present(A, device) ? "" : " not" ); |
122 | printf(format: "Inside target data: B is%s present\n" , |
123 | omp_target_is_present(B, device) ? "" : " not" ); |
124 | printf(format: "Inside target data: C is%s present\n" , |
125 | omp_target_is_present(C, device) ? "" : " not" ); |
126 | |
127 | #pragma omp target map(from : A) device(device) |
128 | { |
129 | #pragma omp parallel for schedule(static, 1) |
130 | for (int i = 0; i < N; i++) |
131 | A[i] = B[i] + C[i] + 1; |
132 | } |
133 | } |
134 | |
135 | // CHECK: B is present, disassociating it... |
136 | // CHECK: omp_target_disassociate_ptr B succeeded |
137 | // CHECK: C is present, disassociating it... |
138 | // CHECK: omp_target_disassociate_ptr C succeeded |
139 | if (omp_target_is_present(B, device)) { |
140 | printf(format: "B is present, disassociating it...\n" ); |
141 | int rc = omp_target_disassociate_ptr(B, device); |
142 | printf(format: "omp_target_disassociate_ptr B %s\n" , !rc ? "succeeded" : "failed" ); |
143 | } |
144 | if (omp_target_is_present(C, device)) { |
145 | printf(format: "C is present, disassociating it...\n" ); |
146 | int rc = omp_target_disassociate_ptr(C, device); |
147 | printf(format: "omp_target_disassociate_ptr C %s\n" , !rc ? "succeeded" : "failed" ); |
148 | } |
149 | |
150 | // CHECK: Test omp_target_associate_ptr: Succeeded |
151 | fail = 0; |
152 | for (int i = 0; i < N; ++i) { |
153 | if (A[i] != i + 2) |
154 | fail++; |
155 | } |
156 | if (fail) { |
157 | printf(format: "Test omp_target_associate_ptr: Failed\n" ); |
158 | } else { |
159 | printf(format: "Test omp_target_associate_ptr: Succeeded\n" ); |
160 | } |
161 | |
162 | omp_target_free(d_A, device); |
163 | omp_target_free(d_B, device); |
164 | omp_target_free(d_C, device); |
165 | |
166 | printf(format: "Done!\n" ); |
167 | |
168 | return 0; |
169 | } |
170 | |