1 | // RUN: %libomptarget-compile-generic |
2 | // RUN: env HSA_XNACK=1 \ |
3 | // RUN: %libomptarget-run-generic | %fcheck-generic |
4 | // XFAIL: nvptx64-nvidia-cuda |
5 | // XFAIL: nvptx64-nvidia-cuda-LTO |
6 | |
7 | // REQUIRES: unified_shared_memory |
8 | |
9 | #include <omp.h> |
10 | #include <stdio.h> |
11 | |
12 | // End of definitions copied from OpenMP RTL. |
13 | // --------------------------------------------------------------------------- |
14 | |
15 | #pragma omp requires unified_shared_memory |
16 | |
17 | #define N 1024 |
18 | |
19 | void init(int A[], int B[], int C[]) { |
20 | for (int i = 0; i < N; ++i) { |
21 | A[i] = 0; |
22 | B[i] = 1; |
23 | C[i] = i; |
24 | } |
25 | } |
26 | |
27 | int main(int argc, char *argv[]) { |
28 | const int device = omp_get_default_device(); |
29 | |
30 | // CHECK: Initial device: [[INITIAL_DEVICE:[0-9]+]] |
31 | printf(format: "Initial device: %d\n" , omp_get_initial_device()); |
32 | // CHECK: Num devices: [[INITIAL_DEVICE]] |
33 | printf(format: "Num devices: %d\n" , omp_get_num_devices()); |
34 | |
35 | // |
36 | // Target alloc & target memcpy |
37 | // |
38 | int A[N], B[N], C[N]; |
39 | |
40 | // Init |
41 | init(A, B, C); |
42 | |
43 | int *pA, *pB, *pC; |
44 | |
45 | // map ptrs |
46 | pA = &A[0]; |
47 | pB = &B[0]; |
48 | pC = &C[0]; |
49 | |
50 | int *d_A = (int *)omp_target_alloc(N * sizeof(int), device); |
51 | int *d_B = (int *)omp_target_alloc(N * sizeof(int), device); |
52 | int *d_C = (int *)omp_target_alloc(N * sizeof(int), device); |
53 | |
54 | // CHECK: omp_target_alloc succeeded |
55 | printf(format: "omp_target_alloc %s\n" , d_A && d_B && d_C ? "succeeded" : "failed" ); |
56 | |
57 | omp_target_memcpy(d_B, pB, N * sizeof(int), 0, 0, device, |
58 | omp_get_initial_device()); |
59 | omp_target_memcpy(d_C, pC, N * sizeof(int), 0, 0, device, |
60 | omp_get_initial_device()); |
61 | |
62 | #pragma omp target is_device_ptr(d_A, d_B, d_C) device(device) |
63 | { |
64 | #pragma omp parallel for schedule(static, 1) |
65 | for (int i = 0; i < N; i++) { |
66 | d_A[i] = d_B[i] + d_C[i] + 1; |
67 | } |
68 | } |
69 | |
70 | omp_target_memcpy(pA, d_A, N * sizeof(int), 0, 0, omp_get_initial_device(), |
71 | device); |
72 | |
73 | // CHECK: Test omp_target_memcpy: Succeeded |
74 | int fail = 0; |
75 | for (int i = 0; i < N; ++i) { |
76 | if (A[i] != i + 2) |
77 | fail++; |
78 | } |
79 | if (fail) { |
80 | printf(format: "Test omp_target_memcpy: Failed\n" ); |
81 | } else { |
82 | printf(format: "Test omp_target_memcpy: Succeeded\n" ); |
83 | } |
84 | |
85 | // |
86 | // target_is_present and target_associate/disassociate_ptr |
87 | // |
88 | init(A, B, C); |
89 | |
90 | // CHECK: B is not present, associating it... |
91 | // CHECK: omp_target_associate_ptr B succeeded |
92 | if (!omp_target_is_present(B, device)) { |
93 | printf(format: "B is not present, associating it...\n" ); |
94 | int rc = omp_target_associate_ptr(B, d_B, N * sizeof(int), 0, device); |
95 | printf(format: "omp_target_associate_ptr B %s\n" , !rc ? "succeeded" : "failed" ); |
96 | } |
97 | |
98 | // CHECK: C is not present, associating it... |
99 | // CHECK: omp_target_associate_ptr C succeeded |
100 | if (!omp_target_is_present(C, device)) { |
101 | printf(format: "C is not present, associating it...\n" ); |
102 | int rc = omp_target_associate_ptr(C, d_C, N * sizeof(int), 0, device); |
103 | printf(format: "omp_target_associate_ptr C %s\n" , !rc ? "succeeded" : "failed" ); |
104 | } |
105 | |
106 | // CHECK: Inside target data: A is not present |
107 | // CHECK: Inside target data: B is present |
108 | // CHECK: Inside target data: C is present |
109 | #pragma omp target data map(from : B, C) device(device) |
110 | { |
111 | printf(format: "Inside target data: A is%s present\n" , |
112 | omp_target_is_present(A, device) ? "" : " not" ); |
113 | printf(format: "Inside target data: B is%s present\n" , |
114 | omp_target_is_present(B, device) ? "" : " not" ); |
115 | printf(format: "Inside target data: C is%s present\n" , |
116 | omp_target_is_present(C, device) ? "" : " not" ); |
117 | |
118 | #pragma omp target map(from : A) device(device) |
119 | { |
120 | #pragma omp parallel for schedule(static, 1) |
121 | for (int i = 0; i < N; i++) |
122 | A[i] = B[i] + C[i] + 1; |
123 | } |
124 | } |
125 | |
126 | // CHECK: B is present, disassociating it... |
127 | // CHECK: omp_target_disassociate_ptr B succeeded |
128 | // CHECK: C is present, disassociating it... |
129 | // CHECK: omp_target_disassociate_ptr C succeeded |
130 | if (omp_target_is_present(B, device)) { |
131 | printf(format: "B is present, disassociating it...\n" ); |
132 | int rc = omp_target_disassociate_ptr(B, device); |
133 | printf(format: "omp_target_disassociate_ptr B %s\n" , !rc ? "succeeded" : "failed" ); |
134 | } |
135 | if (omp_target_is_present(C, device)) { |
136 | printf(format: "C is present, disassociating it...\n" ); |
137 | int rc = omp_target_disassociate_ptr(C, device); |
138 | printf(format: "omp_target_disassociate_ptr C %s\n" , !rc ? "succeeded" : "failed" ); |
139 | } |
140 | |
141 | // CHECK: Test omp_target_associate_ptr: Succeeded |
142 | fail = 0; |
143 | for (int i = 0; i < N; ++i) { |
144 | if (A[i] != i + 2) |
145 | fail++; |
146 | } |
147 | if (fail) { |
148 | printf(format: "Test omp_target_associate_ptr: Failed\n" ); |
149 | } else { |
150 | printf(format: "Test omp_target_associate_ptr: Succeeded\n" ); |
151 | } |
152 | |
153 | omp_target_free(d_A, device); |
154 | omp_target_free(d_B, device); |
155 | omp_target_free(d_C, device); |
156 | |
157 | printf(format: "Done!\n" ); |
158 | |
159 | return 0; |
160 | } |
161 | |