1 | // RUN: %libomptarget-compile-generic && \ |
2 | // RUN: env LIBOMPTARGET_AMDGPU_MAX_ASYNC_COPY_BYTES=0 %libomptarget-run-generic | \ |
3 | // RUN: %fcheck-generic -allow-empty |
4 | // REQUIRES: amdgcn-amd-amdhsa |
5 | |
6 | #include <assert.h> |
7 | #include <omp.h> |
8 | #include <stdio.h> |
9 | #include <stdlib.h> |
10 | |
11 | const int magic_num = 7; |
12 | |
13 | int main(int argc, char *argv[]) { |
14 | const int N = 128; |
15 | const int num_devices = omp_get_num_devices(); |
16 | |
17 | // No target device, just return |
18 | if (num_devices == 0) { |
19 | printf(format: "PASS\n" ); |
20 | return 0; |
21 | } |
22 | |
23 | const int src_device = 0; |
24 | int dst_device = num_devices - 1; |
25 | |
26 | int length = N * sizeof(int); |
27 | int *src_ptr = omp_target_alloc(length, src_device); |
28 | int *dst_ptr = omp_target_alloc(length, dst_device); |
29 | |
30 | if (!src_ptr || !dst_ptr) { |
31 | printf(format: "FAIL\n" ); |
32 | return 1; |
33 | } |
34 | |
35 | #pragma omp target teams distribute parallel for device(src_device) \ |
36 | is_device_ptr(src_ptr) |
37 | for (int i = 0; i < N; ++i) { |
38 | src_ptr[i] = magic_num; |
39 | } |
40 | |
41 | if (omp_target_memcpy(dst_ptr, src_ptr, length, 0, 0, dst_device, |
42 | src_device)) { |
43 | printf(format: "FAIL\n" ); |
44 | return 1; |
45 | } |
46 | |
47 | int *buffer = malloc(size: length); |
48 | if (!buffer) { |
49 | printf(format: "FAIL\n" ); |
50 | return 1; |
51 | } |
52 | |
53 | #pragma omp target teams distribute parallel for device(dst_device) \ |
54 | map(from : buffer[0 : N]) is_device_ptr(dst_ptr) |
55 | for (int i = 0; i < N; ++i) { |
56 | buffer[i] = dst_ptr[i] + magic_num; |
57 | } |
58 | |
59 | for (int i = 0; i < N; ++i) |
60 | assert(buffer[i] == 2 * magic_num); |
61 | |
62 | printf(format: "PASS\n" ); |
63 | |
64 | // Free host and device memory |
65 | free(ptr: buffer); |
66 | omp_target_free(src_ptr, src_device); |
67 | omp_target_free(dst_ptr, dst_device); |
68 | |
69 | return 0; |
70 | } |
71 | |
72 | // CHECK: PASS |
73 | |