| 1 | // Use the generic state machine. On some architectures, other threads in the |
| 2 | // main thread's warp must avoid barrier instructions. |
| 3 | // |
| 4 | // RUN: %libomptarget-compile-run-and-check-generic |
| 5 | |
| 6 | // SPMDize. There is no main thread, so there's no issue. |
| 7 | // |
| 8 | // RUN: %libomptarget-compile-generic -O2 -foffload-lto -Rpass=openmp-opt > %t.spmd 2>&1 |
| 9 | // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=SPMD -input-file=%t.spmd |
| 10 | // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=SPMD -input-file=%t.spmd |
| 11 | // RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic |
| 12 | // |
| 13 | // SPMD: Transformed generic-mode kernel to SPMD-mode. |
| 14 | |
| 15 | // Use the custom state machine, which must avoid the same barrier problem as |
| 16 | // the generic state machine. |
| 17 | // |
| 18 | // RUN: %libomptarget-compile-generic -O2 -foffload-lto -Rpass=openmp-opt \ |
| 19 | // RUN: -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1 |
| 20 | // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom |
| 21 | // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom |
| 22 | // RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic |
| 23 | // |
| 24 | // Repeat with reduction clause, which has managed to break the custom state |
| 25 | // machine in the past. |
| 26 | // |
| 27 | // RUN: %libomptarget-compile-generic -O2 -foffload-lto -Rpass=openmp-opt -DADD_REDUCTION \ |
| 28 | // RUN: -mllvm -openmp-opt-disable-spmdization > %t.custom 2>&1 |
| 29 | // RUN: %fcheck-nvptx64-nvidia-cuda -check-prefix=CUSTOM -input-file=%t.custom |
| 30 | // RUN: %fcheck-amdgcn-amd-amdhsa -check-prefix=CUSTOM -input-file=%t.custom |
| 31 | // RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic |
| 32 | // |
| 33 | // CUSTOM: Rewriting generic-mode kernel with a customized state machine. |
| 34 | |
| 35 | #if ADD_REDUCTION |
| 36 | #define REDUCTION(...) reduction(__VA_ARGS__) |
| 37 | #else |
| 38 | #define REDUCTION(...) |
| 39 | #endif |
| 40 | |
| 41 | #include <stdio.h> |
| 42 | int main() { |
| 43 | int x = 0, y = 1; |
| 44 | #pragma omp target teams num_teams(1) map(tofrom : x, y) REDUCTION(+ : x) |
| 45 | { |
| 46 | x += 5; |
| 47 | #pragma omp parallel |
| 48 | y = 6; |
| 49 | } |
| 50 | // CHECK: 5, 6 |
| 51 | printf(format: "%d, %d\n" , x, y); |
| 52 | return 0; |
| 53 | } |
| 54 | |