| 1 | // RUN: %libomptarget-compilexx-generic -O3 && %libomptarget-run-generic |
| 2 | // RUN: %libomptarget-compilexx-generic -O3 -ffast-math && %libomptarget-run-generic |
| 3 | // RUN: %libomptarget-compileoptxx-generic -O3 && %libomptarget-run-generic |
| 4 | // RUN: %libomptarget-compileoptxx-generic -O3 -ffast-math && %libomptarget-run-generic |
| 5 | |
| 6 | #include <iostream> |
| 7 | |
| 8 | template <typename T> int test_map() { |
| 9 | std::cout << "map(T)" << std::endl; |
| 10 | T a(0.2), a_check; |
| 11 | #pragma omp target map(from : a_check) |
| 12 | { a_check = a; } |
| 13 | |
| 14 | if (a_check != a) { |
| 15 | std::cout << " wrong results" ; |
| 16 | return 1; |
| 17 | } |
| 18 | |
| 19 | return 0; |
| 20 | } |
| 21 | |
| 22 | template <typename T> int test_reduction() { |
| 23 | std::cout << "flat parallelism" << std::endl; |
| 24 | T sum(0), sum_host(0); |
| 25 | const int size = 100; |
| 26 | T array[size]; |
| 27 | for (int i = 0; i < size; i++) { |
| 28 | array[i] = i; |
| 29 | sum_host += array[i]; |
| 30 | } |
| 31 | |
| 32 | #pragma omp target teams distribute parallel for map(to : array[ : size]) \ |
| 33 | reduction(+ : sum) |
| 34 | for (int i = 0; i < size; i++) |
| 35 | sum += array[i]; |
| 36 | |
| 37 | if (sum != sum_host) |
| 38 | std::cout << " wrong results " << sum << " host " << sum_host << std::endl; |
| 39 | |
| 40 | std::cout << "hierarchical parallelism" << std::endl; |
| 41 | const int nblock(10), block_size(10); |
| 42 | T block_sum[nblock]; |
| 43 | #pragma omp target teams distribute map(to : array[ : size]) \ |
| 44 | map(from : block_sum[ : nblock]) |
| 45 | for (int ib = 0; ib < nblock; ib++) { |
| 46 | T partial_sum = 0; |
| 47 | const int istart = ib * block_size; |
| 48 | const int iend = (ib + 1) * block_size; |
| 49 | #pragma omp parallel for reduction(+ : partial_sum) |
| 50 | for (int i = istart; i < iend; i++) |
| 51 | partial_sum += array[i]; |
| 52 | block_sum[ib] = partial_sum; |
| 53 | } |
| 54 | |
| 55 | sum = 0; |
| 56 | for (int ib = 0; ib < nblock; ib++) { |
| 57 | sum += block_sum[ib]; |
| 58 | } |
| 59 | |
| 60 | if (sum != sum_host) { |
| 61 | std::cout << " wrong results " << sum << " host " << sum_host << std::endl; |
| 62 | return 1; |
| 63 | } |
| 64 | |
| 65 | return 0; |
| 66 | } |
| 67 | |
| 68 | template <typename T> int test_POD() { |
| 69 | int ret = 0; |
| 70 | ret |= test_map<T>(); |
| 71 | ret |= test_reduction<T>(); |
| 72 | return ret; |
| 73 | } |
| 74 | |
| 75 | int main() { |
| 76 | int ret = 0; |
| 77 | std::cout << "Testing float" << std::endl; |
| 78 | ret |= test_POD<float>(); |
| 79 | std::cout << "Testing double" << std::endl; |
| 80 | ret |= test_POD<double>(); |
| 81 | return ret; |
| 82 | } |
| 83 | |