| 1 | // XFAIL: msvc |
| 2 | // Fails on windows due to issue #129023 |
| 3 | // RUN: %libomp-compile-and-run |
| 4 | #include <stdio.h> |
| 5 | #include <stdlib.h> |
| 6 | #include <string.h> |
| 7 | #include "omp.h" |
| 8 | |
| 9 | #ifndef MAX_BOUND |
| 10 | #define MAX_BOUND 64 |
| 11 | #endif |
| 12 | #ifndef _MSC_VER |
| 13 | #define NO_EFFICIENCY_CHECK |
| 14 | #endif |
| 15 | |
| 16 | /* To ensure Correctness, only valid iterations are executed and are executed |
| 17 | only once. Stores the number of times an iteration is executed. */ |
| 18 | unsigned *execution_count = NULL; |
| 19 | /* Stores the number of iterations executed by each thread. */ |
| 20 | unsigned *iterations_per_thread = NULL; |
| 21 | |
| 22 | unsigned *Alloc(unsigned bound1, unsigned bound2) { |
| 23 | return (unsigned *)(malloc(size: bound1 * bound2 * sizeof(unsigned))); |
| 24 | } |
| 25 | |
| 26 | void ZeroOut(unsigned *p, unsigned bound1, unsigned bound2) { |
| 27 | memset(s: p, c: 0, n: bound1 * bound2 * sizeof(unsigned)); |
| 28 | } |
| 29 | |
| 30 | void Free(unsigned *p) { free(ptr: (void *)p); } |
| 31 | |
| 32 | unsigned *Index(unsigned *p, unsigned i, unsigned j, unsigned bound2) { |
| 33 | return &p[i * bound2 + j]; |
| 34 | } |
| 35 | |
| 36 | int test(unsigned upper_bound) { |
| 37 | |
| 38 | unsigned total_iterations = upper_bound * (upper_bound - 1) / 2; |
| 39 | unsigned num_threads = omp_get_max_threads(); |
| 40 | unsigned lower_per_chunk = total_iterations / num_threads; |
| 41 | unsigned upper_per_chunk = |
| 42 | lower_per_chunk + ((total_iterations % num_threads) ? 1 : 0); |
| 43 | int i, j; |
| 44 | |
| 45 | omp_set_num_threads(num_threads); |
| 46 | |
| 47 | ZeroOut(p: execution_count, bound1: upper_bound, bound2: upper_bound); |
| 48 | ZeroOut(p: iterations_per_thread, bound1: num_threads, bound2: 1); |
| 49 | |
| 50 | #ifdef VERBOSE |
| 51 | fprintf(stderr, |
| 52 | "INFO: Using %6d threads for %6d outer iterations with %6d [%6d:%6d] " |
| 53 | "chunks " |
| 54 | "loop type lower triangle <,< - " , |
| 55 | num_threads, upper_bound, total_iterations, lower_per_chunk, |
| 56 | upper_per_chunk); |
| 57 | #endif |
| 58 | |
| 59 | #pragma omp parallel shared(iterations_per_thread, execution_count) |
| 60 | { /* begin of parallel */ |
| 61 | /* Lower triangular execution_count matrix */ |
| 62 | #pragma omp for schedule(static) collapse(2) |
| 63 | for (i = 0; i < upper_bound; i++) { |
| 64 | for (j = 0; j < i; j++) { |
| 65 | (*Index(p: iterations_per_thread, i: omp_get_thread_num(), j: 0, bound2: 1))++; |
| 66 | (*Index(p: execution_count, i, j, bound2: upper_bound))++; |
| 67 | } |
| 68 | } /* end of for*/ |
| 69 | } /* end of parallel */ |
| 70 | |
| 71 | /* check the execution_count array */ |
| 72 | for (i = 0; i < upper_bound; i++) { |
| 73 | for (j = 0; j < i; j++) { |
| 74 | unsigned value = *Index(p: execution_count, i, j, bound2: upper_bound); |
| 75 | /* iteration with j<=i are valid, but should have been executed only once |
| 76 | */ |
| 77 | if (value != 1) { |
| 78 | fprintf(stderr, format: "ERROR: valid iteration [%i,%i] executed %i times.\n" , |
| 79 | i, j, value); |
| 80 | return 0; |
| 81 | } |
| 82 | } |
| 83 | for (j = i; j < upper_bound; j++) { |
| 84 | unsigned value = *Index(p: execution_count, i, j, bound2: upper_bound); |
| 85 | /* iteration with j>=i are invalid and should not have been executed |
| 86 | */ |
| 87 | if (value > 0) { |
| 88 | fprintf(stderr, format: "ERROR: invalid iteration [%i,%i] executed %i times.\n" , |
| 89 | i, j, value); |
| 90 | return 0; |
| 91 | } |
| 92 | } |
| 93 | } |
| 94 | |
| 95 | #ifndef NO_EFFICIENCY_CHECK |
| 96 | /* Ensure the number of iterations executed by each thread is within bounds */ |
| 97 | for (i = 0; i < num_threads; i++) { |
| 98 | unsigned value = *Index(iterations_per_thread, i, 0, 1); |
| 99 | if (value < lower_per_chunk || value > upper_per_chunk) { |
| 100 | fprintf(stderr, |
| 101 | "ERROR: Inefficient Collapse thread %d of %d assigned %i " |
| 102 | "iterations; must be between %d and %d\n" , |
| 103 | i, num_threads, value, lower_per_chunk, upper_per_chunk); |
| 104 | return 0; |
| 105 | } |
| 106 | } |
| 107 | #endif |
| 108 | #ifdef VERBOSE |
| 109 | fprintf(stderr, "PASSED\r\n" ); |
| 110 | #endif |
| 111 | return 1; |
| 112 | } |
| 113 | |
| 114 | int main() { |
| 115 | |
| 116 | execution_count = Alloc(MAX_BOUND, MAX_BOUND); |
| 117 | iterations_per_thread = Alloc(bound1: omp_get_max_threads(), bound2: 1); |
| 118 | |
| 119 | for (unsigned j = 0; j < MAX_BOUND; j++) { |
| 120 | if (!test(upper_bound: j)) |
| 121 | return 1; |
| 122 | } |
| 123 | Free(p: execution_count); |
| 124 | Free(p: iterations_per_thread); |
| 125 | return 0; |
| 126 | } |
| 127 | |