1 | // RUN: %libomp-compile-and-run |
2 | #include <stdio.h> |
3 | #include <stdlib.h> |
4 | #include <string.h> |
5 | #include "omp.h" |
6 | |
7 | #ifndef MAX_BOUND |
8 | #define MAX_BOUND 64 |
9 | #endif |
10 | #ifndef _MSC_VER |
11 | #define NO_EFFICIENCY_CHECK |
12 | #endif |
13 | |
14 | /* To ensure Correctness, only valid iterations are executed and are executed |
15 | only once. Stores the number of times an iteration is executed. */ |
16 | unsigned *execution_count = NULL; |
17 | /* Stores the number of iterations executed by each thread. */ |
18 | unsigned *iterations_per_thread = NULL; |
19 | |
20 | unsigned *Alloc(unsigned bound1, unsigned bound2) { |
21 | return (unsigned *)(malloc(size: bound1 * bound2 * sizeof(unsigned))); |
22 | } |
23 | |
24 | void ZeroOut(unsigned *p, unsigned bound1, unsigned bound2) { |
25 | memset(s: p, c: 0, n: bound1 * bound2 * sizeof(unsigned)); |
26 | } |
27 | |
28 | void Free(unsigned *p) { free(ptr: (void *)p); } |
29 | |
30 | unsigned *Index(unsigned *p, unsigned i, unsigned j, unsigned bound2) { |
31 | return &p[i * bound2 + j]; |
32 | } |
33 | |
34 | int test(unsigned upper_bound) { |
35 | |
36 | unsigned total_iterations = upper_bound * (upper_bound - 1) / 2; |
37 | unsigned num_threads = omp_get_max_threads(); |
38 | unsigned lower_per_chunk = total_iterations / num_threads; |
39 | unsigned upper_per_chunk = |
40 | lower_per_chunk + ((total_iterations % num_threads) ? 1 : 0); |
41 | int i, j; |
42 | |
43 | omp_set_num_threads(num_threads); |
44 | |
45 | ZeroOut(p: execution_count, bound1: upper_bound, bound2: upper_bound); |
46 | ZeroOut(p: iterations_per_thread, bound1: num_threads, bound2: 1); |
47 | |
48 | #ifdef VERBOSE |
49 | fprintf(stderr, |
50 | "INFO: Using %6d threads for %6d outer iterations with %6d [%6d:%6d] " |
51 | "chunks " |
52 | "loop type lower triangle <,< - " , |
53 | num_threads, upper_bound, total_iterations, lower_per_chunk, |
54 | upper_per_chunk); |
55 | #endif |
56 | |
57 | #pragma omp parallel shared(iterations_per_thread, execution_count) |
58 | { /* begin of parallel */ |
59 | /* Lower triangular execution_count matrix */ |
60 | #pragma omp for schedule(static) collapse(2) |
61 | for (i = 0; i < upper_bound; i++) { |
62 | for (j = 0; j < i; j++) { |
63 | (*Index(p: iterations_per_thread, i: omp_get_thread_num(), j: 0, bound2: 1))++; |
64 | (*Index(p: execution_count, i, j, bound2: upper_bound))++; |
65 | } |
66 | } /* end of for*/ |
67 | } /* end of parallel */ |
68 | |
69 | /* check the execution_count array */ |
70 | for (i = 0; i < upper_bound; i++) { |
71 | for (j = 0; j < i; j++) { |
72 | unsigned value = *Index(p: execution_count, i, j, bound2: upper_bound); |
73 | /* iteration with j<=i are valid, but should have been executed only once |
74 | */ |
75 | if (value != 1) { |
76 | fprintf(stderr, format: "ERROR: valid iteration [%i,%i] executed %i times.\n" , |
77 | i, j, value); |
78 | return 0; |
79 | } |
80 | } |
81 | for (j = i; j < upper_bound; j++) { |
82 | unsigned value = *Index(p: execution_count, i, j, bound2: upper_bound); |
83 | /* iteration with j>=i are invalid and should not have been executed |
84 | */ |
85 | if (value > 0) { |
86 | fprintf(stderr, format: "ERROR: invalid iteration [%i,%i] executed %i times.\n" , |
87 | i, j, value); |
88 | return 0; |
89 | } |
90 | } |
91 | } |
92 | |
93 | #ifndef NO_EFFICIENCY_CHECK |
94 | /* Ensure the number of iterations executed by each thread is within bounds */ |
95 | for (i = 0; i < num_threads; i++) { |
96 | unsigned value = *Index(iterations_per_thread, i, 0, 1); |
97 | if (value < lower_per_chunk || value > upper_per_chunk) { |
98 | fprintf(stderr, |
99 | "ERROR: Inefficient Collapse thread %d of %d assigned %i " |
100 | "iterations; must be between %d and %d\n" , |
101 | i, num_threads, value, lower_per_chunk, upper_per_chunk); |
102 | return 0; |
103 | } |
104 | } |
105 | #endif |
106 | #ifdef VERBOSE |
107 | fprintf(stderr, "PASSED\r\n" ); |
108 | #endif |
109 | return 1; |
110 | } |
111 | |
112 | int main() { |
113 | |
114 | execution_count = Alloc(MAX_BOUND, MAX_BOUND); |
115 | iterations_per_thread = Alloc(bound1: omp_get_max_threads(), bound2: 1); |
116 | |
117 | for (unsigned j = 0; j < MAX_BOUND; j++) { |
118 | if (!test(upper_bound: j)) |
119 | return 1; |
120 | } |
121 | Free(p: execution_count); |
122 | Free(p: iterations_per_thread); |
123 | return 0; |
124 | } |
125 | |