1 | // RUN: %libomp-cxx-compile-and-run |
2 | |
3 | #include <stdio.h> |
4 | #include <omp.h> |
5 | |
6 | #define NT 4 |
7 | #define INIT 10 |
8 | |
9 | /* |
10 | The test emulates code generation needed for reduction with task modifier on |
11 | parallel construct. |
12 | |
13 | Note: tasks could just use in_reduction clause, but compiler does not accept |
14 | this because of bug: it mistakenly requires reduction item to be shared, which |
15 | is only true for reduction on worksharing and wrong for task reductions. |
16 | */ |
17 | |
18 | //------------------------------------------------ |
19 | // OpenMP runtime library routines |
20 | #ifdef __cplusplus |
21 | extern "C" { |
22 | #endif |
23 | extern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *item); |
24 | // extern void* __kmpc_task_reduction_modifier_init(void *loc, int gtid, int |
25 | // flags, int num, void* data); |
26 | extern void *__kmpc_taskred_modifier_init(void *loc, int gtid, int is_ws, |
27 | int num, void *data); |
28 | extern void __kmpc_task_reduction_modifier_fini(void *loc, int gtid, int is_ws); |
29 | extern int __kmpc_global_thread_num(void *); |
30 | #ifdef __cplusplus |
31 | } |
32 | #endif |
33 | |
34 | //------------------------------------------------ |
35 | // Compiler-generated code |
36 | |
37 | typedef struct red_input { |
38 | void *reduce_shar; /**< shared between tasks item to reduce into */ |
39 | void *reduce_orig; /**< original reduction item used for initialization */ |
40 | size_t reduce_size; /**< size of data item in bytes */ |
41 | // three compiler-generated routines (init, fini are optional): |
42 | void *reduce_init; /**< data initialization routine (single parameter) */ |
43 | void *reduce_fini; /**< data finalization routine */ |
44 | void *reduce_comb; /**< data combiner routine */ |
45 | unsigned flags; /**< flags for additional info from compiler */ |
46 | } red_input_t; |
47 | |
48 | void i_comb(void *lhs, void *rhs) { *(int *)lhs += *(int *)rhs; } |
49 | |
50 | int main() { |
51 | int var = INIT; |
52 | int *p_var_orig = &var; |
53 | int i; |
54 | omp_set_dynamic(0); |
55 | omp_set_num_threads(NT); |
56 | #pragma omp parallel private(i) shared(p_var_orig) |
57 | // #pragma omp for reduction(task,+:var) |
58 | #pragma omp for reduction(+ : var) |
59 | for (i = 0; i < NT; ++i) // single iteration per thread |
60 | { |
61 | // generated code, which actually should be placed before |
62 | // loop iterations distribution, but placed here just to show the idea, |
63 | // and to keep correctness the loop count is equal to number of threads |
64 | int gtid = __kmpc_global_thread_num(NULL); |
65 | void *tg; // pointer to taskgroup (optional) |
66 | red_input_t r_var; |
67 | r_var.reduce_shar = &var; |
68 | r_var.reduce_orig = |
69 | p_var_orig; // not used in this test but illustrates codegen |
70 | r_var.reduce_size = sizeof(var); |
71 | r_var.reduce_init = NULL; |
72 | r_var.reduce_fini = NULL; |
73 | r_var.reduce_comb = (void *)&i_comb; |
74 | tg = __kmpc_taskred_modifier_init( |
75 | NULL, // ident_t loc; |
76 | gtid, |
77 | is_ws: 1, // 1 - worksharing construct, 0 - parallel |
78 | num: 1, // number of reduction objects |
79 | data: &r_var // related data |
80 | ); |
81 | // end of generated code |
82 | var++; |
83 | #pragma omp task /*in_reduction(+:var)*/ shared(var) |
84 | { |
85 | // emulate task reduction here because of compiler bug: |
86 | // it mistakenly declines to accept in_reduction because var is private |
87 | // outside. |
88 | int gtid = __kmpc_global_thread_num(NULL); |
89 | int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, item: &var); |
90 | *p_var += 1; |
91 | } |
92 | if (omp_get_thread_num() > 0) { |
93 | #pragma omp task /*in_reduction(+:var)*/ shared(var) |
94 | { |
95 | int gtid = __kmpc_global_thread_num(NULL); |
96 | int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, item: &var); |
97 | *p_var += 1; |
98 | } |
99 | } |
100 | // generated code, which actually should be placed after loop completion |
101 | // but before barrier and before loop reduction. It placed here just to show |
102 | // the idea, |
103 | // and to keep correctness the loop count is equal to number of threads |
104 | __kmpc_task_reduction_modifier_fini(NULL, gtid, is_ws: 1); |
105 | // end of generated code |
106 | } |
107 | if (var == INIT + NT * 3 - 1) { |
108 | printf(format: "passed\n" ); |
109 | return 0; |
110 | } else { |
111 | printf(format: "failed: var = %d (!= %d)\n" , var, INIT + NT * 3 - 1); |
112 | return 1; |
113 | } |
114 | } |
115 | |