| 1 | // RUN: %libomp-cxx-compile-and-run |
| 2 | |
| 3 | #include <stdio.h> |
| 4 | #include <omp.h> |
| 5 | |
| 6 | #define NT 4 |
| 7 | #define INIT 10 |
| 8 | |
| 9 | /* |
| 10 | The test emulates code generation needed for reduction with task modifier on |
| 11 | parallel construct. |
| 12 | |
| 13 | Note: tasks could just use in_reduction clause, but compiler does not accept |
| 14 | this because of bug: it mistakenly requires reduction item to be shared, which |
| 15 | is only true for reduction on worksharing and wrong for task reductions. |
| 16 | */ |
| 17 | |
| 18 | //------------------------------------------------ |
| 19 | // OpenMP runtime library routines |
| 20 | #ifdef __cplusplus |
| 21 | extern "C" { |
| 22 | #endif |
| 23 | extern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *item); |
| 24 | // extern void* __kmpc_task_reduction_modifier_init(void *loc, int gtid, int |
| 25 | // is_ws, int num, void* data); |
| 26 | extern void *__kmpc_taskred_modifier_init(void *loc, int gtid, int is_ws, |
| 27 | int num, void *data); |
| 28 | extern void __kmpc_task_reduction_modifier_fini(void *loc, int gtid, int is_ws); |
| 29 | extern int __kmpc_global_thread_num(void *); |
| 30 | #ifdef __cplusplus |
| 31 | } |
| 32 | #endif |
| 33 | |
| 34 | //------------------------------------------------ |
| 35 | // Compiler-generated code |
| 36 | |
| 37 | typedef struct red_input { |
| 38 | void *reduce_shar; /**< shared between tasks item to reduce into */ |
| 39 | void *reduce_orig; /**< original reduction item used for initialization */ |
| 40 | size_t reduce_size; /**< size of data item in bytes */ |
| 41 | // three compiler-generated routines (init, fini are optional): |
| 42 | void *reduce_init; /**< data initialization routine (single parameter) */ |
| 43 | void *reduce_fini; /**< data finalization routine */ |
| 44 | void *reduce_comb; /**< data combiner routine */ |
| 45 | unsigned flags; /**< flags for additional info from compiler */ |
| 46 | } red_input_t; |
| 47 | |
| 48 | void i_comb(void *lhs, void *rhs) { *(int *)lhs += *(int *)rhs; } |
| 49 | |
| 50 | int main() { |
| 51 | int var = INIT; |
| 52 | int *p_var_orig = &var; |
| 53 | omp_set_dynamic(0); |
| 54 | omp_set_num_threads(NT); |
| 55 | // #pragma omp parallel reduction(task,+:var) |
| 56 | #pragma omp parallel reduction(+ : var) shared(p_var_orig) |
| 57 | { |
| 58 | int gtid = __kmpc_global_thread_num(NULL); |
| 59 | void *tg; // pointer to taskgroup (optional) |
| 60 | red_input_t r_var; |
| 61 | r_var.reduce_shar = &var; |
| 62 | r_var.reduce_orig = |
| 63 | p_var_orig; // not used in this test but illustrates codegen |
| 64 | r_var.reduce_size = sizeof(var); |
| 65 | r_var.reduce_init = NULL; |
| 66 | r_var.reduce_fini = NULL; |
| 67 | r_var.reduce_comb = (void *)&i_comb; |
| 68 | tg = __kmpc_taskred_modifier_init( |
| 69 | NULL, // ident_t loc; |
| 70 | gtid, |
| 71 | is_ws: 0, // 1 - worksharing construct, 0 - parallel |
| 72 | num: 1, // number of reduction objects |
| 73 | data: &r_var // related data |
| 74 | ); |
| 75 | var++; |
| 76 | #pragma omp task /*in_reduction(+:var)*/ shared(var) |
| 77 | { |
| 78 | int gtid = __kmpc_global_thread_num(NULL); |
| 79 | int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, item: &var); |
| 80 | *p_var += 1; |
| 81 | } |
| 82 | if (omp_get_thread_num() > 0) { |
| 83 | #pragma omp task /*in_reduction(+:var)*/ shared(var) |
| 84 | { |
| 85 | int gtid = __kmpc_global_thread_num(NULL); |
| 86 | int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, item: &var); |
| 87 | *p_var += 1; |
| 88 | } |
| 89 | } |
| 90 | __kmpc_task_reduction_modifier_fini(NULL, gtid, is_ws: 0); |
| 91 | } |
| 92 | if (var == INIT + NT * 3 - 1) { |
| 93 | printf(format: "passed\n" ); |
| 94 | return 0; |
| 95 | } else { |
| 96 | printf(format: "failed: var = %d (!= %d)\n" , var, INIT + NT * 3 - 1); |
| 97 | return 1; |
| 98 | } |
| 99 | } |
| 100 | |