1 | /* |
2 | * Copyright 2011 INRIA Saclay |
3 | * Copyright 2012-2014 Ecole Normale Superieure |
4 | * Copyright 2015-2016 Sven Verdoolaege |
5 | * Copyright 2016 INRIA Paris |
6 | * Copyright 2017 Sven Verdoolaege |
7 | * |
8 | * Use of this software is governed by the MIT license |
9 | * |
10 | * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France, |
11 | * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod, |
12 | * 91893 Orsay, France |
13 | * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France |
14 | * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12, |
15 | * CS 42112, 75589 Paris Cedex 12, France |
16 | */ |
17 | |
18 | #include <isl_ctx_private.h> |
19 | #include <isl_map_private.h> |
20 | #include <isl_space_private.h> |
21 | #include <isl_aff_private.h> |
22 | #include <isl/hash.h> |
23 | #include <isl/id.h> |
24 | #include <isl/constraint.h> |
25 | #include <isl/schedule.h> |
26 | #include <isl_schedule_constraints.h> |
27 | #include <isl/schedule_node.h> |
28 | #include <isl_mat_private.h> |
29 | #include <isl_vec_private.h> |
30 | #include <isl/set.h> |
31 | #include <isl_union_set_private.h> |
32 | #include <isl_seq.h> |
33 | #include <isl_tab.h> |
34 | #include <isl_dim_map.h> |
35 | #include <isl/map_to_basic_set.h> |
36 | #include <isl_sort.h> |
37 | #include <isl_options_private.h> |
38 | #include <isl_tarjan.h> |
39 | #include <isl_morph.h> |
40 | #include <isl/ilp.h> |
41 | #include <isl_val_private.h> |
42 | |
43 | #include "isl_scheduler.h" |
44 | #include "isl_scheduler_clustering.h" |
45 | |
46 | /* |
47 | * The scheduling algorithm implemented in this file was inspired by |
48 | * Bondhugula et al., "Automatic Transformations for Communication-Minimized |
49 | * Parallelization and Locality Optimization in the Polyhedral Model". |
50 | * |
51 | * For a detailed description of the variant implemented in isl, |
52 | * see Verdoolaege and Janssens, "Scheduling for PPCG" (2017). |
53 | */ |
54 | |
55 | |
56 | static isl_bool node_has_tuples(const void *entry, const void *val) |
57 | { |
58 | struct isl_sched_node *node = (struct isl_sched_node *)entry; |
59 | isl_space *space = (isl_space *) val; |
60 | |
61 | return isl_space_has_equal_tuples(space1: node->space, space2: space); |
62 | } |
63 | |
64 | int isl_sched_node_scc_exactly(struct isl_sched_node *node, int scc) |
65 | { |
66 | return node->scc == scc; |
67 | } |
68 | |
69 | static int node_scc_at_most(struct isl_sched_node *node, int scc) |
70 | { |
71 | return node->scc <= scc; |
72 | } |
73 | |
74 | static int node_scc_at_least(struct isl_sched_node *node, int scc) |
75 | { |
76 | return node->scc >= scc; |
77 | } |
78 | |
79 | /* Is "edge" marked as being of type "type"? |
80 | */ |
81 | int isl_sched_edge_has_type(struct isl_sched_edge *edge, |
82 | enum isl_edge_type type) |
83 | { |
84 | return ISL_FL_ISSET(edge->types, 1 << type); |
85 | } |
86 | |
87 | /* Mark "edge" as being of type "type". |
88 | */ |
89 | static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type) |
90 | { |
91 | ISL_FL_SET(edge->types, 1 << type); |
92 | } |
93 | |
94 | /* No longer mark "edge" as being of type "type"? |
95 | */ |
96 | static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type) |
97 | { |
98 | ISL_FL_CLR(edge->types, 1 << type); |
99 | } |
100 | |
101 | /* Is "edge" marked as a validity edge? |
102 | */ |
103 | static int is_validity(struct isl_sched_edge *edge) |
104 | { |
105 | return isl_sched_edge_has_type(edge, type: isl_edge_validity); |
106 | } |
107 | |
108 | /* Mark "edge" as a validity edge. |
109 | */ |
110 | static void set_validity(struct isl_sched_edge *edge) |
111 | { |
112 | set_type(edge, type: isl_edge_validity); |
113 | } |
114 | |
115 | /* Is "edge" marked as a proximity edge? |
116 | */ |
117 | int isl_sched_edge_is_proximity(struct isl_sched_edge *edge) |
118 | { |
119 | return isl_sched_edge_has_type(edge, type: isl_edge_proximity); |
120 | } |
121 | |
122 | /* Is "edge" marked as a local edge? |
123 | */ |
124 | static int is_local(struct isl_sched_edge *edge) |
125 | { |
126 | return isl_sched_edge_has_type(edge, type: isl_edge_local); |
127 | } |
128 | |
129 | /* Mark "edge" as a local edge. |
130 | */ |
131 | static void set_local(struct isl_sched_edge *edge) |
132 | { |
133 | set_type(edge, type: isl_edge_local); |
134 | } |
135 | |
136 | /* No longer mark "edge" as a local edge. |
137 | */ |
138 | static void clear_local(struct isl_sched_edge *edge) |
139 | { |
140 | clear_type(edge, type: isl_edge_local); |
141 | } |
142 | |
143 | /* Is "edge" marked as a coincidence edge? |
144 | */ |
145 | static int is_coincidence(struct isl_sched_edge *edge) |
146 | { |
147 | return isl_sched_edge_has_type(edge, type: isl_edge_coincidence); |
148 | } |
149 | |
150 | /* Is "edge" marked as a condition edge? |
151 | */ |
152 | int isl_sched_edge_is_condition(struct isl_sched_edge *edge) |
153 | { |
154 | return isl_sched_edge_has_type(edge, type: isl_edge_condition); |
155 | } |
156 | |
157 | /* Is "edge" marked as a conditional validity edge? |
158 | */ |
159 | int isl_sched_edge_is_conditional_validity(struct isl_sched_edge *edge) |
160 | { |
161 | return isl_sched_edge_has_type(edge, type: isl_edge_conditional_validity); |
162 | } |
163 | |
164 | /* Is "edge" of a type that can appear multiple times between |
165 | * the same pair of nodes? |
166 | * |
167 | * Condition edges and conditional validity edges may have tagged |
168 | * dependence relations, in which case an edge is added for each |
169 | * pair of tags. |
170 | */ |
171 | static int is_multi_edge_type(struct isl_sched_edge *edge) |
172 | { |
173 | return isl_sched_edge_is_condition(edge) || |
174 | isl_sched_edge_is_conditional_validity(edge); |
175 | } |
176 | |
177 | /* Initialize node_table based on the list of nodes. |
178 | */ |
179 | static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph) |
180 | { |
181 | int i; |
182 | |
183 | graph->node_table = isl_hash_table_alloc(ctx, min_size: graph->n); |
184 | if (!graph->node_table) |
185 | return -1; |
186 | |
187 | for (i = 0; i < graph->n; ++i) { |
188 | struct isl_hash_table_entry *entry; |
189 | uint32_t hash; |
190 | |
191 | hash = isl_space_get_tuple_hash(space: graph->node[i].space); |
192 | entry = isl_hash_table_find(ctx, table: graph->node_table, key_hash: hash, |
193 | eq: &node_has_tuples, |
194 | val: graph->node[i].space, reserve: 1); |
195 | if (!entry) |
196 | return -1; |
197 | entry->data = &graph->node[i]; |
198 | } |
199 | |
200 | return 0; |
201 | } |
202 | |
203 | /* Return a pointer to the node that lives within the given space, |
204 | * an invalid node if there is no such node, or NULL in case of error. |
205 | */ |
206 | struct isl_sched_node *isl_sched_graph_find_node(isl_ctx *ctx, |
207 | struct isl_sched_graph *graph, __isl_keep isl_space *space) |
208 | { |
209 | struct isl_hash_table_entry *entry; |
210 | uint32_t hash; |
211 | |
212 | if (!space) |
213 | return NULL; |
214 | |
215 | hash = isl_space_get_tuple_hash(space); |
216 | entry = isl_hash_table_find(ctx, table: graph->node_table, key_hash: hash, |
217 | eq: &node_has_tuples, val: space, reserve: 0); |
218 | if (!entry) |
219 | return NULL; |
220 | if (entry == isl_hash_table_entry_none) |
221 | return graph->node + graph->n; |
222 | |
223 | return entry->data; |
224 | } |
225 | |
226 | /* Is "node" a node in "graph"? |
227 | */ |
228 | int isl_sched_graph_is_node(struct isl_sched_graph *graph, |
229 | struct isl_sched_node *node) |
230 | { |
231 | return node && node >= &graph->node[0] && node < &graph->node[graph->n]; |
232 | } |
233 | |
234 | static isl_bool edge_has_src_and_dst(const void *entry, const void *val) |
235 | { |
236 | const struct isl_sched_edge *edge = entry; |
237 | const struct isl_sched_edge *temp = val; |
238 | |
239 | return isl_bool_ok(b: edge->src == temp->src && edge->dst == temp->dst); |
240 | } |
241 | |
242 | /* Add the given edge to graph->edge_table[type]. |
243 | */ |
244 | static isl_stat graph_edge_table_add(isl_ctx *ctx, |
245 | struct isl_sched_graph *graph, enum isl_edge_type type, |
246 | struct isl_sched_edge *edge) |
247 | { |
248 | struct isl_hash_table_entry *entry; |
249 | uint32_t hash; |
250 | |
251 | hash = isl_hash_init(); |
252 | hash = isl_hash_builtin(hash, edge->src); |
253 | hash = isl_hash_builtin(hash, edge->dst); |
254 | entry = isl_hash_table_find(ctx, table: graph->edge_table[type], key_hash: hash, |
255 | eq: &edge_has_src_and_dst, val: edge, reserve: 1); |
256 | if (!entry) |
257 | return isl_stat_error; |
258 | entry->data = edge; |
259 | |
260 | return isl_stat_ok; |
261 | } |
262 | |
263 | /* Add "edge" to all relevant edge tables. |
264 | * That is, for every type of the edge, add it to the corresponding table. |
265 | */ |
266 | static isl_stat graph_edge_tables_add(isl_ctx *ctx, |
267 | struct isl_sched_graph *graph, struct isl_sched_edge *edge) |
268 | { |
269 | enum isl_edge_type t; |
270 | |
271 | for (t = isl_edge_first; t <= isl_edge_last; ++t) { |
272 | if (!isl_sched_edge_has_type(edge, type: t)) |
273 | continue; |
274 | if (graph_edge_table_add(ctx, graph, type: t, edge) < 0) |
275 | return isl_stat_error; |
276 | } |
277 | |
278 | return isl_stat_ok; |
279 | } |
280 | |
281 | /* Allocate the edge_tables based on the maximal number of edges of |
282 | * each type. |
283 | */ |
284 | static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph) |
285 | { |
286 | int i; |
287 | |
288 | for (i = 0; i <= isl_edge_last; ++i) { |
289 | graph->edge_table[i] = isl_hash_table_alloc(ctx, |
290 | min_size: graph->max_edge[i]); |
291 | if (!graph->edge_table[i]) |
292 | return -1; |
293 | } |
294 | |
295 | return 0; |
296 | } |
297 | |
298 | /* If graph->edge_table[type] contains an edge from the given source |
299 | * to the given destination, then return the hash table entry of this edge. |
300 | * Otherwise, return NULL. |
301 | */ |
302 | static struct isl_hash_table_entry *graph_find_edge_entry( |
303 | struct isl_sched_graph *graph, |
304 | enum isl_edge_type type, |
305 | struct isl_sched_node *src, struct isl_sched_node *dst) |
306 | { |
307 | isl_ctx *ctx = isl_space_get_ctx(space: src->space); |
308 | uint32_t hash; |
309 | struct isl_sched_edge temp = { .src = src, .dst = dst }; |
310 | |
311 | hash = isl_hash_init(); |
312 | hash = isl_hash_builtin(hash, temp.src); |
313 | hash = isl_hash_builtin(hash, temp.dst); |
314 | return isl_hash_table_find(ctx, table: graph->edge_table[type], key_hash: hash, |
315 | eq: &edge_has_src_and_dst, val: &temp, reserve: 0); |
316 | } |
317 | |
318 | |
319 | /* If graph->edge_table[type] contains an edge from the given source |
320 | * to the given destination, then return this edge. |
321 | * Return "none" if no such edge can be found. |
322 | * Return NULL on error. |
323 | */ |
324 | static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph, |
325 | enum isl_edge_type type, |
326 | struct isl_sched_node *src, struct isl_sched_node *dst, |
327 | struct isl_sched_edge *none) |
328 | { |
329 | struct isl_hash_table_entry *entry; |
330 | |
331 | entry = graph_find_edge_entry(graph, type, src, dst); |
332 | if (!entry) |
333 | return NULL; |
334 | if (entry == isl_hash_table_entry_none) |
335 | return none; |
336 | |
337 | return entry->data; |
338 | } |
339 | |
340 | /* Check whether the dependence graph has an edge of the given type |
341 | * between the given two nodes. |
342 | */ |
343 | static isl_bool graph_has_edge(struct isl_sched_graph *graph, |
344 | enum isl_edge_type type, |
345 | struct isl_sched_node *src, struct isl_sched_node *dst) |
346 | { |
347 | struct isl_sched_edge dummy; |
348 | struct isl_sched_edge *edge; |
349 | isl_bool empty; |
350 | |
351 | edge = graph_find_edge(graph, type, src, dst, none: &dummy); |
352 | if (!edge) |
353 | return isl_bool_error; |
354 | if (edge == &dummy) |
355 | return isl_bool_false; |
356 | |
357 | empty = isl_map_plain_is_empty(map: edge->map); |
358 | |
359 | return isl_bool_not(b: empty); |
360 | } |
361 | |
362 | /* Look for any edge with the same src, dst and map fields as "model". |
363 | * |
364 | * Return the matching edge if one can be found. |
365 | * Return "model" if no matching edge is found. |
366 | * Return NULL on error. |
367 | */ |
368 | static struct isl_sched_edge *graph_find_matching_edge( |
369 | struct isl_sched_graph *graph, struct isl_sched_edge *model) |
370 | { |
371 | enum isl_edge_type i; |
372 | struct isl_sched_edge *edge; |
373 | |
374 | for (i = isl_edge_first; i <= isl_edge_last; ++i) { |
375 | int is_equal; |
376 | |
377 | edge = graph_find_edge(graph, type: i, src: model->src, dst: model->dst, none: model); |
378 | if (!edge) |
379 | return NULL; |
380 | if (edge == model) |
381 | continue; |
382 | is_equal = isl_map_plain_is_equal(map1: model->map, map2: edge->map); |
383 | if (is_equal < 0) |
384 | return NULL; |
385 | if (is_equal) |
386 | return edge; |
387 | } |
388 | |
389 | return model; |
390 | } |
391 | |
392 | /* Remove the given edge from all the edge_tables that refer to it. |
393 | */ |
394 | static isl_stat graph_remove_edge(struct isl_sched_graph *graph, |
395 | struct isl_sched_edge *edge) |
396 | { |
397 | isl_ctx *ctx = isl_map_get_ctx(map: edge->map); |
398 | enum isl_edge_type i; |
399 | |
400 | for (i = isl_edge_first; i <= isl_edge_last; ++i) { |
401 | struct isl_hash_table_entry *entry; |
402 | |
403 | entry = graph_find_edge_entry(graph, type: i, src: edge->src, dst: edge->dst); |
404 | if (!entry) |
405 | return isl_stat_error; |
406 | if (entry == isl_hash_table_entry_none) |
407 | continue; |
408 | if (entry->data != edge) |
409 | continue; |
410 | isl_hash_table_remove(ctx, table: graph->edge_table[i], entry); |
411 | } |
412 | |
413 | return isl_stat_ok; |
414 | } |
415 | |
416 | /* Check whether the dependence graph has any edge |
417 | * between the given two nodes. |
418 | */ |
419 | static isl_bool graph_has_any_edge(struct isl_sched_graph *graph, |
420 | struct isl_sched_node *src, struct isl_sched_node *dst) |
421 | { |
422 | enum isl_edge_type i; |
423 | isl_bool r; |
424 | |
425 | for (i = isl_edge_first; i <= isl_edge_last; ++i) { |
426 | r = graph_has_edge(graph, type: i, src, dst); |
427 | if (r < 0 || r) |
428 | return r; |
429 | } |
430 | |
431 | return r; |
432 | } |
433 | |
434 | /* Check whether the dependence graph has a validity edge |
435 | * between the given two nodes. |
436 | * |
437 | * Conditional validity edges are essentially validity edges that |
438 | * can be ignored if the corresponding condition edges are iteration private. |
439 | * Here, we are only checking for the presence of validity |
440 | * edges, so we need to consider the conditional validity edges too. |
441 | * In particular, this function is used during the detection |
442 | * of strongly connected components and we cannot ignore |
443 | * conditional validity edges during this detection. |
444 | */ |
445 | isl_bool isl_sched_graph_has_validity_edge(struct isl_sched_graph *graph, |
446 | struct isl_sched_node *src, struct isl_sched_node *dst) |
447 | { |
448 | isl_bool r; |
449 | |
450 | r = graph_has_edge(graph, type: isl_edge_validity, src, dst); |
451 | if (r < 0 || r) |
452 | return r; |
453 | |
454 | return graph_has_edge(graph, type: isl_edge_conditional_validity, src, dst); |
455 | } |
456 | |
457 | /* Perform all the required memory allocations for a schedule graph "graph" |
458 | * with "n_node" nodes and "n_edge" edge and initialize the corresponding |
459 | * fields. |
460 | */ |
461 | static isl_stat graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph, |
462 | int n_node, int n_edge) |
463 | { |
464 | int i; |
465 | |
466 | graph->n = n_node; |
467 | graph->n_edge = n_edge; |
468 | graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n); |
469 | graph->sorted = isl_calloc_array(ctx, int, graph->n); |
470 | graph->region = isl_alloc_array(ctx, |
471 | struct isl_trivial_region, graph->n); |
472 | graph->edge = isl_calloc_array(ctx, |
473 | struct isl_sched_edge, graph->n_edge); |
474 | |
475 | graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, min_size: 2 * n_edge); |
476 | graph->intra_hmap_param = isl_map_to_basic_set_alloc(ctx, min_size: 2 * n_edge); |
477 | graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, min_size: 2 * n_edge); |
478 | |
479 | if (!graph->node || !graph->region || (graph->n_edge && !graph->edge) || |
480 | !graph->sorted) |
481 | return isl_stat_error; |
482 | |
483 | for(i = 0; i < graph->n; ++i) |
484 | graph->sorted[i] = i; |
485 | |
486 | return isl_stat_ok; |
487 | } |
488 | |
489 | /* Free the memory associated to node "node" in "graph". |
490 | * The "coincident" field is shared by nodes in a graph and its subgraph. |
491 | * It therefore only needs to be freed for the original dependence graph, |
492 | * i.e., one that is not the result of splitting. |
493 | */ |
494 | static void clear_node(struct isl_sched_graph *graph, |
495 | struct isl_sched_node *node) |
496 | { |
497 | isl_space_free(space: node->space); |
498 | isl_set_free(set: node->hull); |
499 | isl_multi_aff_free(multi: node->compress); |
500 | isl_pw_multi_aff_free(pma: node->decompress); |
501 | isl_mat_free(mat: node->sched); |
502 | isl_map_free(map: node->sched_map); |
503 | isl_mat_free(mat: node->indep); |
504 | isl_mat_free(mat: node->vmap); |
505 | if (graph->root == graph) |
506 | free(ptr: node->coincident); |
507 | isl_multi_val_free(multi: node->sizes); |
508 | isl_basic_set_free(bset: node->bounds); |
509 | isl_vec_free(vec: node->max); |
510 | } |
511 | |
512 | void isl_sched_graph_free(isl_ctx *ctx, struct isl_sched_graph *graph) |
513 | { |
514 | int i; |
515 | |
516 | isl_map_to_basic_set_free(hmap: graph->intra_hmap); |
517 | isl_map_to_basic_set_free(hmap: graph->intra_hmap_param); |
518 | isl_map_to_basic_set_free(hmap: graph->inter_hmap); |
519 | |
520 | if (graph->node) |
521 | for (i = 0; i < graph->n; ++i) |
522 | clear_node(graph, node: &graph->node[i]); |
523 | free(ptr: graph->node); |
524 | free(ptr: graph->sorted); |
525 | if (graph->edge) |
526 | for (i = 0; i < graph->n_edge; ++i) { |
527 | isl_map_free(map: graph->edge[i].map); |
528 | isl_union_map_free(umap: graph->edge[i].tagged_condition); |
529 | isl_union_map_free(umap: graph->edge[i].tagged_validity); |
530 | } |
531 | free(ptr: graph->edge); |
532 | free(ptr: graph->region); |
533 | for (i = 0; i <= isl_edge_last; ++i) |
534 | isl_hash_table_free(ctx, table: graph->edge_table[i]); |
535 | isl_hash_table_free(ctx, table: graph->node_table); |
536 | isl_basic_set_free(bset: graph->lp); |
537 | } |
538 | |
539 | /* For each "set" on which this function is called, increment |
540 | * graph->n by one and update graph->maxvar. |
541 | */ |
542 | static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user) |
543 | { |
544 | struct isl_sched_graph *graph = user; |
545 | isl_size nvar = isl_set_dim(set, type: isl_dim_set); |
546 | |
547 | graph->n++; |
548 | if (nvar > graph->maxvar) |
549 | graph->maxvar = nvar; |
550 | |
551 | isl_set_free(set); |
552 | |
553 | if (nvar < 0) |
554 | return isl_stat_error; |
555 | return isl_stat_ok; |
556 | } |
557 | |
558 | /* Compute the number of rows that should be allocated for the schedule. |
559 | * In particular, we need one row for each variable or one row |
560 | * for each basic map in the dependences. |
561 | * Note that it is practically impossible to exhaust both |
562 | * the number of dependences and the number of variables. |
563 | */ |
564 | static isl_stat compute_max_row(struct isl_sched_graph *graph, |
565 | __isl_keep isl_schedule_constraints *sc) |
566 | { |
567 | int n_edge; |
568 | isl_stat r; |
569 | isl_union_set *domain; |
570 | |
571 | graph->n = 0; |
572 | graph->maxvar = 0; |
573 | domain = isl_schedule_constraints_get_domain(sc); |
574 | r = isl_union_set_foreach_set(uset: domain, fn: &init_n_maxvar, user: graph); |
575 | isl_union_set_free(uset: domain); |
576 | if (r < 0) |
577 | return isl_stat_error; |
578 | n_edge = isl_schedule_constraints_n_basic_map(sc); |
579 | if (n_edge < 0) |
580 | return isl_stat_error; |
581 | graph->max_row = n_edge + graph->maxvar; |
582 | |
583 | return isl_stat_ok; |
584 | } |
585 | |
586 | /* Does "bset" have any defining equalities for its set variables? |
587 | */ |
588 | static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset) |
589 | { |
590 | int i; |
591 | isl_size n; |
592 | |
593 | n = isl_basic_set_dim(bset, type: isl_dim_set); |
594 | if (n < 0) |
595 | return isl_bool_error; |
596 | |
597 | for (i = 0; i < n; ++i) { |
598 | isl_bool has; |
599 | |
600 | has = isl_basic_set_has_defining_equality(bset, type: isl_dim_set, pos: i, |
601 | NULL); |
602 | if (has < 0 || has) |
603 | return has; |
604 | } |
605 | |
606 | return isl_bool_false; |
607 | } |
608 | |
609 | /* Set the entries of node->max to the value of the schedule_max_coefficient |
610 | * option, if set. |
611 | */ |
612 | static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node) |
613 | { |
614 | int max; |
615 | |
616 | max = isl_options_get_schedule_max_coefficient(ctx); |
617 | if (max == -1) |
618 | return isl_stat_ok; |
619 | |
620 | node->max = isl_vec_alloc(ctx, size: node->nvar); |
621 | node->max = isl_vec_set_si(vec: node->max, v: max); |
622 | if (!node->max) |
623 | return isl_stat_error; |
624 | |
625 | return isl_stat_ok; |
626 | } |
627 | |
628 | /* Set the entries of node->max to the minimum of the schedule_max_coefficient |
629 | * option (if set) and half of the minimum of the sizes in the other |
630 | * dimensions. Round up when computing the half such that |
631 | * if the minimum of the sizes is one, half of the size is taken to be one |
632 | * rather than zero. |
633 | * If the global minimum is unbounded (i.e., if both |
634 | * the schedule_max_coefficient is not set and the sizes in the other |
635 | * dimensions are unbounded), then store a negative value. |
636 | * If the schedule coefficient is close to the size of the instance set |
637 | * in another dimension, then the schedule may represent a loop |
638 | * coalescing transformation (especially if the coefficient |
639 | * in that other dimension is one). Forcing the coefficient to be |
640 | * smaller than or equal to half the minimal size should avoid this |
641 | * situation. |
642 | */ |
643 | static isl_stat compute_max_coefficient(isl_ctx *ctx, |
644 | struct isl_sched_node *node) |
645 | { |
646 | int max; |
647 | int i, j; |
648 | isl_vec *v; |
649 | |
650 | max = isl_options_get_schedule_max_coefficient(ctx); |
651 | v = isl_vec_alloc(ctx, size: node->nvar); |
652 | if (!v) |
653 | return isl_stat_error; |
654 | |
655 | for (i = 0; i < node->nvar; ++i) { |
656 | isl_int_set_si(v->el[i], max); |
657 | isl_int_mul_si(v->el[i], v->el[i], 2); |
658 | } |
659 | |
660 | for (i = 0; i < node->nvar; ++i) { |
661 | isl_val *size; |
662 | |
663 | size = isl_multi_val_get_val(multi: node->sizes, pos: i); |
664 | if (!size) |
665 | goto error; |
666 | if (!isl_val_is_int(v: size)) { |
667 | isl_val_free(v: size); |
668 | continue; |
669 | } |
670 | for (j = 0; j < node->nvar; ++j) { |
671 | if (j == i) |
672 | continue; |
673 | if (isl_int_is_neg(v->el[j]) || |
674 | isl_int_gt(v->el[j], size->n)) |
675 | isl_int_set(v->el[j], size->n); |
676 | } |
677 | isl_val_free(v: size); |
678 | } |
679 | |
680 | for (i = 0; i < node->nvar; ++i) |
681 | isl_int_cdiv_q_ui(v->el[i], v->el[i], 2); |
682 | |
683 | node->max = v; |
684 | return isl_stat_ok; |
685 | error: |
686 | isl_vec_free(vec: v); |
687 | return isl_stat_error; |
688 | } |
689 | |
690 | /* Construct an identifier for node "node", which will represent "set". |
691 | * The name of the identifier is either "compressed" or |
692 | * "compressed_<name>", with <name> the name of the space of "set". |
693 | * The user pointer of the identifier points to "node". |
694 | */ |
695 | static __isl_give isl_id *construct_compressed_id(__isl_keep isl_set *set, |
696 | struct isl_sched_node *node) |
697 | { |
698 | isl_bool has_name; |
699 | isl_ctx *ctx; |
700 | isl_id *id; |
701 | isl_printer *p; |
702 | const char *name; |
703 | char *id_name; |
704 | |
705 | has_name = isl_set_has_tuple_name(set); |
706 | if (has_name < 0) |
707 | return NULL; |
708 | |
709 | ctx = isl_set_get_ctx(set); |
710 | if (!has_name) |
711 | return isl_id_alloc(ctx, name: "compressed" , user: node); |
712 | |
713 | p = isl_printer_to_str(ctx); |
714 | name = isl_set_get_tuple_name(set); |
715 | p = isl_printer_print_str(p, s: "compressed_" ); |
716 | p = isl_printer_print_str(p, s: name); |
717 | id_name = isl_printer_get_str(printer: p); |
718 | isl_printer_free(printer: p); |
719 | |
720 | id = isl_id_alloc(ctx, name: id_name, user: node); |
721 | free(ptr: id_name); |
722 | |
723 | return id; |
724 | } |
725 | |
726 | /* Construct a map that isolates the variable in position "pos" in "set". |
727 | * |
728 | * That is, construct |
729 | * |
730 | * [i_0, ..., i_pos-1, i_pos+1, ...] -> [i_pos] |
731 | */ |
732 | static __isl_give isl_map *isolate(__isl_take isl_set *set, int pos) |
733 | { |
734 | isl_map *map; |
735 | |
736 | map = isl_set_project_onto_map(set, type: isl_dim_set, first: pos, n: 1); |
737 | map = isl_map_project_out(map, type: isl_dim_in, first: pos, n: 1); |
738 | return map; |
739 | } |
740 | |
741 | /* Compute and return the size of "set" in dimension "dim". |
742 | * The size is taken to be the difference in values for that variable |
743 | * for fixed values of the other variables. |
744 | * This assumes that "set" is convex. |
745 | * In particular, the variable is first isolated from the other variables |
746 | * in the range of a map |
747 | * |
748 | * [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim] |
749 | * |
750 | * and then duplicated |
751 | * |
752 | * [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']] |
753 | * |
754 | * The shared variables are then projected out and the maximal value |
755 | * of i_dim' - i_dim is computed. |
756 | */ |
757 | static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim) |
758 | { |
759 | isl_map *map; |
760 | isl_local_space *ls; |
761 | isl_aff *obj; |
762 | isl_val *v; |
763 | |
764 | map = isolate(set, pos: dim); |
765 | map = isl_map_range_product(map1: map, map2: isl_map_copy(map)); |
766 | map = isl_set_unwrap(set: isl_map_range(map)); |
767 | set = isl_map_deltas(map); |
768 | ls = isl_local_space_from_space(space: isl_set_get_space(set)); |
769 | obj = isl_aff_var_on_domain(ls, type: isl_dim_set, pos: 0); |
770 | v = isl_set_max_val(set, obj); |
771 | isl_aff_free(aff: obj); |
772 | isl_set_free(set); |
773 | |
774 | return v; |
775 | } |
776 | |
777 | /* Perform a compression on "node" where "hull" represents the constraints |
778 | * that were used to derive the compression, while "compress" and |
779 | * "decompress" map the original space to the compressed space and |
780 | * vice versa. |
781 | * |
782 | * If "node" was not compressed already, then simply store |
783 | * the compression information. |
784 | * Otherwise the "original" space is actually the result |
785 | * of a previous compression, which is then combined |
786 | * with the present compression. |
787 | * |
788 | * The dimensionality of the compressed domain is also adjusted. |
789 | * Other information, such as the sizes and the maximal coefficient values, |
790 | * has not been computed yet and therefore does not need to be adjusted. |
791 | */ |
792 | static isl_stat compress_node(struct isl_sched_node *node, |
793 | __isl_take isl_set *hull, __isl_take isl_multi_aff *compress, |
794 | __isl_take isl_pw_multi_aff *decompress) |
795 | { |
796 | node->nvar = isl_multi_aff_dim(multi: compress, type: isl_dim_out); |
797 | if (!node->compressed) { |
798 | node->compressed = 1; |
799 | node->hull = hull; |
800 | node->compress = compress; |
801 | node->decompress = decompress; |
802 | } else { |
803 | hull = isl_set_preimage_multi_aff(set: hull, |
804 | ma: isl_multi_aff_copy(multi: node->compress)); |
805 | node->hull = isl_set_intersect(set1: node->hull, set2: hull); |
806 | node->compress = isl_multi_aff_pullback_multi_aff( |
807 | ma1: compress, ma2: node->compress); |
808 | node->decompress = isl_pw_multi_aff_pullback_pw_multi_aff( |
809 | pma1: node->decompress, pma2: decompress); |
810 | } |
811 | |
812 | if (!node->hull || !node->compress || !node->decompress) |
813 | return isl_stat_error; |
814 | |
815 | return isl_stat_ok; |
816 | } |
817 | |
818 | /* Given that dimension "pos" in "set" has a fixed value |
819 | * in terms of the other dimensions, (further) compress "node" |
820 | * by projecting out this dimension. |
821 | * "set" may be the result of a previous compression. |
822 | * "uncompressed" is the original domain (without compression). |
823 | * |
824 | * The compression function simply projects out the dimension. |
825 | * The decompression function adds back the dimension |
826 | * in the right position as an expression of the other dimensions |
827 | * derived from "set". |
828 | * As in extract_node, the compressed space has an identifier |
829 | * that references "node" such that each compressed space is unique and |
830 | * such that the node can be recovered from the compressed space. |
831 | * |
832 | * The constraint removed through the compression is added to the "hull" |
833 | * such that only edges that relate to the original domains |
834 | * are taken into account. |
835 | * In particular, it is obtained by composing compression and decompression and |
836 | * taking the relation among the variables in the range. |
837 | */ |
838 | static isl_stat project_out_fixed(struct isl_sched_node *node, |
839 | __isl_keep isl_set *uncompressed, __isl_take isl_set *set, int pos) |
840 | { |
841 | isl_id *id; |
842 | isl_space *space; |
843 | isl_set *domain; |
844 | isl_map *map; |
845 | isl_multi_aff *compress; |
846 | isl_pw_multi_aff *decompress, *pma; |
847 | isl_multi_pw_aff *mpa; |
848 | isl_set *hull; |
849 | |
850 | map = isolate(set: isl_set_copy(set), pos); |
851 | pma = isl_pw_multi_aff_from_map(map); |
852 | domain = isl_pw_multi_aff_domain(pma: isl_pw_multi_aff_copy(pma)); |
853 | pma = isl_pw_multi_aff_gist(pma, set: domain); |
854 | space = isl_pw_multi_aff_get_domain_space(pma); |
855 | mpa = isl_multi_pw_aff_identity(space: isl_space_map_from_set(space)); |
856 | mpa = isl_multi_pw_aff_range_splice(multi1: mpa, pos, |
857 | multi2: isl_multi_pw_aff_from_pw_multi_aff(pma)); |
858 | decompress = isl_pw_multi_aff_from_multi_pw_aff(mpa); |
859 | space = isl_set_get_space(set); |
860 | compress = isl_multi_aff_project_out_map(space, type: isl_dim_set, first: pos, n: 1); |
861 | id = construct_compressed_id(set: uncompressed, node); |
862 | compress = isl_multi_aff_set_tuple_id(multi: compress, type: isl_dim_out, id); |
863 | space = isl_space_reverse(space: isl_multi_aff_get_space(multi: compress)); |
864 | decompress = isl_pw_multi_aff_reset_space(pwmaff: decompress, space); |
865 | pma = isl_pw_multi_aff_pullback_multi_aff( |
866 | pma: isl_pw_multi_aff_copy(pma: decompress), ma: isl_multi_aff_copy(multi: compress)); |
867 | hull = isl_map_range(map: isl_map_from_pw_multi_aff(pma)); |
868 | |
869 | isl_set_free(set); |
870 | |
871 | return compress_node(node, hull, compress, decompress); |
872 | } |
873 | |
874 | /* Compute the size of the compressed domain in each dimension and |
875 | * store the results in node->sizes. |
876 | * "uncompressed" is the original domain (without compression). |
877 | * |
878 | * First compress the domain if needed and then compute the size |
879 | * in each direction. |
880 | * If the domain is not convex, then the sizes are computed |
881 | * on a convex superset in order to avoid picking up sizes |
882 | * that are valid for the individual disjuncts, but not for |
883 | * the domain as a whole. |
884 | * |
885 | * If any of the sizes turns out to be zero, then this means |
886 | * that this dimension has a fixed value in terms of |
887 | * the other dimensions. Perform an (extra) compression |
888 | * to remove this dimension. |
889 | */ |
890 | static isl_stat compute_sizes(struct isl_sched_node *node, |
891 | __isl_keep isl_set *uncompressed) |
892 | { |
893 | int j; |
894 | isl_size n; |
895 | isl_multi_val *mv; |
896 | isl_set *set = isl_set_copy(set: uncompressed); |
897 | |
898 | if (node->compressed) |
899 | set = isl_set_preimage_pw_multi_aff(set, |
900 | pma: isl_pw_multi_aff_copy(pma: node->decompress)); |
901 | set = isl_set_from_basic_set(bset: isl_set_simple_hull(set)); |
902 | mv = isl_multi_val_zero(space: isl_set_get_space(set)); |
903 | n = isl_set_dim(set, type: isl_dim_set); |
904 | if (n < 0) |
905 | mv = isl_multi_val_free(multi: mv); |
906 | for (j = 0; j < n; ++j) { |
907 | isl_bool is_zero; |
908 | isl_val *v; |
909 | |
910 | v = compute_size(set: isl_set_copy(set), dim: j); |
911 | is_zero = isl_val_is_zero(v); |
912 | mv = isl_multi_val_set_val(multi: mv, pos: j, el: v); |
913 | if (is_zero >= 0 && is_zero) { |
914 | isl_multi_val_free(multi: mv); |
915 | if (project_out_fixed(node, uncompressed, set, pos: j) < 0) |
916 | return isl_stat_error; |
917 | return compute_sizes(node, uncompressed); |
918 | } |
919 | } |
920 | node->sizes = mv; |
921 | isl_set_free(set); |
922 | if (!node->sizes) |
923 | return isl_stat_error; |
924 | return isl_stat_ok; |
925 | } |
926 | |
927 | /* Compute the size of the instance set "set" of "node", after compression, |
928 | * as well as bounds on the corresponding coefficients, if needed. |
929 | * |
930 | * The sizes are needed when the schedule_treat_coalescing option is set. |
931 | * The bounds are needed when the schedule_treat_coalescing option or |
932 | * the schedule_max_coefficient option is set. |
933 | * |
934 | * If the schedule_treat_coalescing option is not set, then at most |
935 | * the bounds need to be set and this is done in set_max_coefficient. |
936 | * Otherwise, compute the size of the compressed domain |
937 | * in each direction and store the results in node->size. |
938 | * Finally, set the bounds on the coefficients based on the sizes |
939 | * and the schedule_max_coefficient option in compute_max_coefficient. |
940 | */ |
941 | static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node, |
942 | __isl_take isl_set *set) |
943 | { |
944 | isl_stat r; |
945 | |
946 | if (!isl_options_get_schedule_treat_coalescing(ctx)) { |
947 | isl_set_free(set); |
948 | return set_max_coefficient(ctx, node); |
949 | } |
950 | |
951 | r = compute_sizes(node, uncompressed: set); |
952 | isl_set_free(set); |
953 | if (r < 0) |
954 | return isl_stat_error; |
955 | return compute_max_coefficient(ctx, node); |
956 | } |
957 | |
958 | /* Add a new node to the graph representing the given instance set. |
959 | * "nvar" is the (possibly compressed) number of variables and |
960 | * may be smaller than then number of set variables in "set" |
961 | * if "compressed" is set. |
962 | * If "compressed" is set, then "hull" represents the constraints |
963 | * that were used to derive the compression, while "compress" and |
964 | * "decompress" map the original space to the compressed space and |
965 | * vice versa. |
966 | * If "compressed" is not set, then "hull", "compress" and "decompress" |
967 | * should be NULL. |
968 | * |
969 | * Compute the size of the instance set and bounds on the coefficients, |
970 | * if needed. |
971 | */ |
972 | static isl_stat add_node(struct isl_sched_graph *graph, |
973 | __isl_take isl_set *set, int nvar, int compressed, |
974 | __isl_take isl_set *hull, __isl_take isl_multi_aff *compress, |
975 | __isl_take isl_pw_multi_aff *decompress) |
976 | { |
977 | isl_size nparam; |
978 | isl_ctx *ctx; |
979 | isl_mat *sched; |
980 | isl_space *space; |
981 | int *coincident; |
982 | struct isl_sched_node *node; |
983 | |
984 | nparam = isl_set_dim(set, type: isl_dim_param); |
985 | if (nparam < 0) |
986 | goto error; |
987 | |
988 | ctx = isl_set_get_ctx(set); |
989 | if (!ctx->opt->schedule_parametric) |
990 | nparam = 0; |
991 | sched = isl_mat_alloc(ctx, n_row: 0, n_col: 1 + nparam + nvar); |
992 | node = &graph->node[graph->n]; |
993 | graph->n++; |
994 | space = isl_set_get_space(set); |
995 | node->space = space; |
996 | node->nvar = nvar; |
997 | node->nparam = nparam; |
998 | node->sched = sched; |
999 | node->sched_map = NULL; |
1000 | coincident = isl_calloc_array(ctx, int, graph->max_row); |
1001 | node->coincident = coincident; |
1002 | node->compressed = compressed; |
1003 | node->hull = hull; |
1004 | node->compress = compress; |
1005 | node->decompress = decompress; |
1006 | if (compute_sizes_and_max(ctx, node, set) < 0) |
1007 | return isl_stat_error; |
1008 | |
1009 | if (!space || !sched || (graph->max_row && !coincident)) |
1010 | return isl_stat_error; |
1011 | if (compressed && (!hull || !compress || !decompress)) |
1012 | return isl_stat_error; |
1013 | |
1014 | return isl_stat_ok; |
1015 | error: |
1016 | isl_set_free(set); |
1017 | isl_set_free(set: hull); |
1018 | isl_multi_aff_free(multi: compress); |
1019 | isl_pw_multi_aff_free(pma: decompress); |
1020 | return isl_stat_error; |
1021 | } |
1022 | |
1023 | /* Add a new node to the graph representing the given set. |
1024 | * |
1025 | * If any of the set variables is defined by an equality, then |
1026 | * we perform variable compression such that we can perform |
1027 | * the scheduling on the compressed domain. |
1028 | * In this case, an identifier is used that references the new node |
1029 | * such that each compressed space is unique and |
1030 | * such that the node can be recovered from the compressed space. |
1031 | */ |
1032 | static isl_stat (__isl_take isl_set *set, void *user) |
1033 | { |
1034 | isl_size nvar; |
1035 | isl_bool has_equality; |
1036 | isl_id *id; |
1037 | isl_basic_set *hull; |
1038 | isl_set *hull_set; |
1039 | isl_morph *morph; |
1040 | isl_multi_aff *compress, *decompress_ma; |
1041 | isl_pw_multi_aff *decompress; |
1042 | struct isl_sched_graph *graph = user; |
1043 | |
1044 | hull = isl_set_affine_hull(set: isl_set_copy(set)); |
1045 | hull = isl_basic_set_remove_divs(bset: hull); |
1046 | nvar = isl_set_dim(set, type: isl_dim_set); |
1047 | has_equality = has_any_defining_equality(bset: hull); |
1048 | |
1049 | if (nvar < 0 || has_equality < 0) |
1050 | goto error; |
1051 | if (!has_equality) { |
1052 | isl_basic_set_free(bset: hull); |
1053 | return add_node(graph, set, nvar, compressed: 0, NULL, NULL, NULL); |
1054 | } |
1055 | |
1056 | id = construct_compressed_id(set, node: &graph->node[graph->n]); |
1057 | morph = isl_basic_set_variable_compression_with_id(bset: hull, id); |
1058 | isl_id_free(id); |
1059 | nvar = isl_morph_ran_dim(morph, type: isl_dim_set); |
1060 | if (nvar < 0) |
1061 | set = isl_set_free(set); |
1062 | compress = isl_morph_get_var_multi_aff(morph); |
1063 | morph = isl_morph_inverse(morph); |
1064 | decompress_ma = isl_morph_get_var_multi_aff(morph); |
1065 | decompress = isl_pw_multi_aff_from_multi_aff(ma: decompress_ma); |
1066 | isl_morph_free(morph); |
1067 | |
1068 | hull_set = isl_set_from_basic_set(bset: hull); |
1069 | return add_node(graph, set, nvar, compressed: 1, hull: hull_set, compress, decompress); |
1070 | error: |
1071 | isl_basic_set_free(bset: hull); |
1072 | isl_set_free(set); |
1073 | return isl_stat_error; |
1074 | } |
1075 | |
1076 | struct { |
1077 | enum isl_edge_type ; |
1078 | struct isl_sched_graph *; |
1079 | }; |
1080 | |
1081 | /* Merge edge2 into edge1, freeing the contents of edge2. |
1082 | * Return 0 on success and -1 on failure. |
1083 | * |
1084 | * edge1 and edge2 are assumed to have the same value for the map field. |
1085 | */ |
1086 | static int merge_edge(struct isl_sched_edge *edge1, |
1087 | struct isl_sched_edge *edge2) |
1088 | { |
1089 | edge1->types |= edge2->types; |
1090 | isl_map_free(map: edge2->map); |
1091 | |
1092 | if (isl_sched_edge_is_condition(edge: edge2)) { |
1093 | if (!edge1->tagged_condition) |
1094 | edge1->tagged_condition = edge2->tagged_condition; |
1095 | else |
1096 | edge1->tagged_condition = |
1097 | isl_union_map_union(umap1: edge1->tagged_condition, |
1098 | umap2: edge2->tagged_condition); |
1099 | } |
1100 | |
1101 | if (isl_sched_edge_is_conditional_validity(edge: edge2)) { |
1102 | if (!edge1->tagged_validity) |
1103 | edge1->tagged_validity = edge2->tagged_validity; |
1104 | else |
1105 | edge1->tagged_validity = |
1106 | isl_union_map_union(umap1: edge1->tagged_validity, |
1107 | umap2: edge2->tagged_validity); |
1108 | } |
1109 | |
1110 | if (isl_sched_edge_is_condition(edge: edge2) && !edge1->tagged_condition) |
1111 | return -1; |
1112 | if (isl_sched_edge_is_conditional_validity(edge: edge2) && |
1113 | !edge1->tagged_validity) |
1114 | return -1; |
1115 | |
1116 | return 0; |
1117 | } |
1118 | |
1119 | /* Insert dummy tags in domain and range of "map". |
1120 | * |
1121 | * In particular, if "map" is of the form |
1122 | * |
1123 | * A -> B |
1124 | * |
1125 | * then return |
1126 | * |
1127 | * [A -> dummy_tag] -> [B -> dummy_tag] |
1128 | * |
1129 | * where the dummy_tags are identical and equal to any dummy tags |
1130 | * introduced by any other call to this function. |
1131 | */ |
1132 | static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map) |
1133 | { |
1134 | static char dummy; |
1135 | isl_ctx *ctx; |
1136 | isl_id *id; |
1137 | isl_space *space; |
1138 | isl_set *domain, *range; |
1139 | |
1140 | ctx = isl_map_get_ctx(map); |
1141 | |
1142 | id = isl_id_alloc(ctx, NULL, user: &dummy); |
1143 | space = isl_space_params(space: isl_map_get_space(map)); |
1144 | space = isl_space_set_from_params(space); |
1145 | space = isl_space_set_tuple_id(space, type: isl_dim_set, id); |
1146 | space = isl_space_map_from_set(space); |
1147 | |
1148 | domain = isl_map_wrap(map); |
1149 | range = isl_map_wrap(map: isl_map_universe(space)); |
1150 | map = isl_map_from_domain_and_range(domain, range); |
1151 | map = isl_map_zip(map); |
1152 | |
1153 | return map; |
1154 | } |
1155 | |
1156 | /* Given that at least one of "src" or "dst" is compressed, return |
1157 | * a map between the spaces of these nodes restricted to the affine |
1158 | * hull that was used in the compression. |
1159 | */ |
1160 | static __isl_give isl_map *(struct isl_sched_node *src, |
1161 | struct isl_sched_node *dst) |
1162 | { |
1163 | isl_set *dom, *ran; |
1164 | |
1165 | if (src->compressed) |
1166 | dom = isl_set_copy(set: src->hull); |
1167 | else |
1168 | dom = isl_set_universe(space: isl_space_copy(space: src->space)); |
1169 | if (dst->compressed) |
1170 | ran = isl_set_copy(set: dst->hull); |
1171 | else |
1172 | ran = isl_set_universe(space: isl_space_copy(space: dst->space)); |
1173 | |
1174 | return isl_map_from_domain_and_range(domain: dom, range: ran); |
1175 | } |
1176 | |
1177 | /* Intersect the domains of the nested relations in domain and range |
1178 | * of "tagged" with "map". |
1179 | */ |
1180 | static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged, |
1181 | __isl_keep isl_map *map) |
1182 | { |
1183 | isl_set *set; |
1184 | |
1185 | tagged = isl_map_zip(map: tagged); |
1186 | set = isl_map_wrap(map: isl_map_copy(map)); |
1187 | tagged = isl_map_intersect_domain(map: tagged, set); |
1188 | tagged = isl_map_zip(map: tagged); |
1189 | return tagged; |
1190 | } |
1191 | |
1192 | /* Return a pointer to the node that lives in the domain space of "map", |
1193 | * an invalid node if there is no such node, or NULL in case of error. |
1194 | */ |
1195 | static struct isl_sched_node *find_domain_node(isl_ctx *ctx, |
1196 | struct isl_sched_graph *graph, __isl_keep isl_map *map) |
1197 | { |
1198 | struct isl_sched_node *node; |
1199 | isl_space *space; |
1200 | |
1201 | space = isl_space_domain(space: isl_map_get_space(map)); |
1202 | node = isl_sched_graph_find_node(ctx, graph, space); |
1203 | isl_space_free(space); |
1204 | |
1205 | return node; |
1206 | } |
1207 | |
1208 | /* Return a pointer to the node that lives in the range space of "map", |
1209 | * an invalid node if there is no such node, or NULL in case of error. |
1210 | */ |
1211 | static struct isl_sched_node *find_range_node(isl_ctx *ctx, |
1212 | struct isl_sched_graph *graph, __isl_keep isl_map *map) |
1213 | { |
1214 | struct isl_sched_node *node; |
1215 | isl_space *space; |
1216 | |
1217 | space = isl_space_range(space: isl_map_get_space(map)); |
1218 | node = isl_sched_graph_find_node(ctx, graph, space); |
1219 | isl_space_free(space); |
1220 | |
1221 | return node; |
1222 | } |
1223 | |
1224 | /* Refrain from adding a new edge based on "map". |
1225 | * Instead, just free the map. |
1226 | * "tagged" is either a copy of "map" with additional tags or NULL. |
1227 | */ |
1228 | static isl_stat skip_edge(__isl_take isl_map *map, __isl_take isl_map *tagged) |
1229 | { |
1230 | isl_map_free(map); |
1231 | isl_map_free(map: tagged); |
1232 | |
1233 | return isl_stat_ok; |
1234 | } |
1235 | |
1236 | /* Add a new edge to the graph based on the given map |
1237 | * and add it to data->graph->edge_table[data->type]. |
1238 | * If a dependence relation of a given type happens to be identical |
1239 | * to one of the dependence relations of a type that was added before, |
1240 | * then we don't create a new edge, but instead mark the original edge |
1241 | * as also representing a dependence of the current type. |
1242 | * |
1243 | * Edges of type isl_edge_condition or isl_edge_conditional_validity |
1244 | * may be specified as "tagged" dependence relations. That is, "map" |
1245 | * may contain elements (i -> a) -> (j -> b), where i -> j denotes |
1246 | * the dependence on iterations and a and b are tags. |
1247 | * edge->map is set to the relation containing the elements i -> j, |
1248 | * while edge->tagged_condition and edge->tagged_validity contain |
1249 | * the union of all the "map" relations |
1250 | * for which extract_edge is called that result in the same edge->map. |
1251 | * |
1252 | * If the source or the destination node is compressed, then |
1253 | * intersect both "map" and "tagged" with the constraints that |
1254 | * were used to construct the compression. |
1255 | * This ensures that there are no schedule constraints defined |
1256 | * outside of these domains, while the scheduler no longer has |
1257 | * any control over those outside parts. |
1258 | */ |
1259 | static isl_stat (__isl_take isl_map *map, void *user) |
1260 | { |
1261 | isl_bool empty; |
1262 | isl_ctx *ctx = isl_map_get_ctx(map); |
1263 | struct isl_extract_edge_data *data = user; |
1264 | struct isl_sched_graph *graph = data->graph; |
1265 | struct isl_sched_node *src, *dst; |
1266 | struct isl_sched_edge *edge; |
1267 | isl_map *tagged = NULL; |
1268 | |
1269 | if (data->type == isl_edge_condition || |
1270 | data->type == isl_edge_conditional_validity) { |
1271 | if (isl_map_can_zip(map)) { |
1272 | tagged = isl_map_copy(map); |
1273 | map = isl_set_unwrap(set: isl_map_domain(bmap: isl_map_zip(map))); |
1274 | } else { |
1275 | tagged = insert_dummy_tags(map: isl_map_copy(map)); |
1276 | } |
1277 | } |
1278 | |
1279 | src = find_domain_node(ctx, graph, map); |
1280 | dst = find_range_node(ctx, graph, map); |
1281 | |
1282 | if (!src || !dst) |
1283 | goto error; |
1284 | if (!isl_sched_graph_is_node(graph, node: src) || |
1285 | !isl_sched_graph_is_node(graph, node: dst)) |
1286 | return skip_edge(map, tagged); |
1287 | |
1288 | if (src->compressed || dst->compressed) { |
1289 | isl_map *hull; |
1290 | hull = extract_hull(src, dst); |
1291 | if (tagged) |
1292 | tagged = map_intersect_domains(tagged, map: hull); |
1293 | map = isl_map_intersect(map1: map, map2: hull); |
1294 | } |
1295 | |
1296 | empty = isl_map_plain_is_empty(map); |
1297 | if (empty < 0) |
1298 | goto error; |
1299 | if (empty) |
1300 | return skip_edge(map, tagged); |
1301 | |
1302 | graph->edge[graph->n_edge].src = src; |
1303 | graph->edge[graph->n_edge].dst = dst; |
1304 | graph->edge[graph->n_edge].map = map; |
1305 | graph->edge[graph->n_edge].types = 0; |
1306 | graph->edge[graph->n_edge].tagged_condition = NULL; |
1307 | graph->edge[graph->n_edge].tagged_validity = NULL; |
1308 | set_type(edge: &graph->edge[graph->n_edge], type: data->type); |
1309 | if (data->type == isl_edge_condition) |
1310 | graph->edge[graph->n_edge].tagged_condition = |
1311 | isl_union_map_from_map(map: tagged); |
1312 | if (data->type == isl_edge_conditional_validity) |
1313 | graph->edge[graph->n_edge].tagged_validity = |
1314 | isl_union_map_from_map(map: tagged); |
1315 | |
1316 | edge = graph_find_matching_edge(graph, model: &graph->edge[graph->n_edge]); |
1317 | if (!edge) { |
1318 | graph->n_edge++; |
1319 | return isl_stat_error; |
1320 | } |
1321 | if (edge == &graph->edge[graph->n_edge]) |
1322 | return graph_edge_table_add(ctx, graph, type: data->type, |
1323 | edge: &graph->edge[graph->n_edge++]); |
1324 | |
1325 | if (merge_edge(edge1: edge, edge2: &graph->edge[graph->n_edge]) < 0) |
1326 | return isl_stat_error; |
1327 | |
1328 | return graph_edge_table_add(ctx, graph, type: data->type, edge); |
1329 | error: |
1330 | isl_map_free(map); |
1331 | isl_map_free(map: tagged); |
1332 | return isl_stat_error; |
1333 | } |
1334 | |
1335 | /* Initialize the schedule graph "graph" from the schedule constraints "sc". |
1336 | * |
1337 | * The context is included in the domain before the nodes of |
1338 | * the graphs are extracted in order to be able to exploit |
1339 | * any possible additional equalities. |
1340 | * Note that this intersection is only performed locally here. |
1341 | */ |
1342 | isl_stat isl_sched_graph_init(struct isl_sched_graph *graph, |
1343 | __isl_keep isl_schedule_constraints *sc) |
1344 | { |
1345 | isl_ctx *ctx; |
1346 | isl_union_set *domain; |
1347 | isl_union_map *c; |
1348 | struct isl_extract_edge_data data; |
1349 | enum isl_edge_type i; |
1350 | isl_stat r; |
1351 | isl_size n; |
1352 | |
1353 | if (!sc) |
1354 | return isl_stat_error; |
1355 | |
1356 | ctx = isl_schedule_constraints_get_ctx(sc); |
1357 | |
1358 | domain = isl_schedule_constraints_get_domain(sc); |
1359 | n = isl_union_set_n_set(uset: domain); |
1360 | graph->n = n; |
1361 | isl_union_set_free(uset: domain); |
1362 | if (n < 0) |
1363 | return isl_stat_error; |
1364 | |
1365 | n = isl_schedule_constraints_n_map(sc); |
1366 | if (n < 0 || graph_alloc(ctx, graph, n_node: graph->n, n_edge: n) < 0) |
1367 | return isl_stat_error; |
1368 | |
1369 | if (compute_max_row(graph, sc) < 0) |
1370 | return isl_stat_error; |
1371 | graph->root = graph; |
1372 | graph->n = 0; |
1373 | domain = isl_schedule_constraints_get_domain(sc); |
1374 | domain = isl_union_set_intersect_params(uset: domain, |
1375 | set: isl_schedule_constraints_get_context(sc)); |
1376 | r = isl_union_set_foreach_set(uset: domain, fn: &extract_node, user: graph); |
1377 | isl_union_set_free(uset: domain); |
1378 | if (r < 0) |
1379 | return isl_stat_error; |
1380 | if (graph_init_table(ctx, graph) < 0) |
1381 | return isl_stat_error; |
1382 | for (i = isl_edge_first; i <= isl_edge_last; ++i) { |
1383 | isl_size n; |
1384 | |
1385 | c = isl_schedule_constraints_get(sc, type: i); |
1386 | n = isl_union_map_n_map(umap: c); |
1387 | graph->max_edge[i] = n; |
1388 | isl_union_map_free(umap: c); |
1389 | if (n < 0) |
1390 | return isl_stat_error; |
1391 | } |
1392 | if (graph_init_edge_tables(ctx, graph) < 0) |
1393 | return isl_stat_error; |
1394 | graph->n_edge = 0; |
1395 | data.graph = graph; |
1396 | for (i = isl_edge_first; i <= isl_edge_last; ++i) { |
1397 | isl_stat r; |
1398 | |
1399 | data.type = i; |
1400 | c = isl_schedule_constraints_get(sc, type: i); |
1401 | r = isl_union_map_foreach_map(umap: c, fn: &extract_edge, user: &data); |
1402 | isl_union_map_free(umap: c); |
1403 | if (r < 0) |
1404 | return isl_stat_error; |
1405 | } |
1406 | |
1407 | return isl_stat_ok; |
1408 | } |
1409 | |
1410 | /* Check whether there is any dependence from node[j] to node[i] |
1411 | * or from node[i] to node[j]. |
1412 | */ |
1413 | static isl_bool node_follows_weak(int i, int j, void *user) |
1414 | { |
1415 | isl_bool f; |
1416 | struct isl_sched_graph *graph = user; |
1417 | |
1418 | f = graph_has_any_edge(graph, src: &graph->node[j], dst: &graph->node[i]); |
1419 | if (f < 0 || f) |
1420 | return f; |
1421 | return graph_has_any_edge(graph, src: &graph->node[i], dst: &graph->node[j]); |
1422 | } |
1423 | |
1424 | /* Check whether there is a (conditional) validity dependence from node[j] |
1425 | * to node[i], forcing node[i] to follow node[j]. |
1426 | */ |
1427 | static isl_bool node_follows_strong(int i, int j, void *user) |
1428 | { |
1429 | struct isl_sched_graph *graph = user; |
1430 | |
1431 | return isl_sched_graph_has_validity_edge(graph, src: &graph->node[j], |
1432 | dst: &graph->node[i]); |
1433 | } |
1434 | |
1435 | /* Use Tarjan's algorithm for computing the strongly connected components |
1436 | * in the dependence graph only considering those edges defined by "follows". |
1437 | */ |
1438 | isl_stat isl_sched_graph_detect_ccs(isl_ctx *ctx, |
1439 | struct isl_sched_graph *graph, |
1440 | isl_bool (*follows)(int i, int j, void *user)) |
1441 | { |
1442 | int i, n; |
1443 | struct isl_tarjan_graph *g = NULL; |
1444 | |
1445 | g = isl_tarjan_graph_init(ctx, len: graph->n, follows, user: graph); |
1446 | if (!g) |
1447 | return isl_stat_error; |
1448 | |
1449 | graph->scc = 0; |
1450 | i = 0; |
1451 | n = graph->n; |
1452 | while (n) { |
1453 | while (g->order[i] != -1) { |
1454 | graph->node[g->order[i]].scc = graph->scc; |
1455 | --n; |
1456 | ++i; |
1457 | } |
1458 | ++i; |
1459 | graph->scc++; |
1460 | } |
1461 | |
1462 | isl_tarjan_graph_free(g); |
1463 | |
1464 | return isl_stat_ok; |
1465 | } |
1466 | |
1467 | /* Apply Tarjan's algorithm to detect the strongly connected components |
1468 | * in the dependence graph. |
1469 | * Only consider the (conditional) validity dependences and clear "weak". |
1470 | */ |
1471 | static isl_stat detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph) |
1472 | { |
1473 | graph->weak = 0; |
1474 | return isl_sched_graph_detect_ccs(ctx, graph, follows: &node_follows_strong); |
1475 | } |
1476 | |
1477 | /* Apply Tarjan's algorithm to detect the (weakly) connected components |
1478 | * in the dependence graph. |
1479 | * Consider all dependences and set "weak". |
1480 | */ |
1481 | static isl_stat detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph) |
1482 | { |
1483 | graph->weak = 1; |
1484 | return isl_sched_graph_detect_ccs(ctx, graph, follows: &node_follows_weak); |
1485 | } |
1486 | |
1487 | static int cmp_scc(const void *a, const void *b, void *data) |
1488 | { |
1489 | struct isl_sched_graph *graph = data; |
1490 | const int *i1 = a; |
1491 | const int *i2 = b; |
1492 | |
1493 | return graph->node[*i1].scc - graph->node[*i2].scc; |
1494 | } |
1495 | |
1496 | /* Sort the elements of graph->sorted according to the corresponding SCCs. |
1497 | */ |
1498 | static int sort_sccs(struct isl_sched_graph *graph) |
1499 | { |
1500 | return isl_sort(pbase: graph->sorted, total_elems: graph->n, size: sizeof(int), cmp: &cmp_scc, arg: graph); |
1501 | } |
1502 | |
1503 | /* Return a non-parametric set in the compressed space of "node" that is |
1504 | * bounded by the size in each direction |
1505 | * |
1506 | * { [x] : -S_i <= x_i <= S_i } |
1507 | * |
1508 | * If S_i is infinity in direction i, then there are no constraints |
1509 | * in that direction. |
1510 | * |
1511 | * Cache the result in node->bounds. |
1512 | */ |
1513 | static __isl_give isl_basic_set *get_size_bounds(struct isl_sched_node *node) |
1514 | { |
1515 | isl_space *space; |
1516 | isl_basic_set *bounds; |
1517 | int i; |
1518 | |
1519 | if (node->bounds) |
1520 | return isl_basic_set_copy(bset: node->bounds); |
1521 | |
1522 | if (node->compressed) |
1523 | space = isl_pw_multi_aff_get_domain_space(pma: node->decompress); |
1524 | else |
1525 | space = isl_space_copy(space: node->space); |
1526 | space = isl_space_drop_all_params(space); |
1527 | bounds = isl_basic_set_universe(space); |
1528 | |
1529 | for (i = 0; i < node->nvar; ++i) { |
1530 | isl_val *size; |
1531 | |
1532 | size = isl_multi_val_get_val(multi: node->sizes, pos: i); |
1533 | if (!size) |
1534 | return isl_basic_set_free(bset: bounds); |
1535 | if (!isl_val_is_int(v: size)) { |
1536 | isl_val_free(v: size); |
1537 | continue; |
1538 | } |
1539 | bounds = isl_basic_set_upper_bound_val(bset: bounds, type: isl_dim_set, pos: i, |
1540 | value: isl_val_copy(v: size)); |
1541 | bounds = isl_basic_set_lower_bound_val(bset: bounds, type: isl_dim_set, pos: i, |
1542 | value: isl_val_neg(v: size)); |
1543 | } |
1544 | |
1545 | node->bounds = isl_basic_set_copy(bset: bounds); |
1546 | return bounds; |
1547 | } |
1548 | |
1549 | /* Compress the dependence relation "map", if needed, i.e., |
1550 | * when the source node "src" and/or the destination node "dst" |
1551 | * has been compressed. |
1552 | */ |
1553 | static __isl_give isl_map *compress(__isl_take isl_map *map, |
1554 | struct isl_sched_node *src, struct isl_sched_node *dst) |
1555 | { |
1556 | if (src->compressed) |
1557 | map = isl_map_preimage_domain_pw_multi_aff(map, |
1558 | pma: isl_pw_multi_aff_copy(pma: src->decompress)); |
1559 | if (dst->compressed) |
1560 | map = isl_map_preimage_range_pw_multi_aff(map, |
1561 | pma: isl_pw_multi_aff_copy(pma: dst->decompress)); |
1562 | return map; |
1563 | } |
1564 | |
1565 | /* Drop some constraints from "delta" that could be exploited |
1566 | * to construct loop coalescing schedules. |
1567 | * In particular, drop those constraint that bound the difference |
1568 | * to the size of the domain. |
1569 | * First project out the parameters to improve the effectiveness. |
1570 | */ |
1571 | static __isl_give isl_set *drop_coalescing_constraints( |
1572 | __isl_take isl_set *delta, struct isl_sched_node *node) |
1573 | { |
1574 | isl_size nparam; |
1575 | isl_basic_set *bounds; |
1576 | |
1577 | nparam = isl_set_dim(set: delta, type: isl_dim_param); |
1578 | if (nparam < 0) |
1579 | return isl_set_free(set: delta); |
1580 | |
1581 | bounds = get_size_bounds(node); |
1582 | |
1583 | delta = isl_set_project_out(set: delta, type: isl_dim_param, first: 0, n: nparam); |
1584 | delta = isl_set_remove_divs(set: delta); |
1585 | delta = isl_set_plain_gist_basic_set(set: delta, context: bounds); |
1586 | return delta; |
1587 | } |
1588 | |
1589 | /* Given a dependence relation R from "node" to itself, |
1590 | * construct the set of coefficients of valid constraints for elements |
1591 | * in that dependence relation. |
1592 | * In particular, the result contains tuples of coefficients |
1593 | * c_0, c_n, c_x such that |
1594 | * |
1595 | * c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R |
1596 | * |
1597 | * or, equivalently, |
1598 | * |
1599 | * c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R } |
1600 | * |
1601 | * We choose here to compute the dual of delta R. |
1602 | * Alternatively, we could have computed the dual of R, resulting |
1603 | * in a set of tuples c_0, c_n, c_x, c_y, and then |
1604 | * plugged in (c_0, c_n, c_x, -c_x). |
1605 | * |
1606 | * If "need_param" is set, then the resulting coefficients effectively |
1607 | * include coefficients for the parameters c_n. Otherwise, they may |
1608 | * have been projected out already. |
1609 | * Since the constraints may be different for these two cases, |
1610 | * they are stored in separate caches. |
1611 | * In particular, if no parameter coefficients are required and |
1612 | * the schedule_treat_coalescing option is set, then the parameters |
1613 | * are projected out and some constraints that could be exploited |
1614 | * to construct coalescing schedules are removed before the dual |
1615 | * is computed. |
1616 | * |
1617 | * If "node" has been compressed, then the dependence relation |
1618 | * is also compressed before the set of coefficients is computed. |
1619 | */ |
1620 | static __isl_give isl_basic_set *intra_coefficients( |
1621 | struct isl_sched_graph *graph, struct isl_sched_node *node, |
1622 | __isl_take isl_map *map, int need_param) |
1623 | { |
1624 | isl_ctx *ctx; |
1625 | isl_set *delta; |
1626 | isl_map *key; |
1627 | isl_basic_set *coef; |
1628 | isl_maybe_isl_basic_set m; |
1629 | isl_map_to_basic_set **hmap = &graph->intra_hmap; |
1630 | int treat; |
1631 | |
1632 | if (!map) |
1633 | return NULL; |
1634 | |
1635 | ctx = isl_map_get_ctx(map); |
1636 | treat = !need_param && isl_options_get_schedule_treat_coalescing(ctx); |
1637 | if (!treat) |
1638 | hmap = &graph->intra_hmap_param; |
1639 | m = isl_map_to_basic_set_try_get(hmap: *hmap, key: map); |
1640 | if (m.valid < 0 || m.valid) { |
1641 | isl_map_free(map); |
1642 | return m.value; |
1643 | } |
1644 | |
1645 | key = isl_map_copy(map); |
1646 | map = compress(map, src: node, dst: node); |
1647 | delta = isl_map_deltas(map); |
1648 | if (treat) |
1649 | delta = drop_coalescing_constraints(delta, node); |
1650 | delta = isl_set_remove_divs(set: delta); |
1651 | coef = isl_set_coefficients(set: delta); |
1652 | *hmap = isl_map_to_basic_set_set(hmap: *hmap, key, val: isl_basic_set_copy(bset: coef)); |
1653 | |
1654 | return coef; |
1655 | } |
1656 | |
1657 | /* Given a dependence relation R, construct the set of coefficients |
1658 | * of valid constraints for elements in that dependence relation. |
1659 | * In particular, the result contains tuples of coefficients |
1660 | * c_0, c_n, c_x, c_y such that |
1661 | * |
1662 | * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R |
1663 | * |
1664 | * If the source or destination nodes of "edge" have been compressed, |
1665 | * then the dependence relation is also compressed before |
1666 | * the set of coefficients is computed. |
1667 | */ |
1668 | static __isl_give isl_basic_set *inter_coefficients( |
1669 | struct isl_sched_graph *graph, struct isl_sched_edge *edge, |
1670 | __isl_take isl_map *map) |
1671 | { |
1672 | isl_set *set; |
1673 | isl_map *key; |
1674 | isl_basic_set *coef; |
1675 | isl_maybe_isl_basic_set m; |
1676 | |
1677 | m = isl_map_to_basic_set_try_get(hmap: graph->inter_hmap, key: map); |
1678 | if (m.valid < 0 || m.valid) { |
1679 | isl_map_free(map); |
1680 | return m.value; |
1681 | } |
1682 | |
1683 | key = isl_map_copy(map); |
1684 | map = compress(map, src: edge->src, dst: edge->dst); |
1685 | set = isl_map_wrap(map: isl_map_remove_divs(map)); |
1686 | coef = isl_set_coefficients(set); |
1687 | graph->inter_hmap = isl_map_to_basic_set_set(hmap: graph->inter_hmap, key, |
1688 | val: isl_basic_set_copy(bset: coef)); |
1689 | |
1690 | return coef; |
1691 | } |
1692 | |
1693 | /* Return the position of the coefficients of the variables in |
1694 | * the coefficients constraints "coef". |
1695 | * |
1696 | * The space of "coef" is of the form |
1697 | * |
1698 | * { coefficients[[cst, params] -> S] } |
1699 | * |
1700 | * Return the position of S. |
1701 | */ |
1702 | static isl_size coef_var_offset(__isl_keep isl_basic_set *coef) |
1703 | { |
1704 | isl_size offset; |
1705 | isl_space *space; |
1706 | |
1707 | space = isl_space_unwrap(space: isl_basic_set_get_space(bset: coef)); |
1708 | offset = isl_space_dim(space, type: isl_dim_in); |
1709 | isl_space_free(space); |
1710 | |
1711 | return offset; |
1712 | } |
1713 | |
1714 | /* Return the offset of the coefficient of the constant term of "node" |
1715 | * within the (I)LP. |
1716 | * |
1717 | * Within each node, the coefficients have the following order: |
1718 | * - positive and negative parts of c_i_x |
1719 | * - c_i_n (if parametric) |
1720 | * - c_i_0 |
1721 | */ |
1722 | static int node_cst_coef_offset(struct isl_sched_node *node) |
1723 | { |
1724 | return node->start + 2 * node->nvar + node->nparam; |
1725 | } |
1726 | |
1727 | /* Return the offset of the coefficients of the parameters of "node" |
1728 | * within the (I)LP. |
1729 | * |
1730 | * Within each node, the coefficients have the following order: |
1731 | * - positive and negative parts of c_i_x |
1732 | * - c_i_n (if parametric) |
1733 | * - c_i_0 |
1734 | */ |
1735 | static int node_par_coef_offset(struct isl_sched_node *node) |
1736 | { |
1737 | return node->start + 2 * node->nvar; |
1738 | } |
1739 | |
1740 | /* Return the offset of the coefficients of the variables of "node" |
1741 | * within the (I)LP. |
1742 | * |
1743 | * Within each node, the coefficients have the following order: |
1744 | * - positive and negative parts of c_i_x |
1745 | * - c_i_n (if parametric) |
1746 | * - c_i_0 |
1747 | */ |
1748 | static int node_var_coef_offset(struct isl_sched_node *node) |
1749 | { |
1750 | return node->start; |
1751 | } |
1752 | |
1753 | /* Return the position of the pair of variables encoding |
1754 | * coefficient "i" of "node". |
1755 | * |
1756 | * The order of these variable pairs is the opposite of |
1757 | * that of the coefficients, with 2 variables per coefficient. |
1758 | */ |
1759 | static int node_var_coef_pos(struct isl_sched_node *node, int i) |
1760 | { |
1761 | return node_var_coef_offset(node) + 2 * (node->nvar - 1 - i); |
1762 | } |
1763 | |
1764 | /* Construct an isl_dim_map for mapping constraints on coefficients |
1765 | * for "node" to the corresponding positions in graph->lp. |
1766 | * "offset" is the offset of the coefficients for the variables |
1767 | * in the input constraints. |
1768 | * "s" is the sign of the mapping. |
1769 | * |
1770 | * The input constraints are given in terms of the coefficients |
1771 | * (c_0, c_x) or (c_0, c_n, c_x). |
1772 | * The mapping produced by this function essentially plugs in |
1773 | * (0, c_i_x^+ - c_i_x^-) if s = 1 and |
1774 | * (0, -c_i_x^+ + c_i_x^-) if s = -1 or |
1775 | * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and |
1776 | * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1. |
1777 | * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart. |
1778 | * Furthermore, the order of these pairs is the opposite of that |
1779 | * of the corresponding coefficients. |
1780 | * |
1781 | * The caller can extend the mapping to also map the other coefficients |
1782 | * (and therefore not plug in 0). |
1783 | */ |
1784 | static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx, |
1785 | struct isl_sched_graph *graph, struct isl_sched_node *node, |
1786 | int offset, int s) |
1787 | { |
1788 | int pos; |
1789 | isl_size total; |
1790 | isl_dim_map *dim_map; |
1791 | |
1792 | total = isl_basic_set_dim(bset: graph->lp, type: isl_dim_all); |
1793 | if (!node || total < 0) |
1794 | return NULL; |
1795 | |
1796 | pos = node_var_coef_pos(node, i: 0); |
1797 | dim_map = isl_dim_map_alloc(ctx, len: total); |
1798 | isl_dim_map_range(dim_map, dst_pos: pos, dst_stride: -2, src_pos: offset, src_stride: 1, n: node->nvar, sign: -s); |
1799 | isl_dim_map_range(dim_map, dst_pos: pos + 1, dst_stride: -2, src_pos: offset, src_stride: 1, n: node->nvar, sign: s); |
1800 | |
1801 | return dim_map; |
1802 | } |
1803 | |
1804 | /* Construct an isl_dim_map for mapping constraints on coefficients |
1805 | * for "src" (node i) and "dst" (node j) to the corresponding positions |
1806 | * in graph->lp. |
1807 | * "offset" is the offset of the coefficients for the variables of "src" |
1808 | * in the input constraints. |
1809 | * "s" is the sign of the mapping. |
1810 | * |
1811 | * The input constraints are given in terms of the coefficients |
1812 | * (c_0, c_n, c_x, c_y). |
1813 | * The mapping produced by this function essentially plugs in |
1814 | * (c_j_0 - c_i_0, c_j_n - c_i_n, |
1815 | * -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and |
1816 | * (-c_j_0 + c_i_0, -c_j_n + c_i_n, |
1817 | * c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1. |
1818 | * In graph->lp, the c_*^- appear before their c_*^+ counterpart. |
1819 | * Furthermore, the order of these pairs is the opposite of that |
1820 | * of the corresponding coefficients. |
1821 | * |
1822 | * The caller can further extend the mapping. |
1823 | */ |
1824 | static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx, |
1825 | struct isl_sched_graph *graph, struct isl_sched_node *src, |
1826 | struct isl_sched_node *dst, int offset, int s) |
1827 | { |
1828 | int pos; |
1829 | isl_size total; |
1830 | isl_dim_map *dim_map; |
1831 | |
1832 | total = isl_basic_set_dim(bset: graph->lp, type: isl_dim_all); |
1833 | if (!src || !dst || total < 0) |
1834 | return NULL; |
1835 | |
1836 | dim_map = isl_dim_map_alloc(ctx, len: total); |
1837 | |
1838 | pos = node_cst_coef_offset(node: dst); |
1839 | isl_dim_map_range(dim_map, dst_pos: pos, dst_stride: 0, src_pos: 0, src_stride: 0, n: 1, sign: s); |
1840 | pos = node_par_coef_offset(node: dst); |
1841 | isl_dim_map_range(dim_map, dst_pos: pos, dst_stride: 1, src_pos: 1, src_stride: 1, n: dst->nparam, sign: s); |
1842 | pos = node_var_coef_pos(node: dst, i: 0); |
1843 | isl_dim_map_range(dim_map, dst_pos: pos, dst_stride: -2, src_pos: offset + src->nvar, src_stride: 1, |
1844 | n: dst->nvar, sign: -s); |
1845 | isl_dim_map_range(dim_map, dst_pos: pos + 1, dst_stride: -2, src_pos: offset + src->nvar, src_stride: 1, |
1846 | n: dst->nvar, sign: s); |
1847 | |
1848 | pos = node_cst_coef_offset(node: src); |
1849 | isl_dim_map_range(dim_map, dst_pos: pos, dst_stride: 0, src_pos: 0, src_stride: 0, n: 1, sign: -s); |
1850 | pos = node_par_coef_offset(node: src); |
1851 | isl_dim_map_range(dim_map, dst_pos: pos, dst_stride: 1, src_pos: 1, src_stride: 1, n: src->nparam, sign: -s); |
1852 | pos = node_var_coef_pos(node: src, i: 0); |
1853 | isl_dim_map_range(dim_map, dst_pos: pos, dst_stride: -2, src_pos: offset, src_stride: 1, n: src->nvar, sign: s); |
1854 | isl_dim_map_range(dim_map, dst_pos: pos + 1, dst_stride: -2, src_pos: offset, src_stride: 1, n: src->nvar, sign: -s); |
1855 | |
1856 | return dim_map; |
1857 | } |
1858 | |
1859 | /* Add the constraints from "src" to "dst" using "dim_map", |
1860 | * after making sure there is enough room in "dst" for the extra constraints. |
1861 | */ |
1862 | static __isl_give isl_basic_set *add_constraints_dim_map( |
1863 | __isl_take isl_basic_set *dst, __isl_take isl_basic_set *src, |
1864 | __isl_take isl_dim_map *dim_map) |
1865 | { |
1866 | isl_size n_eq, n_ineq; |
1867 | |
1868 | n_eq = isl_basic_set_n_equality(bset: src); |
1869 | n_ineq = isl_basic_set_n_inequality(bset: src); |
1870 | if (n_eq < 0 || n_ineq < 0) |
1871 | dst = isl_basic_set_free(bset: dst); |
1872 | dst = isl_basic_set_extend_constraints(base: dst, n_eq, n_ineq); |
1873 | dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map); |
1874 | return dst; |
1875 | } |
1876 | |
1877 | /* Add constraints to graph->lp that force validity for the given |
1878 | * dependence from a node i to itself. |
1879 | * That is, add constraints that enforce |
1880 | * |
1881 | * (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x) |
1882 | * = c_i_x (y - x) >= 0 |
1883 | * |
1884 | * for each (x,y) in R. |
1885 | * We obtain general constraints on coefficients (c_0, c_x) |
1886 | * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-), |
1887 | * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative. |
1888 | * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart. |
1889 | * Note that the result of intra_coefficients may also contain |
1890 | * parameter coefficients c_n, in which case 0 is plugged in for them as well. |
1891 | */ |
1892 | static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph, |
1893 | struct isl_sched_edge *edge) |
1894 | { |
1895 | isl_size offset; |
1896 | isl_map *map = isl_map_copy(map: edge->map); |
1897 | isl_ctx *ctx = isl_map_get_ctx(map); |
1898 | isl_dim_map *dim_map; |
1899 | isl_basic_set *coef; |
1900 | struct isl_sched_node *node = edge->src; |
1901 | |
1902 | coef = intra_coefficients(graph, node, map, need_param: 0); |
1903 | |
1904 | offset = coef_var_offset(coef); |
1905 | if (offset < 0) |
1906 | coef = isl_basic_set_free(bset: coef); |
1907 | if (!coef) |
1908 | return isl_stat_error; |
1909 | |
1910 | dim_map = intra_dim_map(ctx, graph, node, offset, s: 1); |
1911 | graph->lp = add_constraints_dim_map(dst: graph->lp, src: coef, dim_map); |
1912 | |
1913 | return isl_stat_ok; |
1914 | } |
1915 | |
1916 | /* Add constraints to graph->lp that force validity for the given |
1917 | * dependence from node i to node j. |
1918 | * That is, add constraints that enforce |
1919 | * |
1920 | * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0 |
1921 | * |
1922 | * for each (x,y) in R. |
1923 | * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y) |
1924 | * of valid constraints for R and then plug in |
1925 | * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-), |
1926 | * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative. |
1927 | * In graph->lp, the c_*^- appear before their c_*^+ counterpart. |
1928 | */ |
1929 | static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph, |
1930 | struct isl_sched_edge *edge) |
1931 | { |
1932 | isl_size offset; |
1933 | isl_map *map; |
1934 | isl_ctx *ctx; |
1935 | isl_dim_map *dim_map; |
1936 | isl_basic_set *coef; |
1937 | struct isl_sched_node *src = edge->src; |
1938 | struct isl_sched_node *dst = edge->dst; |
1939 | |
1940 | if (!graph->lp) |
1941 | return isl_stat_error; |
1942 | |
1943 | map = isl_map_copy(map: edge->map); |
1944 | ctx = isl_map_get_ctx(map); |
1945 | coef = inter_coefficients(graph, edge, map); |
1946 | |
1947 | offset = coef_var_offset(coef); |
1948 | if (offset < 0) |
1949 | coef = isl_basic_set_free(bset: coef); |
1950 | if (!coef) |
1951 | return isl_stat_error; |
1952 | |
1953 | dim_map = inter_dim_map(ctx, graph, src, dst, offset, s: 1); |
1954 | |
1955 | edge->start = graph->lp->n_ineq; |
1956 | graph->lp = add_constraints_dim_map(dst: graph->lp, src: coef, dim_map); |
1957 | if (!graph->lp) |
1958 | return isl_stat_error; |
1959 | edge->end = graph->lp->n_ineq; |
1960 | |
1961 | return isl_stat_ok; |
1962 | } |
1963 | |
1964 | /* Add constraints to graph->lp that bound the dependence distance for the given |
1965 | * dependence from a node i to itself. |
1966 | * If s = 1, we add the constraint |
1967 | * |
1968 | * c_i_x (y - x) <= m_0 + m_n n |
1969 | * |
1970 | * or |
1971 | * |
1972 | * -c_i_x (y - x) + m_0 + m_n n >= 0 |
1973 | * |
1974 | * for each (x,y) in R. |
1975 | * If s = -1, we add the constraint |
1976 | * |
1977 | * -c_i_x (y - x) <= m_0 + m_n n |
1978 | * |
1979 | * or |
1980 | * |
1981 | * c_i_x (y - x) + m_0 + m_n n >= 0 |
1982 | * |
1983 | * for each (x,y) in R. |
1984 | * We obtain general constraints on coefficients (c_0, c_n, c_x) |
1985 | * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x), |
1986 | * with each coefficient (except m_0) represented as a pair of non-negative |
1987 | * coefficients. |
1988 | * |
1989 | * |
1990 | * If "local" is set, then we add constraints |
1991 | * |
1992 | * c_i_x (y - x) <= 0 |
1993 | * |
1994 | * or |
1995 | * |
1996 | * -c_i_x (y - x) <= 0 |
1997 | * |
1998 | * instead, forcing the dependence distance to be (less than or) equal to 0. |
1999 | * That is, we plug in (0, 0, -s * c_i_x), |
2000 | * intra_coefficients is not required to have c_n in its result when |
2001 | * "local" is set. If they are missing, then (0, -s * c_i_x) is plugged in. |
2002 | * Note that dependences marked local are treated as validity constraints |
2003 | * by add_all_validity_constraints and therefore also have |
2004 | * their distances bounded by 0 from below. |
2005 | */ |
2006 | static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph, |
2007 | struct isl_sched_edge *edge, int s, int local) |
2008 | { |
2009 | isl_size offset; |
2010 | isl_size nparam; |
2011 | isl_map *map = isl_map_copy(map: edge->map); |
2012 | isl_ctx *ctx = isl_map_get_ctx(map); |
2013 | isl_dim_map *dim_map; |
2014 | isl_basic_set *coef; |
2015 | struct isl_sched_node *node = edge->src; |
2016 | |
2017 | coef = intra_coefficients(graph, node, map, need_param: !local); |
2018 | nparam = isl_space_dim(space: node->space, type: isl_dim_param); |
2019 | |
2020 | offset = coef_var_offset(coef); |
2021 | if (nparam < 0 || offset < 0) |
2022 | coef = isl_basic_set_free(bset: coef); |
2023 | if (!coef) |
2024 | return isl_stat_error; |
2025 | |
2026 | dim_map = intra_dim_map(ctx, graph, node, offset, s: -s); |
2027 | |
2028 | if (!local) { |
2029 | isl_dim_map_range(dim_map, dst_pos: 1, dst_stride: 0, src_pos: 0, src_stride: 0, n: 1, sign: 1); |
2030 | isl_dim_map_range(dim_map, dst_pos: 4, dst_stride: 2, src_pos: 1, src_stride: 1, n: nparam, sign: -1); |
2031 | isl_dim_map_range(dim_map, dst_pos: 5, dst_stride: 2, src_pos: 1, src_stride: 1, n: nparam, sign: 1); |
2032 | } |
2033 | graph->lp = add_constraints_dim_map(dst: graph->lp, src: coef, dim_map); |
2034 | |
2035 | return isl_stat_ok; |
2036 | } |
2037 | |
2038 | /* Add constraints to graph->lp that bound the dependence distance for the given |
2039 | * dependence from node i to node j. |
2040 | * If s = 1, we add the constraint |
2041 | * |
2042 | * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) |
2043 | * <= m_0 + m_n n |
2044 | * |
2045 | * or |
2046 | * |
2047 | * -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) + |
2048 | * m_0 + m_n n >= 0 |
2049 | * |
2050 | * for each (x,y) in R. |
2051 | * If s = -1, we add the constraint |
2052 | * |
2053 | * -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)) |
2054 | * <= m_0 + m_n n |
2055 | * |
2056 | * or |
2057 | * |
2058 | * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) + |
2059 | * m_0 + m_n n >= 0 |
2060 | * |
2061 | * for each (x,y) in R. |
2062 | * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y) |
2063 | * of valid constraints for R and then plug in |
2064 | * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n, |
2065 | * s*c_i_x, -s*c_j_x) |
2066 | * with each coefficient (except m_0, c_*_0 and c_*_n) |
2067 | * represented as a pair of non-negative coefficients. |
2068 | * |
2069 | * |
2070 | * If "local" is set (and s = 1), then we add constraints |
2071 | * |
2072 | * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0 |
2073 | * |
2074 | * or |
2075 | * |
2076 | * -((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0 |
2077 | * |
2078 | * instead, forcing the dependence distance to be (less than or) equal to 0. |
2079 | * That is, we plug in |
2080 | * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x). |
2081 | * Note that dependences marked local are treated as validity constraints |
2082 | * by add_all_validity_constraints and therefore also have |
2083 | * their distances bounded by 0 from below. |
2084 | */ |
2085 | static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph, |
2086 | struct isl_sched_edge *edge, int s, int local) |
2087 | { |
2088 | isl_size offset; |
2089 | isl_size nparam; |
2090 | isl_map *map = isl_map_copy(map: edge->map); |
2091 | isl_ctx *ctx = isl_map_get_ctx(map); |
2092 | isl_dim_map *dim_map; |
2093 | isl_basic_set *coef; |
2094 | struct isl_sched_node *src = edge->src; |
2095 | struct isl_sched_node *dst = edge->dst; |
2096 | |
2097 | coef = inter_coefficients(graph, edge, map); |
2098 | nparam = isl_space_dim(space: src->space, type: isl_dim_param); |
2099 | |
2100 | offset = coef_var_offset(coef); |
2101 | if (nparam < 0 || offset < 0) |
2102 | coef = isl_basic_set_free(bset: coef); |
2103 | if (!coef) |
2104 | return isl_stat_error; |
2105 | |
2106 | dim_map = inter_dim_map(ctx, graph, src, dst, offset, s: -s); |
2107 | |
2108 | if (!local) { |
2109 | isl_dim_map_range(dim_map, dst_pos: 1, dst_stride: 0, src_pos: 0, src_stride: 0, n: 1, sign: 1); |
2110 | isl_dim_map_range(dim_map, dst_pos: 4, dst_stride: 2, src_pos: 1, src_stride: 1, n: nparam, sign: -1); |
2111 | isl_dim_map_range(dim_map, dst_pos: 5, dst_stride: 2, src_pos: 1, src_stride: 1, n: nparam, sign: 1); |
2112 | } |
2113 | |
2114 | graph->lp = add_constraints_dim_map(dst: graph->lp, src: coef, dim_map); |
2115 | |
2116 | return isl_stat_ok; |
2117 | } |
2118 | |
2119 | /* Should the distance over "edge" be forced to zero? |
2120 | * That is, is it marked as a local edge? |
2121 | * If "use_coincidence" is set, then coincidence edges are treated |
2122 | * as local edges. |
2123 | */ |
2124 | static int force_zero(struct isl_sched_edge *edge, int use_coincidence) |
2125 | { |
2126 | return is_local(edge) || (use_coincidence && is_coincidence(edge)); |
2127 | } |
2128 | |
2129 | /* Add all validity constraints to graph->lp. |
2130 | * |
2131 | * An edge that is forced to be local needs to have its dependence |
2132 | * distances equal to zero. We take care of bounding them by 0 from below |
2133 | * here. add_all_proximity_constraints takes care of bounding them by 0 |
2134 | * from above. |
2135 | * |
2136 | * If "use_coincidence" is set, then we treat coincidence edges as local edges. |
2137 | * Otherwise, we ignore them. |
2138 | */ |
2139 | static int add_all_validity_constraints(struct isl_sched_graph *graph, |
2140 | int use_coincidence) |
2141 | { |
2142 | int i; |
2143 | |
2144 | for (i = 0; i < graph->n_edge; ++i) { |
2145 | struct isl_sched_edge *edge = &graph->edge[i]; |
2146 | int zero; |
2147 | |
2148 | zero = force_zero(edge, use_coincidence); |
2149 | if (!is_validity(edge) && !zero) |
2150 | continue; |
2151 | if (edge->src != edge->dst) |
2152 | continue; |
2153 | if (add_intra_validity_constraints(graph, edge) < 0) |
2154 | return -1; |
2155 | } |
2156 | |
2157 | for (i = 0; i < graph->n_edge; ++i) { |
2158 | struct isl_sched_edge *edge = &graph->edge[i]; |
2159 | int zero; |
2160 | |
2161 | zero = force_zero(edge, use_coincidence); |
2162 | if (!is_validity(edge) && !zero) |
2163 | continue; |
2164 | if (edge->src == edge->dst) |
2165 | continue; |
2166 | if (add_inter_validity_constraints(graph, edge) < 0) |
2167 | return -1; |
2168 | } |
2169 | |
2170 | return 0; |
2171 | } |
2172 | |
2173 | /* Add constraints to graph->lp that bound the dependence distance |
2174 | * for all dependence relations. |
2175 | * If a given proximity dependence is identical to a validity |
2176 | * dependence, then the dependence distance is already bounded |
2177 | * from below (by zero), so we only need to bound the distance |
2178 | * from above. (This includes the case of "local" dependences |
2179 | * which are treated as validity dependence by add_all_validity_constraints.) |
2180 | * Otherwise, we need to bound the distance both from above and from below. |
2181 | * |
2182 | * If "use_coincidence" is set, then we treat coincidence edges as local edges. |
2183 | * Otherwise, we ignore them. |
2184 | */ |
2185 | static int add_all_proximity_constraints(struct isl_sched_graph *graph, |
2186 | int use_coincidence) |
2187 | { |
2188 | int i; |
2189 | |
2190 | for (i = 0; i < graph->n_edge; ++i) { |
2191 | struct isl_sched_edge *edge = &graph->edge[i]; |
2192 | int zero; |
2193 | |
2194 | zero = force_zero(edge, use_coincidence); |
2195 | if (!isl_sched_edge_is_proximity(edge) && !zero) |
2196 | continue; |
2197 | if (edge->src == edge->dst && |
2198 | add_intra_proximity_constraints(graph, edge, s: 1, local: zero) < 0) |
2199 | return -1; |
2200 | if (edge->src != edge->dst && |
2201 | add_inter_proximity_constraints(graph, edge, s: 1, local: zero) < 0) |
2202 | return -1; |
2203 | if (is_validity(edge) || zero) |
2204 | continue; |
2205 | if (edge->src == edge->dst && |
2206 | add_intra_proximity_constraints(graph, edge, s: -1, local: 0) < 0) |
2207 | return -1; |
2208 | if (edge->src != edge->dst && |
2209 | add_inter_proximity_constraints(graph, edge, s: -1, local: 0) < 0) |
2210 | return -1; |
2211 | } |
2212 | |
2213 | return 0; |
2214 | } |
2215 | |
2216 | /* Normalize the rows of "indep" such that all rows are lexicographically |
2217 | * positive and such that each row contains as many final zeros as possible, |
2218 | * given the choice for the previous rows. |
2219 | * Do this by performing elementary row operations. |
2220 | */ |
2221 | static __isl_give isl_mat *normalize_independent(__isl_take isl_mat *indep) |
2222 | { |
2223 | indep = isl_mat_reverse_gauss(mat: indep); |
2224 | indep = isl_mat_lexnonneg_rows(mat: indep); |
2225 | return indep; |
2226 | } |
2227 | |
2228 | /* Extract the linear part of the current schedule for node "node". |
2229 | */ |
2230 | static __isl_give isl_mat *(struct isl_sched_node *node) |
2231 | { |
2232 | isl_size n_row = isl_mat_rows(mat: node->sched); |
2233 | |
2234 | if (n_row < 0) |
2235 | return NULL; |
2236 | return isl_mat_sub_alloc(mat: node->sched, first_row: 0, n_row, |
2237 | first_col: 1 + node->nparam, n_col: node->nvar); |
2238 | } |
2239 | |
2240 | /* Compute a basis for the rows in the linear part of the schedule |
2241 | * and extend this basis to a full basis. The remaining rows |
2242 | * can then be used to force linear independence from the rows |
2243 | * in the schedule. |
2244 | * |
2245 | * In particular, given the schedule rows S, we compute |
2246 | * |
2247 | * S = H Q |
2248 | * S U = H |
2249 | * |
2250 | * with H the Hermite normal form of S. That is, all but the |
2251 | * first rank columns of H are zero and so each row in S is |
2252 | * a linear combination of the first rank rows of Q. |
2253 | * The matrix Q can be used as a variable transformation |
2254 | * that isolates the directions of S in the first rank rows. |
2255 | * Transposing S U = H yields |
2256 | * |
2257 | * U^T S^T = H^T |
2258 | * |
2259 | * with all but the first rank rows of H^T zero. |
2260 | * The last rows of U^T are therefore linear combinations |
2261 | * of schedule coefficients that are all zero on schedule |
2262 | * coefficients that are linearly dependent on the rows of S. |
2263 | * At least one of these combinations is non-zero on |
2264 | * linearly independent schedule coefficients. |
2265 | * The rows are normalized to involve as few of the last |
2266 | * coefficients as possible and to have a positive initial value. |
2267 | */ |
2268 | isl_stat isl_sched_node_update_vmap(struct isl_sched_node *node) |
2269 | { |
2270 | isl_mat *H, *U, *Q; |
2271 | |
2272 | H = extract_linear_schedule(node); |
2273 | |
2274 | H = isl_mat_left_hermite(M: H, neg: 0, U: &U, Q: &Q); |
2275 | isl_mat_free(mat: node->indep); |
2276 | isl_mat_free(mat: node->vmap); |
2277 | node->vmap = Q; |
2278 | node->indep = isl_mat_transpose(mat: U); |
2279 | node->rank = isl_mat_initial_non_zero_cols(mat: H); |
2280 | node->indep = isl_mat_drop_rows(mat: node->indep, row: 0, n: node->rank); |
2281 | node->indep = normalize_independent(indep: node->indep); |
2282 | isl_mat_free(mat: H); |
2283 | |
2284 | if (!node->indep || !node->vmap || node->rank < 0) |
2285 | return isl_stat_error; |
2286 | return isl_stat_ok; |
2287 | } |
2288 | |
2289 | /* Is "edge" marked as a validity or a conditional validity edge? |
2290 | */ |
2291 | static int is_any_validity(struct isl_sched_edge *edge) |
2292 | { |
2293 | return is_validity(edge) || |
2294 | isl_sched_edge_is_conditional_validity(edge); |
2295 | } |
2296 | |
2297 | /* How many times should we count the constraints in "edge"? |
2298 | * |
2299 | * We count as follows |
2300 | * validity -> 1 (>= 0) |
2301 | * validity+proximity -> 2 (>= 0 and upper bound) |
2302 | * proximity -> 2 (lower and upper bound) |
2303 | * local(+any) -> 2 (>= 0 and <= 0) |
2304 | * |
2305 | * If an edge is only marked conditional_validity then it counts |
2306 | * as zero since it is only checked afterwards. |
2307 | * |
2308 | * If "use_coincidence" is set, then we treat coincidence edges as local edges. |
2309 | * Otherwise, we ignore them. |
2310 | */ |
2311 | static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence) |
2312 | { |
2313 | if (isl_sched_edge_is_proximity(edge) || |
2314 | force_zero(edge, use_coincidence)) |
2315 | return 2; |
2316 | if (is_validity(edge)) |
2317 | return 1; |
2318 | return 0; |
2319 | } |
2320 | |
2321 | /* How many times should the constraints in "edge" be counted |
2322 | * as a parametric intra-node constraint? |
2323 | * |
2324 | * Only proximity edges that are not forced zero need |
2325 | * coefficient constraints that include coefficients for parameters. |
2326 | * If the edge is also a validity edge, then only |
2327 | * an upper bound is introduced. Otherwise, both lower and upper bounds |
2328 | * are introduced. |
2329 | */ |
2330 | static int parametric_intra_edge_multiplicity(struct isl_sched_edge *edge, |
2331 | int use_coincidence) |
2332 | { |
2333 | if (edge->src != edge->dst) |
2334 | return 0; |
2335 | if (!isl_sched_edge_is_proximity(edge)) |
2336 | return 0; |
2337 | if (force_zero(edge, use_coincidence)) |
2338 | return 0; |
2339 | if (is_validity(edge)) |
2340 | return 1; |
2341 | else |
2342 | return 2; |
2343 | } |
2344 | |
2345 | /* Add "f" times the number of equality and inequality constraints of "bset" |
2346 | * to "n_eq" and "n_ineq" and free "bset". |
2347 | */ |
2348 | static isl_stat update_count(__isl_take isl_basic_set *bset, |
2349 | int f, int *n_eq, int *n_ineq) |
2350 | { |
2351 | isl_size eq, ineq; |
2352 | |
2353 | eq = isl_basic_set_n_equality(bset); |
2354 | ineq = isl_basic_set_n_inequality(bset); |
2355 | isl_basic_set_free(bset); |
2356 | |
2357 | if (eq < 0 || ineq < 0) |
2358 | return isl_stat_error; |
2359 | |
2360 | *n_eq += eq; |
2361 | *n_ineq += ineq; |
2362 | |
2363 | return isl_stat_ok; |
2364 | } |
2365 | |
2366 | /* Count the number of equality and inequality constraints |
2367 | * that will be added for the given map. |
2368 | * |
2369 | * The edges that require parameter coefficients are counted separately. |
2370 | * |
2371 | * "use_coincidence" is set if we should take into account coincidence edges. |
2372 | */ |
2373 | static isl_stat count_map_constraints(struct isl_sched_graph *graph, |
2374 | struct isl_sched_edge *edge, __isl_take isl_map *map, |
2375 | int *n_eq, int *n_ineq, int use_coincidence) |
2376 | { |
2377 | isl_map *copy; |
2378 | isl_basic_set *coef; |
2379 | int f = edge_multiplicity(edge, use_coincidence); |
2380 | int fp = parametric_intra_edge_multiplicity(edge, use_coincidence); |
2381 | |
2382 | if (f == 0) { |
2383 | isl_map_free(map); |
2384 | return isl_stat_ok; |
2385 | } |
2386 | |
2387 | if (edge->src != edge->dst) { |
2388 | coef = inter_coefficients(graph, edge, map); |
2389 | return update_count(bset: coef, f, n_eq, n_ineq); |
2390 | } |
2391 | |
2392 | if (fp > 0) { |
2393 | copy = isl_map_copy(map); |
2394 | coef = intra_coefficients(graph, node: edge->src, map: copy, need_param: 1); |
2395 | if (update_count(bset: coef, f: fp, n_eq, n_ineq) < 0) |
2396 | goto error; |
2397 | } |
2398 | |
2399 | if (f > fp) { |
2400 | copy = isl_map_copy(map); |
2401 | coef = intra_coefficients(graph, node: edge->src, map: copy, need_param: 0); |
2402 | if (update_count(bset: coef, f: f - fp, n_eq, n_ineq) < 0) |
2403 | goto error; |
2404 | } |
2405 | |
2406 | isl_map_free(map); |
2407 | return isl_stat_ok; |
2408 | error: |
2409 | isl_map_free(map); |
2410 | return isl_stat_error; |
2411 | } |
2412 | |
2413 | /* Count the number of equality and inequality constraints |
2414 | * that will be added to the main lp problem. |
2415 | * We count as follows |
2416 | * validity -> 1 (>= 0) |
2417 | * validity+proximity -> 2 (>= 0 and upper bound) |
2418 | * proximity -> 2 (lower and upper bound) |
2419 | * local(+any) -> 2 (>= 0 and <= 0) |
2420 | * |
2421 | * If "use_coincidence" is set, then we treat coincidence edges as local edges. |
2422 | * Otherwise, we ignore them. |
2423 | */ |
2424 | static int count_constraints(struct isl_sched_graph *graph, |
2425 | int *n_eq, int *n_ineq, int use_coincidence) |
2426 | { |
2427 | int i; |
2428 | |
2429 | *n_eq = *n_ineq = 0; |
2430 | for (i = 0; i < graph->n_edge; ++i) { |
2431 | struct isl_sched_edge *edge = &graph->edge[i]; |
2432 | isl_map *map = isl_map_copy(map: edge->map); |
2433 | |
2434 | if (count_map_constraints(graph, edge, map, n_eq, n_ineq, |
2435 | use_coincidence) < 0) |
2436 | return -1; |
2437 | } |
2438 | |
2439 | return 0; |
2440 | } |
2441 | |
2442 | /* Count the number of constraints that will be added by |
2443 | * add_bound_constant_constraints to bound the values of the constant terms |
2444 | * and increment *n_eq and *n_ineq accordingly. |
2445 | * |
2446 | * In practice, add_bound_constant_constraints only adds inequalities. |
2447 | */ |
2448 | static isl_stat count_bound_constant_constraints(isl_ctx *ctx, |
2449 | struct isl_sched_graph *graph, int *n_eq, int *n_ineq) |
2450 | { |
2451 | if (isl_options_get_schedule_max_constant_term(ctx) == -1) |
2452 | return isl_stat_ok; |
2453 | |
2454 | *n_ineq += graph->n; |
2455 | |
2456 | return isl_stat_ok; |
2457 | } |
2458 | |
2459 | /* Add constraints to bound the values of the constant terms in the schedule, |
2460 | * if requested by the user. |
2461 | * |
2462 | * The maximal value of the constant terms is defined by the option |
2463 | * "schedule_max_constant_term". |
2464 | */ |
2465 | static isl_stat add_bound_constant_constraints(isl_ctx *ctx, |
2466 | struct isl_sched_graph *graph) |
2467 | { |
2468 | int i, k; |
2469 | int max; |
2470 | isl_size total; |
2471 | |
2472 | max = isl_options_get_schedule_max_constant_term(ctx); |
2473 | if (max == -1) |
2474 | return isl_stat_ok; |
2475 | |
2476 | total = isl_basic_set_dim(bset: graph->lp, type: isl_dim_set); |
2477 | if (total < 0) |
2478 | return isl_stat_error; |
2479 | |
2480 | for (i = 0; i < graph->n; ++i) { |
2481 | struct isl_sched_node *node = &graph->node[i]; |
2482 | int pos; |
2483 | |
2484 | k = isl_basic_set_alloc_inequality(bset: graph->lp); |
2485 | if (k < 0) |
2486 | return isl_stat_error; |
2487 | isl_seq_clr(p: graph->lp->ineq[k], len: 1 + total); |
2488 | pos = node_cst_coef_offset(node); |
2489 | isl_int_set_si(graph->lp->ineq[k][1 + pos], -1); |
2490 | isl_int_set_si(graph->lp->ineq[k][0], max); |
2491 | } |
2492 | |
2493 | return isl_stat_ok; |
2494 | } |
2495 | |
2496 | /* Count the number of constraints that will be added by |
2497 | * add_bound_coefficient_constraints and increment *n_eq and *n_ineq |
2498 | * accordingly. |
2499 | * |
2500 | * In practice, add_bound_coefficient_constraints only adds inequalities. |
2501 | */ |
2502 | static int count_bound_coefficient_constraints(isl_ctx *ctx, |
2503 | struct isl_sched_graph *graph, int *n_eq, int *n_ineq) |
2504 | { |
2505 | int i; |
2506 | |
2507 | if (isl_options_get_schedule_max_coefficient(ctx) == -1 && |
2508 | !isl_options_get_schedule_treat_coalescing(ctx)) |
2509 | return 0; |
2510 | |
2511 | for (i = 0; i < graph->n; ++i) |
2512 | *n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar; |
2513 | |
2514 | return 0; |
2515 | } |
2516 | |
2517 | /* Add constraints to graph->lp that bound the values of |
2518 | * the parameter schedule coefficients of "node" to "max" and |
2519 | * the variable schedule coefficients to the corresponding entry |
2520 | * in node->max. |
2521 | * In either case, a negative value means that no bound needs to be imposed. |
2522 | * |
2523 | * For parameter coefficients, this amounts to adding a constraint |
2524 | * |
2525 | * c_n <= max |
2526 | * |
2527 | * i.e., |
2528 | * |
2529 | * -c_n + max >= 0 |
2530 | * |
2531 | * The variables coefficients are, however, not represented directly. |
2532 | * Instead, the variable coefficients c_x are written as differences |
2533 | * c_x = c_x^+ - c_x^-. |
2534 | * That is, |
2535 | * |
2536 | * -max_i <= c_x_i <= max_i |
2537 | * |
2538 | * is encoded as |
2539 | * |
2540 | * -max_i <= c_x_i^+ - c_x_i^- <= max_i |
2541 | * |
2542 | * or |
2543 | * |
2544 | * -(c_x_i^+ - c_x_i^-) + max_i >= 0 |
2545 | * c_x_i^+ - c_x_i^- + max_i >= 0 |
2546 | */ |
2547 | static isl_stat node_add_coefficient_constraints(isl_ctx *ctx, |
2548 | struct isl_sched_graph *graph, struct isl_sched_node *node, int max) |
2549 | { |
2550 | int i, j, k; |
2551 | isl_size total; |
2552 | isl_vec *ineq; |
2553 | |
2554 | total = isl_basic_set_dim(bset: graph->lp, type: isl_dim_set); |
2555 | if (total < 0) |
2556 | return isl_stat_error; |
2557 | |
2558 | for (j = 0; j < node->nparam; ++j) { |
2559 | int dim; |
2560 | |
2561 | if (max < 0) |
2562 | continue; |
2563 | |
2564 | k = isl_basic_set_alloc_inequality(bset: graph->lp); |
2565 | if (k < 0) |
2566 | return isl_stat_error; |
2567 | dim = 1 + node_par_coef_offset(node) + j; |
2568 | isl_seq_clr(p: graph->lp->ineq[k], len: 1 + total); |
2569 | isl_int_set_si(graph->lp->ineq[k][dim], -1); |
2570 | isl_int_set_si(graph->lp->ineq[k][0], max); |
2571 | } |
2572 | |
2573 | ineq = isl_vec_alloc(ctx, size: 1 + total); |
2574 | ineq = isl_vec_clr(vec: ineq); |
2575 | if (!ineq) |
2576 | return isl_stat_error; |
2577 | for (i = 0; i < node->nvar; ++i) { |
2578 | int pos = 1 + node_var_coef_pos(node, i); |
2579 | |
2580 | if (isl_int_is_neg(node->max->el[i])) |
2581 | continue; |
2582 | |
2583 | isl_int_set_si(ineq->el[pos], 1); |
2584 | isl_int_set_si(ineq->el[pos + 1], -1); |
2585 | isl_int_set(ineq->el[0], node->max->el[i]); |
2586 | |
2587 | k = isl_basic_set_alloc_inequality(bset: graph->lp); |
2588 | if (k < 0) |
2589 | goto error; |
2590 | isl_seq_cpy(dst: graph->lp->ineq[k], src: ineq->el, len: 1 + total); |
2591 | |
2592 | isl_seq_neg(dst: ineq->el + pos, src: ineq->el + pos, len: 2); |
2593 | k = isl_basic_set_alloc_inequality(bset: graph->lp); |
2594 | if (k < 0) |
2595 | goto error; |
2596 | isl_seq_cpy(dst: graph->lp->ineq[k], src: ineq->el, len: 1 + total); |
2597 | |
2598 | isl_seq_clr(p: ineq->el + pos, len: 2); |
2599 | } |
2600 | isl_vec_free(vec: ineq); |
2601 | |
2602 | return isl_stat_ok; |
2603 | error: |
2604 | isl_vec_free(vec: ineq); |
2605 | return isl_stat_error; |
2606 | } |
2607 | |
2608 | /* Add constraints that bound the values of the variable and parameter |
2609 | * coefficients of the schedule. |
2610 | * |
2611 | * The maximal value of the coefficients is defined by the option |
2612 | * 'schedule_max_coefficient' and the entries in node->max. |
2613 | * These latter entries are only set if either the schedule_max_coefficient |
2614 | * option or the schedule_treat_coalescing option is set. |
2615 | */ |
2616 | static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx, |
2617 | struct isl_sched_graph *graph) |
2618 | { |
2619 | int i; |
2620 | int max; |
2621 | |
2622 | max = isl_options_get_schedule_max_coefficient(ctx); |
2623 | |
2624 | if (max == -1 && !isl_options_get_schedule_treat_coalescing(ctx)) |
2625 | return isl_stat_ok; |
2626 | |
2627 | for (i = 0; i < graph->n; ++i) { |
2628 | struct isl_sched_node *node = &graph->node[i]; |
2629 | |
2630 | if (node_add_coefficient_constraints(ctx, graph, node, max) < 0) |
2631 | return isl_stat_error; |
2632 | } |
2633 | |
2634 | return isl_stat_ok; |
2635 | } |
2636 | |
2637 | /* Add a constraint to graph->lp that equates the value at position |
2638 | * "sum_pos" to the sum of the "n" values starting at "first". |
2639 | */ |
2640 | static isl_stat add_sum_constraint(struct isl_sched_graph *graph, |
2641 | int sum_pos, int first, int n) |
2642 | { |
2643 | int i, k; |
2644 | isl_size total; |
2645 | |
2646 | total = isl_basic_set_dim(bset: graph->lp, type: isl_dim_set); |
2647 | if (total < 0) |
2648 | return isl_stat_error; |
2649 | |
2650 | k = isl_basic_set_alloc_equality(bset: graph->lp); |
2651 | if (k < 0) |
2652 | return isl_stat_error; |
2653 | isl_seq_clr(p: graph->lp->eq[k], len: 1 + total); |
2654 | isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1); |
2655 | for (i = 0; i < n; ++i) |
2656 | isl_int_set_si(graph->lp->eq[k][1 + first + i], 1); |
2657 | |
2658 | return isl_stat_ok; |
2659 | } |
2660 | |
2661 | /* Add a constraint to graph->lp that equates the value at position |
2662 | * "sum_pos" to the sum of the parameter coefficients of all nodes. |
2663 | */ |
2664 | static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph, |
2665 | int sum_pos) |
2666 | { |
2667 | int i, j, k; |
2668 | isl_size total; |
2669 | |
2670 | total = isl_basic_set_dim(bset: graph->lp, type: isl_dim_set); |
2671 | if (total < 0) |
2672 | return isl_stat_error; |
2673 | |
2674 | k = isl_basic_set_alloc_equality(bset: graph->lp); |
2675 | if (k < 0) |
2676 | return isl_stat_error; |
2677 | isl_seq_clr(p: graph->lp->eq[k], len: 1 + total); |
2678 | isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1); |
2679 | for (i = 0; i < graph->n; ++i) { |
2680 | int pos = 1 + node_par_coef_offset(node: &graph->node[i]); |
2681 | |
2682 | for (j = 0; j < graph->node[i].nparam; ++j) |
2683 | isl_int_set_si(graph->lp->eq[k][pos + j], 1); |
2684 | } |
2685 | |
2686 | return isl_stat_ok; |
2687 | } |
2688 | |
2689 | /* Add a constraint to graph->lp that equates the value at position |
2690 | * "sum_pos" to the sum of the variable coefficients of all nodes. |
2691 | */ |
2692 | static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph, |
2693 | int sum_pos) |
2694 | { |
2695 | int i, j, k; |
2696 | isl_size total; |
2697 | |
2698 | total = isl_basic_set_dim(bset: graph->lp, type: isl_dim_set); |
2699 | if (total < 0) |
2700 | return isl_stat_error; |
2701 | |
2702 | k = isl_basic_set_alloc_equality(bset: graph->lp); |
2703 | if (k < 0) |
2704 | return isl_stat_error; |
2705 | isl_seq_clr(p: graph->lp->eq[k], len: 1 + total); |
2706 | isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1); |
2707 | for (i = 0; i < graph->n; ++i) { |
2708 | struct isl_sched_node *node = &graph->node[i]; |
2709 | int pos = 1 + node_var_coef_offset(node); |
2710 | |
2711 | for (j = 0; j < 2 * node->nvar; ++j) |
2712 | isl_int_set_si(graph->lp->eq[k][pos + j], 1); |
2713 | } |
2714 | |
2715 | return isl_stat_ok; |
2716 | } |
2717 | |
2718 | /* Construct an ILP problem for finding schedule coefficients |
2719 | * that result in non-negative, but small dependence distances |
2720 | * over all dependences. |
2721 | * In particular, the dependence distances over proximity edges |
2722 | * are bounded by m_0 + m_n n and we compute schedule coefficients |
2723 | * with small values (preferably zero) of m_n and m_0. |
2724 | * |
2725 | * All variables of the ILP are non-negative. The actual coefficients |
2726 | * may be negative, so each coefficient is represented as the difference |
2727 | * of two non-negative variables. The negative part always appears |
2728 | * immediately before the positive part. |
2729 | * Other than that, the variables have the following order |
2730 | * |
2731 | * - sum of positive and negative parts of m_n coefficients |
2732 | * - m_0 |
2733 | * - sum of all c_n coefficients |
2734 | * (unconstrained when computing non-parametric schedules) |
2735 | * - sum of positive and negative parts of all c_x coefficients |
2736 | * - positive and negative parts of m_n coefficients |
2737 | * - for each node |
2738 | * - positive and negative parts of c_i_x, in opposite order |
2739 | * - c_i_n (if parametric) |
2740 | * - c_i_0 |
2741 | * |
2742 | * The constraints are those from the edges plus two or three equalities |
2743 | * to express the sums. |
2744 | * |
2745 | * If "use_coincidence" is set, then we treat coincidence edges as local edges. |
2746 | * Otherwise, we ignore them. |
2747 | */ |
2748 | static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph, |
2749 | int use_coincidence) |
2750 | { |
2751 | int i; |
2752 | isl_size nparam; |
2753 | unsigned total; |
2754 | isl_space *space; |
2755 | int parametric; |
2756 | int param_pos; |
2757 | int n_eq, n_ineq; |
2758 | |
2759 | parametric = ctx->opt->schedule_parametric; |
2760 | nparam = isl_space_dim(space: graph->node[0].space, type: isl_dim_param); |
2761 | if (nparam < 0) |
2762 | return isl_stat_error; |
2763 | param_pos = 4; |
2764 | total = param_pos + 2 * nparam; |
2765 | for (i = 0; i < graph->n; ++i) { |
2766 | struct isl_sched_node *node = &graph->node[graph->sorted[i]]; |
2767 | if (isl_sched_node_update_vmap(node) < 0) |
2768 | return isl_stat_error; |
2769 | node->start = total; |
2770 | total += 1 + node->nparam + 2 * node->nvar; |
2771 | } |
2772 | |
2773 | if (count_constraints(graph, n_eq: &n_eq, n_ineq: &n_ineq, use_coincidence) < 0) |
2774 | return isl_stat_error; |
2775 | if (count_bound_constant_constraints(ctx, graph, n_eq: &n_eq, n_ineq: &n_ineq) < 0) |
2776 | return isl_stat_error; |
2777 | if (count_bound_coefficient_constraints(ctx, graph, n_eq: &n_eq, n_ineq: &n_ineq) < 0) |
2778 | return isl_stat_error; |
2779 | |
2780 | space = isl_space_set_alloc(ctx, nparam: 0, dim: total); |
2781 | isl_basic_set_free(bset: graph->lp); |
2782 | n_eq += 2 + parametric; |
2783 | |
2784 | graph->lp = isl_basic_set_alloc_space(space, extra: 0, n_eq, n_ineq); |
2785 | |
2786 | if (add_sum_constraint(graph, sum_pos: 0, first: param_pos, n: 2 * nparam) < 0) |
2787 | return isl_stat_error; |
2788 | if (parametric && add_param_sum_constraint(graph, sum_pos: 2) < 0) |
2789 | return isl_stat_error; |
2790 | if (add_var_sum_constraint(graph, sum_pos: 3) < 0) |
2791 | return isl_stat_error; |
2792 | if (add_bound_constant_constraints(ctx, graph) < 0) |
2793 | return isl_stat_error; |
2794 | if (add_bound_coefficient_constraints(ctx, graph) < 0) |
2795 | return isl_stat_error; |
2796 | if (add_all_validity_constraints(graph, use_coincidence) < 0) |
2797 | return isl_stat_error; |
2798 | if (add_all_proximity_constraints(graph, use_coincidence) < 0) |
2799 | return isl_stat_error; |
2800 | |
2801 | return isl_stat_ok; |
2802 | } |
2803 | |
2804 | /* Analyze the conflicting constraint found by |
2805 | * isl_tab_basic_set_non_trivial_lexmin. If it corresponds to the validity |
2806 | * constraint of one of the edges between distinct nodes, living, moreover |
2807 | * in distinct SCCs, then record the source and sink SCC as this may |
2808 | * be a good place to cut between SCCs. |
2809 | */ |
2810 | static int check_conflict(int con, void *user) |
2811 | { |
2812 | int i; |
2813 | struct isl_sched_graph *graph = user; |
2814 | |
2815 | if (graph->src_scc >= 0) |
2816 | return 0; |
2817 | |
2818 | con -= graph->lp->n_eq; |
2819 | |
2820 | if (con >= graph->lp->n_ineq) |
2821 | return 0; |
2822 | |
2823 | for (i = 0; i < graph->n_edge; ++i) { |
2824 | if (!is_validity(edge: &graph->edge[i])) |
2825 | continue; |
2826 | if (graph->edge[i].src == graph->edge[i].dst) |
2827 | continue; |
2828 | if (graph->edge[i].src->scc == graph->edge[i].dst->scc) |
2829 | continue; |
2830 | if (graph->edge[i].start > con) |
2831 | continue; |
2832 | if (graph->edge[i].end <= con) |
2833 | continue; |
2834 | graph->src_scc = graph->edge[i].src->scc; |
2835 | graph->dst_scc = graph->edge[i].dst->scc; |
2836 | } |
2837 | |
2838 | return 0; |
2839 | } |
2840 | |
2841 | /* Check whether the next schedule row of the given node needs to be |
2842 | * non-trivial. Lower-dimensional domains may have some trivial rows, |
2843 | * but as soon as the number of remaining required non-trivial rows |
2844 | * is as large as the number or remaining rows to be computed, |
2845 | * all remaining rows need to be non-trivial. |
2846 | */ |
2847 | static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node) |
2848 | { |
2849 | return node->nvar - node->rank >= graph->maxvar - graph->n_row; |
2850 | } |
2851 | |
2852 | /* Construct a non-triviality region with triviality directions |
2853 | * corresponding to the rows of "indep". |
2854 | * The rows of "indep" are expressed in terms of the schedule coefficients c_i, |
2855 | * while the triviality directions are expressed in terms of |
2856 | * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing |
2857 | * before c^+_i. Furthermore, |
2858 | * the pairs of non-negative variables representing the coefficients |
2859 | * are stored in the opposite order. |
2860 | */ |
2861 | static __isl_give isl_mat *construct_trivial(__isl_keep isl_mat *indep) |
2862 | { |
2863 | isl_ctx *ctx; |
2864 | isl_mat *mat; |
2865 | int i, j; |
2866 | isl_size n, n_var; |
2867 | |
2868 | n = isl_mat_rows(mat: indep); |
2869 | n_var = isl_mat_cols(mat: indep); |
2870 | if (n < 0 || n_var < 0) |
2871 | return NULL; |
2872 | |
2873 | ctx = isl_mat_get_ctx(mat: indep); |
2874 | mat = isl_mat_alloc(ctx, n_row: n, n_col: 2 * n_var); |
2875 | if (!mat) |
2876 | return NULL; |
2877 | for (i = 0; i < n; ++i) { |
2878 | for (j = 0; j < n_var; ++j) { |
2879 | int nj = n_var - 1 - j; |
2880 | isl_int_neg(mat->row[i][2 * nj], indep->row[i][j]); |
2881 | isl_int_set(mat->row[i][2 * nj + 1], indep->row[i][j]); |
2882 | } |
2883 | } |
2884 | |
2885 | return mat; |
2886 | } |
2887 | |
2888 | /* Solve the ILP problem constructed in setup_lp. |
2889 | * For each node such that all the remaining rows of its schedule |
2890 | * need to be non-trivial, we construct a non-triviality region. |
2891 | * This region imposes that the next row is independent of previous rows. |
2892 | * In particular, the non-triviality region enforces that at least |
2893 | * one of the linear combinations in the rows of node->indep is non-zero. |
2894 | */ |
2895 | static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph) |
2896 | { |
2897 | int i; |
2898 | isl_vec *sol; |
2899 | isl_basic_set *lp; |
2900 | |
2901 | for (i = 0; i < graph->n; ++i) { |
2902 | struct isl_sched_node *node = &graph->node[i]; |
2903 | isl_mat *trivial; |
2904 | |
2905 | graph->region[i].pos = node_var_coef_offset(node); |
2906 | if (needs_row(graph, node)) |
2907 | trivial = construct_trivial(indep: node->indep); |
2908 | else |
2909 | trivial = isl_mat_zero(ctx, n_row: 0, n_col: 0); |
2910 | graph->region[i].trivial = trivial; |
2911 | } |
2912 | lp = isl_basic_set_copy(bset: graph->lp); |
2913 | sol = isl_tab_basic_set_non_trivial_lexmin(bset: lp, n_op: 2, n_region: graph->n, |
2914 | region: graph->region, conflict: &check_conflict, user: graph); |
2915 | for (i = 0; i < graph->n; ++i) |
2916 | isl_mat_free(mat: graph->region[i].trivial); |
2917 | return sol; |
2918 | } |
2919 | |
2920 | /* Extract the coefficients for the variables of "node" from "sol". |
2921 | * |
2922 | * Each schedule coefficient c_i_x is represented as the difference |
2923 | * between two non-negative variables c_i_x^+ - c_i_x^-. |
2924 | * The c_i_x^- appear before their c_i_x^+ counterpart. |
2925 | * Furthermore, the order of these pairs is the opposite of that |
2926 | * of the corresponding coefficients. |
2927 | * |
2928 | * Return c_i_x = c_i_x^+ - c_i_x^- |
2929 | */ |
2930 | static __isl_give isl_vec *(struct isl_sched_node *node, |
2931 | __isl_keep isl_vec *sol) |
2932 | { |
2933 | int i; |
2934 | int pos; |
2935 | isl_vec *csol; |
2936 | |
2937 | if (!sol) |
2938 | return NULL; |
2939 | csol = isl_vec_alloc(ctx: isl_vec_get_ctx(vec: sol), size: node->nvar); |
2940 | if (!csol) |
2941 | return NULL; |
2942 | |
2943 | pos = 1 + node_var_coef_offset(node); |
2944 | for (i = 0; i < node->nvar; ++i) |
2945 | isl_int_sub(csol->el[node->nvar - 1 - i], |
2946 | sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]); |
2947 | |
2948 | return csol; |
2949 | } |
2950 | |
2951 | /* Update the schedules of all nodes based on the given solution |
2952 | * of the LP problem. |
2953 | * The new row is added to the current band. |
2954 | * All possibly negative coefficients are encoded as a difference |
2955 | * of two non-negative variables, so we need to perform the subtraction |
2956 | * here. |
2957 | * |
2958 | * If coincident is set, then the caller guarantees that the new |
2959 | * row satisfies the coincidence constraints. |
2960 | */ |
2961 | static int update_schedule(struct isl_sched_graph *graph, |
2962 | __isl_take isl_vec *sol, int coincident) |
2963 | { |
2964 | int i, j; |
2965 | isl_vec *csol = NULL; |
2966 | |
2967 | if (!sol) |
2968 | goto error; |
2969 | if (sol->size == 0) |
2970 | isl_die(sol->ctx, isl_error_internal, |
2971 | "no solution found" , goto error); |
2972 | if (graph->n_total_row >= graph->max_row) |
2973 | isl_die(sol->ctx, isl_error_internal, |
2974 | "too many schedule rows" , goto error); |
2975 | |
2976 | for (i = 0; i < graph->n; ++i) { |
2977 | struct isl_sched_node *node = &graph->node[i]; |
2978 | int pos; |
2979 | isl_size row = isl_mat_rows(mat: node->sched); |
2980 | |
2981 | isl_vec_free(vec: csol); |
2982 | csol = extract_var_coef(node, sol); |
2983 | if (row < 0 || !csol) |
2984 | goto error; |
2985 | |
2986 | isl_map_free(map: node->sched_map); |
2987 | node->sched_map = NULL; |
2988 | node->sched = isl_mat_add_rows(mat: node->sched, n: 1); |
2989 | if (!node->sched) |
2990 | goto error; |
2991 | pos = node_cst_coef_offset(node); |
2992 | node->sched = isl_mat_set_element(mat: node->sched, |
2993 | row, col: 0, v: sol->el[1 + pos]); |
2994 | pos = node_par_coef_offset(node); |
2995 | for (j = 0; j < node->nparam; ++j) |
2996 | node->sched = isl_mat_set_element(mat: node->sched, |
2997 | row, col: 1 + j, v: sol->el[1 + pos + j]); |
2998 | for (j = 0; j < node->nvar; ++j) |
2999 | node->sched = isl_mat_set_element(mat: node->sched, |
3000 | row, col: 1 + node->nparam + j, v: csol->el[j]); |
3001 | node->coincident[graph->n_total_row] = coincident; |
3002 | } |
3003 | isl_vec_free(vec: sol); |
3004 | isl_vec_free(vec: csol); |
3005 | |
3006 | graph->n_row++; |
3007 | graph->n_total_row++; |
3008 | |
3009 | return 0; |
3010 | error: |
3011 | isl_vec_free(vec: sol); |
3012 | isl_vec_free(vec: csol); |
3013 | return -1; |
3014 | } |
3015 | |
3016 | /* Convert row "row" of node->sched into an isl_aff living in "ls" |
3017 | * and return this isl_aff. |
3018 | */ |
3019 | static __isl_give isl_aff *(__isl_take isl_local_space *ls, |
3020 | struct isl_sched_node *node, int row) |
3021 | { |
3022 | int j; |
3023 | isl_int v; |
3024 | isl_aff *aff; |
3025 | |
3026 | isl_int_init(v); |
3027 | |
3028 | aff = isl_aff_zero_on_domain(ls); |
3029 | if (isl_mat_get_element(mat: node->sched, row, col: 0, v: &v) < 0) |
3030 | goto error; |
3031 | aff = isl_aff_set_constant(aff, v); |
3032 | for (j = 0; j < node->nparam; ++j) { |
3033 | if (isl_mat_get_element(mat: node->sched, row, col: 1 + j, v: &v) < 0) |
3034 | goto error; |
3035 | aff = isl_aff_set_coefficient(aff, type: isl_dim_param, pos: j, v); |
3036 | } |
3037 | for (j = 0; j < node->nvar; ++j) { |
3038 | if (isl_mat_get_element(mat: node->sched, row, |
3039 | col: 1 + node->nparam + j, v: &v) < 0) |
3040 | goto error; |
3041 | aff = isl_aff_set_coefficient(aff, type: isl_dim_in, pos: j, v); |
3042 | } |
3043 | |
3044 | isl_int_clear(v); |
3045 | |
3046 | return aff; |
3047 | error: |
3048 | isl_int_clear(v); |
3049 | isl_aff_free(aff); |
3050 | return NULL; |
3051 | } |
3052 | |
3053 | /* Convert the "n" rows starting at "first" of node->sched into a multi_aff |
3054 | * and return this multi_aff. |
3055 | * |
3056 | * The result is defined over the uncompressed node domain. |
3057 | */ |
3058 | __isl_give isl_multi_aff *( |
3059 | struct isl_sched_node *node, int first, int n) |
3060 | { |
3061 | int i; |
3062 | isl_space *space; |
3063 | isl_local_space *ls; |
3064 | isl_aff *aff; |
3065 | isl_multi_aff *ma; |
3066 | isl_size nrow; |
3067 | |
3068 | if (!node) |
3069 | return NULL; |
3070 | nrow = isl_mat_rows(mat: node->sched); |
3071 | if (nrow < 0) |
3072 | return NULL; |
3073 | if (node->compressed) |
3074 | space = isl_pw_multi_aff_get_domain_space(pma: node->decompress); |
3075 | else |
3076 | space = isl_space_copy(space: node->space); |
3077 | ls = isl_local_space_from_space(space: isl_space_copy(space)); |
3078 | space = isl_space_from_domain(space); |
3079 | space = isl_space_add_dims(space, type: isl_dim_out, n); |
3080 | ma = isl_multi_aff_zero(space); |
3081 | |
3082 | for (i = first; i < first + n; ++i) { |
3083 | aff = extract_schedule_row(ls: isl_local_space_copy(ls), node, row: i); |
3084 | ma = isl_multi_aff_set_aff(multi: ma, pos: i - first, el: aff); |
3085 | } |
3086 | |
3087 | isl_local_space_free(ls); |
3088 | |
3089 | if (node->compressed) |
3090 | ma = isl_multi_aff_pullback_multi_aff(ma1: ma, |
3091 | ma2: isl_multi_aff_copy(multi: node->compress)); |
3092 | |
3093 | return ma; |
3094 | } |
3095 | |
3096 | /* Convert node->sched into a multi_aff and return this multi_aff. |
3097 | * |
3098 | * The result is defined over the uncompressed node domain. |
3099 | */ |
3100 | static __isl_give isl_multi_aff *( |
3101 | struct isl_sched_node *node) |
3102 | { |
3103 | isl_size nrow; |
3104 | |
3105 | nrow = isl_mat_rows(mat: node->sched); |
3106 | if (nrow < 0) |
3107 | return NULL; |
3108 | return isl_sched_node_extract_partial_schedule_multi_aff(node, first: 0, n: nrow); |
3109 | } |
3110 | |
3111 | /* Convert node->sched into a map and return this map. |
3112 | * |
3113 | * The result is cached in node->sched_map, which needs to be released |
3114 | * whenever node->sched is updated. |
3115 | * It is defined over the uncompressed node domain. |
3116 | */ |
3117 | static __isl_give isl_map *(struct isl_sched_node *node) |
3118 | { |
3119 | if (!node->sched_map) { |
3120 | isl_multi_aff *ma; |
3121 | |
3122 | ma = node_extract_schedule_multi_aff(node); |
3123 | node->sched_map = isl_map_from_multi_aff(maff: ma); |
3124 | } |
3125 | |
3126 | return isl_map_copy(map: node->sched_map); |
3127 | } |
3128 | |
3129 | /* Construct a map that can be used to update a dependence relation |
3130 | * based on the current schedule. |
3131 | * That is, construct a map expressing that source and sink |
3132 | * are executed within the same iteration of the current schedule. |
3133 | * This map can then be intersected with the dependence relation. |
3134 | * This is not the most efficient way, but this shouldn't be a critical |
3135 | * operation. |
3136 | */ |
3137 | static __isl_give isl_map *specializer(struct isl_sched_node *src, |
3138 | struct isl_sched_node *dst) |
3139 | { |
3140 | isl_map *src_sched, *dst_sched; |
3141 | |
3142 | src_sched = node_extract_schedule(node: src); |
3143 | dst_sched = node_extract_schedule(node: dst); |
3144 | return isl_map_apply_range(map1: src_sched, map2: isl_map_reverse(map: dst_sched)); |
3145 | } |
3146 | |
3147 | /* Intersect the domains of the nested relations in domain and range |
3148 | * of "umap" with "map". |
3149 | */ |
3150 | static __isl_give isl_union_map *intersect_domains( |
3151 | __isl_take isl_union_map *umap, __isl_keep isl_map *map) |
3152 | { |
3153 | isl_union_set *uset; |
3154 | |
3155 | umap = isl_union_map_zip(umap); |
3156 | uset = isl_union_set_from_set(set: isl_map_wrap(map: isl_map_copy(map))); |
3157 | umap = isl_union_map_intersect_domain(umap, uset); |
3158 | umap = isl_union_map_zip(umap); |
3159 | return umap; |
3160 | } |
3161 | |
3162 | /* Update the dependence relation of the given edge based |
3163 | * on the current schedule. |
3164 | * If the dependence is carried completely by the current schedule, then |
3165 | * it is removed from the edge_tables. It is kept in the list of edges |
3166 | * as otherwise all edge_tables would have to be recomputed. |
3167 | * |
3168 | * If the edge is of a type that can appear multiple times |
3169 | * between the same pair of nodes, then it is added to |
3170 | * the edge table (again). This prevents the situation |
3171 | * where none of these edges is referenced from the edge table |
3172 | * because the one that was referenced turned out to be empty and |
3173 | * was therefore removed from the table. |
3174 | */ |
3175 | static isl_stat update_edge(isl_ctx *ctx, struct isl_sched_graph *graph, |
3176 | struct isl_sched_edge *edge) |
3177 | { |
3178 | int empty; |
3179 | isl_map *id; |
3180 | |
3181 | id = specializer(src: edge->src, dst: edge->dst); |
3182 | edge->map = isl_map_intersect(map1: edge->map, map2: isl_map_copy(map: id)); |
3183 | if (!edge->map) |
3184 | goto error; |
3185 | |
3186 | if (edge->tagged_condition) { |
3187 | edge->tagged_condition = |
3188 | intersect_domains(umap: edge->tagged_condition, map: id); |
3189 | if (!edge->tagged_condition) |
3190 | goto error; |
3191 | } |
3192 | if (edge->tagged_validity) { |
3193 | edge->tagged_validity = |
3194 | intersect_domains(umap: edge->tagged_validity, map: id); |
3195 | if (!edge->tagged_validity) |
3196 | goto error; |
3197 | } |
3198 | |
3199 | empty = isl_map_plain_is_empty(map: edge->map); |
3200 | if (empty < 0) |
3201 | goto error; |
3202 | if (empty) { |
3203 | if (graph_remove_edge(graph, edge) < 0) |
3204 | goto error; |
3205 | } else if (is_multi_edge_type(edge)) { |
3206 | if (graph_edge_tables_add(ctx, graph, edge) < 0) |
3207 | goto error; |
3208 | } |
3209 | |
3210 | isl_map_free(map: id); |
3211 | return isl_stat_ok; |
3212 | error: |
3213 | isl_map_free(map: id); |
3214 | return isl_stat_error; |
3215 | } |
3216 | |
3217 | /* Does the domain of "umap" intersect "uset"? |
3218 | */ |
3219 | static int domain_intersects(__isl_keep isl_union_map *umap, |
3220 | __isl_keep isl_union_set *uset) |
3221 | { |
3222 | int empty; |
3223 | |
3224 | umap = isl_union_map_copy(umap); |
3225 | umap = isl_union_map_intersect_domain(umap, uset: isl_union_set_copy(uset)); |
3226 | empty = isl_union_map_is_empty(umap); |
3227 | isl_union_map_free(umap); |
3228 | |
3229 | return empty < 0 ? -1 : !empty; |
3230 | } |
3231 | |
3232 | /* Does the range of "umap" intersect "uset"? |
3233 | */ |
3234 | static int range_intersects(__isl_keep isl_union_map *umap, |
3235 | __isl_keep isl_union_set *uset) |
3236 | { |
3237 | int empty; |
3238 | |
3239 | umap = isl_union_map_copy(umap); |
3240 | umap = isl_union_map_intersect_range(umap, uset: isl_union_set_copy(uset)); |
3241 | empty = isl_union_map_is_empty(umap); |
3242 | isl_union_map_free(umap); |
3243 | |
3244 | return empty < 0 ? -1 : !empty; |
3245 | } |
3246 | |
3247 | /* Are the condition dependences of "edge" local with respect to |
3248 | * the current schedule? |
3249 | * |
3250 | * That is, are domain and range of the condition dependences mapped |
3251 | * to the same point? |
3252 | * |
3253 | * In other words, is the condition false? |
3254 | */ |
3255 | static int is_condition_false(struct isl_sched_edge *edge) |
3256 | { |
3257 | isl_union_map *umap; |
3258 | isl_map *map, *sched, *test; |
3259 | int empty, local; |
3260 | |
3261 | empty = isl_union_map_is_empty(umap: edge->tagged_condition); |
3262 | if (empty < 0 || empty) |
3263 | return empty; |
3264 | |
3265 | umap = isl_union_map_copy(umap: edge->tagged_condition); |
3266 | umap = isl_union_map_zip(umap); |
3267 | umap = isl_union_set_unwrap(uset: isl_union_map_domain(umap)); |
3268 | map = isl_map_from_union_map(umap); |
3269 | |
3270 | sched = node_extract_schedule(node: edge->src); |
3271 | map = isl_map_apply_domain(map1: map, map2: sched); |
3272 | sched = node_extract_schedule(node: edge->dst); |
3273 | map = isl_map_apply_range(map1: map, map2: sched); |
3274 | |
3275 | test = isl_map_identity(space: isl_map_get_space(map)); |
3276 | local = isl_map_is_subset(map1: map, map2: test); |
3277 | isl_map_free(map); |
3278 | isl_map_free(map: test); |
3279 | |
3280 | return local; |
3281 | } |
3282 | |
3283 | /* For each conditional validity constraint that is adjacent |
3284 | * to a condition with domain in condition_source or range in condition_sink, |
3285 | * turn it into an unconditional validity constraint. |
3286 | */ |
3287 | static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph, |
3288 | __isl_take isl_union_set *condition_source, |
3289 | __isl_take isl_union_set *condition_sink) |
3290 | { |
3291 | int i; |
3292 | |
3293 | condition_source = isl_union_set_coalesce(uset: condition_source); |
3294 | condition_sink = isl_union_set_coalesce(uset: condition_sink); |
3295 | |
3296 | for (i = 0; i < graph->n_edge; ++i) { |
3297 | int adjacent; |
3298 | isl_union_map *validity; |
3299 | |
3300 | if (!isl_sched_edge_is_conditional_validity(edge: &graph->edge[i])) |
3301 | continue; |
3302 | if (is_validity(edge: &graph->edge[i])) |
3303 | continue; |
3304 | |
3305 | validity = graph->edge[i].tagged_validity; |
3306 | adjacent = domain_intersects(umap: validity, uset: condition_sink); |
3307 | if (adjacent >= 0 && !adjacent) |
3308 | adjacent = range_intersects(umap: validity, uset: condition_source); |
3309 | if (adjacent < 0) |
3310 | goto error; |
3311 | if (!adjacent) |
3312 | continue; |
3313 | |
3314 | set_validity(&graph->edge[i]); |
3315 | } |
3316 | |
3317 | isl_union_set_free(uset: condition_source); |
3318 | isl_union_set_free(uset: condition_sink); |
3319 | return 0; |
3320 | error: |
3321 | isl_union_set_free(uset: condition_source); |
3322 | isl_union_set_free(uset: condition_sink); |
3323 | return -1; |
3324 | } |
3325 | |
3326 | /* Update the dependence relations of all edges based on the current schedule |
3327 | * and enforce conditional validity constraints that are adjacent |
3328 | * to satisfied condition constraints. |
3329 | * |
3330 | * First check if any of the condition constraints are satisfied |
3331 | * (i.e., not local to the outer schedule) and keep track of |
3332 | * their domain and range. |
3333 | * Then update all dependence relations (which removes the non-local |
3334 | * constraints). |
3335 | * Finally, if any condition constraints turned out to be satisfied, |
3336 | * then turn all adjacent conditional validity constraints into |
3337 | * unconditional validity constraints. |
3338 | */ |
3339 | static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph) |
3340 | { |
3341 | int i; |
3342 | int any = 0; |
3343 | isl_union_set *source, *sink; |
3344 | |
3345 | source = isl_union_set_empty(space: isl_space_params_alloc(ctx, nparam: 0)); |
3346 | sink = isl_union_set_empty(space: isl_space_params_alloc(ctx, nparam: 0)); |
3347 | for (i = 0; i < graph->n_edge; ++i) { |
3348 | int local; |
3349 | isl_union_set *uset; |
3350 | isl_union_map *umap; |
3351 | |
3352 | if (!isl_sched_edge_is_condition(edge: &graph->edge[i])) |
3353 | continue; |
3354 | if (is_local(edge: &graph->edge[i])) |
3355 | continue; |
3356 | local = is_condition_false(edge: &graph->edge[i]); |
3357 | if (local < 0) |
3358 | goto error; |
3359 | if (local) |
3360 | continue; |
3361 | |
3362 | any = 1; |
3363 | |
3364 | umap = isl_union_map_copy(umap: graph->edge[i].tagged_condition); |
3365 | uset = isl_union_map_domain(umap); |
3366 | source = isl_union_set_union(uset1: source, uset2: uset); |
3367 | |
3368 | umap = isl_union_map_copy(umap: graph->edge[i].tagged_condition); |
3369 | uset = isl_union_map_range(umap); |
3370 | sink = isl_union_set_union(uset1: sink, uset2: uset); |
3371 | } |
3372 | |
3373 | for (i = 0; i < graph->n_edge; ++i) { |
3374 | if (update_edge(ctx, graph, edge: &graph->edge[i]) < 0) |
3375 | goto error; |
3376 | } |
3377 | |
3378 | if (any) |
3379 | return unconditionalize_adjacent_validity(graph, condition_source: source, condition_sink: sink); |
3380 | |
3381 | isl_union_set_free(uset: source); |
3382 | isl_union_set_free(uset: sink); |
3383 | return 0; |
3384 | error: |
3385 | isl_union_set_free(uset: source); |
3386 | isl_union_set_free(uset: sink); |
3387 | return -1; |
3388 | } |
3389 | |
3390 | static void next_band(struct isl_sched_graph *graph) |
3391 | { |
3392 | graph->band_start = graph->n_total_row; |
3393 | } |
3394 | |
3395 | /* Return the union of the universe domains of the nodes in "graph" |
3396 | * that satisfy "pred". |
3397 | */ |
3398 | static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx, |
3399 | struct isl_sched_graph *graph, |
3400 | int (*pred)(struct isl_sched_node *node, int data), int data) |
3401 | { |
3402 | int i; |
3403 | isl_set *set; |
3404 | isl_union_set *dom; |
3405 | |
3406 | for (i = 0; i < graph->n; ++i) |
3407 | if (pred(&graph->node[i], data)) |
3408 | break; |
3409 | |
3410 | if (i >= graph->n) |
3411 | isl_die(ctx, isl_error_internal, |
3412 | "empty component" , return NULL); |
3413 | |
3414 | set = isl_set_universe(space: isl_space_copy(space: graph->node[i].space)); |
3415 | dom = isl_union_set_from_set(set); |
3416 | |
3417 | for (i = i + 1; i < graph->n; ++i) { |
3418 | if (!pred(&graph->node[i], data)) |
3419 | continue; |
3420 | set = isl_set_universe(space: isl_space_copy(space: graph->node[i].space)); |
3421 | dom = isl_union_set_union(uset1: dom, uset2: isl_union_set_from_set(set)); |
3422 | } |
3423 | |
3424 | return dom; |
3425 | } |
3426 | |
3427 | /* Return a union of universe domains corresponding to the nodes |
3428 | * in the SCC with index "scc". |
3429 | */ |
3430 | __isl_give isl_union_set *(isl_ctx *ctx, |
3431 | struct isl_sched_graph *graph, int scc) |
3432 | { |
3433 | return isl_sched_graph_domain(ctx, graph, |
3434 | pred: &isl_sched_node_scc_exactly, data: scc); |
3435 | } |
3436 | |
3437 | /* Return a list of unions of universe domains, where each element |
3438 | * in the list corresponds to an SCC (or WCC) indexed by node->scc. |
3439 | */ |
3440 | __isl_give isl_union_set_list *(isl_ctx *ctx, |
3441 | struct isl_sched_graph *graph) |
3442 | { |
3443 | int i; |
3444 | isl_union_set_list *filters; |
3445 | |
3446 | filters = isl_union_set_list_alloc(ctx, n: graph->scc); |
3447 | for (i = 0; i < graph->scc; ++i) { |
3448 | isl_union_set *dom; |
3449 | |
3450 | dom = isl_sched_graph_extract_scc(ctx, graph, scc: i); |
3451 | filters = isl_union_set_list_add(list: filters, el: dom); |
3452 | } |
3453 | |
3454 | return filters; |
3455 | } |
3456 | |
3457 | /* Return a list of two unions of universe domains, one for the SCCs up |
3458 | * to and including graph->src_scc and another for the other SCCs. |
3459 | */ |
3460 | static __isl_give isl_union_set_list *(isl_ctx *ctx, |
3461 | struct isl_sched_graph *graph) |
3462 | { |
3463 | isl_union_set *dom; |
3464 | isl_union_set_list *filters; |
3465 | |
3466 | filters = isl_union_set_list_alloc(ctx, n: 2); |
3467 | dom = isl_sched_graph_domain(ctx, graph, |
3468 | pred: &node_scc_at_most, data: graph->src_scc); |
3469 | filters = isl_union_set_list_add(list: filters, el: dom); |
3470 | dom = isl_sched_graph_domain(ctx, graph, |
3471 | pred: &node_scc_at_least, data: graph->src_scc + 1); |
3472 | filters = isl_union_set_list_add(list: filters, el: dom); |
3473 | |
3474 | return filters; |
3475 | } |
3476 | |
3477 | /* Copy nodes that satisfy node_pred from the src dependence graph |
3478 | * to the dst dependence graph. |
3479 | */ |
3480 | static isl_stat copy_nodes(struct isl_sched_graph *dst, |
3481 | struct isl_sched_graph *src, |
3482 | int (*node_pred)(struct isl_sched_node *node, int data), int data) |
3483 | { |
3484 | int i; |
3485 | |
3486 | dst->n = 0; |
3487 | for (i = 0; i < src->n; ++i) { |
3488 | int j; |
3489 | |
3490 | if (!node_pred(&src->node[i], data)) |
3491 | continue; |
3492 | |
3493 | j = dst->n; |
3494 | dst->node[j].space = isl_space_copy(space: src->node[i].space); |
3495 | dst->node[j].compressed = src->node[i].compressed; |
3496 | dst->node[j].hull = isl_set_copy(set: src->node[i].hull); |
3497 | dst->node[j].compress = |
3498 | isl_multi_aff_copy(multi: src->node[i].compress); |
3499 | dst->node[j].decompress = |
3500 | isl_pw_multi_aff_copy(pma: src->node[i].decompress); |
3501 | dst->node[j].nvar = src->node[i].nvar; |
3502 | dst->node[j].nparam = src->node[i].nparam; |
3503 | dst->node[j].sched = isl_mat_copy(mat: src->node[i].sched); |
3504 | dst->node[j].sched_map = isl_map_copy(map: src->node[i].sched_map); |
3505 | dst->node[j].coincident = src->node[i].coincident; |
3506 | dst->node[j].sizes = isl_multi_val_copy(multi: src->node[i].sizes); |
3507 | dst->node[j].bounds = isl_basic_set_copy(bset: src->node[i].bounds); |
3508 | dst->node[j].max = isl_vec_copy(vec: src->node[i].max); |
3509 | dst->n++; |
3510 | |
3511 | if (!dst->node[j].space || !dst->node[j].sched) |
3512 | return isl_stat_error; |
3513 | if (dst->node[j].compressed && |
3514 | (!dst->node[j].hull || !dst->node[j].compress || |
3515 | !dst->node[j].decompress)) |
3516 | return isl_stat_error; |
3517 | } |
3518 | |
3519 | return isl_stat_ok; |
3520 | } |
3521 | |
3522 | /* Copy non-empty edges that satisfy edge_pred from the src dependence graph |
3523 | * to the dst dependence graph. |
3524 | * If the source or destination node of the edge is not in the destination |
3525 | * graph, then it must be a backward proximity edge and it should simply |
3526 | * be ignored. |
3527 | */ |
3528 | static isl_stat copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst, |
3529 | struct isl_sched_graph *src, |
3530 | int (*edge_pred)(struct isl_sched_edge *edge, int data), int data) |
3531 | { |
3532 | int i; |
3533 | |
3534 | dst->n_edge = 0; |
3535 | for (i = 0; i < src->n_edge; ++i) { |
3536 | struct isl_sched_edge *edge = &src->edge[i]; |
3537 | isl_map *map; |
3538 | isl_union_map *tagged_condition; |
3539 | isl_union_map *tagged_validity; |
3540 | struct isl_sched_node *dst_src, *dst_dst; |
3541 | |
3542 | if (!edge_pred(edge, data)) |
3543 | continue; |
3544 | |
3545 | if (isl_map_plain_is_empty(map: edge->map)) |
3546 | continue; |
3547 | |
3548 | dst_src = isl_sched_graph_find_node(ctx, graph: dst, space: edge->src->space); |
3549 | dst_dst = isl_sched_graph_find_node(ctx, graph: dst, space: edge->dst->space); |
3550 | if (!dst_src || !dst_dst) |
3551 | return isl_stat_error; |
3552 | if (!isl_sched_graph_is_node(graph: dst, node: dst_src) || |
3553 | !isl_sched_graph_is_node(graph: dst, node: dst_dst)) { |
3554 | if (is_validity(edge) || |
3555 | isl_sched_edge_is_conditional_validity(edge)) |
3556 | isl_die(ctx, isl_error_internal, |
3557 | "backward (conditional) validity edge" , |
3558 | return isl_stat_error); |
3559 | continue; |
3560 | } |
3561 | |
3562 | map = isl_map_copy(map: edge->map); |
3563 | tagged_condition = isl_union_map_copy(umap: edge->tagged_condition); |
3564 | tagged_validity = isl_union_map_copy(umap: edge->tagged_validity); |
3565 | |
3566 | dst->edge[dst->n_edge].src = dst_src; |
3567 | dst->edge[dst->n_edge].dst = dst_dst; |
3568 | dst->edge[dst->n_edge].map = map; |
3569 | dst->edge[dst->n_edge].tagged_condition = tagged_condition; |
3570 | dst->edge[dst->n_edge].tagged_validity = tagged_validity; |
3571 | dst->edge[dst->n_edge].types = edge->types; |
3572 | dst->n_edge++; |
3573 | |
3574 | if (edge->tagged_condition && !tagged_condition) |
3575 | return isl_stat_error; |
3576 | if (edge->tagged_validity && !tagged_validity) |
3577 | return isl_stat_error; |
3578 | |
3579 | if (graph_edge_tables_add(ctx, graph: dst, |
3580 | edge: &dst->edge[dst->n_edge - 1]) < 0) |
3581 | return isl_stat_error; |
3582 | } |
3583 | |
3584 | return isl_stat_ok; |
3585 | } |
3586 | |
3587 | /* Compute the maximal number of variables over all nodes. |
3588 | * This is the maximal number of linearly independent schedule |
3589 | * rows that we need to compute. |
3590 | * Just in case we end up in a part of the dependence graph |
3591 | * with only lower-dimensional domains, we make sure we will |
3592 | * compute the required amount of extra linearly independent rows. |
3593 | */ |
3594 | isl_stat isl_sched_graph_compute_maxvar(struct isl_sched_graph *graph) |
3595 | { |
3596 | int i; |
3597 | |
3598 | graph->maxvar = 0; |
3599 | for (i = 0; i < graph->n; ++i) { |
3600 | struct isl_sched_node *node = &graph->node[i]; |
3601 | int nvar; |
3602 | |
3603 | if (isl_sched_node_update_vmap(node) < 0) |
3604 | return isl_stat_error; |
3605 | nvar = node->nvar + graph->n_row - node->rank; |
3606 | if (nvar > graph->maxvar) |
3607 | graph->maxvar = nvar; |
3608 | } |
3609 | |
3610 | return isl_stat_ok; |
3611 | } |
3612 | |
3613 | /* Extract the subgraph of "graph" that consists of the nodes satisfying |
3614 | * "node_pred" and the edges satisfying "edge_pred" and store |
3615 | * the result in "sub". |
3616 | */ |
3617 | isl_stat (isl_ctx *ctx, |
3618 | struct isl_sched_graph *graph, |
3619 | int (*node_pred)(struct isl_sched_node *node, int data), |
3620 | int (*edge_pred)(struct isl_sched_edge *edge, int data), |
3621 | int data, struct isl_sched_graph *sub) |
3622 | { |
3623 | int i, n = 0, n_edge = 0; |
3624 | int t; |
3625 | |
3626 | for (i = 0; i < graph->n; ++i) |
3627 | if (node_pred(&graph->node[i], data)) |
3628 | ++n; |
3629 | for (i = 0; i < graph->n_edge; ++i) |
3630 | if (edge_pred(&graph->edge[i], data)) |
3631 | ++n_edge; |
3632 | if (graph_alloc(ctx, graph: sub, n_node: n, n_edge) < 0) |
3633 | return isl_stat_error; |
3634 | sub->root = graph->root; |
3635 | if (copy_nodes(dst: sub, src: graph, node_pred, data) < 0) |
3636 | return isl_stat_error; |
3637 | if (graph_init_table(ctx, graph: sub) < 0) |
3638 | return isl_stat_error; |
3639 | for (t = 0; t <= isl_edge_last; ++t) |
3640 | sub->max_edge[t] = graph->max_edge[t]; |
3641 | if (graph_init_edge_tables(ctx, graph: sub) < 0) |
3642 | return isl_stat_error; |
3643 | if (copy_edges(ctx, dst: sub, src: graph, edge_pred, data) < 0) |
3644 | return isl_stat_error; |
3645 | sub->n_row = graph->n_row; |
3646 | sub->max_row = graph->max_row; |
3647 | sub->n_total_row = graph->n_total_row; |
3648 | sub->band_start = graph->band_start; |
3649 | |
3650 | return isl_stat_ok; |
3651 | } |
3652 | |
3653 | static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node, |
3654 | struct isl_sched_graph *graph); |
3655 | static __isl_give isl_schedule_node *compute_schedule_wcc( |
3656 | isl_schedule_node *node, struct isl_sched_graph *graph); |
3657 | |
3658 | /* Compute a schedule for a subgraph of "graph". In particular, for |
3659 | * the graph composed of nodes that satisfy node_pred and edges that |
3660 | * that satisfy edge_pred. |
3661 | * If the subgraph is known to consist of a single component, then wcc should |
3662 | * be set and then we call compute_schedule_wcc on the constructed subgraph. |
3663 | * Otherwise, we call compute_schedule, which will check whether the subgraph |
3664 | * is connected. |
3665 | * |
3666 | * The schedule is inserted at "node" and the updated schedule node |
3667 | * is returned. |
3668 | */ |
3669 | static __isl_give isl_schedule_node *compute_sub_schedule( |
3670 | __isl_take isl_schedule_node *node, isl_ctx *ctx, |
3671 | struct isl_sched_graph *graph, |
3672 | int (*node_pred)(struct isl_sched_node *node, int data), |
3673 | int (*edge_pred)(struct isl_sched_edge *edge, int data), |
3674 | int data, int wcc) |
3675 | { |
3676 | struct isl_sched_graph split = { 0 }; |
3677 | |
3678 | if (isl_sched_graph_extract_sub_graph(ctx, graph, node_pred, edge_pred, |
3679 | data, sub: &split) < 0) |
3680 | goto error; |
3681 | |
3682 | if (wcc) |
3683 | node = compute_schedule_wcc(node, graph: &split); |
3684 | else |
3685 | node = compute_schedule(node, graph: &split); |
3686 | |
3687 | isl_sched_graph_free(ctx, graph: &split); |
3688 | return node; |
3689 | error: |
3690 | isl_sched_graph_free(ctx, graph: &split); |
3691 | return isl_schedule_node_free(node); |
3692 | } |
3693 | |
3694 | int isl_sched_edge_scc_exactly(struct isl_sched_edge *edge, int scc) |
3695 | { |
3696 | return edge->src->scc == scc && edge->dst->scc == scc; |
3697 | } |
3698 | |
3699 | static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc) |
3700 | { |
3701 | return edge->dst->scc <= scc; |
3702 | } |
3703 | |
3704 | static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc) |
3705 | { |
3706 | return edge->src->scc >= scc; |
3707 | } |
3708 | |
3709 | /* Reset the current band by dropping all its schedule rows. |
3710 | */ |
3711 | static isl_stat reset_band(struct isl_sched_graph *graph) |
3712 | { |
3713 | int i; |
3714 | int drop; |
3715 | |
3716 | drop = graph->n_total_row - graph->band_start; |
3717 | graph->n_total_row -= drop; |
3718 | graph->n_row -= drop; |
3719 | |
3720 | for (i = 0; i < graph->n; ++i) { |
3721 | struct isl_sched_node *node = &graph->node[i]; |
3722 | |
3723 | isl_map_free(map: node->sched_map); |
3724 | node->sched_map = NULL; |
3725 | |
3726 | node->sched = isl_mat_drop_rows(mat: node->sched, |
3727 | row: graph->band_start, n: drop); |
3728 | |
3729 | if (!node->sched) |
3730 | return isl_stat_error; |
3731 | } |
3732 | |
3733 | return isl_stat_ok; |
3734 | } |
3735 | |
3736 | /* Split the current graph into two parts and compute a schedule for each |
3737 | * part individually. In particular, one part consists of all SCCs up |
3738 | * to and including graph->src_scc, while the other part contains the other |
3739 | * SCCs. The split is enforced by a sequence node inserted at position "node" |
3740 | * in the schedule tree. Return the updated schedule node. |
3741 | * If either of these two parts consists of a sequence, then it is spliced |
3742 | * into the sequence containing the two parts. |
3743 | * |
3744 | * The current band is reset. It would be possible to reuse |
3745 | * the previously computed rows as the first rows in the next |
3746 | * band, but recomputing them may result in better rows as we are looking |
3747 | * at a smaller part of the dependence graph. |
3748 | */ |
3749 | static __isl_give isl_schedule_node *compute_split_schedule( |
3750 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
3751 | { |
3752 | isl_ctx *ctx; |
3753 | isl_union_set_list *filters; |
3754 | |
3755 | if (!node) |
3756 | return NULL; |
3757 | |
3758 | if (reset_band(graph) < 0) |
3759 | return isl_schedule_node_free(node); |
3760 | |
3761 | next_band(graph); |
3762 | |
3763 | ctx = isl_schedule_node_get_ctx(node); |
3764 | filters = extract_split(ctx, graph); |
3765 | node = isl_schedule_node_insert_sequence(node, filters); |
3766 | node = isl_schedule_node_grandchild(node, pos1: 1, pos2: 0); |
3767 | |
3768 | node = compute_sub_schedule(node, ctx, graph, |
3769 | node_pred: &node_scc_at_least, edge_pred: &edge_src_scc_at_least, |
3770 | data: graph->src_scc + 1, wcc: 0); |
3771 | node = isl_schedule_node_grandparent(node); |
3772 | node = isl_schedule_node_grandchild(node, pos1: 0, pos2: 0); |
3773 | node = compute_sub_schedule(node, ctx, graph, |
3774 | node_pred: &node_scc_at_most, edge_pred: &edge_dst_scc_at_most, |
3775 | data: graph->src_scc, wcc: 0); |
3776 | node = isl_schedule_node_grandparent(node); |
3777 | |
3778 | node = isl_schedule_node_sequence_splice_children(node); |
3779 | |
3780 | return node; |
3781 | } |
3782 | |
3783 | /* Insert a band node at position "node" in the schedule tree corresponding |
3784 | * to the current band in "graph". Mark the band node permutable |
3785 | * if "permutable" is set. |
3786 | * The partial schedules and the coincidence property are extracted |
3787 | * from the graph nodes. |
3788 | * Return the updated schedule node. |
3789 | */ |
3790 | static __isl_give isl_schedule_node *insert_current_band( |
3791 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
3792 | int permutable) |
3793 | { |
3794 | int i; |
3795 | int start, end, n; |
3796 | isl_multi_aff *ma; |
3797 | isl_multi_pw_aff *mpa; |
3798 | isl_multi_union_pw_aff *mupa; |
3799 | |
3800 | if (!node) |
3801 | return NULL; |
3802 | |
3803 | if (graph->n < 1) |
3804 | isl_die(isl_schedule_node_get_ctx(node), isl_error_internal, |
3805 | "graph should have at least one node" , |
3806 | return isl_schedule_node_free(node)); |
3807 | |
3808 | start = graph->band_start; |
3809 | end = graph->n_total_row; |
3810 | n = end - start; |
3811 | |
3812 | ma = isl_sched_node_extract_partial_schedule_multi_aff(node: &graph->node[0], |
3813 | first: start, n); |
3814 | mpa = isl_multi_pw_aff_from_multi_aff(ma); |
3815 | mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa); |
3816 | |
3817 | for (i = 1; i < graph->n; ++i) { |
3818 | isl_multi_union_pw_aff *mupa_i; |
3819 | |
3820 | ma = isl_sched_node_extract_partial_schedule_multi_aff( |
3821 | node: &graph->node[i], first: start, n); |
3822 | mpa = isl_multi_pw_aff_from_multi_aff(ma); |
3823 | mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa); |
3824 | mupa = isl_multi_union_pw_aff_union_add(mupa1: mupa, mupa2: mupa_i); |
3825 | } |
3826 | node = isl_schedule_node_insert_partial_schedule(node, schedule: mupa); |
3827 | |
3828 | for (i = 0; i < n; ++i) |
3829 | node = isl_schedule_node_band_member_set_coincident(node, pos: i, |
3830 | coincident: graph->node[0].coincident[start + i]); |
3831 | node = isl_schedule_node_band_set_permutable(node, permutable); |
3832 | |
3833 | return node; |
3834 | } |
3835 | |
3836 | /* Update the dependence relations based on the current schedule, |
3837 | * add the current band to "node" and then continue with the computation |
3838 | * of the next band. |
3839 | * Return the updated schedule node. |
3840 | */ |
3841 | static __isl_give isl_schedule_node *compute_next_band( |
3842 | __isl_take isl_schedule_node *node, |
3843 | struct isl_sched_graph *graph, int permutable) |
3844 | { |
3845 | isl_ctx *ctx; |
3846 | |
3847 | if (!node) |
3848 | return NULL; |
3849 | |
3850 | ctx = isl_schedule_node_get_ctx(node); |
3851 | if (update_edges(ctx, graph) < 0) |
3852 | return isl_schedule_node_free(node); |
3853 | node = insert_current_band(node, graph, permutable); |
3854 | next_band(graph); |
3855 | |
3856 | node = isl_schedule_node_child(node, pos: 0); |
3857 | node = compute_schedule(node, graph); |
3858 | node = isl_schedule_node_parent(node); |
3859 | |
3860 | return node; |
3861 | } |
3862 | |
3863 | /* Add the constraints "coef" derived from an edge from "node" to itself |
3864 | * to graph->lp in order to respect the dependences and to try and carry them. |
3865 | * "pos" is the sequence number of the edge that needs to be carried. |
3866 | * "coef" represents general constraints on coefficients (c_0, c_x) |
3867 | * of valid constraints for (y - x) with x and y instances of the node. |
3868 | * |
3869 | * The constraints added to graph->lp need to enforce |
3870 | * |
3871 | * (c_j_0 + c_j_x y) - (c_j_0 + c_j_x x) |
3872 | * = c_j_x (y - x) >= e_i |
3873 | * |
3874 | * for each (x,y) in the dependence relation of the edge. |
3875 | * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x), |
3876 | * taking into account that each coefficient in c_j_x is represented |
3877 | * as a pair of non-negative coefficients. |
3878 | */ |
3879 | static isl_stat add_intra_constraints(struct isl_sched_graph *graph, |
3880 | struct isl_sched_node *node, __isl_take isl_basic_set *coef, int pos) |
3881 | { |
3882 | isl_size offset; |
3883 | isl_ctx *ctx; |
3884 | isl_dim_map *dim_map; |
3885 | |
3886 | offset = coef_var_offset(coef); |
3887 | if (offset < 0) |
3888 | coef = isl_basic_set_free(bset: coef); |
3889 | if (!coef) |
3890 | return isl_stat_error; |
3891 | |
3892 | ctx = isl_basic_set_get_ctx(bset: coef); |
3893 | dim_map = intra_dim_map(ctx, graph, node, offset, s: 1); |
3894 | isl_dim_map_range(dim_map, dst_pos: 3 + pos, dst_stride: 0, src_pos: 0, src_stride: 0, n: 1, sign: -1); |
3895 | graph->lp = add_constraints_dim_map(dst: graph->lp, src: coef, dim_map); |
3896 | |
3897 | return isl_stat_ok; |
3898 | } |
3899 | |
3900 | /* Add the constraints "coef" derived from an edge from "src" to "dst" |
3901 | * to graph->lp in order to respect the dependences and to try and carry them. |
3902 | * "pos" is the sequence number of the edge that needs to be carried or |
3903 | * -1 if no attempt should be made to carry the dependences. |
3904 | * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y) |
3905 | * of valid constraints for (x, y) with x and y instances of "src" and "dst". |
3906 | * |
3907 | * The constraints added to graph->lp need to enforce |
3908 | * |
3909 | * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i |
3910 | * |
3911 | * for each (x,y) in the dependence relation of the edge or |
3912 | * |
3913 | * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0 |
3914 | * |
3915 | * if pos is -1. |
3916 | * That is, |
3917 | * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x) |
3918 | * or |
3919 | * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x) |
3920 | * needs to be plugged in for (c_0, c_n, c_x, c_y), |
3921 | * taking into account that each coefficient in c_j_x and c_k_x is represented |
3922 | * as a pair of non-negative coefficients. |
3923 | */ |
3924 | static isl_stat add_inter_constraints(struct isl_sched_graph *graph, |
3925 | struct isl_sched_node *src, struct isl_sched_node *dst, |
3926 | __isl_take isl_basic_set *coef, int pos) |
3927 | { |
3928 | isl_size offset; |
3929 | isl_ctx *ctx; |
3930 | isl_dim_map *dim_map; |
3931 | |
3932 | offset = coef_var_offset(coef); |
3933 | if (offset < 0) |
3934 | coef = isl_basic_set_free(bset: coef); |
3935 | if (!coef) |
3936 | return isl_stat_error; |
3937 | |
3938 | ctx = isl_basic_set_get_ctx(bset: coef); |
3939 | dim_map = inter_dim_map(ctx, graph, src, dst, offset, s: 1); |
3940 | if (pos >= 0) |
3941 | isl_dim_map_range(dim_map, dst_pos: 3 + pos, dst_stride: 0, src_pos: 0, src_stride: 0, n: 1, sign: -1); |
3942 | graph->lp = add_constraints_dim_map(dst: graph->lp, src: coef, dim_map); |
3943 | |
3944 | return isl_stat_ok; |
3945 | } |
3946 | |
3947 | /* Data structure for keeping track of the data needed |
3948 | * to exploit non-trivial lineality spaces. |
3949 | * |
3950 | * "any_non_trivial" is true if there are any non-trivial lineality spaces. |
3951 | * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL. |
3952 | * "equivalent" connects instances to other instances on the same line(s). |
3953 | * "mask" contains the domain spaces of "equivalent". |
3954 | * Any instance set not in "mask" does not have a non-trivial lineality space. |
3955 | */ |
3956 | struct isl_exploit_lineality_data { |
3957 | isl_bool any_non_trivial; |
3958 | isl_union_map *equivalent; |
3959 | isl_union_set *mask; |
3960 | }; |
3961 | |
3962 | /* Data structure collecting information used during the construction |
3963 | * of an LP for carrying dependences. |
3964 | * |
3965 | * "intra" is a sequence of coefficient constraints for intra-node edges. |
3966 | * "inter" is a sequence of coefficient constraints for inter-node edges. |
3967 | * "lineality" contains data used to exploit non-trivial lineality spaces. |
3968 | */ |
3969 | struct isl_carry { |
3970 | isl_basic_set_list *intra; |
3971 | isl_basic_set_list *inter; |
3972 | struct isl_exploit_lineality_data lineality; |
3973 | }; |
3974 | |
3975 | /* Free all the data stored in "carry". |
3976 | */ |
3977 | static void isl_carry_clear(struct isl_carry *carry) |
3978 | { |
3979 | isl_basic_set_list_free(list: carry->intra); |
3980 | isl_basic_set_list_free(list: carry->inter); |
3981 | isl_union_map_free(umap: carry->lineality.equivalent); |
3982 | isl_union_set_free(uset: carry->lineality.mask); |
3983 | } |
3984 | |
3985 | /* Return a pointer to the node in "graph" that lives in "space". |
3986 | * If the requested node has been compressed, then "space" |
3987 | * corresponds to the compressed space. |
3988 | * The graph is assumed to have such a node. |
3989 | * Return NULL in case of error. |
3990 | * |
3991 | * First try and see if "space" is the space of an uncompressed node. |
3992 | * If so, return that node. |
3993 | * Otherwise, "space" was constructed by construct_compressed_id and |
3994 | * contains a user pointer pointing to the node in the tuple id. |
3995 | * However, this node belongs to the original dependence graph. |
3996 | * If "graph" is a subgraph of this original dependence graph, |
3997 | * then the node with the same space still needs to be looked up |
3998 | * in the current graph. |
3999 | */ |
4000 | static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx, |
4001 | struct isl_sched_graph *graph, __isl_keep isl_space *space) |
4002 | { |
4003 | isl_id *id; |
4004 | struct isl_sched_node *node; |
4005 | |
4006 | if (!space) |
4007 | return NULL; |
4008 | |
4009 | node = isl_sched_graph_find_node(ctx, graph, space); |
4010 | if (!node) |
4011 | return NULL; |
4012 | if (isl_sched_graph_is_node(graph, node)) |
4013 | return node; |
4014 | |
4015 | id = isl_space_get_tuple_id(space, type: isl_dim_set); |
4016 | node = isl_id_get_user(id); |
4017 | isl_id_free(id); |
4018 | |
4019 | if (!node) |
4020 | return NULL; |
4021 | |
4022 | if (!isl_sched_graph_is_node(graph: graph->root, node)) |
4023 | isl_die(ctx, isl_error_internal, |
4024 | "space points to invalid node" , return NULL); |
4025 | if (graph != graph->root) |
4026 | node = isl_sched_graph_find_node(ctx, graph, space: node->space); |
4027 | if (!isl_sched_graph_is_node(graph, node)) |
4028 | isl_die(ctx, isl_error_internal, |
4029 | "unable to find node" , return NULL); |
4030 | |
4031 | return node; |
4032 | } |
4033 | |
4034 | /* Internal data structure for add_all_constraints. |
4035 | * |
4036 | * "graph" is the schedule constraint graph for which an LP problem |
4037 | * is being constructed. |
4038 | * "carry_inter" indicates whether inter-node edges should be carried. |
4039 | * "pos" is the position of the next edge that needs to be carried. |
4040 | */ |
4041 | struct isl_add_all_constraints_data { |
4042 | isl_ctx *ctx; |
4043 | struct isl_sched_graph *graph; |
4044 | int carry_inter; |
4045 | int pos; |
4046 | }; |
4047 | |
4048 | /* Add the constraints "coef" derived from an edge from a node to itself |
4049 | * to data->graph->lp in order to respect the dependences and |
4050 | * to try and carry them. |
4051 | * |
4052 | * The space of "coef" is of the form |
4053 | * |
4054 | * coefficients[[c_cst] -> S[c_x]] |
4055 | * |
4056 | * with S[c_x] the (compressed) space of the node. |
4057 | * Extract the node from the space and call add_intra_constraints. |
4058 | */ |
4059 | static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user) |
4060 | { |
4061 | struct isl_add_all_constraints_data *data = user; |
4062 | isl_space *space; |
4063 | struct isl_sched_node *node; |
4064 | |
4065 | space = isl_basic_set_get_space(bset: coef); |
4066 | space = isl_space_range(space: isl_space_unwrap(space)); |
4067 | node = graph_find_compressed_node(ctx: data->ctx, graph: data->graph, space); |
4068 | isl_space_free(space); |
4069 | return add_intra_constraints(graph: data->graph, node, coef, pos: data->pos++); |
4070 | } |
4071 | |
4072 | /* Add the constraints "coef" derived from an edge from a node j |
4073 | * to a node k to data->graph->lp in order to respect the dependences and |
4074 | * to try and carry them (provided data->carry_inter is set). |
4075 | * |
4076 | * The space of "coef" is of the form |
4077 | * |
4078 | * coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]] |
4079 | * |
4080 | * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes. |
4081 | * Extract the nodes from the space and call add_inter_constraints. |
4082 | */ |
4083 | static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user) |
4084 | { |
4085 | struct isl_add_all_constraints_data *data = user; |
4086 | isl_space *space, *dom; |
4087 | struct isl_sched_node *src, *dst; |
4088 | int pos; |
4089 | |
4090 | space = isl_basic_set_get_space(bset: coef); |
4091 | space = isl_space_unwrap(space: isl_space_range(space: isl_space_unwrap(space))); |
4092 | dom = isl_space_domain(space: isl_space_copy(space)); |
4093 | src = graph_find_compressed_node(ctx: data->ctx, graph: data->graph, space: dom); |
4094 | isl_space_free(space: dom); |
4095 | space = isl_space_range(space); |
4096 | dst = graph_find_compressed_node(ctx: data->ctx, graph: data->graph, space); |
4097 | isl_space_free(space); |
4098 | |
4099 | pos = data->carry_inter ? data->pos++ : -1; |
4100 | return add_inter_constraints(graph: data->graph, src, dst, coef, pos); |
4101 | } |
4102 | |
4103 | /* Add constraints to graph->lp that force all (conditional) validity |
4104 | * dependences to be respected and attempt to carry them. |
4105 | * "intra" is the sequence of coefficient constraints for intra-node edges. |
4106 | * "inter" is the sequence of coefficient constraints for inter-node edges. |
4107 | * "carry_inter" indicates whether inter-node edges should be carried or |
4108 | * only respected. |
4109 | */ |
4110 | static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph, |
4111 | __isl_keep isl_basic_set_list *intra, |
4112 | __isl_keep isl_basic_set_list *inter, int carry_inter) |
4113 | { |
4114 | struct isl_add_all_constraints_data data = { ctx, graph, carry_inter }; |
4115 | |
4116 | data.pos = 0; |
4117 | if (isl_basic_set_list_foreach(list: intra, fn: &lp_add_intra, user: &data) < 0) |
4118 | return isl_stat_error; |
4119 | if (isl_basic_set_list_foreach(list: inter, fn: &lp_add_inter, user: &data) < 0) |
4120 | return isl_stat_error; |
4121 | return isl_stat_ok; |
4122 | } |
4123 | |
4124 | /* Internal data structure for count_all_constraints |
4125 | * for keeping track of the number of equality and inequality constraints. |
4126 | */ |
4127 | struct isl_sched_count { |
4128 | int n_eq; |
4129 | int n_ineq; |
4130 | }; |
4131 | |
4132 | /* Add the number of equality and inequality constraints of "bset" |
4133 | * to data->n_eq and data->n_ineq. |
4134 | */ |
4135 | static isl_stat bset_update_count(__isl_take isl_basic_set *bset, void *user) |
4136 | { |
4137 | struct isl_sched_count *data = user; |
4138 | |
4139 | return update_count(bset, f: 1, n_eq: &data->n_eq, n_ineq: &data->n_ineq); |
4140 | } |
4141 | |
4142 | /* Count the number of equality and inequality constraints |
4143 | * that will be added to the carry_lp problem. |
4144 | * We count each edge exactly once. |
4145 | * "intra" is the sequence of coefficient constraints for intra-node edges. |
4146 | * "inter" is the sequence of coefficient constraints for inter-node edges. |
4147 | */ |
4148 | static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra, |
4149 | __isl_keep isl_basic_set_list *inter, int *n_eq, int *n_ineq) |
4150 | { |
4151 | struct isl_sched_count data; |
4152 | |
4153 | data.n_eq = data.n_ineq = 0; |
4154 | if (isl_basic_set_list_foreach(list: inter, fn: &bset_update_count, user: &data) < 0) |
4155 | return isl_stat_error; |
4156 | if (isl_basic_set_list_foreach(list: intra, fn: &bset_update_count, user: &data) < 0) |
4157 | return isl_stat_error; |
4158 | |
4159 | *n_eq = data.n_eq; |
4160 | *n_ineq = data.n_ineq; |
4161 | |
4162 | return isl_stat_ok; |
4163 | } |
4164 | |
4165 | /* Construct an LP problem for finding schedule coefficients |
4166 | * such that the schedule carries as many validity dependences as possible. |
4167 | * In particular, for each dependence i, we bound the dependence distance |
4168 | * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum |
4169 | * of all e_i's. Dependences with e_i = 0 in the solution are simply |
4170 | * respected, while those with e_i > 0 (in practice e_i = 1) are carried. |
4171 | * "intra" is the sequence of coefficient constraints for intra-node edges. |
4172 | * "inter" is the sequence of coefficient constraints for inter-node edges. |
4173 | * "n_edge" is the total number of edges. |
4174 | * "carry_inter" indicates whether inter-node edges should be carried or |
4175 | * only respected. That is, if "carry_inter" is not set, then |
4176 | * no e_i variables are introduced for the inter-node edges. |
4177 | * |
4178 | * All variables of the LP are non-negative. The actual coefficients |
4179 | * may be negative, so each coefficient is represented as the difference |
4180 | * of two non-negative variables. The negative part always appears |
4181 | * immediately before the positive part. |
4182 | * Other than that, the variables have the following order |
4183 | * |
4184 | * - sum of (1 - e_i) over all edges |
4185 | * - sum of all c_n coefficients |
4186 | * (unconstrained when computing non-parametric schedules) |
4187 | * - sum of positive and negative parts of all c_x coefficients |
4188 | * - for each edge |
4189 | * - e_i |
4190 | * - for each node |
4191 | * - positive and negative parts of c_i_x, in opposite order |
4192 | * - c_i_n (if parametric) |
4193 | * - c_i_0 |
4194 | * |
4195 | * The constraints are those from the (validity) edges plus three equalities |
4196 | * to express the sums and n_edge inequalities to express e_i <= 1. |
4197 | */ |
4198 | static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph, |
4199 | int n_edge, __isl_keep isl_basic_set_list *intra, |
4200 | __isl_keep isl_basic_set_list *inter, int carry_inter) |
4201 | { |
4202 | int i; |
4203 | int k; |
4204 | isl_space *space; |
4205 | unsigned total; |
4206 | int n_eq, n_ineq; |
4207 | |
4208 | total = 3 + n_edge; |
4209 | for (i = 0; i < graph->n; ++i) { |
4210 | struct isl_sched_node *node = &graph->node[graph->sorted[i]]; |
4211 | node->start = total; |
4212 | total += 1 + node->nparam + 2 * node->nvar; |
4213 | } |
4214 | |
4215 | if (count_all_constraints(intra, inter, n_eq: &n_eq, n_ineq: &n_ineq) < 0) |
4216 | return isl_stat_error; |
4217 | |
4218 | space = isl_space_set_alloc(ctx, nparam: 0, dim: total); |
4219 | isl_basic_set_free(bset: graph->lp); |
4220 | n_eq += 3; |
4221 | n_ineq += n_edge; |
4222 | graph->lp = isl_basic_set_alloc_space(space, extra: 0, n_eq, n_ineq); |
4223 | graph->lp = isl_basic_set_set_rational(bset: graph->lp); |
4224 | |
4225 | k = isl_basic_set_alloc_equality(bset: graph->lp); |
4226 | if (k < 0) |
4227 | return isl_stat_error; |
4228 | isl_seq_clr(p: graph->lp->eq[k], len: 1 + total); |
4229 | isl_int_set_si(graph->lp->eq[k][0], -n_edge); |
4230 | isl_int_set_si(graph->lp->eq[k][1], 1); |
4231 | for (i = 0; i < n_edge; ++i) |
4232 | isl_int_set_si(graph->lp->eq[k][4 + i], 1); |
4233 | |
4234 | if (add_param_sum_constraint(graph, sum_pos: 1) < 0) |
4235 | return isl_stat_error; |
4236 | if (add_var_sum_constraint(graph, sum_pos: 2) < 0) |
4237 | return isl_stat_error; |
4238 | |
4239 | for (i = 0; i < n_edge; ++i) { |
4240 | k = isl_basic_set_alloc_inequality(bset: graph->lp); |
4241 | if (k < 0) |
4242 | return isl_stat_error; |
4243 | isl_seq_clr(p: graph->lp->ineq[k], len: 1 + total); |
4244 | isl_int_set_si(graph->lp->ineq[k][4 + i], -1); |
4245 | isl_int_set_si(graph->lp->ineq[k][0], 1); |
4246 | } |
4247 | |
4248 | if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0) |
4249 | return isl_stat_error; |
4250 | |
4251 | return isl_stat_ok; |
4252 | } |
4253 | |
4254 | static __isl_give isl_schedule_node *compute_component_schedule( |
4255 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
4256 | int wcc); |
4257 | |
4258 | /* If the schedule_split_scaled option is set and if the linear |
4259 | * parts of the scheduling rows for all nodes in the graphs have |
4260 | * a non-trivial common divisor, then remove this |
4261 | * common divisor from the linear part. |
4262 | * Otherwise, insert a band node directly and continue with |
4263 | * the construction of the schedule. |
4264 | * |
4265 | * If a non-trivial common divisor is found, then |
4266 | * the linear part is reduced and the remainder is ignored. |
4267 | * The pieces of the graph that are assigned different remainders |
4268 | * form (groups of) strongly connected components within |
4269 | * the scaled down band. If needed, they can therefore |
4270 | * be ordered along this remainder in a sequence node. |
4271 | * However, this ordering is not enforced here in order to allow |
4272 | * the scheduler to combine some of the strongly connected components. |
4273 | */ |
4274 | static __isl_give isl_schedule_node *split_scaled( |
4275 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
4276 | { |
4277 | int i; |
4278 | int row; |
4279 | isl_ctx *ctx; |
4280 | isl_int gcd, gcd_i; |
4281 | isl_size n_row; |
4282 | |
4283 | if (!node) |
4284 | return NULL; |
4285 | |
4286 | ctx = isl_schedule_node_get_ctx(node); |
4287 | if (!ctx->opt->schedule_split_scaled) |
4288 | return compute_next_band(node, graph, permutable: 0); |
4289 | if (graph->n <= 1) |
4290 | return compute_next_band(node, graph, permutable: 0); |
4291 | n_row = isl_mat_rows(mat: graph->node[0].sched); |
4292 | if (n_row < 0) |
4293 | return isl_schedule_node_free(node); |
4294 | |
4295 | isl_int_init(gcd); |
4296 | isl_int_init(gcd_i); |
4297 | |
4298 | isl_int_set_si(gcd, 0); |
4299 | |
4300 | row = n_row - 1; |
4301 | |
4302 | for (i = 0; i < graph->n; ++i) { |
4303 | struct isl_sched_node *node = &graph->node[i]; |
4304 | isl_size cols = isl_mat_cols(mat: node->sched); |
4305 | |
4306 | if (cols < 0) |
4307 | break; |
4308 | isl_seq_gcd(p: node->sched->row[row] + 1, len: cols - 1, gcd: &gcd_i); |
4309 | isl_int_gcd(gcd, gcd, gcd_i); |
4310 | } |
4311 | |
4312 | isl_int_clear(gcd_i); |
4313 | if (i < graph->n) |
4314 | goto error; |
4315 | |
4316 | if (isl_int_cmp_si(gcd, 1) <= 0) { |
4317 | isl_int_clear(gcd); |
4318 | return compute_next_band(node, graph, permutable: 0); |
4319 | } |
4320 | |
4321 | for (i = 0; i < graph->n; ++i) { |
4322 | struct isl_sched_node *node = &graph->node[i]; |
4323 | |
4324 | isl_int_fdiv_q(node->sched->row[row][0], |
4325 | node->sched->row[row][0], gcd); |
4326 | isl_int_mul(node->sched->row[row][0], |
4327 | node->sched->row[row][0], gcd); |
4328 | node->sched = isl_mat_scale_down_row(mat: node->sched, row, m: gcd); |
4329 | if (!node->sched) |
4330 | goto error; |
4331 | } |
4332 | |
4333 | isl_int_clear(gcd); |
4334 | |
4335 | return compute_next_band(node, graph, permutable: 0); |
4336 | error: |
4337 | isl_int_clear(gcd); |
4338 | return isl_schedule_node_free(node); |
4339 | } |
4340 | |
4341 | /* Is the schedule row "sol" trivial on node "node"? |
4342 | * That is, is the solution zero on the dimensions linearly independent of |
4343 | * the previously found solutions? |
4344 | * Return 1 if the solution is trivial, 0 if it is not and -1 on error. |
4345 | * |
4346 | * Each coefficient is represented as the difference between |
4347 | * two non-negative values in "sol". |
4348 | * We construct the schedule row s and check if it is linearly |
4349 | * independent of previously computed schedule rows |
4350 | * by computing T s, with T the linear combinations that are zero |
4351 | * on linearly dependent schedule rows. |
4352 | * If the result consists of all zeros, then the solution is trivial. |
4353 | */ |
4354 | static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol) |
4355 | { |
4356 | int trivial; |
4357 | isl_vec *node_sol; |
4358 | |
4359 | if (!sol) |
4360 | return -1; |
4361 | if (node->nvar == node->rank) |
4362 | return 0; |
4363 | |
4364 | node_sol = extract_var_coef(node, sol); |
4365 | node_sol = isl_mat_vec_product(mat: isl_mat_copy(mat: node->indep), vec: node_sol); |
4366 | if (!node_sol) |
4367 | return -1; |
4368 | |
4369 | trivial = isl_seq_first_non_zero(p: node_sol->el, |
4370 | len: node->nvar - node->rank) == -1; |
4371 | |
4372 | isl_vec_free(vec: node_sol); |
4373 | |
4374 | return trivial; |
4375 | } |
4376 | |
4377 | /* Is the schedule row "sol" trivial on any node where it should |
4378 | * not be trivial? |
4379 | * Return 1 if any solution is trivial, 0 if they are not and -1 on error. |
4380 | */ |
4381 | static int is_any_trivial(struct isl_sched_graph *graph, |
4382 | __isl_keep isl_vec *sol) |
4383 | { |
4384 | int i; |
4385 | |
4386 | for (i = 0; i < graph->n; ++i) { |
4387 | struct isl_sched_node *node = &graph->node[i]; |
4388 | int trivial; |
4389 | |
4390 | if (!needs_row(graph, node)) |
4391 | continue; |
4392 | trivial = is_trivial(node, sol); |
4393 | if (trivial < 0 || trivial) |
4394 | return trivial; |
4395 | } |
4396 | |
4397 | return 0; |
4398 | } |
4399 | |
4400 | /* Does the schedule represented by "sol" perform loop coalescing on "node"? |
4401 | * If so, return the position of the coalesced dimension. |
4402 | * Otherwise, return node->nvar or -1 on error. |
4403 | * |
4404 | * In particular, look for pairs of coefficients c_i and c_j such that |
4405 | * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|. |
4406 | * If any such pair is found, then return i. |
4407 | * If size_i is infinity, then no check on c_i needs to be performed. |
4408 | */ |
4409 | static int find_node_coalescing(struct isl_sched_node *node, |
4410 | __isl_keep isl_vec *sol) |
4411 | { |
4412 | int i, j; |
4413 | isl_int max; |
4414 | isl_vec *csol; |
4415 | |
4416 | if (node->nvar <= 1) |
4417 | return node->nvar; |
4418 | |
4419 | csol = extract_var_coef(node, sol); |
4420 | if (!csol) |
4421 | return -1; |
4422 | isl_int_init(max); |
4423 | for (i = 0; i < node->nvar; ++i) { |
4424 | isl_val *v; |
4425 | |
4426 | if (isl_int_is_zero(csol->el[i])) |
4427 | continue; |
4428 | v = isl_multi_val_get_val(multi: node->sizes, pos: i); |
4429 | if (!v) |
4430 | goto error; |
4431 | if (!isl_val_is_int(v)) { |
4432 | isl_val_free(v); |
4433 | continue; |
4434 | } |
4435 | v = isl_val_div_ui(v1: v, v2: 2); |
4436 | v = isl_val_ceil(v); |
4437 | if (!v) |
4438 | goto error; |
4439 | isl_int_mul(max, v->n, csol->el[i]); |
4440 | isl_val_free(v); |
4441 | |
4442 | for (j = 0; j < node->nvar; ++j) { |
4443 | if (j == i) |
4444 | continue; |
4445 | if (isl_int_abs_gt(csol->el[j], max)) |
4446 | break; |
4447 | } |
4448 | if (j < node->nvar) |
4449 | break; |
4450 | } |
4451 | |
4452 | isl_int_clear(max); |
4453 | isl_vec_free(vec: csol); |
4454 | return i; |
4455 | error: |
4456 | isl_int_clear(max); |
4457 | isl_vec_free(vec: csol); |
4458 | return -1; |
4459 | } |
4460 | |
4461 | /* Force the schedule coefficient at position "pos" of "node" to be zero |
4462 | * in "tl". |
4463 | * The coefficient is encoded as the difference between two non-negative |
4464 | * variables. Force these two variables to have the same value. |
4465 | */ |
4466 | static __isl_give isl_tab_lexmin *zero_out_node_coef( |
4467 | __isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos) |
4468 | { |
4469 | int dim; |
4470 | isl_ctx *ctx; |
4471 | isl_vec *eq; |
4472 | |
4473 | ctx = isl_space_get_ctx(space: node->space); |
4474 | dim = isl_tab_lexmin_dim(tl); |
4475 | if (dim < 0) |
4476 | return isl_tab_lexmin_free(tl); |
4477 | eq = isl_vec_alloc(ctx, size: 1 + dim); |
4478 | eq = isl_vec_clr(vec: eq); |
4479 | if (!eq) |
4480 | return isl_tab_lexmin_free(tl); |
4481 | |
4482 | pos = 1 + node_var_coef_pos(node, i: pos); |
4483 | isl_int_set_si(eq->el[pos], 1); |
4484 | isl_int_set_si(eq->el[pos + 1], -1); |
4485 | tl = isl_tab_lexmin_add_eq(tl, eq: eq->el); |
4486 | isl_vec_free(vec: eq); |
4487 | |
4488 | return tl; |
4489 | } |
4490 | |
4491 | /* Return the lexicographically smallest rational point in the basic set |
4492 | * from which "tl" was constructed, double checking that this input set |
4493 | * was not empty. |
4494 | */ |
4495 | static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl) |
4496 | { |
4497 | isl_vec *sol; |
4498 | |
4499 | sol = isl_tab_lexmin_get_solution(tl); |
4500 | if (!sol) |
4501 | return NULL; |
4502 | if (sol->size == 0) |
4503 | isl_die(isl_vec_get_ctx(sol), isl_error_internal, |
4504 | "error in schedule construction" , |
4505 | return isl_vec_free(sol)); |
4506 | return sol; |
4507 | } |
4508 | |
4509 | /* Does the solution "sol" of the LP problem constructed by setup_carry_lp |
4510 | * carry any of the "n_edge" groups of dependences? |
4511 | * The value in the first position is the sum of (1 - e_i) over all "n_edge" |
4512 | * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented |
4513 | * by the edge are carried by the solution. |
4514 | * If the sum of the (1 - e_i) is smaller than "n_edge" then at least |
4515 | * one of those is carried. |
4516 | * |
4517 | * Note that despite the fact that the problem is solved using a rational |
4518 | * solver, the solution is guaranteed to be integral. |
4519 | * Specifically, the dependence distance lower bounds e_i (and therefore |
4520 | * also their sum) are integers. See Lemma 5 of [1]. |
4521 | * |
4522 | * Any potential denominator of the sum is cleared by this function. |
4523 | * The denominator is not relevant for any of the other elements |
4524 | * in the solution. |
4525 | * |
4526 | * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling |
4527 | * Problem, Part II: Multi-Dimensional Time. |
4528 | * In Intl. Journal of Parallel Programming, 1992. |
4529 | */ |
4530 | static int carries_dependences(__isl_keep isl_vec *sol, int n_edge) |
4531 | { |
4532 | isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]); |
4533 | isl_int_set_si(sol->el[0], 1); |
4534 | return isl_int_cmp_si(sol->el[1], n_edge) < 0; |
4535 | } |
4536 | |
4537 | /* Return the lexicographically smallest rational point in "lp", |
4538 | * assuming that all variables are non-negative and performing some |
4539 | * additional sanity checks. |
4540 | * If "want_integral" is set, then compute the lexicographically smallest |
4541 | * integer point instead. |
4542 | * In particular, "lp" should not be empty by construction. |
4543 | * Double check that this is the case. |
4544 | * If dependences are not carried for any of the "n_edge" edges, |
4545 | * then return an empty vector. |
4546 | * |
4547 | * If the schedule_treat_coalescing option is set and |
4548 | * if the computed schedule performs loop coalescing on a given node, |
4549 | * i.e., if it is of the form |
4550 | * |
4551 | * c_i i + c_j j + ... |
4552 | * |
4553 | * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero |
4554 | * to cut out this solution. Repeat this process until no more loop |
4555 | * coalescing occurs or until no more dependences can be carried. |
4556 | * In the latter case, revert to the previously computed solution. |
4557 | * |
4558 | * If the caller requests an integral solution and if coalescing should |
4559 | * be treated, then perform the coalescing treatment first as |
4560 | * an integral solution computed before coalescing treatment |
4561 | * would carry the same number of edges and would therefore probably |
4562 | * also be coalescing. |
4563 | * |
4564 | * To allow the coalescing treatment to be performed first, |
4565 | * the initial solution is allowed to be rational and it is only |
4566 | * cut out (if needed) in the next iteration, if no coalescing measures |
4567 | * were taken. |
4568 | */ |
4569 | static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph, |
4570 | __isl_take isl_basic_set *lp, int n_edge, int want_integral) |
4571 | { |
4572 | int i, pos, cut; |
4573 | isl_ctx *ctx; |
4574 | isl_tab_lexmin *tl; |
4575 | isl_vec *sol = NULL, *prev; |
4576 | int treat_coalescing; |
4577 | int try_again; |
4578 | |
4579 | if (!lp) |
4580 | return NULL; |
4581 | ctx = isl_basic_set_get_ctx(bset: lp); |
4582 | treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx); |
4583 | tl = isl_tab_lexmin_from_basic_set(bset: lp); |
4584 | |
4585 | cut = 0; |
4586 | do { |
4587 | int integral; |
4588 | |
4589 | try_again = 0; |
4590 | if (cut) |
4591 | tl = isl_tab_lexmin_cut_to_integer(tl); |
4592 | prev = sol; |
4593 | sol = non_empty_solution(tl); |
4594 | if (!sol) |
4595 | goto error; |
4596 | |
4597 | integral = isl_int_is_one(sol->el[0]); |
4598 | if (!carries_dependences(sol, n_edge)) { |
4599 | if (!prev) |
4600 | prev = isl_vec_alloc(ctx, size: 0); |
4601 | isl_vec_free(vec: sol); |
4602 | sol = prev; |
4603 | break; |
4604 | } |
4605 | prev = isl_vec_free(vec: prev); |
4606 | cut = want_integral && !integral; |
4607 | if (cut) |
4608 | try_again = 1; |
4609 | if (!treat_coalescing) |
4610 | continue; |
4611 | for (i = 0; i < graph->n; ++i) { |
4612 | struct isl_sched_node *node = &graph->node[i]; |
4613 | |
4614 | pos = find_node_coalescing(node, sol); |
4615 | if (pos < 0) |
4616 | goto error; |
4617 | if (pos < node->nvar) |
4618 | break; |
4619 | } |
4620 | if (i < graph->n) { |
4621 | try_again = 1; |
4622 | tl = zero_out_node_coef(tl, node: &graph->node[i], pos); |
4623 | cut = 0; |
4624 | } |
4625 | } while (try_again); |
4626 | |
4627 | isl_tab_lexmin_free(tl); |
4628 | |
4629 | return sol; |
4630 | error: |
4631 | isl_tab_lexmin_free(tl); |
4632 | isl_vec_free(vec: prev); |
4633 | isl_vec_free(vec: sol); |
4634 | return NULL; |
4635 | } |
4636 | |
4637 | /* If "edge" is an edge from a node to itself, then add the corresponding |
4638 | * dependence relation to "umap". |
4639 | * If "node" has been compressed, then the dependence relation |
4640 | * is also compressed first. |
4641 | */ |
4642 | static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap, |
4643 | struct isl_sched_edge *edge) |
4644 | { |
4645 | isl_map *map; |
4646 | struct isl_sched_node *node = edge->src; |
4647 | |
4648 | if (edge->src != edge->dst) |
4649 | return umap; |
4650 | |
4651 | map = isl_map_copy(map: edge->map); |
4652 | map = compress(map, src: node, dst: node); |
4653 | umap = isl_union_map_add_map(umap, map); |
4654 | return umap; |
4655 | } |
4656 | |
4657 | /* If "edge" is an edge from a node to another node, then add the corresponding |
4658 | * dependence relation to "umap". |
4659 | * If the source or destination nodes of "edge" have been compressed, |
4660 | * then the dependence relation is also compressed first. |
4661 | */ |
4662 | static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap, |
4663 | struct isl_sched_edge *edge) |
4664 | { |
4665 | isl_map *map; |
4666 | |
4667 | if (edge->src == edge->dst) |
4668 | return umap; |
4669 | |
4670 | map = isl_map_copy(map: edge->map); |
4671 | map = compress(map, src: edge->src, dst: edge->dst); |
4672 | umap = isl_union_map_add_map(umap, map); |
4673 | return umap; |
4674 | } |
4675 | |
4676 | /* Internal data structure used by union_drop_coalescing_constraints |
4677 | * to collect bounds on all relevant statements. |
4678 | * |
4679 | * "graph" is the schedule constraint graph for which an LP problem |
4680 | * is being constructed. |
4681 | * "bounds" collects the bounds. |
4682 | */ |
4683 | struct isl_collect_bounds_data { |
4684 | isl_ctx *ctx; |
4685 | struct isl_sched_graph *graph; |
4686 | isl_union_set *bounds; |
4687 | }; |
4688 | |
4689 | /* Add the size bounds for the node with instance deltas in "set" |
4690 | * to data->bounds. |
4691 | */ |
4692 | static isl_stat collect_bounds(__isl_take isl_set *set, void *user) |
4693 | { |
4694 | struct isl_collect_bounds_data *data = user; |
4695 | struct isl_sched_node *node; |
4696 | isl_space *space; |
4697 | isl_set *bounds; |
4698 | |
4699 | space = isl_set_get_space(set); |
4700 | isl_set_free(set); |
4701 | |
4702 | node = graph_find_compressed_node(ctx: data->ctx, graph: data->graph, space); |
4703 | isl_space_free(space); |
4704 | |
4705 | bounds = isl_set_from_basic_set(bset: get_size_bounds(node)); |
4706 | data->bounds = isl_union_set_add_set(uset: data->bounds, set: bounds); |
4707 | |
4708 | return isl_stat_ok; |
4709 | } |
4710 | |
4711 | /* Drop some constraints from "delta" that could be exploited |
4712 | * to construct loop coalescing schedules. |
4713 | * In particular, drop those constraint that bound the difference |
4714 | * to the size of the domain. |
4715 | * Do this for each set/node in "delta" separately. |
4716 | * The parameters are assumed to have been projected out by the caller. |
4717 | */ |
4718 | static __isl_give isl_union_set *union_drop_coalescing_constraints(isl_ctx *ctx, |
4719 | struct isl_sched_graph *graph, __isl_take isl_union_set *delta) |
4720 | { |
4721 | struct isl_collect_bounds_data data = { ctx, graph }; |
4722 | |
4723 | data.bounds = isl_union_set_empty(space: isl_space_params_alloc(ctx, nparam: 0)); |
4724 | if (isl_union_set_foreach_set(uset: delta, fn: &collect_bounds, user: &data) < 0) |
4725 | data.bounds = isl_union_set_free(uset: data.bounds); |
4726 | delta = isl_union_set_plain_gist(uset: delta, context: data.bounds); |
4727 | |
4728 | return delta; |
4729 | } |
4730 | |
4731 | /* Given a non-trivial lineality space "lineality", add the corresponding |
4732 | * universe set to data->mask and add a map from elements to |
4733 | * other elements along the lines in "lineality" to data->equivalent. |
4734 | * If this is the first time this function gets called |
4735 | * (data->any_non_trivial is still false), then set data->any_non_trivial and |
4736 | * initialize data->mask and data->equivalent. |
4737 | * |
4738 | * In particular, if the lineality space is defined by equality constraints |
4739 | * |
4740 | * E x = 0 |
4741 | * |
4742 | * then construct an affine mapping |
4743 | * |
4744 | * f : x -> E x |
4745 | * |
4746 | * and compute the equivalence relation of having the same image under f: |
4747 | * |
4748 | * { x -> x' : E x = E x' } |
4749 | */ |
4750 | static isl_stat add_non_trivial_lineality(__isl_take isl_basic_set *lineality, |
4751 | struct isl_exploit_lineality_data *data) |
4752 | { |
4753 | isl_mat *eq; |
4754 | isl_space *space; |
4755 | isl_set *univ; |
4756 | isl_multi_aff *ma; |
4757 | isl_multi_pw_aff *mpa; |
4758 | isl_map *map; |
4759 | isl_size n; |
4760 | |
4761 | if (isl_basic_set_check_no_locals(bset: lineality) < 0) |
4762 | goto error; |
4763 | |
4764 | space = isl_basic_set_get_space(bset: lineality); |
4765 | if (!data->any_non_trivial) { |
4766 | data->equivalent = isl_union_map_empty(space: isl_space_copy(space)); |
4767 | data->mask = isl_union_set_empty(space: isl_space_copy(space)); |
4768 | } |
4769 | data->any_non_trivial = isl_bool_true; |
4770 | |
4771 | univ = isl_set_universe(space: isl_space_copy(space)); |
4772 | data->mask = isl_union_set_add_set(uset: data->mask, set: univ); |
4773 | |
4774 | eq = isl_basic_set_extract_equalities(bset: lineality); |
4775 | n = isl_mat_rows(mat: eq); |
4776 | if (n < 0) |
4777 | space = isl_space_free(space); |
4778 | eq = isl_mat_insert_zero_rows(mat: eq, row: 0, n: 1); |
4779 | eq = isl_mat_set_element_si(mat: eq, row: 0, col: 0, v: 1); |
4780 | space = isl_space_from_domain(space); |
4781 | space = isl_space_add_dims(space, type: isl_dim_out, n); |
4782 | ma = isl_multi_aff_from_aff_mat(space, mat: eq); |
4783 | mpa = isl_multi_pw_aff_from_multi_aff(ma); |
4784 | map = isl_multi_pw_aff_eq_map(mpa1: mpa, mpa2: isl_multi_pw_aff_copy(multi: mpa)); |
4785 | data->equivalent = isl_union_map_add_map(umap: data->equivalent, map); |
4786 | |
4787 | isl_basic_set_free(bset: lineality); |
4788 | return isl_stat_ok; |
4789 | error: |
4790 | isl_basic_set_free(bset: lineality); |
4791 | return isl_stat_error; |
4792 | } |
4793 | |
4794 | /* Check if the lineality space "set" is non-trivial (i.e., is not just |
4795 | * the origin or, in other words, satisfies a number of equality constraints |
4796 | * that is smaller than the dimension of the set). |
4797 | * If so, extend data->mask and data->equivalent accordingly. |
4798 | * |
4799 | * The input should not have any local variables already, but |
4800 | * isl_set_remove_divs is called to make sure it does not. |
4801 | */ |
4802 | static isl_stat add_lineality(__isl_take isl_set *set, void *user) |
4803 | { |
4804 | struct isl_exploit_lineality_data *data = user; |
4805 | isl_basic_set *hull; |
4806 | isl_size dim; |
4807 | isl_size n_eq; |
4808 | |
4809 | set = isl_set_remove_divs(set); |
4810 | hull = isl_set_unshifted_simple_hull(set); |
4811 | dim = isl_basic_set_dim(bset: hull, type: isl_dim_set); |
4812 | n_eq = isl_basic_set_n_equality(bset: hull); |
4813 | if (dim < 0 || n_eq < 0) |
4814 | goto error; |
4815 | if (dim != n_eq) |
4816 | return add_non_trivial_lineality(lineality: hull, data); |
4817 | isl_basic_set_free(bset: hull); |
4818 | return isl_stat_ok; |
4819 | error: |
4820 | isl_basic_set_free(bset: hull); |
4821 | return isl_stat_error; |
4822 | } |
4823 | |
4824 | /* Check if the difference set on intra-node schedule constraints "intra" |
4825 | * has any non-trivial lineality space. |
4826 | * If so, then extend the difference set to a difference set |
4827 | * on equivalent elements. That is, if "intra" is |
4828 | * |
4829 | * { y - x : (x,y) \in V } |
4830 | * |
4831 | * and elements are equivalent if they have the same image under f, |
4832 | * then return |
4833 | * |
4834 | * { y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') } |
4835 | * |
4836 | * or, since f is linear, |
4837 | * |
4838 | * { y' - x' : (x,y) \in V and f(y - x) = f(y' - x') } |
4839 | * |
4840 | * The results of the search for non-trivial lineality spaces is stored |
4841 | * in "data". |
4842 | */ |
4843 | static __isl_give isl_union_set *exploit_intra_lineality( |
4844 | __isl_take isl_union_set *intra, |
4845 | struct isl_exploit_lineality_data *data) |
4846 | { |
4847 | isl_union_set *lineality; |
4848 | isl_union_set *uset; |
4849 | |
4850 | data->any_non_trivial = isl_bool_false; |
4851 | lineality = isl_union_set_copy(uset: intra); |
4852 | lineality = isl_union_set_combined_lineality_space(uset: lineality); |
4853 | if (isl_union_set_foreach_set(uset: lineality, fn: &add_lineality, user: data) < 0) |
4854 | data->any_non_trivial = isl_bool_error; |
4855 | isl_union_set_free(uset: lineality); |
4856 | |
4857 | if (data->any_non_trivial < 0) |
4858 | return isl_union_set_free(uset: intra); |
4859 | if (!data->any_non_trivial) |
4860 | return intra; |
4861 | |
4862 | uset = isl_union_set_copy(uset: intra); |
4863 | intra = isl_union_set_subtract(uset1: intra, uset2: isl_union_set_copy(uset: data->mask)); |
4864 | uset = isl_union_set_apply(uset, umap: isl_union_map_copy(umap: data->equivalent)); |
4865 | intra = isl_union_set_union(uset1: intra, uset2: uset); |
4866 | |
4867 | intra = isl_union_set_remove_divs(bset: intra); |
4868 | |
4869 | return intra; |
4870 | } |
4871 | |
4872 | /* If the difference set on intra-node schedule constraints was found to have |
4873 | * any non-trivial lineality space by exploit_intra_lineality, |
4874 | * as recorded in "data", then extend the inter-node |
4875 | * schedule constraints "inter" to schedule constraints on equivalent elements. |
4876 | * That is, if "inter" is V and |
4877 | * elements are equivalent if they have the same image under f, then return |
4878 | * |
4879 | * { (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') } |
4880 | */ |
4881 | static __isl_give isl_union_map *exploit_inter_lineality( |
4882 | __isl_take isl_union_map *inter, |
4883 | struct isl_exploit_lineality_data *data) |
4884 | { |
4885 | isl_union_map *umap; |
4886 | |
4887 | if (data->any_non_trivial < 0) |
4888 | return isl_union_map_free(umap: inter); |
4889 | if (!data->any_non_trivial) |
4890 | return inter; |
4891 | |
4892 | umap = isl_union_map_copy(umap: inter); |
4893 | inter = isl_union_map_subtract_range(umap: inter, |
4894 | dom: isl_union_set_copy(uset: data->mask)); |
4895 | umap = isl_union_map_apply_range(umap1: umap, |
4896 | umap2: isl_union_map_copy(umap: data->equivalent)); |
4897 | inter = isl_union_map_union(umap1: inter, umap2: umap); |
4898 | umap = isl_union_map_copy(umap: inter); |
4899 | inter = isl_union_map_subtract_domain(umap: inter, |
4900 | dom: isl_union_set_copy(uset: data->mask)); |
4901 | umap = isl_union_map_apply_range(umap1: isl_union_map_copy(umap: data->equivalent), |
4902 | umap2: umap); |
4903 | inter = isl_union_map_union(umap1: inter, umap2: umap); |
4904 | |
4905 | inter = isl_union_map_remove_divs(bmap: inter); |
4906 | |
4907 | return inter; |
4908 | } |
4909 | |
4910 | /* For each (conditional) validity edge in "graph", |
4911 | * add the corresponding dependence relation using "add" |
4912 | * to a collection of dependence relations and return the result. |
4913 | * If "coincidence" is set, then coincidence edges are considered as well. |
4914 | */ |
4915 | static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph, |
4916 | __isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap, |
4917 | struct isl_sched_edge *edge), int coincidence) |
4918 | { |
4919 | int i; |
4920 | isl_space *space; |
4921 | isl_union_map *umap; |
4922 | |
4923 | space = isl_space_copy(space: graph->node[0].space); |
4924 | umap = isl_union_map_empty(space); |
4925 | |
4926 | for (i = 0; i < graph->n_edge; ++i) { |
4927 | struct isl_sched_edge *edge = &graph->edge[i]; |
4928 | |
4929 | if (!is_any_validity(edge) && |
4930 | (!coincidence || !is_coincidence(edge))) |
4931 | continue; |
4932 | |
4933 | umap = add(umap, edge); |
4934 | } |
4935 | |
4936 | return umap; |
4937 | } |
4938 | |
4939 | /* For each dependence relation on a (conditional) validity edge |
4940 | * from a node to itself, |
4941 | * construct the set of coefficients of valid constraints for elements |
4942 | * in that dependence relation and collect the results. |
4943 | * If "coincidence" is set, then coincidence edges are considered as well. |
4944 | * |
4945 | * In particular, for each dependence relation R, constraints |
4946 | * on coefficients (c_0, c_x) are constructed such that |
4947 | * |
4948 | * c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R } |
4949 | * |
4950 | * If the schedule_treat_coalescing option is set, then some constraints |
4951 | * that could be exploited to construct coalescing schedules |
4952 | * are removed before the dual is computed, but after the parameters |
4953 | * have been projected out. |
4954 | * The entire computation is essentially the same as that performed |
4955 | * by intra_coefficients, except that it operates on multiple |
4956 | * edges together and that the parameters are always projected out. |
4957 | * |
4958 | * Additionally, exploit any non-trivial lineality space |
4959 | * in the difference set after removing coalescing constraints and |
4960 | * store the results of the non-trivial lineality space detection in "data". |
4961 | * The procedure is currently run unconditionally, but it is unlikely |
4962 | * to find any non-trivial lineality spaces if no coalescing constraints |
4963 | * have been removed. |
4964 | * |
4965 | * Note that if a dependence relation is a union of basic maps, |
4966 | * then each basic map needs to be treated individually as it may only |
4967 | * be possible to carry the dependences expressed by some of those |
4968 | * basic maps and not all of them. |
4969 | * The collected validity constraints are therefore not coalesced and |
4970 | * it is assumed that they are not coalesced automatically. |
4971 | * Duplicate basic maps can be removed, however. |
4972 | * In particular, if the same basic map appears as a disjunct |
4973 | * in multiple edges, then it only needs to be carried once. |
4974 | */ |
4975 | static __isl_give isl_basic_set_list *collect_intra_validity(isl_ctx *ctx, |
4976 | struct isl_sched_graph *graph, int coincidence, |
4977 | struct isl_exploit_lineality_data *data) |
4978 | { |
4979 | isl_union_map *intra; |
4980 | isl_union_set *delta; |
4981 | isl_basic_set_list *list; |
4982 | |
4983 | intra = collect_validity(graph, add: &add_intra, coincidence); |
4984 | delta = isl_union_map_deltas(umap: intra); |
4985 | delta = isl_union_set_project_out_all_params(uset: delta); |
4986 | delta = isl_union_set_remove_divs(bset: delta); |
4987 | if (isl_options_get_schedule_treat_coalescing(ctx)) |
4988 | delta = union_drop_coalescing_constraints(ctx, graph, delta); |
4989 | delta = exploit_intra_lineality(intra: delta, data); |
4990 | list = isl_union_set_get_basic_set_list(uset: delta); |
4991 | isl_union_set_free(uset: delta); |
4992 | |
4993 | return isl_basic_set_list_coefficients(list); |
4994 | } |
4995 | |
4996 | /* For each dependence relation on a (conditional) validity edge |
4997 | * from a node to some other node, |
4998 | * construct the set of coefficients of valid constraints for elements |
4999 | * in that dependence relation and collect the results. |
5000 | * If "coincidence" is set, then coincidence edges are considered as well. |
5001 | * |
5002 | * In particular, for each dependence relation R, constraints |
5003 | * on coefficients (c_0, c_n, c_x, c_y) are constructed such that |
5004 | * |
5005 | * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R |
5006 | * |
5007 | * This computation is essentially the same as that performed |
5008 | * by inter_coefficients, except that it operates on multiple |
5009 | * edges together. |
5010 | * |
5011 | * Additionally, exploit any non-trivial lineality space |
5012 | * that may have been discovered by collect_intra_validity |
5013 | * (as stored in "data"). |
5014 | * |
5015 | * Note that if a dependence relation is a union of basic maps, |
5016 | * then each basic map needs to be treated individually as it may only |
5017 | * be possible to carry the dependences expressed by some of those |
5018 | * basic maps and not all of them. |
5019 | * The collected validity constraints are therefore not coalesced and |
5020 | * it is assumed that they are not coalesced automatically. |
5021 | * Duplicate basic maps can be removed, however. |
5022 | * In particular, if the same basic map appears as a disjunct |
5023 | * in multiple edges, then it only needs to be carried once. |
5024 | */ |
5025 | static __isl_give isl_basic_set_list *collect_inter_validity( |
5026 | struct isl_sched_graph *graph, int coincidence, |
5027 | struct isl_exploit_lineality_data *data) |
5028 | { |
5029 | isl_union_map *inter; |
5030 | isl_union_set *wrap; |
5031 | isl_basic_set_list *list; |
5032 | |
5033 | inter = collect_validity(graph, add: &add_inter, coincidence); |
5034 | inter = exploit_inter_lineality(inter, data); |
5035 | inter = isl_union_map_remove_divs(bmap: inter); |
5036 | wrap = isl_union_map_wrap(umap: inter); |
5037 | list = isl_union_set_get_basic_set_list(uset: wrap); |
5038 | isl_union_set_free(uset: wrap); |
5039 | return isl_basic_set_list_coefficients(list); |
5040 | } |
5041 | |
5042 | /* Construct an LP problem for finding schedule coefficients |
5043 | * such that the schedule carries as many of the "n_edge" groups of |
5044 | * dependences as possible based on the corresponding coefficient |
5045 | * constraints and return the lexicographically smallest non-trivial solution. |
5046 | * "intra" is the sequence of coefficient constraints for intra-node edges. |
5047 | * "inter" is the sequence of coefficient constraints for inter-node edges. |
5048 | * If "want_integral" is set, then compute an integral solution |
5049 | * for the coefficients rather than using the numerators |
5050 | * of a rational solution. |
5051 | * "carry_inter" indicates whether inter-node edges should be carried or |
5052 | * only respected. |
5053 | * |
5054 | * If none of the "n_edge" groups can be carried |
5055 | * then return an empty vector. |
5056 | */ |
5057 | static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx, |
5058 | struct isl_sched_graph *graph, int n_edge, |
5059 | __isl_keep isl_basic_set_list *intra, |
5060 | __isl_keep isl_basic_set_list *inter, int want_integral, |
5061 | int carry_inter) |
5062 | { |
5063 | isl_basic_set *lp; |
5064 | |
5065 | if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0) |
5066 | return NULL; |
5067 | |
5068 | lp = isl_basic_set_copy(bset: graph->lp); |
5069 | return non_neg_lexmin(graph, lp, n_edge, want_integral); |
5070 | } |
5071 | |
5072 | /* Construct an LP problem for finding schedule coefficients |
5073 | * such that the schedule carries as many of the validity dependences |
5074 | * as possible and |
5075 | * return the lexicographically smallest non-trivial solution. |
5076 | * If "fallback" is set, then the carrying is performed as a fallback |
5077 | * for the Pluto-like scheduler. |
5078 | * If "coincidence" is set, then try and carry coincidence edges as well. |
5079 | * |
5080 | * The variable "n_edge" stores the number of groups that should be carried. |
5081 | * If none of the "n_edge" groups can be carried |
5082 | * then return an empty vector. |
5083 | * If, moreover, "n_edge" is zero, then the LP problem does not even |
5084 | * need to be constructed. |
5085 | * |
5086 | * If a fallback solution is being computed, then compute an integral solution |
5087 | * for the coefficients rather than using the numerators |
5088 | * of a rational solution. |
5089 | * |
5090 | * If a fallback solution is being computed, if there are any intra-node |
5091 | * dependences, and if requested by the user, then first try |
5092 | * to only carry those intra-node dependences. |
5093 | * If this fails to carry any dependences, then try again |
5094 | * with the inter-node dependences included. |
5095 | */ |
5096 | static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx, |
5097 | struct isl_sched_graph *graph, int fallback, int coincidence) |
5098 | { |
5099 | isl_size n_intra, n_inter; |
5100 | int n_edge; |
5101 | struct isl_carry carry = { 0 }; |
5102 | isl_vec *sol; |
5103 | |
5104 | carry.intra = collect_intra_validity(ctx, graph, coincidence, |
5105 | data: &carry.lineality); |
5106 | carry.inter = collect_inter_validity(graph, coincidence, |
5107 | data: &carry.lineality); |
5108 | n_intra = isl_basic_set_list_n_basic_set(list: carry.intra); |
5109 | n_inter = isl_basic_set_list_n_basic_set(list: carry.inter); |
5110 | if (n_intra < 0 || n_inter < 0) |
5111 | goto error; |
5112 | |
5113 | if (fallback && n_intra > 0 && |
5114 | isl_options_get_schedule_carry_self_first(ctx)) { |
5115 | sol = compute_carrying_sol_coef(ctx, graph, n_edge: n_intra, |
5116 | intra: carry.intra, inter: carry.inter, want_integral: fallback, carry_inter: 0); |
5117 | if (!sol || sol->size != 0 || n_inter == 0) { |
5118 | isl_carry_clear(carry: &carry); |
5119 | return sol; |
5120 | } |
5121 | isl_vec_free(vec: sol); |
5122 | } |
5123 | |
5124 | n_edge = n_intra + n_inter; |
5125 | if (n_edge == 0) { |
5126 | isl_carry_clear(carry: &carry); |
5127 | return isl_vec_alloc(ctx, size: 0); |
5128 | } |
5129 | |
5130 | sol = compute_carrying_sol_coef(ctx, graph, n_edge, |
5131 | intra: carry.intra, inter: carry.inter, want_integral: fallback, carry_inter: 1); |
5132 | isl_carry_clear(carry: &carry); |
5133 | return sol; |
5134 | error: |
5135 | isl_carry_clear(carry: &carry); |
5136 | return NULL; |
5137 | } |
5138 | |
5139 | /* Construct a schedule row for each node such that as many validity dependences |
5140 | * as possible are carried and then continue with the next band. |
5141 | * If "fallback" is set, then the carrying is performed as a fallback |
5142 | * for the Pluto-like scheduler. |
5143 | * If "coincidence" is set, then try and carry coincidence edges as well. |
5144 | * |
5145 | * If there are no validity dependences, then no dependence can be carried and |
5146 | * the procedure is guaranteed to fail. If there is more than one component, |
5147 | * then try computing a schedule on each component separately |
5148 | * to prevent or at least postpone this failure. |
5149 | * |
5150 | * If a schedule row is computed, then check that dependences are carried |
5151 | * for at least one of the edges. |
5152 | * |
5153 | * If the computed schedule row turns out to be trivial on one or |
5154 | * more nodes where it should not be trivial, then we throw it away |
5155 | * and try again on each component separately. |
5156 | * |
5157 | * If there is only one component, then we accept the schedule row anyway, |
5158 | * but we do not consider it as a complete row and therefore do not |
5159 | * increment graph->n_row. Note that the ranks of the nodes that |
5160 | * do get a non-trivial schedule part will get updated regardless and |
5161 | * graph->maxvar is computed based on these ranks. The test for |
5162 | * whether more schedule rows are required in compute_schedule_wcc |
5163 | * is therefore not affected. |
5164 | * |
5165 | * Insert a band corresponding to the schedule row at position "node" |
5166 | * of the schedule tree and continue with the construction of the schedule. |
5167 | * This insertion and the continued construction is performed by split_scaled |
5168 | * after optionally checking for non-trivial common divisors. |
5169 | */ |
5170 | static __isl_give isl_schedule_node *carry(__isl_take isl_schedule_node *node, |
5171 | struct isl_sched_graph *graph, int fallback, int coincidence) |
5172 | { |
5173 | int trivial; |
5174 | isl_ctx *ctx; |
5175 | isl_vec *sol; |
5176 | |
5177 | if (!node) |
5178 | return NULL; |
5179 | |
5180 | ctx = isl_schedule_node_get_ctx(node); |
5181 | sol = compute_carrying_sol(ctx, graph, fallback, coincidence); |
5182 | if (!sol) |
5183 | return isl_schedule_node_free(node); |
5184 | if (sol->size == 0) { |
5185 | isl_vec_free(vec: sol); |
5186 | if (graph->scc > 1) |
5187 | return compute_component_schedule(node, graph, wcc: 1); |
5188 | isl_die(ctx, isl_error_unknown, "unable to carry dependences" , |
5189 | return isl_schedule_node_free(node)); |
5190 | } |
5191 | |
5192 | trivial = is_any_trivial(graph, sol); |
5193 | if (trivial < 0) { |
5194 | sol = isl_vec_free(vec: sol); |
5195 | } else if (trivial && graph->scc > 1) { |
5196 | isl_vec_free(vec: sol); |
5197 | return compute_component_schedule(node, graph, wcc: 1); |
5198 | } |
5199 | |
5200 | if (update_schedule(graph, sol, coincident: 0) < 0) |
5201 | return isl_schedule_node_free(node); |
5202 | if (trivial) |
5203 | graph->n_row--; |
5204 | |
5205 | return split_scaled(node, graph); |
5206 | } |
5207 | |
5208 | /* Construct a schedule row for each node such that as many validity dependences |
5209 | * as possible are carried and then continue with the next band. |
5210 | * Do so as a fallback for the Pluto-like scheduler. |
5211 | * If "coincidence" is set, then try and carry coincidence edges as well. |
5212 | */ |
5213 | static __isl_give isl_schedule_node *carry_fallback( |
5214 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
5215 | int coincidence) |
5216 | { |
5217 | return carry(node, graph, fallback: 1, coincidence); |
5218 | } |
5219 | |
5220 | /* Construct a schedule row for each node such that as many validity dependences |
5221 | * as possible are carried and then continue with the next band. |
5222 | * Do so for the case where the Feautrier scheduler was selected |
5223 | * by the user. |
5224 | */ |
5225 | static __isl_give isl_schedule_node *carry_feautrier( |
5226 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
5227 | { |
5228 | return carry(node, graph, fallback: 0, coincidence: 0); |
5229 | } |
5230 | |
5231 | /* Construct a schedule row for each node such that as many validity dependences |
5232 | * as possible are carried and then continue with the next band. |
5233 | * Do so as a fallback for the Pluto-like scheduler. |
5234 | */ |
5235 | static __isl_give isl_schedule_node *carry_dependences( |
5236 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
5237 | { |
5238 | return carry_fallback(node, graph, coincidence: 0); |
5239 | } |
5240 | |
5241 | /* Construct a schedule row for each node such that as many validity or |
5242 | * coincidence dependences as possible are carried and |
5243 | * then continue with the next band. |
5244 | * Do so as a fallback for the Pluto-like scheduler. |
5245 | */ |
5246 | static __isl_give isl_schedule_node *carry_coincidence( |
5247 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
5248 | { |
5249 | return carry_fallback(node, graph, coincidence: 1); |
5250 | } |
5251 | |
5252 | /* Topologically sort statements mapped to the same schedule iteration |
5253 | * and add insert a sequence node in front of "node" |
5254 | * corresponding to this order. |
5255 | * If "initialized" is set, then it may be assumed that |
5256 | * isl_sched_graph_compute_maxvar |
5257 | * has been called on the current band. Otherwise, call |
5258 | * isl_sched_graph_compute_maxvar if and before carry_dependences gets called. |
5259 | * |
5260 | * If it turns out to be impossible to sort the statements apart, |
5261 | * because different dependences impose different orderings |
5262 | * on the statements, then we extend the schedule such that |
5263 | * it carries at least one more dependence. |
5264 | */ |
5265 | static __isl_give isl_schedule_node *sort_statements( |
5266 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
5267 | int initialized) |
5268 | { |
5269 | isl_ctx *ctx; |
5270 | isl_union_set_list *filters; |
5271 | |
5272 | if (!node) |
5273 | return NULL; |
5274 | |
5275 | ctx = isl_schedule_node_get_ctx(node); |
5276 | if (graph->n < 1) |
5277 | isl_die(ctx, isl_error_internal, |
5278 | "graph should have at least one node" , |
5279 | return isl_schedule_node_free(node)); |
5280 | |
5281 | if (graph->n == 1) |
5282 | return node; |
5283 | |
5284 | if (update_edges(ctx, graph) < 0) |
5285 | return isl_schedule_node_free(node); |
5286 | |
5287 | if (graph->n_edge == 0) |
5288 | return node; |
5289 | |
5290 | if (detect_sccs(ctx, graph) < 0) |
5291 | return isl_schedule_node_free(node); |
5292 | |
5293 | next_band(graph); |
5294 | if (graph->scc < graph->n) { |
5295 | if (!initialized && isl_sched_graph_compute_maxvar(graph) < 0) |
5296 | return isl_schedule_node_free(node); |
5297 | return carry_dependences(node, graph); |
5298 | } |
5299 | |
5300 | filters = isl_sched_graph_extract_sccs(ctx, graph); |
5301 | node = isl_schedule_node_insert_sequence(node, filters); |
5302 | |
5303 | return node; |
5304 | } |
5305 | |
5306 | /* Are there any (non-empty) (conditional) validity edges in the graph? |
5307 | */ |
5308 | static int has_validity_edges(struct isl_sched_graph *graph) |
5309 | { |
5310 | int i; |
5311 | |
5312 | for (i = 0; i < graph->n_edge; ++i) { |
5313 | int empty; |
5314 | |
5315 | empty = isl_map_plain_is_empty(map: graph->edge[i].map); |
5316 | if (empty < 0) |
5317 | return -1; |
5318 | if (empty) |
5319 | continue; |
5320 | if (is_any_validity(edge: &graph->edge[i])) |
5321 | return 1; |
5322 | } |
5323 | |
5324 | return 0; |
5325 | } |
5326 | |
5327 | /* Should we apply a Feautrier step? |
5328 | * That is, did the user request the Feautrier algorithm and are |
5329 | * there any validity dependences (left)? |
5330 | */ |
5331 | static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph) |
5332 | { |
5333 | if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER) |
5334 | return 0; |
5335 | |
5336 | return has_validity_edges(graph); |
5337 | } |
5338 | |
5339 | /* Compute a schedule for a connected dependence graph using Feautrier's |
5340 | * multi-dimensional scheduling algorithm and return the updated schedule node. |
5341 | * |
5342 | * The original algorithm is described in [1]. |
5343 | * The main idea is to minimize the number of scheduling dimensions, by |
5344 | * trying to satisfy as many dependences as possible per scheduling dimension. |
5345 | * |
5346 | * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling |
5347 | * Problem, Part II: Multi-Dimensional Time. |
5348 | * In Intl. Journal of Parallel Programming, 1992. |
5349 | */ |
5350 | static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier( |
5351 | isl_schedule_node *node, struct isl_sched_graph *graph) |
5352 | { |
5353 | return carry_feautrier(node, graph); |
5354 | } |
5355 | |
5356 | /* Turn off the "local" bit on all (condition) edges. |
5357 | */ |
5358 | static void clear_local_edges(struct isl_sched_graph *graph) |
5359 | { |
5360 | int i; |
5361 | |
5362 | for (i = 0; i < graph->n_edge; ++i) |
5363 | if (isl_sched_edge_is_condition(edge: &graph->edge[i])) |
5364 | clear_local(edge: &graph->edge[i]); |
5365 | } |
5366 | |
5367 | /* Does "graph" have both condition and conditional validity edges? |
5368 | */ |
5369 | static int need_condition_check(struct isl_sched_graph *graph) |
5370 | { |
5371 | int i; |
5372 | int any_condition = 0; |
5373 | int any_conditional_validity = 0; |
5374 | |
5375 | for (i = 0; i < graph->n_edge; ++i) { |
5376 | if (isl_sched_edge_is_condition(edge: &graph->edge[i])) |
5377 | any_condition = 1; |
5378 | if (isl_sched_edge_is_conditional_validity(edge: &graph->edge[i])) |
5379 | any_conditional_validity = 1; |
5380 | } |
5381 | |
5382 | return any_condition && any_conditional_validity; |
5383 | } |
5384 | |
5385 | /* Does "graph" contain any coincidence edge? |
5386 | */ |
5387 | static int has_any_coincidence(struct isl_sched_graph *graph) |
5388 | { |
5389 | int i; |
5390 | |
5391 | for (i = 0; i < graph->n_edge; ++i) |
5392 | if (is_coincidence(edge: &graph->edge[i])) |
5393 | return 1; |
5394 | |
5395 | return 0; |
5396 | } |
5397 | |
5398 | /* Extract the final schedule row as a map with the iteration domain |
5399 | * of "node" as domain. |
5400 | */ |
5401 | static __isl_give isl_map *final_row(struct isl_sched_node *node) |
5402 | { |
5403 | isl_multi_aff *ma; |
5404 | isl_size n_row; |
5405 | |
5406 | n_row = isl_mat_rows(mat: node->sched); |
5407 | if (n_row < 0) |
5408 | return NULL; |
5409 | ma = isl_sched_node_extract_partial_schedule_multi_aff(node, |
5410 | first: n_row - 1, n: 1); |
5411 | return isl_map_from_multi_aff(maff: ma); |
5412 | } |
5413 | |
5414 | /* Is the conditional validity dependence in the edge with index "edge_index" |
5415 | * violated by the latest (i.e., final) row of the schedule? |
5416 | * That is, is i scheduled after j |
5417 | * for any conditional validity dependence i -> j? |
5418 | */ |
5419 | static int is_violated(struct isl_sched_graph *graph, int edge_index) |
5420 | { |
5421 | isl_map *src_sched, *dst_sched, *map; |
5422 | struct isl_sched_edge *edge = &graph->edge[edge_index]; |
5423 | int empty; |
5424 | |
5425 | src_sched = final_row(node: edge->src); |
5426 | dst_sched = final_row(node: edge->dst); |
5427 | map = isl_map_copy(map: edge->map); |
5428 | map = isl_map_apply_domain(map1: map, map2: src_sched); |
5429 | map = isl_map_apply_range(map1: map, map2: dst_sched); |
5430 | map = isl_map_order_gt(map, type1: isl_dim_in, pos1: 0, type2: isl_dim_out, pos2: 0); |
5431 | empty = isl_map_is_empty(map); |
5432 | isl_map_free(map); |
5433 | |
5434 | if (empty < 0) |
5435 | return -1; |
5436 | |
5437 | return !empty; |
5438 | } |
5439 | |
5440 | /* Does "graph" have any satisfied condition edges that |
5441 | * are adjacent to the conditional validity constraint with |
5442 | * domain "conditional_source" and range "conditional_sink"? |
5443 | * |
5444 | * A satisfied condition is one that is not local. |
5445 | * If a condition was forced to be local already (i.e., marked as local) |
5446 | * then there is no need to check if it is in fact local. |
5447 | * |
5448 | * Additionally, mark all adjacent condition edges found as local. |
5449 | */ |
5450 | static int has_adjacent_true_conditions(struct isl_sched_graph *graph, |
5451 | __isl_keep isl_union_set *conditional_source, |
5452 | __isl_keep isl_union_set *conditional_sink) |
5453 | { |
5454 | int i; |
5455 | int any = 0; |
5456 | |
5457 | for (i = 0; i < graph->n_edge; ++i) { |
5458 | int adjacent, local; |
5459 | isl_union_map *condition; |
5460 | |
5461 | if (!isl_sched_edge_is_condition(edge: &graph->edge[i])) |
5462 | continue; |
5463 | if (is_local(edge: &graph->edge[i])) |
5464 | continue; |
5465 | |
5466 | condition = graph->edge[i].tagged_condition; |
5467 | adjacent = domain_intersects(umap: condition, uset: conditional_sink); |
5468 | if (adjacent >= 0 && !adjacent) |
5469 | adjacent = range_intersects(umap: condition, |
5470 | uset: conditional_source); |
5471 | if (adjacent < 0) |
5472 | return -1; |
5473 | if (!adjacent) |
5474 | continue; |
5475 | |
5476 | set_local(&graph->edge[i]); |
5477 | |
5478 | local = is_condition_false(edge: &graph->edge[i]); |
5479 | if (local < 0) |
5480 | return -1; |
5481 | if (!local) |
5482 | any = 1; |
5483 | } |
5484 | |
5485 | return any; |
5486 | } |
5487 | |
5488 | /* Are there any violated conditional validity dependences with |
5489 | * adjacent condition dependences that are not local with respect |
5490 | * to the current schedule? |
5491 | * That is, is the conditional validity constraint violated? |
5492 | * |
5493 | * Additionally, mark all those adjacent condition dependences as local. |
5494 | * We also mark those adjacent condition dependences that were not marked |
5495 | * as local before, but just happened to be local already. This ensures |
5496 | * that they remain local if the schedule is recomputed. |
5497 | * |
5498 | * We first collect domain and range of all violated conditional validity |
5499 | * dependences and then check if there are any adjacent non-local |
5500 | * condition dependences. |
5501 | */ |
5502 | static int has_violated_conditional_constraint(isl_ctx *ctx, |
5503 | struct isl_sched_graph *graph) |
5504 | { |
5505 | int i; |
5506 | int any = 0; |
5507 | isl_union_set *source, *sink; |
5508 | |
5509 | source = isl_union_set_empty(space: isl_space_params_alloc(ctx, nparam: 0)); |
5510 | sink = isl_union_set_empty(space: isl_space_params_alloc(ctx, nparam: 0)); |
5511 | for (i = 0; i < graph->n_edge; ++i) { |
5512 | isl_union_set *uset; |
5513 | isl_union_map *umap; |
5514 | int violated; |
5515 | |
5516 | if (!isl_sched_edge_is_conditional_validity(edge: &graph->edge[i])) |
5517 | continue; |
5518 | |
5519 | violated = is_violated(graph, edge_index: i); |
5520 | if (violated < 0) |
5521 | goto error; |
5522 | if (!violated) |
5523 | continue; |
5524 | |
5525 | any = 1; |
5526 | |
5527 | umap = isl_union_map_copy(umap: graph->edge[i].tagged_validity); |
5528 | uset = isl_union_map_domain(umap); |
5529 | source = isl_union_set_union(uset1: source, uset2: uset); |
5530 | source = isl_union_set_coalesce(uset: source); |
5531 | |
5532 | umap = isl_union_map_copy(umap: graph->edge[i].tagged_validity); |
5533 | uset = isl_union_map_range(umap); |
5534 | sink = isl_union_set_union(uset1: sink, uset2: uset); |
5535 | sink = isl_union_set_coalesce(uset: sink); |
5536 | } |
5537 | |
5538 | if (any) |
5539 | any = has_adjacent_true_conditions(graph, conditional_source: source, conditional_sink: sink); |
5540 | |
5541 | isl_union_set_free(uset: source); |
5542 | isl_union_set_free(uset: sink); |
5543 | return any; |
5544 | error: |
5545 | isl_union_set_free(uset: source); |
5546 | isl_union_set_free(uset: sink); |
5547 | return -1; |
5548 | } |
5549 | |
5550 | /* Examine the current band (the rows between graph->band_start and |
5551 | * graph->n_total_row), deciding whether to drop it or add it to "node" |
5552 | * and then continue with the computation of the next band, if any. |
5553 | * If "initialized" is set, then it may be assumed that |
5554 | * isl_sched_graph_compute_maxvar |
5555 | * has been called on the current band. Otherwise, call |
5556 | * isl_sched_graph_compute_maxvar if and before carry_dependences gets called. |
5557 | * |
5558 | * The caller keeps looking for a new row as long as |
5559 | * graph->n_row < graph->maxvar. If the latest attempt to find |
5560 | * such a row failed (i.e., we still have graph->n_row < graph->maxvar), |
5561 | * then we either |
5562 | * - split between SCCs and start over (assuming we found an interesting |
5563 | * pair of SCCs between which to split) |
5564 | * - continue with the next band (assuming the current band has at least |
5565 | * one row) |
5566 | * - if there is more than one SCC left, then split along all SCCs |
5567 | * - if outer coincidence needs to be enforced, then try to carry as many |
5568 | * validity or coincidence dependences as possible and |
5569 | * continue with the next band |
5570 | * - try to carry as many validity dependences as possible and |
5571 | * continue with the next band |
5572 | * In each case, we first insert a band node in the schedule tree |
5573 | * if any rows have been computed. |
5574 | * |
5575 | * If the caller managed to complete the schedule and the current band |
5576 | * is empty, then finish off by topologically |
5577 | * sorting the statements based on the remaining dependences. |
5578 | * If, on the other hand, the current band has at least one row, |
5579 | * then continue with the next band. Note that this next band |
5580 | * will necessarily be empty, but the graph may still be split up |
5581 | * into weakly connected components before arriving back here. |
5582 | */ |
5583 | __isl_give isl_schedule_node *isl_schedule_node_compute_finish_band( |
5584 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
5585 | int initialized) |
5586 | { |
5587 | int empty; |
5588 | |
5589 | if (!node) |
5590 | return NULL; |
5591 | |
5592 | empty = graph->n_total_row == graph->band_start; |
5593 | if (graph->n_row < graph->maxvar) { |
5594 | isl_ctx *ctx; |
5595 | |
5596 | ctx = isl_schedule_node_get_ctx(node); |
5597 | if (!ctx->opt->schedule_maximize_band_depth && !empty) |
5598 | return compute_next_band(node, graph, permutable: 1); |
5599 | if (graph->src_scc >= 0) |
5600 | return compute_split_schedule(node, graph); |
5601 | if (!empty) |
5602 | return compute_next_band(node, graph, permutable: 1); |
5603 | if (graph->scc > 1) |
5604 | return compute_component_schedule(node, graph, wcc: 1); |
5605 | if (!initialized && isl_sched_graph_compute_maxvar(graph) < 0) |
5606 | return isl_schedule_node_free(node); |
5607 | if (isl_options_get_schedule_outer_coincidence(ctx)) |
5608 | return carry_coincidence(node, graph); |
5609 | return carry_dependences(node, graph); |
5610 | } |
5611 | |
5612 | if (!empty) |
5613 | return compute_next_band(node, graph, permutable: 1); |
5614 | return sort_statements(node, graph, initialized); |
5615 | } |
5616 | |
5617 | /* Construct a band of schedule rows for a connected dependence graph. |
5618 | * The caller is responsible for determining the strongly connected |
5619 | * components and calling isl_sched_graph_compute_maxvar first. |
5620 | * |
5621 | * We try to find a sequence of as many schedule rows as possible that result |
5622 | * in non-negative dependence distances (independent of the previous rows |
5623 | * in the sequence, i.e., such that the sequence is tilable), with as |
5624 | * many of the initial rows as possible satisfying the coincidence constraints. |
5625 | * The computation stops if we can't find any more rows or if we have found |
5626 | * all the rows we wanted to find. |
5627 | * |
5628 | * If ctx->opt->schedule_outer_coincidence is set, then we force the |
5629 | * outermost dimension to satisfy the coincidence constraints. If this |
5630 | * turns out to be impossible, we fall back on the general scheme above |
5631 | * and try to carry as many dependences as possible. |
5632 | * |
5633 | * If "graph" contains both condition and conditional validity dependences, |
5634 | * then we need to check that that the conditional schedule constraint |
5635 | * is satisfied, i.e., there are no violated conditional validity dependences |
5636 | * that are adjacent to any non-local condition dependences. |
5637 | * If there are, then we mark all those adjacent condition dependences |
5638 | * as local and recompute the current band. Those dependences that |
5639 | * are marked local will then be forced to be local. |
5640 | * The initial computation is performed with no dependences marked as local. |
5641 | * If we are lucky, then there will be no violated conditional validity |
5642 | * dependences adjacent to any non-local condition dependences. |
5643 | * Otherwise, we mark some additional condition dependences as local and |
5644 | * recompute. We continue this process until there are no violations left or |
5645 | * until we are no longer able to compute a schedule. |
5646 | * Since there are only a finite number of dependences, |
5647 | * there will only be a finite number of iterations. |
5648 | */ |
5649 | isl_stat isl_schedule_node_compute_wcc_band(isl_ctx *ctx, |
5650 | struct isl_sched_graph *graph) |
5651 | { |
5652 | int has_coincidence; |
5653 | int use_coincidence; |
5654 | int force_coincidence = 0; |
5655 | int check_conditional; |
5656 | |
5657 | if (sort_sccs(graph) < 0) |
5658 | return isl_stat_error; |
5659 | |
5660 | clear_local_edges(graph); |
5661 | check_conditional = need_condition_check(graph); |
5662 | has_coincidence = has_any_coincidence(graph); |
5663 | |
5664 | if (ctx->opt->schedule_outer_coincidence) |
5665 | force_coincidence = 1; |
5666 | |
5667 | use_coincidence = has_coincidence; |
5668 | while (graph->n_row < graph->maxvar) { |
5669 | isl_vec *sol; |
5670 | int violated; |
5671 | int coincident; |
5672 | |
5673 | graph->src_scc = -1; |
5674 | graph->dst_scc = -1; |
5675 | |
5676 | if (setup_lp(ctx, graph, use_coincidence) < 0) |
5677 | return isl_stat_error; |
5678 | sol = solve_lp(ctx, graph); |
5679 | if (!sol) |
5680 | return isl_stat_error; |
5681 | if (sol->size == 0) { |
5682 | int empty = graph->n_total_row == graph->band_start; |
5683 | |
5684 | isl_vec_free(vec: sol); |
5685 | if (use_coincidence && (!force_coincidence || !empty)) { |
5686 | use_coincidence = 0; |
5687 | continue; |
5688 | } |
5689 | return isl_stat_ok; |
5690 | } |
5691 | coincident = !has_coincidence || use_coincidence; |
5692 | if (update_schedule(graph, sol, coincident) < 0) |
5693 | return isl_stat_error; |
5694 | |
5695 | if (!check_conditional) |
5696 | continue; |
5697 | violated = has_violated_conditional_constraint(ctx, graph); |
5698 | if (violated < 0) |
5699 | return isl_stat_error; |
5700 | if (!violated) |
5701 | continue; |
5702 | if (reset_band(graph) < 0) |
5703 | return isl_stat_error; |
5704 | use_coincidence = has_coincidence; |
5705 | } |
5706 | |
5707 | return isl_stat_ok; |
5708 | } |
5709 | |
5710 | /* Compute a schedule for a connected dependence graph by considering |
5711 | * the graph as a whole and return the updated schedule node. |
5712 | * |
5713 | * The actual schedule rows of the current band are computed by |
5714 | * isl_schedule_node_compute_wcc_band. isl_schedule_node_compute_finish_band |
5715 | * takes care of integrating the band into "node" and continuing |
5716 | * the computation. |
5717 | */ |
5718 | static __isl_give isl_schedule_node *compute_schedule_wcc_whole( |
5719 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
5720 | { |
5721 | isl_ctx *ctx; |
5722 | |
5723 | if (!node) |
5724 | return NULL; |
5725 | |
5726 | ctx = isl_schedule_node_get_ctx(node); |
5727 | if (isl_schedule_node_compute_wcc_band(ctx, graph) < 0) |
5728 | return isl_schedule_node_free(node); |
5729 | |
5730 | return isl_schedule_node_compute_finish_band(node, graph, initialized: 1); |
5731 | } |
5732 | |
5733 | /* Compute a schedule for a connected dependence graph and return |
5734 | * the updated schedule node. |
5735 | * |
5736 | * If Feautrier's algorithm is selected, we first recursively try to satisfy |
5737 | * as many validity dependences as possible. When all validity dependences |
5738 | * are satisfied we extend the schedule to a full-dimensional schedule. |
5739 | * |
5740 | * Call compute_schedule_wcc_whole or isl_schedule_node_compute_wcc_clustering |
5741 | * depending on whether the user has selected the option to try and |
5742 | * compute a schedule for the entire (weakly connected) component first. |
5743 | * If there is only a single strongly connected component (SCC), then |
5744 | * there is no point in trying to combine SCCs |
5745 | * in isl_schedule_node_compute_wcc_clustering, so compute_schedule_wcc_whole |
5746 | * is called instead. |
5747 | */ |
5748 | static __isl_give isl_schedule_node *compute_schedule_wcc( |
5749 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
5750 | { |
5751 | isl_ctx *ctx; |
5752 | |
5753 | if (!node) |
5754 | return NULL; |
5755 | |
5756 | ctx = isl_schedule_node_get_ctx(node); |
5757 | if (detect_sccs(ctx, graph) < 0) |
5758 | return isl_schedule_node_free(node); |
5759 | |
5760 | if (isl_sched_graph_compute_maxvar(graph) < 0) |
5761 | return isl_schedule_node_free(node); |
5762 | |
5763 | if (need_feautrier_step(ctx, graph)) |
5764 | return compute_schedule_wcc_feautrier(node, graph); |
5765 | |
5766 | if (graph->scc <= 1 || isl_options_get_schedule_whole_component(ctx)) |
5767 | return compute_schedule_wcc_whole(node, graph); |
5768 | else |
5769 | return isl_schedule_node_compute_wcc_clustering(node, graph); |
5770 | } |
5771 | |
5772 | /* Compute a schedule for each group of nodes identified by node->scc |
5773 | * separately and then combine them in a sequence node (or as set node |
5774 | * if graph->weak is set) inserted at position "node" of the schedule tree. |
5775 | * Return the updated schedule node. |
5776 | * |
5777 | * If "wcc" is set then each of the groups belongs to a single |
5778 | * weakly connected component in the dependence graph so that |
5779 | * there is no need for compute_sub_schedule to look for weakly |
5780 | * connected components. |
5781 | * |
5782 | * If a set node would be introduced and if the number of components |
5783 | * is equal to the number of nodes, then check if the schedule |
5784 | * is already complete. If so, a redundant set node would be introduced |
5785 | * (without any further descendants) stating that the statements |
5786 | * can be executed in arbitrary order, which is also expressed |
5787 | * by the absence of any node. Refrain from inserting any nodes |
5788 | * in this case and simply return. |
5789 | */ |
5790 | static __isl_give isl_schedule_node *compute_component_schedule( |
5791 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
5792 | int wcc) |
5793 | { |
5794 | int component; |
5795 | isl_ctx *ctx; |
5796 | isl_union_set_list *filters; |
5797 | |
5798 | if (!node) |
5799 | return NULL; |
5800 | |
5801 | if (graph->weak && graph->scc == graph->n) { |
5802 | if (isl_sched_graph_compute_maxvar(graph) < 0) |
5803 | return isl_schedule_node_free(node); |
5804 | if (graph->n_row >= graph->maxvar) |
5805 | return node; |
5806 | } |
5807 | |
5808 | ctx = isl_schedule_node_get_ctx(node); |
5809 | filters = isl_sched_graph_extract_sccs(ctx, graph); |
5810 | if (graph->weak) |
5811 | node = isl_schedule_node_insert_set(node, filters); |
5812 | else |
5813 | node = isl_schedule_node_insert_sequence(node, filters); |
5814 | |
5815 | for (component = 0; component < graph->scc; ++component) { |
5816 | node = isl_schedule_node_grandchild(node, pos1: component, pos2: 0); |
5817 | node = compute_sub_schedule(node, ctx, graph, |
5818 | node_pred: &isl_sched_node_scc_exactly, |
5819 | edge_pred: &isl_sched_edge_scc_exactly, |
5820 | data: component, wcc); |
5821 | node = isl_schedule_node_grandparent(node); |
5822 | } |
5823 | |
5824 | return node; |
5825 | } |
5826 | |
5827 | /* Compute a schedule for the given dependence graph and insert it at "node". |
5828 | * Return the updated schedule node. |
5829 | * |
5830 | * We first check if the graph is connected (through validity and conditional |
5831 | * validity dependences) and, if not, compute a schedule |
5832 | * for each component separately. |
5833 | * If the schedule_serialize_sccs option is set, then we check for strongly |
5834 | * connected components instead and compute a separate schedule for |
5835 | * each such strongly connected component. |
5836 | */ |
5837 | static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node, |
5838 | struct isl_sched_graph *graph) |
5839 | { |
5840 | isl_ctx *ctx; |
5841 | |
5842 | if (!node) |
5843 | return NULL; |
5844 | |
5845 | ctx = isl_schedule_node_get_ctx(node); |
5846 | if (isl_options_get_schedule_serialize_sccs(ctx)) { |
5847 | if (detect_sccs(ctx, graph) < 0) |
5848 | return isl_schedule_node_free(node); |
5849 | } else { |
5850 | if (detect_wccs(ctx, graph) < 0) |
5851 | return isl_schedule_node_free(node); |
5852 | } |
5853 | |
5854 | if (graph->scc > 1) |
5855 | return compute_component_schedule(node, graph, wcc: 1); |
5856 | |
5857 | return compute_schedule_wcc(node, graph); |
5858 | } |
5859 | |
5860 | /* Compute a schedule on sc->domain that respects the given schedule |
5861 | * constraints. |
5862 | * |
5863 | * In particular, the schedule respects all the validity dependences. |
5864 | * If the default isl scheduling algorithm is used, it tries to minimize |
5865 | * the dependence distances over the proximity dependences. |
5866 | * If Feautrier's scheduling algorithm is used, the proximity dependence |
5867 | * distances are only minimized during the extension to a full-dimensional |
5868 | * schedule. |
5869 | * |
5870 | * If there are any condition and conditional validity dependences, |
5871 | * then the conditional validity dependences may be violated inside |
5872 | * a tilable band, provided they have no adjacent non-local |
5873 | * condition dependences. |
5874 | */ |
5875 | __isl_give isl_schedule *isl_schedule_constraints_compute_schedule( |
5876 | __isl_take isl_schedule_constraints *sc) |
5877 | { |
5878 | isl_ctx *ctx = isl_schedule_constraints_get_ctx(sc); |
5879 | struct isl_sched_graph graph = { 0 }; |
5880 | isl_schedule *sched; |
5881 | isl_schedule_node *node; |
5882 | isl_union_set *domain; |
5883 | isl_size n; |
5884 | |
5885 | sc = isl_schedule_constraints_align_params(sc); |
5886 | |
5887 | domain = isl_schedule_constraints_get_domain(sc); |
5888 | n = isl_union_set_n_set(uset: domain); |
5889 | if (n == 0) { |
5890 | isl_schedule_constraints_free(sc); |
5891 | return isl_schedule_from_domain(domain); |
5892 | } |
5893 | |
5894 | if (n < 0 || isl_sched_graph_init(graph: &graph, sc) < 0) |
5895 | domain = isl_union_set_free(uset: domain); |
5896 | |
5897 | node = isl_schedule_node_from_domain(domain); |
5898 | node = isl_schedule_node_child(node, pos: 0); |
5899 | if (graph.n > 0) |
5900 | node = compute_schedule(node, graph: &graph); |
5901 | sched = isl_schedule_node_get_schedule(node); |
5902 | isl_schedule_node_free(node); |
5903 | |
5904 | isl_sched_graph_free(ctx, graph: &graph); |
5905 | isl_schedule_constraints_free(sc); |
5906 | |
5907 | return sched; |
5908 | } |
5909 | |
5910 | /* Compute a schedule for the given union of domains that respects |
5911 | * all the validity dependences and minimizes |
5912 | * the dependence distances over the proximity dependences. |
5913 | * |
5914 | * This function is kept for backward compatibility. |
5915 | */ |
5916 | __isl_give isl_schedule *isl_union_set_compute_schedule( |
5917 | __isl_take isl_union_set *domain, |
5918 | __isl_take isl_union_map *validity, |
5919 | __isl_take isl_union_map *proximity) |
5920 | { |
5921 | isl_schedule_constraints *sc; |
5922 | |
5923 | sc = isl_schedule_constraints_on_domain(domain); |
5924 | sc = isl_schedule_constraints_set_validity(sc, validity); |
5925 | sc = isl_schedule_constraints_set_proximity(sc, proximity); |
5926 | |
5927 | return isl_schedule_constraints_compute_schedule(sc); |
5928 | } |
5929 | |