1 | /* |
2 | * Copyright 2015 Sven Verdoolaege |
3 | * |
4 | * Use of this software is governed by the MIT license |
5 | * |
6 | * Written by Sven Verdoolaege |
7 | */ |
8 | |
9 | #include "isl_map_private.h" |
10 | |
11 | #include <isl/id.h> |
12 | #include <isl/schedule_node.h> |
13 | #include <isl/union_set.h> |
14 | |
15 | #include "isl_mat_private.h" |
16 | #include "isl_scheduler_clustering.h" |
17 | #include "isl_scheduler_scc.h" |
18 | #include "isl_seq.h" |
19 | #include "isl_tarjan.h" |
20 | |
21 | /* Initialize the clustering data structure "c" from "graph". |
22 | * |
23 | * In particular, allocate memory, extract the SCCs from "graph" |
24 | * into c->scc, initialize scc_cluster and construct |
25 | * a band of schedule rows for each SCC. |
26 | * Within each SCC, there is only one SCC by definition. |
27 | * Each SCC initially belongs to a cluster containing only that SCC. |
28 | */ |
29 | static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c, |
30 | struct isl_sched_graph *graph) |
31 | { |
32 | int i; |
33 | |
34 | c->n = graph->scc; |
35 | c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n); |
36 | c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n); |
37 | c->scc_cluster = isl_calloc_array(ctx, int, c->n); |
38 | c->scc_node = isl_calloc_array(ctx, int, c->n); |
39 | c->scc_in_merge = isl_calloc_array(ctx, int, c->n); |
40 | if (!c->scc || !c->cluster || |
41 | !c->scc_cluster || !c->scc_node || !c->scc_in_merge) |
42 | return isl_stat_error; |
43 | |
44 | for (i = 0; i < c->n; ++i) { |
45 | if (isl_sched_graph_extract_sub_graph(ctx, graph, |
46 | node_pred: &isl_sched_node_scc_exactly, |
47 | edge_pred: &isl_sched_edge_scc_exactly, |
48 | data: i, sub: &c->scc[i]) < 0) |
49 | return isl_stat_error; |
50 | c->scc[i].scc = 1; |
51 | if (isl_sched_graph_compute_maxvar(graph: &c->scc[i]) < 0) |
52 | return isl_stat_error; |
53 | if (isl_schedule_node_compute_wcc_band(ctx, graph: &c->scc[i]) < 0) |
54 | return isl_stat_error; |
55 | c->scc_cluster[i] = i; |
56 | } |
57 | |
58 | return isl_stat_ok; |
59 | } |
60 | |
61 | /* Free all memory allocated for "c". |
62 | */ |
63 | static void clustering_free(isl_ctx *ctx, struct isl_clustering *c) |
64 | { |
65 | int i; |
66 | |
67 | if (c->scc) |
68 | for (i = 0; i < c->n; ++i) |
69 | isl_sched_graph_free(ctx, graph: &c->scc[i]); |
70 | free(ptr: c->scc); |
71 | if (c->cluster) |
72 | for (i = 0; i < c->n; ++i) |
73 | isl_sched_graph_free(ctx, graph: &c->cluster[i]); |
74 | free(ptr: c->cluster); |
75 | free(ptr: c->scc_cluster); |
76 | free(ptr: c->scc_node); |
77 | free(ptr: c->scc_in_merge); |
78 | } |
79 | |
80 | /* Should we refrain from merging the cluster in "graph" with |
81 | * any other cluster? |
82 | * In particular, is its current schedule band empty and incomplete. |
83 | */ |
84 | static int bad_cluster(struct isl_sched_graph *graph) |
85 | { |
86 | return graph->n_row < graph->maxvar && |
87 | graph->n_total_row == graph->band_start; |
88 | } |
89 | |
90 | /* Is "edge" a proximity edge with a non-empty dependence relation? |
91 | */ |
92 | static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge) |
93 | { |
94 | if (!isl_sched_edge_is_proximity(edge)) |
95 | return isl_bool_false; |
96 | return isl_bool_not(b: isl_map_plain_is_empty(map: edge->map)); |
97 | } |
98 | |
99 | /* Return the index of an edge in "graph" that can be used to merge |
100 | * two clusters in "c". |
101 | * Return graph->n_edge if no such edge can be found. |
102 | * Return -1 on error. |
103 | * |
104 | * In particular, return a proximity edge between two clusters |
105 | * that is not marked "no_merge" and such that neither of the |
106 | * two clusters has an incomplete, empty band. |
107 | * |
108 | * If there are multiple such edges, then try and find the most |
109 | * appropriate edge to use for merging. In particular, pick the edge |
110 | * with the greatest weight. If there are multiple of those, |
111 | * then pick one with the shortest distance between |
112 | * the two cluster representatives. |
113 | */ |
114 | static int find_proximity(struct isl_sched_graph *graph, |
115 | struct isl_clustering *c) |
116 | { |
117 | int i, best = graph->n_edge, best_dist, best_weight; |
118 | |
119 | for (i = 0; i < graph->n_edge; ++i) { |
120 | struct isl_sched_edge *edge = &graph->edge[i]; |
121 | int dist, weight; |
122 | isl_bool prox; |
123 | |
124 | prox = is_non_empty_proximity(edge); |
125 | if (prox < 0) |
126 | return -1; |
127 | if (!prox) |
128 | continue; |
129 | if (edge->no_merge) |
130 | continue; |
131 | if (bad_cluster(graph: &c->scc[edge->src->scc]) || |
132 | bad_cluster(graph: &c->scc[edge->dst->scc])) |
133 | continue; |
134 | dist = c->scc_cluster[edge->dst->scc] - |
135 | c->scc_cluster[edge->src->scc]; |
136 | if (dist == 0) |
137 | continue; |
138 | weight = edge->weight; |
139 | if (best < graph->n_edge) { |
140 | if (best_weight > weight) |
141 | continue; |
142 | if (best_weight == weight && best_dist <= dist) |
143 | continue; |
144 | } |
145 | best = i; |
146 | best_dist = dist; |
147 | best_weight = weight; |
148 | } |
149 | |
150 | return best; |
151 | } |
152 | |
153 | /* Internal data structure used in mark_merge_sccs. |
154 | * |
155 | * "graph" is the dependence graph in which a strongly connected |
156 | * component is constructed. |
157 | * "scc_cluster" maps each SCC index to the cluster to which it belongs. |
158 | * "src" and "dst" are the indices of the nodes that are being merged. |
159 | */ |
160 | struct isl_mark_merge_sccs_data { |
161 | struct isl_sched_graph *graph; |
162 | int *scc_cluster; |
163 | int src; |
164 | int dst; |
165 | }; |
166 | |
167 | /* Check whether the cluster containing node "i" depends on the cluster |
168 | * containing node "j". If "i" and "j" belong to the same cluster, |
169 | * then they are taken to depend on each other to ensure that |
170 | * the resulting strongly connected component consists of complete |
171 | * clusters. Furthermore, if "i" and "j" are the two nodes that |
172 | * are being merged, then they are taken to depend on each other as well. |
173 | * Otherwise, check if there is a (conditional) validity dependence |
174 | * from node[j] to node[i], forcing node[i] to follow node[j]. |
175 | */ |
176 | static isl_bool cluster_follows(int i, int j, void *user) |
177 | { |
178 | struct isl_mark_merge_sccs_data *data = user; |
179 | struct isl_sched_graph *graph = data->graph; |
180 | int *scc_cluster = data->scc_cluster; |
181 | |
182 | if (data->src == i && data->dst == j) |
183 | return isl_bool_true; |
184 | if (data->src == j && data->dst == i) |
185 | return isl_bool_true; |
186 | if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc]) |
187 | return isl_bool_true; |
188 | |
189 | return isl_sched_graph_has_validity_edge(graph, src: &graph->node[j], |
190 | dst: &graph->node[i]); |
191 | } |
192 | |
193 | /* Mark all SCCs that belong to either of the two clusters in "c" |
194 | * connected by the edge in "graph" with index "edge", or to any |
195 | * of the intermediate clusters. |
196 | * The marking is recorded in c->scc_in_merge. |
197 | * |
198 | * The given edge has been selected for merging two clusters, |
199 | * meaning that there is at least a proximity edge between the two nodes. |
200 | * However, there may also be (indirect) validity dependences |
201 | * between the two nodes. When merging the two clusters, all clusters |
202 | * containing one or more of the intermediate nodes along the |
203 | * indirect validity dependences need to be merged in as well. |
204 | * |
205 | * First collect all such nodes by computing the strongly connected |
206 | * component (SCC) containing the two nodes connected by the edge, where |
207 | * the two nodes are considered to depend on each other to make |
208 | * sure they end up in the same SCC. Similarly, each node is considered |
209 | * to depend on every other node in the same cluster to ensure |
210 | * that the SCC consists of complete clusters. |
211 | * |
212 | * Then the original SCCs that contain any of these nodes are marked |
213 | * in c->scc_in_merge. |
214 | */ |
215 | static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph, |
216 | int edge, struct isl_clustering *c) |
217 | { |
218 | struct isl_mark_merge_sccs_data data; |
219 | struct isl_tarjan_graph *g; |
220 | int i; |
221 | |
222 | for (i = 0; i < c->n; ++i) |
223 | c->scc_in_merge[i] = 0; |
224 | |
225 | data.graph = graph; |
226 | data.scc_cluster = c->scc_cluster; |
227 | data.src = graph->edge[edge].src - graph->node; |
228 | data.dst = graph->edge[edge].dst - graph->node; |
229 | |
230 | g = isl_tarjan_graph_component(ctx, len: graph->n, node: data.dst, |
231 | follows: &cluster_follows, user: &data); |
232 | if (!g) |
233 | goto error; |
234 | |
235 | i = g->op; |
236 | if (i < 3) |
237 | isl_die(ctx, isl_error_internal, |
238 | "expecting at least two nodes in component" , |
239 | goto error); |
240 | if (g->order[--i] != -1) |
241 | isl_die(ctx, isl_error_internal, |
242 | "expecting end of component marker" , goto error); |
243 | |
244 | for (--i; i >= 0 && g->order[i] != -1; --i) { |
245 | int scc = graph->node[g->order[i]].scc; |
246 | c->scc_in_merge[scc] = 1; |
247 | } |
248 | |
249 | isl_tarjan_graph_free(g); |
250 | return isl_stat_ok; |
251 | error: |
252 | isl_tarjan_graph_free(g); |
253 | return isl_stat_error; |
254 | } |
255 | |
256 | /* Construct the identifier "cluster_i". |
257 | */ |
258 | static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i) |
259 | { |
260 | char name[40]; |
261 | |
262 | snprintf(s: name, maxlen: sizeof(name), format: "cluster_%d" , i); |
263 | return isl_id_alloc(ctx, name, NULL); |
264 | } |
265 | |
266 | /* Construct the space of the cluster with index "i" containing |
267 | * the strongly connected component "scc". |
268 | * |
269 | * In particular, construct a space called cluster_i with dimension equal |
270 | * to the number of schedule rows in the current band of "scc". |
271 | */ |
272 | static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i) |
273 | { |
274 | int nvar; |
275 | isl_space *space; |
276 | isl_id *id; |
277 | |
278 | nvar = scc->n_total_row - scc->band_start; |
279 | space = isl_space_copy(space: scc->node[0].space); |
280 | space = isl_space_params(space); |
281 | space = isl_space_set_from_params(space); |
282 | space = isl_space_add_dims(space, type: isl_dim_set, n: nvar); |
283 | id = cluster_id(ctx: isl_space_get_ctx(space), i); |
284 | space = isl_space_set_tuple_id(space, type: isl_dim_set, id); |
285 | |
286 | return space; |
287 | } |
288 | |
289 | /* Collect the domain of the graph for merging clusters. |
290 | * |
291 | * In particular, for each cluster with first SCC "i", construct |
292 | * a set in the space called cluster_i with dimension equal |
293 | * to the number of schedule rows in the current band of the cluster. |
294 | */ |
295 | static __isl_give isl_union_set *collect_domain(isl_ctx *ctx, |
296 | struct isl_sched_graph *graph, struct isl_clustering *c) |
297 | { |
298 | int i; |
299 | isl_space *space; |
300 | isl_union_set *domain; |
301 | |
302 | space = isl_space_params_alloc(ctx, nparam: 0); |
303 | domain = isl_union_set_empty(space); |
304 | |
305 | for (i = 0; i < graph->scc; ++i) { |
306 | isl_space *space; |
307 | |
308 | if (!c->scc_in_merge[i]) |
309 | continue; |
310 | if (c->scc_cluster[i] != i) |
311 | continue; |
312 | space = cluster_space(scc: &c->scc[i], i); |
313 | domain = isl_union_set_add_set(uset: domain, set: isl_set_universe(space)); |
314 | } |
315 | |
316 | return domain; |
317 | } |
318 | |
319 | /* Construct a map from the original instances to the corresponding |
320 | * cluster instance in the current bands of the clusters in "c". |
321 | */ |
322 | static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx, |
323 | struct isl_sched_graph *graph, struct isl_clustering *c) |
324 | { |
325 | int i, j; |
326 | isl_space *space; |
327 | isl_union_map *cluster_map; |
328 | |
329 | space = isl_space_params_alloc(ctx, nparam: 0); |
330 | cluster_map = isl_union_map_empty(space); |
331 | for (i = 0; i < graph->scc; ++i) { |
332 | int start, n; |
333 | isl_id *id; |
334 | |
335 | if (!c->scc_in_merge[i]) |
336 | continue; |
337 | |
338 | id = cluster_id(ctx, i: c->scc_cluster[i]); |
339 | start = c->scc[i].band_start; |
340 | n = c->scc[i].n_total_row - start; |
341 | for (j = 0; j < c->scc[i].n; ++j) { |
342 | isl_multi_aff *ma; |
343 | isl_map *map; |
344 | struct isl_sched_node *node = &c->scc[i].node[j]; |
345 | |
346 | ma = isl_sched_node_extract_partial_schedule_multi_aff( |
347 | node, first: start, n); |
348 | ma = isl_multi_aff_set_tuple_id(multi: ma, type: isl_dim_out, |
349 | id: isl_id_copy(id)); |
350 | map = isl_map_from_multi_aff(maff: ma); |
351 | cluster_map = isl_union_map_add_map(umap: cluster_map, map); |
352 | } |
353 | isl_id_free(id); |
354 | } |
355 | |
356 | return cluster_map; |
357 | } |
358 | |
359 | /* Add "umap" to the schedule constraints "sc" of all types of "edge" |
360 | * that are not isl_edge_condition or isl_edge_conditional_validity. |
361 | */ |
362 | static __isl_give isl_schedule_constraints *add_non_conditional_constraints( |
363 | struct isl_sched_edge *edge, __isl_keep isl_union_map *umap, |
364 | __isl_take isl_schedule_constraints *sc) |
365 | { |
366 | enum isl_edge_type t; |
367 | |
368 | if (!sc) |
369 | return NULL; |
370 | |
371 | for (t = isl_edge_first; t <= isl_edge_last; ++t) { |
372 | if (t == isl_edge_condition || |
373 | t == isl_edge_conditional_validity) |
374 | continue; |
375 | if (!isl_sched_edge_has_type(edge, type: t)) |
376 | continue; |
377 | sc = isl_schedule_constraints_add(sc, type: t, |
378 | c: isl_union_map_copy(umap)); |
379 | } |
380 | |
381 | return sc; |
382 | } |
383 | |
384 | /* Add schedule constraints of types isl_edge_condition and |
385 | * isl_edge_conditional_validity to "sc" by applying "umap" to |
386 | * the domains of the wrapped relations in domain and range |
387 | * of the corresponding tagged constraints of "edge". |
388 | */ |
389 | static __isl_give isl_schedule_constraints *add_conditional_constraints( |
390 | struct isl_sched_edge *edge, __isl_keep isl_union_map *umap, |
391 | __isl_take isl_schedule_constraints *sc) |
392 | { |
393 | enum isl_edge_type t; |
394 | isl_union_map *tagged; |
395 | |
396 | for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) { |
397 | if (!isl_sched_edge_has_type(edge, type: t)) |
398 | continue; |
399 | if (t == isl_edge_condition) |
400 | tagged = isl_union_map_copy(umap: edge->tagged_condition); |
401 | else |
402 | tagged = isl_union_map_copy(umap: edge->tagged_validity); |
403 | tagged = isl_union_map_zip(umap: tagged); |
404 | tagged = isl_union_map_apply_domain(umap1: tagged, |
405 | umap2: isl_union_map_copy(umap)); |
406 | tagged = isl_union_map_zip(umap: tagged); |
407 | sc = isl_schedule_constraints_add(sc, type: t, c: tagged); |
408 | if (!sc) |
409 | return NULL; |
410 | } |
411 | |
412 | return sc; |
413 | } |
414 | |
415 | /* Given a mapping "cluster_map" from the original instances to |
416 | * the cluster instances, add schedule constraints on the clusters |
417 | * to "sc" corresponding to the original constraints represented by "edge". |
418 | * |
419 | * For non-tagged dependence constraints, the cluster constraints |
420 | * are obtained by applying "cluster_map" to the edge->map. |
421 | * |
422 | * For tagged dependence constraints, "cluster_map" needs to be applied |
423 | * to the domains of the wrapped relations in domain and range |
424 | * of the tagged dependence constraints. Pick out the mappings |
425 | * from these domains from "cluster_map" and construct their product. |
426 | * This mapping can then be applied to the pair of domains. |
427 | */ |
428 | static __isl_give isl_schedule_constraints *collect_edge_constraints( |
429 | struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map, |
430 | __isl_take isl_schedule_constraints *sc) |
431 | { |
432 | isl_union_map *umap; |
433 | isl_space *space; |
434 | isl_union_set *uset; |
435 | isl_union_map *umap1, *umap2; |
436 | |
437 | if (!sc) |
438 | return NULL; |
439 | |
440 | umap = isl_union_map_from_map(map: isl_map_copy(map: edge->map)); |
441 | umap = isl_union_map_apply_domain(umap1: umap, |
442 | umap2: isl_union_map_copy(umap: cluster_map)); |
443 | umap = isl_union_map_apply_range(umap1: umap, |
444 | umap2: isl_union_map_copy(umap: cluster_map)); |
445 | sc = add_non_conditional_constraints(edge, umap, sc); |
446 | isl_union_map_free(umap); |
447 | |
448 | if (!sc || |
449 | (!isl_sched_edge_is_condition(edge) && |
450 | !isl_sched_edge_is_conditional_validity(edge))) |
451 | return sc; |
452 | |
453 | space = isl_space_domain(space: isl_map_get_space(map: edge->map)); |
454 | uset = isl_union_set_from_set(set: isl_set_universe(space)); |
455 | umap1 = isl_union_map_copy(umap: cluster_map); |
456 | umap1 = isl_union_map_intersect_domain(umap: umap1, uset); |
457 | space = isl_space_range(space: isl_map_get_space(map: edge->map)); |
458 | uset = isl_union_set_from_set(set: isl_set_universe(space)); |
459 | umap2 = isl_union_map_copy(umap: cluster_map); |
460 | umap2 = isl_union_map_intersect_domain(umap: umap2, uset); |
461 | umap = isl_union_map_product(umap1, umap2); |
462 | |
463 | sc = add_conditional_constraints(edge, umap, sc); |
464 | |
465 | isl_union_map_free(umap); |
466 | return sc; |
467 | } |
468 | |
469 | /* Given a mapping "cluster_map" from the original instances to |
470 | * the cluster instances, add schedule constraints on the clusters |
471 | * to "sc" corresponding to all edges in "graph" between nodes that |
472 | * belong to SCCs that are marked for merging in "scc_in_merge". |
473 | */ |
474 | static __isl_give isl_schedule_constraints *collect_constraints( |
475 | struct isl_sched_graph *graph, int *scc_in_merge, |
476 | __isl_keep isl_union_map *cluster_map, |
477 | __isl_take isl_schedule_constraints *sc) |
478 | { |
479 | int i; |
480 | |
481 | for (i = 0; i < graph->n_edge; ++i) { |
482 | struct isl_sched_edge *edge = &graph->edge[i]; |
483 | |
484 | if (!scc_in_merge[edge->src->scc]) |
485 | continue; |
486 | if (!scc_in_merge[edge->dst->scc]) |
487 | continue; |
488 | sc = collect_edge_constraints(edge, cluster_map, sc); |
489 | } |
490 | |
491 | return sc; |
492 | } |
493 | |
494 | /* Construct a dependence graph for scheduling clusters with respect |
495 | * to each other and store the result in "merge_graph". |
496 | * In particular, the nodes of the graph correspond to the schedule |
497 | * dimensions of the current bands of those clusters that have been |
498 | * marked for merging in "c". |
499 | * |
500 | * First construct an isl_schedule_constraints object for this domain |
501 | * by transforming the edges in "graph" to the domain. |
502 | * Then initialize a dependence graph for scheduling from these |
503 | * constraints. |
504 | */ |
505 | static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph, |
506 | struct isl_clustering *c, struct isl_sched_graph *merge_graph) |
507 | { |
508 | isl_union_set *domain; |
509 | isl_union_map *cluster_map; |
510 | isl_schedule_constraints *sc; |
511 | isl_stat r; |
512 | |
513 | domain = collect_domain(ctx, graph, c); |
514 | sc = isl_schedule_constraints_on_domain(domain); |
515 | if (!sc) |
516 | return isl_stat_error; |
517 | cluster_map = collect_cluster_map(ctx, graph, c); |
518 | sc = collect_constraints(graph, scc_in_merge: c->scc_in_merge, cluster_map, sc); |
519 | isl_union_map_free(umap: cluster_map); |
520 | |
521 | r = isl_sched_graph_init(graph: merge_graph, sc); |
522 | |
523 | isl_schedule_constraints_free(sc); |
524 | |
525 | return r; |
526 | } |
527 | |
528 | /* Compute the maximal number of remaining schedule rows that still need |
529 | * to be computed for the nodes that belong to clusters with the maximal |
530 | * dimension for the current band (i.e., the band that is to be merged). |
531 | * Only clusters that are about to be merged are considered. |
532 | * "maxvar" is the maximal dimension for the current band. |
533 | * "c" contains information about the clusters. |
534 | * |
535 | * Return the maximal number of remaining schedule rows or |
536 | * isl_size_error on error. |
537 | */ |
538 | static isl_size compute_maxvar_max_slack(int maxvar, struct isl_clustering *c) |
539 | { |
540 | int i, j; |
541 | int max_slack; |
542 | |
543 | max_slack = 0; |
544 | for (i = 0; i < c->n; ++i) { |
545 | int nvar; |
546 | struct isl_sched_graph *scc; |
547 | |
548 | if (!c->scc_in_merge[i]) |
549 | continue; |
550 | scc = &c->scc[i]; |
551 | nvar = scc->n_total_row - scc->band_start; |
552 | if (nvar != maxvar) |
553 | continue; |
554 | for (j = 0; j < scc->n; ++j) { |
555 | struct isl_sched_node *node = &scc->node[j]; |
556 | int slack; |
557 | |
558 | if (isl_sched_node_update_vmap(node) < 0) |
559 | return isl_size_error; |
560 | slack = node->nvar - node->rank; |
561 | if (slack > max_slack) |
562 | max_slack = slack; |
563 | } |
564 | } |
565 | |
566 | return max_slack; |
567 | } |
568 | |
569 | /* If there are any clusters where the dimension of the current band |
570 | * (i.e., the band that is to be merged) is smaller than "maxvar" and |
571 | * if there are any nodes in such a cluster where the number |
572 | * of remaining schedule rows that still need to be computed |
573 | * is greater than "max_slack", then return the smallest current band |
574 | * dimension of all these clusters. Otherwise return the original value |
575 | * of "maxvar". Return isl_size_error in case of any error. |
576 | * Only clusters that are about to be merged are considered. |
577 | * "c" contains information about the clusters. |
578 | */ |
579 | static isl_size limit_maxvar_to_slack(int maxvar, int max_slack, |
580 | struct isl_clustering *c) |
581 | { |
582 | int i, j; |
583 | |
584 | for (i = 0; i < c->n; ++i) { |
585 | int nvar; |
586 | struct isl_sched_graph *scc; |
587 | |
588 | if (!c->scc_in_merge[i]) |
589 | continue; |
590 | scc = &c->scc[i]; |
591 | nvar = scc->n_total_row - scc->band_start; |
592 | if (nvar >= maxvar) |
593 | continue; |
594 | for (j = 0; j < scc->n; ++j) { |
595 | struct isl_sched_node *node = &scc->node[j]; |
596 | int slack; |
597 | |
598 | if (isl_sched_node_update_vmap(node) < 0) |
599 | return isl_size_error; |
600 | slack = node->nvar - node->rank; |
601 | if (slack > max_slack) { |
602 | maxvar = nvar; |
603 | break; |
604 | } |
605 | } |
606 | } |
607 | |
608 | return maxvar; |
609 | } |
610 | |
611 | /* Adjust merge_graph->maxvar based on the number of remaining schedule rows |
612 | * that still need to be computed. In particular, if there is a node |
613 | * in a cluster where the dimension of the current band is smaller |
614 | * than merge_graph->maxvar, but the number of remaining schedule rows |
615 | * is greater than that of any node in a cluster with the maximal |
616 | * dimension for the current band (i.e., merge_graph->maxvar), |
617 | * then adjust merge_graph->maxvar to the (smallest) current band dimension |
618 | * of those clusters. Without this adjustment, the total number of |
619 | * schedule dimensions would be increased, resulting in a skewed view |
620 | * of the number of coincident dimensions. |
621 | * "c" contains information about the clusters. |
622 | * |
623 | * If the maximize_band_depth option is set and merge_graph->maxvar is reduced, |
624 | * then there is no point in attempting any merge since it will be rejected |
625 | * anyway. Set merge_graph->maxvar to zero in such cases. |
626 | */ |
627 | static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx, |
628 | struct isl_sched_graph *merge_graph, struct isl_clustering *c) |
629 | { |
630 | isl_size max_slack, maxvar; |
631 | |
632 | max_slack = compute_maxvar_max_slack(maxvar: merge_graph->maxvar, c); |
633 | if (max_slack < 0) |
634 | return isl_stat_error; |
635 | maxvar = limit_maxvar_to_slack(maxvar: merge_graph->maxvar, max_slack, c); |
636 | if (maxvar < 0) |
637 | return isl_stat_error; |
638 | |
639 | if (maxvar < merge_graph->maxvar) { |
640 | if (isl_options_get_schedule_maximize_band_depth(ctx)) |
641 | merge_graph->maxvar = 0; |
642 | else |
643 | merge_graph->maxvar = maxvar; |
644 | } |
645 | |
646 | return isl_stat_ok; |
647 | } |
648 | |
649 | /* Return the number of coincident dimensions in the current band of "graph", |
650 | * where the nodes of "graph" are assumed to be scheduled by a single band. |
651 | */ |
652 | static int get_n_coincident(struct isl_sched_graph *graph) |
653 | { |
654 | int i; |
655 | |
656 | for (i = graph->band_start; i < graph->n_total_row; ++i) |
657 | if (!graph->node[0].coincident[i]) |
658 | break; |
659 | |
660 | return i - graph->band_start; |
661 | } |
662 | |
663 | /* Should the clusters be merged based on the cluster schedule |
664 | * in the current (and only) band of "merge_graph", given that |
665 | * coincidence should be maximized? |
666 | * |
667 | * If the number of coincident schedule dimensions in the merged band |
668 | * would be less than the maximal number of coincident schedule dimensions |
669 | * in any of the merged clusters, then the clusters should not be merged. |
670 | */ |
671 | static isl_bool ok_to_merge_coincident(struct isl_clustering *c, |
672 | struct isl_sched_graph *merge_graph) |
673 | { |
674 | int i; |
675 | int n_coincident; |
676 | int max_coincident; |
677 | |
678 | max_coincident = 0; |
679 | for (i = 0; i < c->n; ++i) { |
680 | if (!c->scc_in_merge[i]) |
681 | continue; |
682 | n_coincident = get_n_coincident(graph: &c->scc[i]); |
683 | if (n_coincident > max_coincident) |
684 | max_coincident = n_coincident; |
685 | } |
686 | |
687 | n_coincident = get_n_coincident(graph: merge_graph); |
688 | |
689 | return isl_bool_ok(b: n_coincident >= max_coincident); |
690 | } |
691 | |
692 | /* Return the transformation on "node" expressed by the current (and only) |
693 | * band of "merge_graph" applied to the clusters in "c". |
694 | * |
695 | * First find the representation of "node" in its SCC in "c" and |
696 | * extract the transformation expressed by the current band. |
697 | * Then extract the transformation applied by "merge_graph" |
698 | * to the cluster to which this SCC belongs. |
699 | * Combine the two to obtain the complete transformation on the node. |
700 | * |
701 | * Note that the range of the first transformation is an anonymous space, |
702 | * while the domain of the second is named "cluster_X". The range |
703 | * of the former therefore needs to be adjusted before the two |
704 | * can be combined. |
705 | */ |
706 | static __isl_give isl_map *(isl_ctx *ctx, |
707 | struct isl_sched_node *node, struct isl_clustering *c, |
708 | struct isl_sched_graph *merge_graph) |
709 | { |
710 | struct isl_sched_node *scc_node, *cluster_node; |
711 | int start, n; |
712 | isl_id *id; |
713 | isl_space *space; |
714 | isl_multi_aff *ma, *ma2; |
715 | |
716 | scc_node = isl_sched_graph_find_node(ctx, graph: &c->scc[node->scc], |
717 | space: node->space); |
718 | if (scc_node && !isl_sched_graph_is_node(graph: &c->scc[node->scc], node: scc_node)) |
719 | isl_die(ctx, isl_error_internal, "unable to find node" , |
720 | return NULL); |
721 | start = c->scc[node->scc].band_start; |
722 | n = c->scc[node->scc].n_total_row - start; |
723 | ma = isl_sched_node_extract_partial_schedule_multi_aff(node: scc_node, |
724 | first: start, n); |
725 | space = cluster_space(scc: &c->scc[node->scc], i: c->scc_cluster[node->scc]); |
726 | cluster_node = isl_sched_graph_find_node(ctx, graph: merge_graph, space); |
727 | if (cluster_node && !isl_sched_graph_is_node(graph: merge_graph, node: cluster_node)) |
728 | isl_die(ctx, isl_error_internal, "unable to find cluster" , |
729 | space = isl_space_free(space)); |
730 | id = isl_space_get_tuple_id(space, type: isl_dim_set); |
731 | ma = isl_multi_aff_set_tuple_id(multi: ma, type: isl_dim_out, id); |
732 | isl_space_free(space); |
733 | n = merge_graph->n_total_row; |
734 | ma2 = isl_sched_node_extract_partial_schedule_multi_aff(node: cluster_node, |
735 | first: 0, n); |
736 | ma = isl_multi_aff_pullback_multi_aff(ma1: ma2, ma2: ma); |
737 | |
738 | return isl_map_from_multi_aff(maff: ma); |
739 | } |
740 | |
741 | /* Give a set of distances "set", are they bounded by a small constant |
742 | * in direction "pos"? |
743 | * In practice, check if they are bounded by 2 by checking that there |
744 | * are no elements with a value greater than or equal to 3 or |
745 | * smaller than or equal to -3. |
746 | */ |
747 | static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos) |
748 | { |
749 | isl_bool bounded; |
750 | isl_set *test; |
751 | |
752 | if (!set) |
753 | return isl_bool_error; |
754 | |
755 | test = isl_set_copy(set); |
756 | test = isl_set_lower_bound_si(set: test, type: isl_dim_set, pos, value: 3); |
757 | bounded = isl_set_is_empty(set: test); |
758 | isl_set_free(set: test); |
759 | |
760 | if (bounded < 0 || !bounded) |
761 | return bounded; |
762 | |
763 | test = isl_set_copy(set); |
764 | test = isl_set_upper_bound_si(set: test, type: isl_dim_set, pos, value: -3); |
765 | bounded = isl_set_is_empty(set: test); |
766 | isl_set_free(set: test); |
767 | |
768 | return bounded; |
769 | } |
770 | |
771 | /* Does the set "set" have a fixed (but possible parametric) value |
772 | * at dimension "pos"? |
773 | */ |
774 | static isl_bool has_single_value(__isl_keep isl_set *set, int pos) |
775 | { |
776 | isl_size n; |
777 | isl_bool single; |
778 | |
779 | n = isl_set_dim(set, type: isl_dim_set); |
780 | if (n < 0) |
781 | return isl_bool_error; |
782 | set = isl_set_copy(set); |
783 | set = isl_set_project_out(set, type: isl_dim_set, first: pos + 1, n: n - (pos + 1)); |
784 | set = isl_set_project_out(set, type: isl_dim_set, first: 0, n: pos); |
785 | single = isl_set_is_singleton(set); |
786 | isl_set_free(set); |
787 | |
788 | return single; |
789 | } |
790 | |
791 | /* Does "map" have a fixed (but possible parametric) value |
792 | * at dimension "pos" of either its domain or its range? |
793 | */ |
794 | static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos) |
795 | { |
796 | isl_set *set; |
797 | isl_bool single; |
798 | |
799 | set = isl_map_domain(bmap: isl_map_copy(map)); |
800 | single = has_single_value(set, pos); |
801 | isl_set_free(set); |
802 | |
803 | if (single < 0 || single) |
804 | return single; |
805 | |
806 | set = isl_map_range(map: isl_map_copy(map)); |
807 | single = has_single_value(set, pos); |
808 | isl_set_free(set); |
809 | |
810 | return single; |
811 | } |
812 | |
813 | /* Does the edge "edge" from "graph" have bounded dependence distances |
814 | * in the merged graph "merge_graph" of a selection of clusters in "c"? |
815 | * |
816 | * Extract the complete transformations of the source and destination |
817 | * nodes of the edge, apply them to the edge constraints and |
818 | * compute the differences. Finally, check if these differences are bounded |
819 | * in each direction. |
820 | * |
821 | * If the dimension of the band is greater than the number of |
822 | * dimensions that can be expected to be optimized by the edge |
823 | * (based on its weight), then also allow the differences to be unbounded |
824 | * in the remaining dimensions, but only if either the source or |
825 | * the destination has a fixed value in that direction. |
826 | * This allows a statement that produces values that are used by |
827 | * several instances of another statement to be merged with that |
828 | * other statement. |
829 | * However, merging such clusters will introduce an inherently |
830 | * large proximity distance inside the merged cluster, meaning |
831 | * that proximity distances will no longer be optimized in |
832 | * subsequent merges. These merges are therefore only allowed |
833 | * after all other possible merges have been tried. |
834 | * The first time such a merge is encountered, the weight of the edge |
835 | * is replaced by a negative weight. The second time (i.e., after |
836 | * all merges over edges with a non-negative weight have been tried), |
837 | * the merge is allowed. |
838 | */ |
839 | static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge, |
840 | struct isl_sched_graph *graph, struct isl_clustering *c, |
841 | struct isl_sched_graph *merge_graph) |
842 | { |
843 | int i, n_slack; |
844 | isl_size n; |
845 | isl_bool bounded; |
846 | isl_map *map, *t; |
847 | isl_set *dist; |
848 | |
849 | map = isl_map_copy(map: edge->map); |
850 | t = extract_node_transformation(ctx, node: edge->src, c, merge_graph); |
851 | map = isl_map_apply_domain(map1: map, map2: t); |
852 | t = extract_node_transformation(ctx, node: edge->dst, c, merge_graph); |
853 | map = isl_map_apply_range(map1: map, map2: t); |
854 | dist = isl_map_deltas(map: isl_map_copy(map)); |
855 | |
856 | bounded = isl_bool_true; |
857 | n = isl_set_dim(set: dist, type: isl_dim_set); |
858 | if (n < 0) |
859 | goto error; |
860 | n_slack = n - edge->weight; |
861 | if (edge->weight < 0) |
862 | n_slack -= graph->max_weight + 1; |
863 | for (i = 0; i < n; ++i) { |
864 | isl_bool bounded_i, singular_i; |
865 | |
866 | bounded_i = distance_is_bounded(set: dist, pos: i); |
867 | if (bounded_i < 0) |
868 | goto error; |
869 | if (bounded_i) |
870 | continue; |
871 | if (edge->weight >= 0) |
872 | bounded = isl_bool_false; |
873 | n_slack--; |
874 | if (n_slack < 0) |
875 | break; |
876 | singular_i = has_singular_src_or_dst(map, pos: i); |
877 | if (singular_i < 0) |
878 | goto error; |
879 | if (singular_i) |
880 | continue; |
881 | bounded = isl_bool_false; |
882 | break; |
883 | } |
884 | if (!bounded && i >= n && edge->weight >= 0) |
885 | edge->weight -= graph->max_weight + 1; |
886 | isl_map_free(map); |
887 | isl_set_free(set: dist); |
888 | |
889 | return bounded; |
890 | error: |
891 | isl_map_free(map); |
892 | isl_set_free(set: dist); |
893 | return isl_bool_error; |
894 | } |
895 | |
896 | /* Should the clusters be merged based on the cluster schedule |
897 | * in the current (and only) band of "merge_graph"? |
898 | * "graph" is the original dependence graph, while "c" records |
899 | * which SCCs are involved in the latest merge. |
900 | * |
901 | * In particular, is there at least one proximity constraint |
902 | * that is optimized by the merge? |
903 | * |
904 | * A proximity constraint is considered to be optimized |
905 | * if the dependence distances are small. |
906 | */ |
907 | static isl_bool ok_to_merge_proximity(isl_ctx *ctx, |
908 | struct isl_sched_graph *graph, struct isl_clustering *c, |
909 | struct isl_sched_graph *merge_graph) |
910 | { |
911 | int i; |
912 | |
913 | for (i = 0; i < graph->n_edge; ++i) { |
914 | struct isl_sched_edge *edge = &graph->edge[i]; |
915 | isl_bool bounded; |
916 | |
917 | if (!isl_sched_edge_is_proximity(edge)) |
918 | continue; |
919 | if (!c->scc_in_merge[edge->src->scc]) |
920 | continue; |
921 | if (!c->scc_in_merge[edge->dst->scc]) |
922 | continue; |
923 | if (c->scc_cluster[edge->dst->scc] == |
924 | c->scc_cluster[edge->src->scc]) |
925 | continue; |
926 | bounded = has_bounded_distances(ctx, edge, graph, c, |
927 | merge_graph); |
928 | if (bounded < 0 || bounded) |
929 | return bounded; |
930 | } |
931 | |
932 | return isl_bool_false; |
933 | } |
934 | |
935 | /* Should the clusters be merged based on the cluster schedule |
936 | * in the current (and only) band of "merge_graph"? |
937 | * "graph" is the original dependence graph, while "c" records |
938 | * which SCCs are involved in the latest merge. |
939 | * |
940 | * If the current band is empty, then the clusters should not be merged. |
941 | * |
942 | * If the band depth should be maximized and the merge schedule |
943 | * is incomplete (meaning that the dimension of some of the schedule |
944 | * bands in the original schedule will be reduced), then the clusters |
945 | * should not be merged. |
946 | * |
947 | * If the schedule_maximize_coincidence option is set, then check that |
948 | * the number of coincident schedule dimensions is not reduced. |
949 | * |
950 | * Finally, only allow the merge if at least one proximity |
951 | * constraint is optimized. |
952 | */ |
953 | static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph, |
954 | struct isl_clustering *c, struct isl_sched_graph *merge_graph) |
955 | { |
956 | if (merge_graph->n_total_row == merge_graph->band_start) |
957 | return isl_bool_false; |
958 | |
959 | if (isl_options_get_schedule_maximize_band_depth(ctx) && |
960 | merge_graph->n_total_row < merge_graph->maxvar) |
961 | return isl_bool_false; |
962 | |
963 | if (isl_options_get_schedule_maximize_coincidence(ctx)) { |
964 | isl_bool ok; |
965 | |
966 | ok = ok_to_merge_coincident(c, merge_graph); |
967 | if (ok < 0 || !ok) |
968 | return ok; |
969 | } |
970 | |
971 | return ok_to_merge_proximity(ctx, graph, c, merge_graph); |
972 | } |
973 | |
974 | /* Apply the schedule in "t_node" to the "n" rows starting at "first" |
975 | * of the schedule in "node" and return the result. |
976 | * |
977 | * That is, essentially compute |
978 | * |
979 | * T * N(first:first+n-1) |
980 | * |
981 | * taking into account the constant term and the parameter coefficients |
982 | * in "t_node". |
983 | */ |
984 | static __isl_give isl_mat *node_transformation(isl_ctx *ctx, |
985 | struct isl_sched_node *t_node, struct isl_sched_node *node, |
986 | int first, int n) |
987 | { |
988 | int i, j; |
989 | isl_mat *t; |
990 | isl_size n_row, n_col; |
991 | int n_param, n_var; |
992 | |
993 | n_param = node->nparam; |
994 | n_var = node->nvar; |
995 | n_row = isl_mat_rows(mat: t_node->sched); |
996 | n_col = isl_mat_cols(mat: node->sched); |
997 | if (n_row < 0 || n_col < 0) |
998 | return NULL; |
999 | t = isl_mat_alloc(ctx, n_row, n_col); |
1000 | if (!t) |
1001 | return NULL; |
1002 | for (i = 0; i < n_row; ++i) { |
1003 | isl_seq_cpy(dst: t->row[i], src: t_node->sched->row[i], len: 1 + n_param); |
1004 | isl_seq_clr(p: t->row[i] + 1 + n_param, len: n_var); |
1005 | for (j = 0; j < n; ++j) |
1006 | isl_seq_addmul(dst: t->row[i], |
1007 | f: t_node->sched->row[i][1 + n_param + j], |
1008 | src: node->sched->row[first + j], |
1009 | len: 1 + n_param + n_var); |
1010 | } |
1011 | return t; |
1012 | } |
1013 | |
1014 | /* Apply the cluster schedule in "t_node" to the current band |
1015 | * schedule of the nodes in "graph". |
1016 | * |
1017 | * In particular, replace the rows starting at band_start |
1018 | * by the result of applying the cluster schedule in "t_node" |
1019 | * to the original rows. |
1020 | * |
1021 | * The coincidence of the schedule is determined by the coincidence |
1022 | * of the cluster schedule. |
1023 | */ |
1024 | static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph, |
1025 | struct isl_sched_node *t_node) |
1026 | { |
1027 | int i, j; |
1028 | isl_size n_new; |
1029 | int start, n; |
1030 | |
1031 | start = graph->band_start; |
1032 | n = graph->n_total_row - start; |
1033 | |
1034 | n_new = isl_mat_rows(mat: t_node->sched); |
1035 | if (n_new < 0) |
1036 | return isl_stat_error; |
1037 | for (i = 0; i < graph->n; ++i) { |
1038 | struct isl_sched_node *node = &graph->node[i]; |
1039 | isl_mat *t; |
1040 | |
1041 | t = node_transformation(ctx, t_node, node, first: start, n); |
1042 | node->sched = isl_mat_drop_rows(mat: node->sched, row: start, n); |
1043 | node->sched = isl_mat_concat(top: node->sched, bot: t); |
1044 | node->sched_map = isl_map_free(map: node->sched_map); |
1045 | if (!node->sched) |
1046 | return isl_stat_error; |
1047 | for (j = 0; j < n_new; ++j) |
1048 | node->coincident[start + j] = t_node->coincident[j]; |
1049 | } |
1050 | graph->n_total_row -= n; |
1051 | graph->n_row -= n; |
1052 | graph->n_total_row += n_new; |
1053 | graph->n_row += n_new; |
1054 | |
1055 | return isl_stat_ok; |
1056 | } |
1057 | |
1058 | /* Merge the clusters marked for merging in "c" into a single |
1059 | * cluster using the cluster schedule in the current band of "merge_graph". |
1060 | * The representative SCC for the new cluster is the SCC with |
1061 | * the smallest index. |
1062 | * |
1063 | * The current band schedule of each SCC in the new cluster is obtained |
1064 | * by applying the schedule of the corresponding original cluster |
1065 | * to the original band schedule. |
1066 | * All SCCs in the new cluster have the same number of schedule rows. |
1067 | */ |
1068 | static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c, |
1069 | struct isl_sched_graph *merge_graph) |
1070 | { |
1071 | int i; |
1072 | int cluster = -1; |
1073 | isl_space *space; |
1074 | |
1075 | for (i = 0; i < c->n; ++i) { |
1076 | struct isl_sched_node *node; |
1077 | |
1078 | if (!c->scc_in_merge[i]) |
1079 | continue; |
1080 | if (cluster < 0) |
1081 | cluster = i; |
1082 | space = cluster_space(scc: &c->scc[i], i: c->scc_cluster[i]); |
1083 | node = isl_sched_graph_find_node(ctx, graph: merge_graph, space); |
1084 | isl_space_free(space); |
1085 | if (!node) |
1086 | return isl_stat_error; |
1087 | if (!isl_sched_graph_is_node(graph: merge_graph, node)) |
1088 | isl_die(ctx, isl_error_internal, |
1089 | "unable to find cluster" , |
1090 | return isl_stat_error); |
1091 | if (transform(ctx, graph: &c->scc[i], t_node: node) < 0) |
1092 | return isl_stat_error; |
1093 | c->scc_cluster[i] = cluster; |
1094 | } |
1095 | |
1096 | return isl_stat_ok; |
1097 | } |
1098 | |
1099 | /* Try and merge the clusters of SCCs marked in c->scc_in_merge |
1100 | * by scheduling the current cluster bands with respect to each other. |
1101 | * |
1102 | * Construct a dependence graph with a space for each cluster and |
1103 | * with the coordinates of each space corresponding to the schedule |
1104 | * dimensions of the current band of that cluster. |
1105 | * Construct a cluster schedule in this cluster dependence graph and |
1106 | * apply it to the current cluster bands if it is applicable |
1107 | * according to ok_to_merge. |
1108 | * |
1109 | * If the number of remaining schedule dimensions in a cluster |
1110 | * with a non-maximal current schedule dimension is greater than |
1111 | * the number of remaining schedule dimensions in clusters |
1112 | * with a maximal current schedule dimension, then restrict |
1113 | * the number of rows to be computed in the cluster schedule |
1114 | * to the minimal such non-maximal current schedule dimension. |
1115 | * Do this by adjusting merge_graph.maxvar. |
1116 | * |
1117 | * Return isl_bool_true if the clusters have effectively been merged |
1118 | * into a single cluster. |
1119 | * |
1120 | * Note that since the standard scheduling algorithm minimizes the maximal |
1121 | * distance over proximity constraints, the proximity constraints between |
1122 | * the merged clusters may not be optimized any further than what is |
1123 | * sufficient to bring the distances within the limits of the internal |
1124 | * proximity constraints inside the individual clusters. |
1125 | * It may therefore make sense to perform an additional translation step |
1126 | * to bring the clusters closer to each other, while maintaining |
1127 | * the linear part of the merging schedule found using the standard |
1128 | * scheduling algorithm. |
1129 | */ |
1130 | static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph, |
1131 | struct isl_clustering *c) |
1132 | { |
1133 | struct isl_sched_graph merge_graph = { 0 }; |
1134 | isl_bool merged; |
1135 | |
1136 | if (init_merge_graph(ctx, graph, c, merge_graph: &merge_graph) < 0) |
1137 | goto error; |
1138 | |
1139 | if (isl_sched_graph_compute_maxvar(graph: &merge_graph) < 0) |
1140 | goto error; |
1141 | if (adjust_maxvar_to_slack(ctx, merge_graph: &merge_graph,c) < 0) |
1142 | goto error; |
1143 | if (isl_schedule_node_compute_wcc_band(ctx, graph: &merge_graph) < 0) |
1144 | goto error; |
1145 | merged = ok_to_merge(ctx, graph, c, merge_graph: &merge_graph); |
1146 | if (merged && merge(ctx, c, merge_graph: &merge_graph) < 0) |
1147 | goto error; |
1148 | |
1149 | isl_sched_graph_free(ctx, graph: &merge_graph); |
1150 | return merged; |
1151 | error: |
1152 | isl_sched_graph_free(ctx, graph: &merge_graph); |
1153 | return isl_bool_error; |
1154 | } |
1155 | |
1156 | /* Is there any edge marked "no_merge" between two SCCs that are |
1157 | * about to be merged (i.e., that are set in "scc_in_merge")? |
1158 | * "merge_edge" is the proximity edge along which the clusters of SCCs |
1159 | * are going to be merged. |
1160 | * |
1161 | * If there is any edge between two SCCs with a negative weight, |
1162 | * while the weight of "merge_edge" is non-negative, then this |
1163 | * means that the edge was postponed. "merge_edge" should then |
1164 | * also be postponed since merging along the edge with negative weight should |
1165 | * be postponed until all edges with non-negative weight have been tried. |
1166 | * Replace the weight of "merge_edge" by a negative weight as well and |
1167 | * tell the caller not to attempt a merge. |
1168 | */ |
1169 | static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge, |
1170 | struct isl_sched_edge *merge_edge) |
1171 | { |
1172 | int i; |
1173 | |
1174 | for (i = 0; i < graph->n_edge; ++i) { |
1175 | struct isl_sched_edge *edge = &graph->edge[i]; |
1176 | |
1177 | if (!scc_in_merge[edge->src->scc]) |
1178 | continue; |
1179 | if (!scc_in_merge[edge->dst->scc]) |
1180 | continue; |
1181 | if (edge->no_merge) |
1182 | return 1; |
1183 | if (merge_edge->weight >= 0 && edge->weight < 0) { |
1184 | merge_edge->weight -= graph->max_weight + 1; |
1185 | return 1; |
1186 | } |
1187 | } |
1188 | |
1189 | return 0; |
1190 | } |
1191 | |
1192 | /* Merge the two clusters in "c" connected by the edge in "graph" |
1193 | * with index "edge" into a single cluster. |
1194 | * If it turns out to be impossible to merge these two clusters, |
1195 | * then mark the edge as "no_merge" such that it will not be |
1196 | * considered again. |
1197 | * |
1198 | * First mark all SCCs that need to be merged. This includes the SCCs |
1199 | * in the two clusters, but it may also include the SCCs |
1200 | * of intermediate clusters. |
1201 | * If there is already a no_merge edge between any pair of such SCCs, |
1202 | * then simply mark the current edge as no_merge as well. |
1203 | * Likewise, if any of those edges was postponed by has_bounded_distances, |
1204 | * then postpone the current edge as well. |
1205 | * Otherwise, try and merge the clusters and mark "edge" as "no_merge" |
1206 | * if the clusters did not end up getting merged, unless the non-merge |
1207 | * is due to the fact that the edge was postponed. This postponement |
1208 | * can be recognized by a change in weight (from non-negative to negative). |
1209 | */ |
1210 | static isl_stat merge_clusters_along_edge(isl_ctx *ctx, |
1211 | struct isl_sched_graph *graph, int edge, struct isl_clustering *c) |
1212 | { |
1213 | isl_bool merged; |
1214 | int edge_weight = graph->edge[edge].weight; |
1215 | |
1216 | if (mark_merge_sccs(ctx, graph, edge, c) < 0) |
1217 | return isl_stat_error; |
1218 | |
1219 | if (any_no_merge(graph, scc_in_merge: c->scc_in_merge, merge_edge: &graph->edge[edge])) |
1220 | merged = isl_bool_false; |
1221 | else |
1222 | merged = try_merge(ctx, graph, c); |
1223 | if (merged < 0) |
1224 | return isl_stat_error; |
1225 | if (!merged && edge_weight == graph->edge[edge].weight) |
1226 | graph->edge[edge].no_merge = 1; |
1227 | |
1228 | return isl_stat_ok; |
1229 | } |
1230 | |
1231 | /* Does "node" belong to the cluster identified by "cluster"? |
1232 | */ |
1233 | static int node_cluster_exactly(struct isl_sched_node *node, int cluster) |
1234 | { |
1235 | return node->cluster == cluster; |
1236 | } |
1237 | |
1238 | /* Does "edge" connect two nodes belonging to the cluster |
1239 | * identified by "cluster"? |
1240 | */ |
1241 | static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster) |
1242 | { |
1243 | return edge->src->cluster == cluster && edge->dst->cluster == cluster; |
1244 | } |
1245 | |
1246 | /* Swap the schedule of "node1" and "node2". |
1247 | * Both nodes have been derived from the same node in a common parent graph. |
1248 | * Since the "coincident" field is shared with that node |
1249 | * in the parent graph, there is no need to also swap this field. |
1250 | */ |
1251 | static void swap_sched(struct isl_sched_node *node1, |
1252 | struct isl_sched_node *node2) |
1253 | { |
1254 | isl_mat *sched; |
1255 | isl_map *sched_map; |
1256 | |
1257 | sched = node1->sched; |
1258 | node1->sched = node2->sched; |
1259 | node2->sched = sched; |
1260 | |
1261 | sched_map = node1->sched_map; |
1262 | node1->sched_map = node2->sched_map; |
1263 | node2->sched_map = sched_map; |
1264 | } |
1265 | |
1266 | /* Copy the current band schedule from the SCCs that form the cluster |
1267 | * with index "pos" to the actual cluster at position "pos". |
1268 | * By construction, the index of the first SCC that belongs to the cluster |
1269 | * is also "pos". |
1270 | * |
1271 | * The order of the nodes inside both the SCCs and the cluster |
1272 | * is assumed to be same as the order in the original "graph". |
1273 | * |
1274 | * Since the SCC graphs will no longer be used after this function, |
1275 | * the schedules are actually swapped rather than copied. |
1276 | */ |
1277 | static isl_stat copy_partial(struct isl_sched_graph *graph, |
1278 | struct isl_clustering *c, int pos) |
1279 | { |
1280 | int i, j; |
1281 | |
1282 | c->cluster[pos].n_total_row = c->scc[pos].n_total_row; |
1283 | c->cluster[pos].n_row = c->scc[pos].n_row; |
1284 | c->cluster[pos].maxvar = c->scc[pos].maxvar; |
1285 | j = 0; |
1286 | for (i = 0; i < graph->n; ++i) { |
1287 | int k; |
1288 | int s; |
1289 | |
1290 | if (graph->node[i].cluster != pos) |
1291 | continue; |
1292 | s = graph->node[i].scc; |
1293 | k = c->scc_node[s]++; |
1294 | swap_sched(node1: &c->cluster[pos].node[j], node2: &c->scc[s].node[k]); |
1295 | if (c->scc[s].maxvar > c->cluster[pos].maxvar) |
1296 | c->cluster[pos].maxvar = c->scc[s].maxvar; |
1297 | ++j; |
1298 | } |
1299 | |
1300 | return isl_stat_ok; |
1301 | } |
1302 | |
1303 | /* Is there a (conditional) validity dependence from node[j] to node[i], |
1304 | * forcing node[i] to follow node[j] or do the nodes belong to the same |
1305 | * cluster? |
1306 | */ |
1307 | static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user) |
1308 | { |
1309 | struct isl_sched_graph *graph = user; |
1310 | |
1311 | if (graph->node[i].cluster == graph->node[j].cluster) |
1312 | return isl_bool_true; |
1313 | return isl_sched_graph_has_validity_edge(graph, src: &graph->node[j], |
1314 | dst: &graph->node[i]); |
1315 | } |
1316 | |
1317 | /* Extract the merged clusters of SCCs in "graph", sort them, and |
1318 | * store them in c->clusters. Update c->scc_cluster accordingly. |
1319 | * |
1320 | * First keep track of the cluster containing the SCC to which a node |
1321 | * belongs in the node itself. |
1322 | * Then extract the clusters into c->clusters, copying the current |
1323 | * band schedule from the SCCs that belong to the cluster. |
1324 | * Do this only once per cluster. |
1325 | * |
1326 | * Finally, topologically sort the clusters and update c->scc_cluster |
1327 | * to match the new scc numbering. While the SCCs were originally |
1328 | * sorted already, some SCCs that depend on some other SCCs may |
1329 | * have been merged with SCCs that appear before these other SCCs. |
1330 | * A reordering may therefore be required. |
1331 | */ |
1332 | static isl_stat (isl_ctx *ctx, struct isl_sched_graph *graph, |
1333 | struct isl_clustering *c) |
1334 | { |
1335 | int i; |
1336 | |
1337 | for (i = 0; i < graph->n; ++i) |
1338 | graph->node[i].cluster = c->scc_cluster[graph->node[i].scc]; |
1339 | |
1340 | for (i = 0; i < graph->scc; ++i) { |
1341 | if (c->scc_cluster[i] != i) |
1342 | continue; |
1343 | if (isl_sched_graph_extract_sub_graph(ctx, graph, |
1344 | node_pred: &node_cluster_exactly, |
1345 | edge_pred: &edge_cluster_exactly, data: i, sub: &c->cluster[i]) < 0) |
1346 | return isl_stat_error; |
1347 | c->cluster[i].src_scc = -1; |
1348 | c->cluster[i].dst_scc = -1; |
1349 | if (copy_partial(graph, c, pos: i) < 0) |
1350 | return isl_stat_error; |
1351 | } |
1352 | |
1353 | if (isl_sched_graph_detect_ccs(ctx, graph, |
1354 | follows: &node_follows_strong_or_same_cluster) < 0) |
1355 | return isl_stat_error; |
1356 | for (i = 0; i < graph->n; ++i) |
1357 | c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster; |
1358 | |
1359 | return isl_stat_ok; |
1360 | } |
1361 | |
1362 | /* Compute weights on the proximity edges of "graph" that can |
1363 | * be used by find_proximity to find the most appropriate |
1364 | * proximity edge to use to merge two clusters in "c". |
1365 | * The weights are also used by has_bounded_distances to determine |
1366 | * whether the merge should be allowed. |
1367 | * Store the maximum of the computed weights in graph->max_weight. |
1368 | * |
1369 | * The computed weight is a measure for the number of remaining schedule |
1370 | * dimensions that can still be completely aligned. |
1371 | * In particular, compute the number of equalities between |
1372 | * input dimensions and output dimensions in the proximity constraints. |
1373 | * The directions that are already handled by outer schedule bands |
1374 | * are projected out prior to determining this number. |
1375 | * |
1376 | * Edges that will never be considered by find_proximity are ignored. |
1377 | */ |
1378 | static isl_stat compute_weights(struct isl_sched_graph *graph, |
1379 | struct isl_clustering *c) |
1380 | { |
1381 | int i; |
1382 | |
1383 | graph->max_weight = 0; |
1384 | |
1385 | for (i = 0; i < graph->n_edge; ++i) { |
1386 | struct isl_sched_edge *edge = &graph->edge[i]; |
1387 | struct isl_sched_node *src = edge->src; |
1388 | struct isl_sched_node *dst = edge->dst; |
1389 | isl_basic_map *hull; |
1390 | isl_bool prox; |
1391 | isl_size n_in, n_out, n; |
1392 | |
1393 | prox = is_non_empty_proximity(edge); |
1394 | if (prox < 0) |
1395 | return isl_stat_error; |
1396 | if (!prox) |
1397 | continue; |
1398 | if (bad_cluster(graph: &c->scc[edge->src->scc]) || |
1399 | bad_cluster(graph: &c->scc[edge->dst->scc])) |
1400 | continue; |
1401 | if (c->scc_cluster[edge->dst->scc] == |
1402 | c->scc_cluster[edge->src->scc]) |
1403 | continue; |
1404 | |
1405 | hull = isl_map_affine_hull(map: isl_map_copy(map: edge->map)); |
1406 | hull = isl_basic_map_transform_dims(bmap: hull, type: isl_dim_in, first: 0, |
1407 | trans: isl_mat_copy(mat: src->vmap)); |
1408 | hull = isl_basic_map_transform_dims(bmap: hull, type: isl_dim_out, first: 0, |
1409 | trans: isl_mat_copy(mat: dst->vmap)); |
1410 | hull = isl_basic_map_project_out(bmap: hull, |
1411 | type: isl_dim_in, first: 0, n: src->rank); |
1412 | hull = isl_basic_map_project_out(bmap: hull, |
1413 | type: isl_dim_out, first: 0, n: dst->rank); |
1414 | hull = isl_basic_map_remove_divs(bmap: hull); |
1415 | n_in = isl_basic_map_dim(bmap: hull, type: isl_dim_in); |
1416 | n_out = isl_basic_map_dim(bmap: hull, type: isl_dim_out); |
1417 | if (n_in < 0 || n_out < 0) |
1418 | hull = isl_basic_map_free(bmap: hull); |
1419 | hull = isl_basic_map_drop_constraints_not_involving_dims(bmap: hull, |
1420 | type: isl_dim_in, first: 0, n: n_in); |
1421 | hull = isl_basic_map_drop_constraints_not_involving_dims(bmap: hull, |
1422 | type: isl_dim_out, first: 0, n: n_out); |
1423 | n = isl_basic_map_n_equality(bmap: hull); |
1424 | isl_basic_map_free(bmap: hull); |
1425 | if (n < 0) |
1426 | return isl_stat_error; |
1427 | edge->weight = n; |
1428 | |
1429 | if (edge->weight > graph->max_weight) |
1430 | graph->max_weight = edge->weight; |
1431 | } |
1432 | |
1433 | return isl_stat_ok; |
1434 | } |
1435 | |
1436 | /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c" and |
1437 | * update "node" to arrange for them to be executed in an order |
1438 | * possibly involving set nodes that generalizes the topological order |
1439 | * determined by the scc fields of the nodes in "graph". |
1440 | * |
1441 | * Note that at this stage, there are graph->scc clusters and |
1442 | * their positions in c->cluster are determined by the values |
1443 | * of c->scc_cluster. |
1444 | * |
1445 | * Construct an isl_scc_graph and perform the decomposition |
1446 | * using this graph. |
1447 | */ |
1448 | static __isl_give isl_schedule_node *finish_bands_decompose( |
1449 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
1450 | struct isl_clustering *c) |
1451 | { |
1452 | isl_ctx *ctx; |
1453 | struct isl_scc_graph *scc_graph; |
1454 | |
1455 | ctx = isl_schedule_node_get_ctx(node); |
1456 | |
1457 | scc_graph = isl_scc_graph_from_sched_graph(ctx, graph, c); |
1458 | node = isl_scc_graph_decompose(scc_graph, node); |
1459 | isl_scc_graph_free(scc_graph); |
1460 | |
1461 | return node; |
1462 | } |
1463 | |
1464 | /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c" |
1465 | * in their topological order. This order is determined by the scc |
1466 | * fields of the nodes in "graph". |
1467 | * Combine the results in a sequence expressing the topological order. |
1468 | * |
1469 | * If there is only one cluster left, then there is no need to introduce |
1470 | * a sequence node. Also, in this case, the cluster necessarily contains |
1471 | * the SCC at position 0 in the original graph and is therefore also |
1472 | * stored in the first cluster of "c". |
1473 | * |
1474 | * If there are more than two clusters left, then some subsets of the clusters |
1475 | * may still be independent of each other. These could then still |
1476 | * be reordered with respect to each other. Call finish_bands_decompose |
1477 | * to try and construct an ordering involving set and sequence nodes |
1478 | * that generalizes the topological order. |
1479 | * Note that at the outermost level there can be no independent components |
1480 | * because isl_schedule_node_compute_wcc_clustering is called |
1481 | * on a (weakly) connected component. |
1482 | */ |
1483 | static __isl_give isl_schedule_node *finish_bands_clustering( |
1484 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
1485 | struct isl_clustering *c) |
1486 | { |
1487 | int i; |
1488 | isl_ctx *ctx; |
1489 | isl_union_set_list *filters; |
1490 | |
1491 | if (graph->scc == 1) |
1492 | return isl_schedule_node_compute_finish_band(node, |
1493 | graph: &c->cluster[0], initialized: 0); |
1494 | if (graph->scc > 2) |
1495 | return finish_bands_decompose(node, graph, c); |
1496 | |
1497 | ctx = isl_schedule_node_get_ctx(node); |
1498 | |
1499 | filters = isl_sched_graph_extract_sccs(ctx, graph); |
1500 | node = isl_schedule_node_insert_sequence(node, filters); |
1501 | |
1502 | for (i = 0; i < graph->scc; ++i) { |
1503 | int j = c->scc_cluster[i]; |
1504 | node = isl_schedule_node_grandchild(node, pos1: i, pos2: 0); |
1505 | node = isl_schedule_node_compute_finish_band(node, |
1506 | graph: &c->cluster[j], initialized: 0); |
1507 | node = isl_schedule_node_grandparent(node); |
1508 | } |
1509 | |
1510 | return node; |
1511 | } |
1512 | |
1513 | /* Compute a schedule for a connected dependence graph by first considering |
1514 | * each strongly connected component (SCC) in the graph separately and then |
1515 | * incrementally combining them into clusters. |
1516 | * Return the updated schedule node. |
1517 | * |
1518 | * Initially, each cluster consists of a single SCC, each with its |
1519 | * own band schedule. The algorithm then tries to merge pairs |
1520 | * of clusters along a proximity edge until no more suitable |
1521 | * proximity edges can be found. During this merging, the schedule |
1522 | * is maintained in the individual SCCs. |
1523 | * After the merging is completed, the full resulting clusters |
1524 | * are extracted and in finish_bands_clustering, |
1525 | * isl_schedule_node_compute_finish_band is called on each of them to integrate |
1526 | * the band into "node" and to continue the computation. |
1527 | * |
1528 | * compute_weights initializes the weights that are used by find_proximity. |
1529 | */ |
1530 | __isl_give isl_schedule_node *isl_schedule_node_compute_wcc_clustering( |
1531 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
1532 | { |
1533 | isl_ctx *ctx; |
1534 | struct isl_clustering c; |
1535 | int i; |
1536 | |
1537 | ctx = isl_schedule_node_get_ctx(node); |
1538 | |
1539 | if (clustering_init(ctx, c: &c, graph) < 0) |
1540 | goto error; |
1541 | |
1542 | if (compute_weights(graph, c: &c) < 0) |
1543 | goto error; |
1544 | |
1545 | for (;;) { |
1546 | i = find_proximity(graph, c: &c); |
1547 | if (i < 0) |
1548 | goto error; |
1549 | if (i >= graph->n_edge) |
1550 | break; |
1551 | if (merge_clusters_along_edge(ctx, graph, edge: i, c: &c) < 0) |
1552 | goto error; |
1553 | } |
1554 | |
1555 | if (extract_clusters(ctx, graph, c: &c) < 0) |
1556 | goto error; |
1557 | |
1558 | node = finish_bands_clustering(node, graph, c: &c); |
1559 | |
1560 | clustering_free(ctx, c: &c); |
1561 | return node; |
1562 | error: |
1563 | clustering_free(ctx, c: &c); |
1564 | return isl_schedule_node_free(node); |
1565 | } |
1566 | |