1/*
2 * Copyright 2015 Sven Verdoolaege
3 *
4 * Use of this software is governed by the MIT license
5 *
6 * Written by Sven Verdoolaege
7 */
8
9#include "isl_map_private.h"
10
11#include <isl/id.h>
12#include <isl/schedule_node.h>
13#include <isl/union_set.h>
14
15#include "isl_mat_private.h"
16#include "isl_scheduler_clustering.h"
17#include "isl_scheduler_scc.h"
18#include "isl_seq.h"
19#include "isl_tarjan.h"
20
21/* Initialize the clustering data structure "c" from "graph".
22 *
23 * In particular, allocate memory, extract the SCCs from "graph"
24 * into c->scc, initialize scc_cluster and construct
25 * a band of schedule rows for each SCC.
26 * Within each SCC, there is only one SCC by definition.
27 * Each SCC initially belongs to a cluster containing only that SCC.
28 */
29static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c,
30 struct isl_sched_graph *graph)
31{
32 int i;
33
34 c->n = graph->scc;
35 c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
36 c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
37 c->scc_cluster = isl_calloc_array(ctx, int, c->n);
38 c->scc_node = isl_calloc_array(ctx, int, c->n);
39 c->scc_in_merge = isl_calloc_array(ctx, int, c->n);
40 if (!c->scc || !c->cluster ||
41 !c->scc_cluster || !c->scc_node || !c->scc_in_merge)
42 return isl_stat_error;
43
44 for (i = 0; i < c->n; ++i) {
45 if (isl_sched_graph_extract_sub_graph(ctx, graph,
46 node_pred: &isl_sched_node_scc_exactly,
47 edge_pred: &isl_sched_edge_scc_exactly,
48 data: i, sub: &c->scc[i]) < 0)
49 return isl_stat_error;
50 c->scc[i].scc = 1;
51 if (isl_sched_graph_compute_maxvar(graph: &c->scc[i]) < 0)
52 return isl_stat_error;
53 if (isl_schedule_node_compute_wcc_band(ctx, graph: &c->scc[i]) < 0)
54 return isl_stat_error;
55 c->scc_cluster[i] = i;
56 }
57
58 return isl_stat_ok;
59}
60
61/* Free all memory allocated for "c".
62 */
63static void clustering_free(isl_ctx *ctx, struct isl_clustering *c)
64{
65 int i;
66
67 if (c->scc)
68 for (i = 0; i < c->n; ++i)
69 isl_sched_graph_free(ctx, graph: &c->scc[i]);
70 free(ptr: c->scc);
71 if (c->cluster)
72 for (i = 0; i < c->n; ++i)
73 isl_sched_graph_free(ctx, graph: &c->cluster[i]);
74 free(ptr: c->cluster);
75 free(ptr: c->scc_cluster);
76 free(ptr: c->scc_node);
77 free(ptr: c->scc_in_merge);
78}
79
80/* Should we refrain from merging the cluster in "graph" with
81 * any other cluster?
82 * In particular, is its current schedule band empty and incomplete.
83 */
84static int bad_cluster(struct isl_sched_graph *graph)
85{
86 return graph->n_row < graph->maxvar &&
87 graph->n_total_row == graph->band_start;
88}
89
90/* Is "edge" a proximity edge with a non-empty dependence relation?
91 */
92static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge)
93{
94 if (!isl_sched_edge_is_proximity(edge))
95 return isl_bool_false;
96 return isl_bool_not(b: isl_map_plain_is_empty(map: edge->map));
97}
98
99/* Return the index of an edge in "graph" that can be used to merge
100 * two clusters in "c".
101 * Return graph->n_edge if no such edge can be found.
102 * Return -1 on error.
103 *
104 * In particular, return a proximity edge between two clusters
105 * that is not marked "no_merge" and such that neither of the
106 * two clusters has an incomplete, empty band.
107 *
108 * If there are multiple such edges, then try and find the most
109 * appropriate edge to use for merging. In particular, pick the edge
110 * with the greatest weight. If there are multiple of those,
111 * then pick one with the shortest distance between
112 * the two cluster representatives.
113 */
114static int find_proximity(struct isl_sched_graph *graph,
115 struct isl_clustering *c)
116{
117 int i, best = graph->n_edge, best_dist, best_weight;
118
119 for (i = 0; i < graph->n_edge; ++i) {
120 struct isl_sched_edge *edge = &graph->edge[i];
121 int dist, weight;
122 isl_bool prox;
123
124 prox = is_non_empty_proximity(edge);
125 if (prox < 0)
126 return -1;
127 if (!prox)
128 continue;
129 if (edge->no_merge)
130 continue;
131 if (bad_cluster(graph: &c->scc[edge->src->scc]) ||
132 bad_cluster(graph: &c->scc[edge->dst->scc]))
133 continue;
134 dist = c->scc_cluster[edge->dst->scc] -
135 c->scc_cluster[edge->src->scc];
136 if (dist == 0)
137 continue;
138 weight = edge->weight;
139 if (best < graph->n_edge) {
140 if (best_weight > weight)
141 continue;
142 if (best_weight == weight && best_dist <= dist)
143 continue;
144 }
145 best = i;
146 best_dist = dist;
147 best_weight = weight;
148 }
149
150 return best;
151}
152
153/* Internal data structure used in mark_merge_sccs.
154 *
155 * "graph" is the dependence graph in which a strongly connected
156 * component is constructed.
157 * "scc_cluster" maps each SCC index to the cluster to which it belongs.
158 * "src" and "dst" are the indices of the nodes that are being merged.
159 */
160struct isl_mark_merge_sccs_data {
161 struct isl_sched_graph *graph;
162 int *scc_cluster;
163 int src;
164 int dst;
165};
166
167/* Check whether the cluster containing node "i" depends on the cluster
168 * containing node "j". If "i" and "j" belong to the same cluster,
169 * then they are taken to depend on each other to ensure that
170 * the resulting strongly connected component consists of complete
171 * clusters. Furthermore, if "i" and "j" are the two nodes that
172 * are being merged, then they are taken to depend on each other as well.
173 * Otherwise, check if there is a (conditional) validity dependence
174 * from node[j] to node[i], forcing node[i] to follow node[j].
175 */
176static isl_bool cluster_follows(int i, int j, void *user)
177{
178 struct isl_mark_merge_sccs_data *data = user;
179 struct isl_sched_graph *graph = data->graph;
180 int *scc_cluster = data->scc_cluster;
181
182 if (data->src == i && data->dst == j)
183 return isl_bool_true;
184 if (data->src == j && data->dst == i)
185 return isl_bool_true;
186 if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc])
187 return isl_bool_true;
188
189 return isl_sched_graph_has_validity_edge(graph, src: &graph->node[j],
190 dst: &graph->node[i]);
191}
192
193/* Mark all SCCs that belong to either of the two clusters in "c"
194 * connected by the edge in "graph" with index "edge", or to any
195 * of the intermediate clusters.
196 * The marking is recorded in c->scc_in_merge.
197 *
198 * The given edge has been selected for merging two clusters,
199 * meaning that there is at least a proximity edge between the two nodes.
200 * However, there may also be (indirect) validity dependences
201 * between the two nodes. When merging the two clusters, all clusters
202 * containing one or more of the intermediate nodes along the
203 * indirect validity dependences need to be merged in as well.
204 *
205 * First collect all such nodes by computing the strongly connected
206 * component (SCC) containing the two nodes connected by the edge, where
207 * the two nodes are considered to depend on each other to make
208 * sure they end up in the same SCC. Similarly, each node is considered
209 * to depend on every other node in the same cluster to ensure
210 * that the SCC consists of complete clusters.
211 *
212 * Then the original SCCs that contain any of these nodes are marked
213 * in c->scc_in_merge.
214 */
215static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph,
216 int edge, struct isl_clustering *c)
217{
218 struct isl_mark_merge_sccs_data data;
219 struct isl_tarjan_graph *g;
220 int i;
221
222 for (i = 0; i < c->n; ++i)
223 c->scc_in_merge[i] = 0;
224
225 data.graph = graph;
226 data.scc_cluster = c->scc_cluster;
227 data.src = graph->edge[edge].src - graph->node;
228 data.dst = graph->edge[edge].dst - graph->node;
229
230 g = isl_tarjan_graph_component(ctx, len: graph->n, node: data.dst,
231 follows: &cluster_follows, user: &data);
232 if (!g)
233 goto error;
234
235 i = g->op;
236 if (i < 3)
237 isl_die(ctx, isl_error_internal,
238 "expecting at least two nodes in component",
239 goto error);
240 if (g->order[--i] != -1)
241 isl_die(ctx, isl_error_internal,
242 "expecting end of component marker", goto error);
243
244 for (--i; i >= 0 && g->order[i] != -1; --i) {
245 int scc = graph->node[g->order[i]].scc;
246 c->scc_in_merge[scc] = 1;
247 }
248
249 isl_tarjan_graph_free(g);
250 return isl_stat_ok;
251error:
252 isl_tarjan_graph_free(g);
253 return isl_stat_error;
254}
255
256/* Construct the identifier "cluster_i".
257 */
258static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i)
259{
260 char name[40];
261
262 snprintf(s: name, maxlen: sizeof(name), format: "cluster_%d", i);
263 return isl_id_alloc(ctx, name, NULL);
264}
265
266/* Construct the space of the cluster with index "i" containing
267 * the strongly connected component "scc".
268 *
269 * In particular, construct a space called cluster_i with dimension equal
270 * to the number of schedule rows in the current band of "scc".
271 */
272static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i)
273{
274 int nvar;
275 isl_space *space;
276 isl_id *id;
277
278 nvar = scc->n_total_row - scc->band_start;
279 space = isl_space_copy(space: scc->node[0].space);
280 space = isl_space_params(space);
281 space = isl_space_set_from_params(space);
282 space = isl_space_add_dims(space, type: isl_dim_set, n: nvar);
283 id = cluster_id(ctx: isl_space_get_ctx(space), i);
284 space = isl_space_set_tuple_id(space, type: isl_dim_set, id);
285
286 return space;
287}
288
289/* Collect the domain of the graph for merging clusters.
290 *
291 * In particular, for each cluster with first SCC "i", construct
292 * a set in the space called cluster_i with dimension equal
293 * to the number of schedule rows in the current band of the cluster.
294 */
295static __isl_give isl_union_set *collect_domain(isl_ctx *ctx,
296 struct isl_sched_graph *graph, struct isl_clustering *c)
297{
298 int i;
299 isl_space *space;
300 isl_union_set *domain;
301
302 space = isl_space_params_alloc(ctx, nparam: 0);
303 domain = isl_union_set_empty(space);
304
305 for (i = 0; i < graph->scc; ++i) {
306 isl_space *space;
307
308 if (!c->scc_in_merge[i])
309 continue;
310 if (c->scc_cluster[i] != i)
311 continue;
312 space = cluster_space(scc: &c->scc[i], i);
313 domain = isl_union_set_add_set(uset: domain, set: isl_set_universe(space));
314 }
315
316 return domain;
317}
318
319/* Construct a map from the original instances to the corresponding
320 * cluster instance in the current bands of the clusters in "c".
321 */
322static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx,
323 struct isl_sched_graph *graph, struct isl_clustering *c)
324{
325 int i, j;
326 isl_space *space;
327 isl_union_map *cluster_map;
328
329 space = isl_space_params_alloc(ctx, nparam: 0);
330 cluster_map = isl_union_map_empty(space);
331 for (i = 0; i < graph->scc; ++i) {
332 int start, n;
333 isl_id *id;
334
335 if (!c->scc_in_merge[i])
336 continue;
337
338 id = cluster_id(ctx, i: c->scc_cluster[i]);
339 start = c->scc[i].band_start;
340 n = c->scc[i].n_total_row - start;
341 for (j = 0; j < c->scc[i].n; ++j) {
342 isl_multi_aff *ma;
343 isl_map *map;
344 struct isl_sched_node *node = &c->scc[i].node[j];
345
346 ma = isl_sched_node_extract_partial_schedule_multi_aff(
347 node, first: start, n);
348 ma = isl_multi_aff_set_tuple_id(multi: ma, type: isl_dim_out,
349 id: isl_id_copy(id));
350 map = isl_map_from_multi_aff(maff: ma);
351 cluster_map = isl_union_map_add_map(umap: cluster_map, map);
352 }
353 isl_id_free(id);
354 }
355
356 return cluster_map;
357}
358
359/* Add "umap" to the schedule constraints "sc" of all types of "edge"
360 * that are not isl_edge_condition or isl_edge_conditional_validity.
361 */
362static __isl_give isl_schedule_constraints *add_non_conditional_constraints(
363 struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
364 __isl_take isl_schedule_constraints *sc)
365{
366 enum isl_edge_type t;
367
368 if (!sc)
369 return NULL;
370
371 for (t = isl_edge_first; t <= isl_edge_last; ++t) {
372 if (t == isl_edge_condition ||
373 t == isl_edge_conditional_validity)
374 continue;
375 if (!isl_sched_edge_has_type(edge, type: t))
376 continue;
377 sc = isl_schedule_constraints_add(sc, type: t,
378 c: isl_union_map_copy(umap));
379 }
380
381 return sc;
382}
383
384/* Add schedule constraints of types isl_edge_condition and
385 * isl_edge_conditional_validity to "sc" by applying "umap" to
386 * the domains of the wrapped relations in domain and range
387 * of the corresponding tagged constraints of "edge".
388 */
389static __isl_give isl_schedule_constraints *add_conditional_constraints(
390 struct isl_sched_edge *edge, __isl_keep isl_union_map *umap,
391 __isl_take isl_schedule_constraints *sc)
392{
393 enum isl_edge_type t;
394 isl_union_map *tagged;
395
396 for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) {
397 if (!isl_sched_edge_has_type(edge, type: t))
398 continue;
399 if (t == isl_edge_condition)
400 tagged = isl_union_map_copy(umap: edge->tagged_condition);
401 else
402 tagged = isl_union_map_copy(umap: edge->tagged_validity);
403 tagged = isl_union_map_zip(umap: tagged);
404 tagged = isl_union_map_apply_domain(umap1: tagged,
405 umap2: isl_union_map_copy(umap));
406 tagged = isl_union_map_zip(umap: tagged);
407 sc = isl_schedule_constraints_add(sc, type: t, c: tagged);
408 if (!sc)
409 return NULL;
410 }
411
412 return sc;
413}
414
415/* Given a mapping "cluster_map" from the original instances to
416 * the cluster instances, add schedule constraints on the clusters
417 * to "sc" corresponding to the original constraints represented by "edge".
418 *
419 * For non-tagged dependence constraints, the cluster constraints
420 * are obtained by applying "cluster_map" to the edge->map.
421 *
422 * For tagged dependence constraints, "cluster_map" needs to be applied
423 * to the domains of the wrapped relations in domain and range
424 * of the tagged dependence constraints. Pick out the mappings
425 * from these domains from "cluster_map" and construct their product.
426 * This mapping can then be applied to the pair of domains.
427 */
428static __isl_give isl_schedule_constraints *collect_edge_constraints(
429 struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map,
430 __isl_take isl_schedule_constraints *sc)
431{
432 isl_union_map *umap;
433 isl_space *space;
434 isl_union_set *uset;
435 isl_union_map *umap1, *umap2;
436
437 if (!sc)
438 return NULL;
439
440 umap = isl_union_map_from_map(map: isl_map_copy(map: edge->map));
441 umap = isl_union_map_apply_domain(umap1: umap,
442 umap2: isl_union_map_copy(umap: cluster_map));
443 umap = isl_union_map_apply_range(umap1: umap,
444 umap2: isl_union_map_copy(umap: cluster_map));
445 sc = add_non_conditional_constraints(edge, umap, sc);
446 isl_union_map_free(umap);
447
448 if (!sc ||
449 (!isl_sched_edge_is_condition(edge) &&
450 !isl_sched_edge_is_conditional_validity(edge)))
451 return sc;
452
453 space = isl_space_domain(space: isl_map_get_space(map: edge->map));
454 uset = isl_union_set_from_set(set: isl_set_universe(space));
455 umap1 = isl_union_map_copy(umap: cluster_map);
456 umap1 = isl_union_map_intersect_domain(umap: umap1, uset);
457 space = isl_space_range(space: isl_map_get_space(map: edge->map));
458 uset = isl_union_set_from_set(set: isl_set_universe(space));
459 umap2 = isl_union_map_copy(umap: cluster_map);
460 umap2 = isl_union_map_intersect_domain(umap: umap2, uset);
461 umap = isl_union_map_product(umap1, umap2);
462
463 sc = add_conditional_constraints(edge, umap, sc);
464
465 isl_union_map_free(umap);
466 return sc;
467}
468
469/* Given a mapping "cluster_map" from the original instances to
470 * the cluster instances, add schedule constraints on the clusters
471 * to "sc" corresponding to all edges in "graph" between nodes that
472 * belong to SCCs that are marked for merging in "scc_in_merge".
473 */
474static __isl_give isl_schedule_constraints *collect_constraints(
475 struct isl_sched_graph *graph, int *scc_in_merge,
476 __isl_keep isl_union_map *cluster_map,
477 __isl_take isl_schedule_constraints *sc)
478{
479 int i;
480
481 for (i = 0; i < graph->n_edge; ++i) {
482 struct isl_sched_edge *edge = &graph->edge[i];
483
484 if (!scc_in_merge[edge->src->scc])
485 continue;
486 if (!scc_in_merge[edge->dst->scc])
487 continue;
488 sc = collect_edge_constraints(edge, cluster_map, sc);
489 }
490
491 return sc;
492}
493
494/* Construct a dependence graph for scheduling clusters with respect
495 * to each other and store the result in "merge_graph".
496 * In particular, the nodes of the graph correspond to the schedule
497 * dimensions of the current bands of those clusters that have been
498 * marked for merging in "c".
499 *
500 * First construct an isl_schedule_constraints object for this domain
501 * by transforming the edges in "graph" to the domain.
502 * Then initialize a dependence graph for scheduling from these
503 * constraints.
504 */
505static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph,
506 struct isl_clustering *c, struct isl_sched_graph *merge_graph)
507{
508 isl_union_set *domain;
509 isl_union_map *cluster_map;
510 isl_schedule_constraints *sc;
511 isl_stat r;
512
513 domain = collect_domain(ctx, graph, c);
514 sc = isl_schedule_constraints_on_domain(domain);
515 if (!sc)
516 return isl_stat_error;
517 cluster_map = collect_cluster_map(ctx, graph, c);
518 sc = collect_constraints(graph, scc_in_merge: c->scc_in_merge, cluster_map, sc);
519 isl_union_map_free(umap: cluster_map);
520
521 r = isl_sched_graph_init(graph: merge_graph, sc);
522
523 isl_schedule_constraints_free(sc);
524
525 return r;
526}
527
528/* Compute the maximal number of remaining schedule rows that still need
529 * to be computed for the nodes that belong to clusters with the maximal
530 * dimension for the current band (i.e., the band that is to be merged).
531 * Only clusters that are about to be merged are considered.
532 * "maxvar" is the maximal dimension for the current band.
533 * "c" contains information about the clusters.
534 *
535 * Return the maximal number of remaining schedule rows or
536 * isl_size_error on error.
537 */
538static isl_size compute_maxvar_max_slack(int maxvar, struct isl_clustering *c)
539{
540 int i, j;
541 int max_slack;
542
543 max_slack = 0;
544 for (i = 0; i < c->n; ++i) {
545 int nvar;
546 struct isl_sched_graph *scc;
547
548 if (!c->scc_in_merge[i])
549 continue;
550 scc = &c->scc[i];
551 nvar = scc->n_total_row - scc->band_start;
552 if (nvar != maxvar)
553 continue;
554 for (j = 0; j < scc->n; ++j) {
555 struct isl_sched_node *node = &scc->node[j];
556 int slack;
557
558 if (isl_sched_node_update_vmap(node) < 0)
559 return isl_size_error;
560 slack = node->nvar - node->rank;
561 if (slack > max_slack)
562 max_slack = slack;
563 }
564 }
565
566 return max_slack;
567}
568
569/* If there are any clusters where the dimension of the current band
570 * (i.e., the band that is to be merged) is smaller than "maxvar" and
571 * if there are any nodes in such a cluster where the number
572 * of remaining schedule rows that still need to be computed
573 * is greater than "max_slack", then return the smallest current band
574 * dimension of all these clusters. Otherwise return the original value
575 * of "maxvar". Return isl_size_error in case of any error.
576 * Only clusters that are about to be merged are considered.
577 * "c" contains information about the clusters.
578 */
579static isl_size limit_maxvar_to_slack(int maxvar, int max_slack,
580 struct isl_clustering *c)
581{
582 int i, j;
583
584 for (i = 0; i < c->n; ++i) {
585 int nvar;
586 struct isl_sched_graph *scc;
587
588 if (!c->scc_in_merge[i])
589 continue;
590 scc = &c->scc[i];
591 nvar = scc->n_total_row - scc->band_start;
592 if (nvar >= maxvar)
593 continue;
594 for (j = 0; j < scc->n; ++j) {
595 struct isl_sched_node *node = &scc->node[j];
596 int slack;
597
598 if (isl_sched_node_update_vmap(node) < 0)
599 return isl_size_error;
600 slack = node->nvar - node->rank;
601 if (slack > max_slack) {
602 maxvar = nvar;
603 break;
604 }
605 }
606 }
607
608 return maxvar;
609}
610
611/* Adjust merge_graph->maxvar based on the number of remaining schedule rows
612 * that still need to be computed. In particular, if there is a node
613 * in a cluster where the dimension of the current band is smaller
614 * than merge_graph->maxvar, but the number of remaining schedule rows
615 * is greater than that of any node in a cluster with the maximal
616 * dimension for the current band (i.e., merge_graph->maxvar),
617 * then adjust merge_graph->maxvar to the (smallest) current band dimension
618 * of those clusters. Without this adjustment, the total number of
619 * schedule dimensions would be increased, resulting in a skewed view
620 * of the number of coincident dimensions.
621 * "c" contains information about the clusters.
622 *
623 * If the maximize_band_depth option is set and merge_graph->maxvar is reduced,
624 * then there is no point in attempting any merge since it will be rejected
625 * anyway. Set merge_graph->maxvar to zero in such cases.
626 */
627static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx,
628 struct isl_sched_graph *merge_graph, struct isl_clustering *c)
629{
630 isl_size max_slack, maxvar;
631
632 max_slack = compute_maxvar_max_slack(maxvar: merge_graph->maxvar, c);
633 if (max_slack < 0)
634 return isl_stat_error;
635 maxvar = limit_maxvar_to_slack(maxvar: merge_graph->maxvar, max_slack, c);
636 if (maxvar < 0)
637 return isl_stat_error;
638
639 if (maxvar < merge_graph->maxvar) {
640 if (isl_options_get_schedule_maximize_band_depth(ctx))
641 merge_graph->maxvar = 0;
642 else
643 merge_graph->maxvar = maxvar;
644 }
645
646 return isl_stat_ok;
647}
648
649/* Return the number of coincident dimensions in the current band of "graph",
650 * where the nodes of "graph" are assumed to be scheduled by a single band.
651 */
652static int get_n_coincident(struct isl_sched_graph *graph)
653{
654 int i;
655
656 for (i = graph->band_start; i < graph->n_total_row; ++i)
657 if (!graph->node[0].coincident[i])
658 break;
659
660 return i - graph->band_start;
661}
662
663/* Should the clusters be merged based on the cluster schedule
664 * in the current (and only) band of "merge_graph", given that
665 * coincidence should be maximized?
666 *
667 * If the number of coincident schedule dimensions in the merged band
668 * would be less than the maximal number of coincident schedule dimensions
669 * in any of the merged clusters, then the clusters should not be merged.
670 */
671static isl_bool ok_to_merge_coincident(struct isl_clustering *c,
672 struct isl_sched_graph *merge_graph)
673{
674 int i;
675 int n_coincident;
676 int max_coincident;
677
678 max_coincident = 0;
679 for (i = 0; i < c->n; ++i) {
680 if (!c->scc_in_merge[i])
681 continue;
682 n_coincident = get_n_coincident(graph: &c->scc[i]);
683 if (n_coincident > max_coincident)
684 max_coincident = n_coincident;
685 }
686
687 n_coincident = get_n_coincident(graph: merge_graph);
688
689 return isl_bool_ok(b: n_coincident >= max_coincident);
690}
691
692/* Return the transformation on "node" expressed by the current (and only)
693 * band of "merge_graph" applied to the clusters in "c".
694 *
695 * First find the representation of "node" in its SCC in "c" and
696 * extract the transformation expressed by the current band.
697 * Then extract the transformation applied by "merge_graph"
698 * to the cluster to which this SCC belongs.
699 * Combine the two to obtain the complete transformation on the node.
700 *
701 * Note that the range of the first transformation is an anonymous space,
702 * while the domain of the second is named "cluster_X". The range
703 * of the former therefore needs to be adjusted before the two
704 * can be combined.
705 */
706static __isl_give isl_map *extract_node_transformation(isl_ctx *ctx,
707 struct isl_sched_node *node, struct isl_clustering *c,
708 struct isl_sched_graph *merge_graph)
709{
710 struct isl_sched_node *scc_node, *cluster_node;
711 int start, n;
712 isl_id *id;
713 isl_space *space;
714 isl_multi_aff *ma, *ma2;
715
716 scc_node = isl_sched_graph_find_node(ctx, graph: &c->scc[node->scc],
717 space: node->space);
718 if (scc_node && !isl_sched_graph_is_node(graph: &c->scc[node->scc], node: scc_node))
719 isl_die(ctx, isl_error_internal, "unable to find node",
720 return NULL);
721 start = c->scc[node->scc].band_start;
722 n = c->scc[node->scc].n_total_row - start;
723 ma = isl_sched_node_extract_partial_schedule_multi_aff(node: scc_node,
724 first: start, n);
725 space = cluster_space(scc: &c->scc[node->scc], i: c->scc_cluster[node->scc]);
726 cluster_node = isl_sched_graph_find_node(ctx, graph: merge_graph, space);
727 if (cluster_node && !isl_sched_graph_is_node(graph: merge_graph, node: cluster_node))
728 isl_die(ctx, isl_error_internal, "unable to find cluster",
729 space = isl_space_free(space));
730 id = isl_space_get_tuple_id(space, type: isl_dim_set);
731 ma = isl_multi_aff_set_tuple_id(multi: ma, type: isl_dim_out, id);
732 isl_space_free(space);
733 n = merge_graph->n_total_row;
734 ma2 = isl_sched_node_extract_partial_schedule_multi_aff(node: cluster_node,
735 first: 0, n);
736 ma = isl_multi_aff_pullback_multi_aff(ma1: ma2, ma2: ma);
737
738 return isl_map_from_multi_aff(maff: ma);
739}
740
741/* Give a set of distances "set", are they bounded by a small constant
742 * in direction "pos"?
743 * In practice, check if they are bounded by 2 by checking that there
744 * are no elements with a value greater than or equal to 3 or
745 * smaller than or equal to -3.
746 */
747static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos)
748{
749 isl_bool bounded;
750 isl_set *test;
751
752 if (!set)
753 return isl_bool_error;
754
755 test = isl_set_copy(set);
756 test = isl_set_lower_bound_si(set: test, type: isl_dim_set, pos, value: 3);
757 bounded = isl_set_is_empty(set: test);
758 isl_set_free(set: test);
759
760 if (bounded < 0 || !bounded)
761 return bounded;
762
763 test = isl_set_copy(set);
764 test = isl_set_upper_bound_si(set: test, type: isl_dim_set, pos, value: -3);
765 bounded = isl_set_is_empty(set: test);
766 isl_set_free(set: test);
767
768 return bounded;
769}
770
771/* Does the set "set" have a fixed (but possible parametric) value
772 * at dimension "pos"?
773 */
774static isl_bool has_single_value(__isl_keep isl_set *set, int pos)
775{
776 isl_size n;
777 isl_bool single;
778
779 n = isl_set_dim(set, type: isl_dim_set);
780 if (n < 0)
781 return isl_bool_error;
782 set = isl_set_copy(set);
783 set = isl_set_project_out(set, type: isl_dim_set, first: pos + 1, n: n - (pos + 1));
784 set = isl_set_project_out(set, type: isl_dim_set, first: 0, n: pos);
785 single = isl_set_is_singleton(set);
786 isl_set_free(set);
787
788 return single;
789}
790
791/* Does "map" have a fixed (but possible parametric) value
792 * at dimension "pos" of either its domain or its range?
793 */
794static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos)
795{
796 isl_set *set;
797 isl_bool single;
798
799 set = isl_map_domain(bmap: isl_map_copy(map));
800 single = has_single_value(set, pos);
801 isl_set_free(set);
802
803 if (single < 0 || single)
804 return single;
805
806 set = isl_map_range(map: isl_map_copy(map));
807 single = has_single_value(set, pos);
808 isl_set_free(set);
809
810 return single;
811}
812
813/* Does the edge "edge" from "graph" have bounded dependence distances
814 * in the merged graph "merge_graph" of a selection of clusters in "c"?
815 *
816 * Extract the complete transformations of the source and destination
817 * nodes of the edge, apply them to the edge constraints and
818 * compute the differences. Finally, check if these differences are bounded
819 * in each direction.
820 *
821 * If the dimension of the band is greater than the number of
822 * dimensions that can be expected to be optimized by the edge
823 * (based on its weight), then also allow the differences to be unbounded
824 * in the remaining dimensions, but only if either the source or
825 * the destination has a fixed value in that direction.
826 * This allows a statement that produces values that are used by
827 * several instances of another statement to be merged with that
828 * other statement.
829 * However, merging such clusters will introduce an inherently
830 * large proximity distance inside the merged cluster, meaning
831 * that proximity distances will no longer be optimized in
832 * subsequent merges. These merges are therefore only allowed
833 * after all other possible merges have been tried.
834 * The first time such a merge is encountered, the weight of the edge
835 * is replaced by a negative weight. The second time (i.e., after
836 * all merges over edges with a non-negative weight have been tried),
837 * the merge is allowed.
838 */
839static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge,
840 struct isl_sched_graph *graph, struct isl_clustering *c,
841 struct isl_sched_graph *merge_graph)
842{
843 int i, n_slack;
844 isl_size n;
845 isl_bool bounded;
846 isl_map *map, *t;
847 isl_set *dist;
848
849 map = isl_map_copy(map: edge->map);
850 t = extract_node_transformation(ctx, node: edge->src, c, merge_graph);
851 map = isl_map_apply_domain(map1: map, map2: t);
852 t = extract_node_transformation(ctx, node: edge->dst, c, merge_graph);
853 map = isl_map_apply_range(map1: map, map2: t);
854 dist = isl_map_deltas(map: isl_map_copy(map));
855
856 bounded = isl_bool_true;
857 n = isl_set_dim(set: dist, type: isl_dim_set);
858 if (n < 0)
859 goto error;
860 n_slack = n - edge->weight;
861 if (edge->weight < 0)
862 n_slack -= graph->max_weight + 1;
863 for (i = 0; i < n; ++i) {
864 isl_bool bounded_i, singular_i;
865
866 bounded_i = distance_is_bounded(set: dist, pos: i);
867 if (bounded_i < 0)
868 goto error;
869 if (bounded_i)
870 continue;
871 if (edge->weight >= 0)
872 bounded = isl_bool_false;
873 n_slack--;
874 if (n_slack < 0)
875 break;
876 singular_i = has_singular_src_or_dst(map, pos: i);
877 if (singular_i < 0)
878 goto error;
879 if (singular_i)
880 continue;
881 bounded = isl_bool_false;
882 break;
883 }
884 if (!bounded && i >= n && edge->weight >= 0)
885 edge->weight -= graph->max_weight + 1;
886 isl_map_free(map);
887 isl_set_free(set: dist);
888
889 return bounded;
890error:
891 isl_map_free(map);
892 isl_set_free(set: dist);
893 return isl_bool_error;
894}
895
896/* Should the clusters be merged based on the cluster schedule
897 * in the current (and only) band of "merge_graph"?
898 * "graph" is the original dependence graph, while "c" records
899 * which SCCs are involved in the latest merge.
900 *
901 * In particular, is there at least one proximity constraint
902 * that is optimized by the merge?
903 *
904 * A proximity constraint is considered to be optimized
905 * if the dependence distances are small.
906 */
907static isl_bool ok_to_merge_proximity(isl_ctx *ctx,
908 struct isl_sched_graph *graph, struct isl_clustering *c,
909 struct isl_sched_graph *merge_graph)
910{
911 int i;
912
913 for (i = 0; i < graph->n_edge; ++i) {
914 struct isl_sched_edge *edge = &graph->edge[i];
915 isl_bool bounded;
916
917 if (!isl_sched_edge_is_proximity(edge))
918 continue;
919 if (!c->scc_in_merge[edge->src->scc])
920 continue;
921 if (!c->scc_in_merge[edge->dst->scc])
922 continue;
923 if (c->scc_cluster[edge->dst->scc] ==
924 c->scc_cluster[edge->src->scc])
925 continue;
926 bounded = has_bounded_distances(ctx, edge, graph, c,
927 merge_graph);
928 if (bounded < 0 || bounded)
929 return bounded;
930 }
931
932 return isl_bool_false;
933}
934
935/* Should the clusters be merged based on the cluster schedule
936 * in the current (and only) band of "merge_graph"?
937 * "graph" is the original dependence graph, while "c" records
938 * which SCCs are involved in the latest merge.
939 *
940 * If the current band is empty, then the clusters should not be merged.
941 *
942 * If the band depth should be maximized and the merge schedule
943 * is incomplete (meaning that the dimension of some of the schedule
944 * bands in the original schedule will be reduced), then the clusters
945 * should not be merged.
946 *
947 * If the schedule_maximize_coincidence option is set, then check that
948 * the number of coincident schedule dimensions is not reduced.
949 *
950 * Finally, only allow the merge if at least one proximity
951 * constraint is optimized.
952 */
953static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
954 struct isl_clustering *c, struct isl_sched_graph *merge_graph)
955{
956 if (merge_graph->n_total_row == merge_graph->band_start)
957 return isl_bool_false;
958
959 if (isl_options_get_schedule_maximize_band_depth(ctx) &&
960 merge_graph->n_total_row < merge_graph->maxvar)
961 return isl_bool_false;
962
963 if (isl_options_get_schedule_maximize_coincidence(ctx)) {
964 isl_bool ok;
965
966 ok = ok_to_merge_coincident(c, merge_graph);
967 if (ok < 0 || !ok)
968 return ok;
969 }
970
971 return ok_to_merge_proximity(ctx, graph, c, merge_graph);
972}
973
974/* Apply the schedule in "t_node" to the "n" rows starting at "first"
975 * of the schedule in "node" and return the result.
976 *
977 * That is, essentially compute
978 *
979 * T * N(first:first+n-1)
980 *
981 * taking into account the constant term and the parameter coefficients
982 * in "t_node".
983 */
984static __isl_give isl_mat *node_transformation(isl_ctx *ctx,
985 struct isl_sched_node *t_node, struct isl_sched_node *node,
986 int first, int n)
987{
988 int i, j;
989 isl_mat *t;
990 isl_size n_row, n_col;
991 int n_param, n_var;
992
993 n_param = node->nparam;
994 n_var = node->nvar;
995 n_row = isl_mat_rows(mat: t_node->sched);
996 n_col = isl_mat_cols(mat: node->sched);
997 if (n_row < 0 || n_col < 0)
998 return NULL;
999 t = isl_mat_alloc(ctx, n_row, n_col);
1000 if (!t)
1001 return NULL;
1002 for (i = 0; i < n_row; ++i) {
1003 isl_seq_cpy(dst: t->row[i], src: t_node->sched->row[i], len: 1 + n_param);
1004 isl_seq_clr(p: t->row[i] + 1 + n_param, len: n_var);
1005 for (j = 0; j < n; ++j)
1006 isl_seq_addmul(dst: t->row[i],
1007 f: t_node->sched->row[i][1 + n_param + j],
1008 src: node->sched->row[first + j],
1009 len: 1 + n_param + n_var);
1010 }
1011 return t;
1012}
1013
1014/* Apply the cluster schedule in "t_node" to the current band
1015 * schedule of the nodes in "graph".
1016 *
1017 * In particular, replace the rows starting at band_start
1018 * by the result of applying the cluster schedule in "t_node"
1019 * to the original rows.
1020 *
1021 * The coincidence of the schedule is determined by the coincidence
1022 * of the cluster schedule.
1023 */
1024static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph,
1025 struct isl_sched_node *t_node)
1026{
1027 int i, j;
1028 isl_size n_new;
1029 int start, n;
1030
1031 start = graph->band_start;
1032 n = graph->n_total_row - start;
1033
1034 n_new = isl_mat_rows(mat: t_node->sched);
1035 if (n_new < 0)
1036 return isl_stat_error;
1037 for (i = 0; i < graph->n; ++i) {
1038 struct isl_sched_node *node = &graph->node[i];
1039 isl_mat *t;
1040
1041 t = node_transformation(ctx, t_node, node, first: start, n);
1042 node->sched = isl_mat_drop_rows(mat: node->sched, row: start, n);
1043 node->sched = isl_mat_concat(top: node->sched, bot: t);
1044 node->sched_map = isl_map_free(map: node->sched_map);
1045 if (!node->sched)
1046 return isl_stat_error;
1047 for (j = 0; j < n_new; ++j)
1048 node->coincident[start + j] = t_node->coincident[j];
1049 }
1050 graph->n_total_row -= n;
1051 graph->n_row -= n;
1052 graph->n_total_row += n_new;
1053 graph->n_row += n_new;
1054
1055 return isl_stat_ok;
1056}
1057
1058/* Merge the clusters marked for merging in "c" into a single
1059 * cluster using the cluster schedule in the current band of "merge_graph".
1060 * The representative SCC for the new cluster is the SCC with
1061 * the smallest index.
1062 *
1063 * The current band schedule of each SCC in the new cluster is obtained
1064 * by applying the schedule of the corresponding original cluster
1065 * to the original band schedule.
1066 * All SCCs in the new cluster have the same number of schedule rows.
1067 */
1068static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c,
1069 struct isl_sched_graph *merge_graph)
1070{
1071 int i;
1072 int cluster = -1;
1073 isl_space *space;
1074
1075 for (i = 0; i < c->n; ++i) {
1076 struct isl_sched_node *node;
1077
1078 if (!c->scc_in_merge[i])
1079 continue;
1080 if (cluster < 0)
1081 cluster = i;
1082 space = cluster_space(scc: &c->scc[i], i: c->scc_cluster[i]);
1083 node = isl_sched_graph_find_node(ctx, graph: merge_graph, space);
1084 isl_space_free(space);
1085 if (!node)
1086 return isl_stat_error;
1087 if (!isl_sched_graph_is_node(graph: merge_graph, node))
1088 isl_die(ctx, isl_error_internal,
1089 "unable to find cluster",
1090 return isl_stat_error);
1091 if (transform(ctx, graph: &c->scc[i], t_node: node) < 0)
1092 return isl_stat_error;
1093 c->scc_cluster[i] = cluster;
1094 }
1095
1096 return isl_stat_ok;
1097}
1098
1099/* Try and merge the clusters of SCCs marked in c->scc_in_merge
1100 * by scheduling the current cluster bands with respect to each other.
1101 *
1102 * Construct a dependence graph with a space for each cluster and
1103 * with the coordinates of each space corresponding to the schedule
1104 * dimensions of the current band of that cluster.
1105 * Construct a cluster schedule in this cluster dependence graph and
1106 * apply it to the current cluster bands if it is applicable
1107 * according to ok_to_merge.
1108 *
1109 * If the number of remaining schedule dimensions in a cluster
1110 * with a non-maximal current schedule dimension is greater than
1111 * the number of remaining schedule dimensions in clusters
1112 * with a maximal current schedule dimension, then restrict
1113 * the number of rows to be computed in the cluster schedule
1114 * to the minimal such non-maximal current schedule dimension.
1115 * Do this by adjusting merge_graph.maxvar.
1116 *
1117 * Return isl_bool_true if the clusters have effectively been merged
1118 * into a single cluster.
1119 *
1120 * Note that since the standard scheduling algorithm minimizes the maximal
1121 * distance over proximity constraints, the proximity constraints between
1122 * the merged clusters may not be optimized any further than what is
1123 * sufficient to bring the distances within the limits of the internal
1124 * proximity constraints inside the individual clusters.
1125 * It may therefore make sense to perform an additional translation step
1126 * to bring the clusters closer to each other, while maintaining
1127 * the linear part of the merging schedule found using the standard
1128 * scheduling algorithm.
1129 */
1130static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph,
1131 struct isl_clustering *c)
1132{
1133 struct isl_sched_graph merge_graph = { 0 };
1134 isl_bool merged;
1135
1136 if (init_merge_graph(ctx, graph, c, merge_graph: &merge_graph) < 0)
1137 goto error;
1138
1139 if (isl_sched_graph_compute_maxvar(graph: &merge_graph) < 0)
1140 goto error;
1141 if (adjust_maxvar_to_slack(ctx, merge_graph: &merge_graph,c) < 0)
1142 goto error;
1143 if (isl_schedule_node_compute_wcc_band(ctx, graph: &merge_graph) < 0)
1144 goto error;
1145 merged = ok_to_merge(ctx, graph, c, merge_graph: &merge_graph);
1146 if (merged && merge(ctx, c, merge_graph: &merge_graph) < 0)
1147 goto error;
1148
1149 isl_sched_graph_free(ctx, graph: &merge_graph);
1150 return merged;
1151error:
1152 isl_sched_graph_free(ctx, graph: &merge_graph);
1153 return isl_bool_error;
1154}
1155
1156/* Is there any edge marked "no_merge" between two SCCs that are
1157 * about to be merged (i.e., that are set in "scc_in_merge")?
1158 * "merge_edge" is the proximity edge along which the clusters of SCCs
1159 * are going to be merged.
1160 *
1161 * If there is any edge between two SCCs with a negative weight,
1162 * while the weight of "merge_edge" is non-negative, then this
1163 * means that the edge was postponed. "merge_edge" should then
1164 * also be postponed since merging along the edge with negative weight should
1165 * be postponed until all edges with non-negative weight have been tried.
1166 * Replace the weight of "merge_edge" by a negative weight as well and
1167 * tell the caller not to attempt a merge.
1168 */
1169static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge,
1170 struct isl_sched_edge *merge_edge)
1171{
1172 int i;
1173
1174 for (i = 0; i < graph->n_edge; ++i) {
1175 struct isl_sched_edge *edge = &graph->edge[i];
1176
1177 if (!scc_in_merge[edge->src->scc])
1178 continue;
1179 if (!scc_in_merge[edge->dst->scc])
1180 continue;
1181 if (edge->no_merge)
1182 return 1;
1183 if (merge_edge->weight >= 0 && edge->weight < 0) {
1184 merge_edge->weight -= graph->max_weight + 1;
1185 return 1;
1186 }
1187 }
1188
1189 return 0;
1190}
1191
1192/* Merge the two clusters in "c" connected by the edge in "graph"
1193 * with index "edge" into a single cluster.
1194 * If it turns out to be impossible to merge these two clusters,
1195 * then mark the edge as "no_merge" such that it will not be
1196 * considered again.
1197 *
1198 * First mark all SCCs that need to be merged. This includes the SCCs
1199 * in the two clusters, but it may also include the SCCs
1200 * of intermediate clusters.
1201 * If there is already a no_merge edge between any pair of such SCCs,
1202 * then simply mark the current edge as no_merge as well.
1203 * Likewise, if any of those edges was postponed by has_bounded_distances,
1204 * then postpone the current edge as well.
1205 * Otherwise, try and merge the clusters and mark "edge" as "no_merge"
1206 * if the clusters did not end up getting merged, unless the non-merge
1207 * is due to the fact that the edge was postponed. This postponement
1208 * can be recognized by a change in weight (from non-negative to negative).
1209 */
1210static isl_stat merge_clusters_along_edge(isl_ctx *ctx,
1211 struct isl_sched_graph *graph, int edge, struct isl_clustering *c)
1212{
1213 isl_bool merged;
1214 int edge_weight = graph->edge[edge].weight;
1215
1216 if (mark_merge_sccs(ctx, graph, edge, c) < 0)
1217 return isl_stat_error;
1218
1219 if (any_no_merge(graph, scc_in_merge: c->scc_in_merge, merge_edge: &graph->edge[edge]))
1220 merged = isl_bool_false;
1221 else
1222 merged = try_merge(ctx, graph, c);
1223 if (merged < 0)
1224 return isl_stat_error;
1225 if (!merged && edge_weight == graph->edge[edge].weight)
1226 graph->edge[edge].no_merge = 1;
1227
1228 return isl_stat_ok;
1229}
1230
1231/* Does "node" belong to the cluster identified by "cluster"?
1232 */
1233static int node_cluster_exactly(struct isl_sched_node *node, int cluster)
1234{
1235 return node->cluster == cluster;
1236}
1237
1238/* Does "edge" connect two nodes belonging to the cluster
1239 * identified by "cluster"?
1240 */
1241static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster)
1242{
1243 return edge->src->cluster == cluster && edge->dst->cluster == cluster;
1244}
1245
1246/* Swap the schedule of "node1" and "node2".
1247 * Both nodes have been derived from the same node in a common parent graph.
1248 * Since the "coincident" field is shared with that node
1249 * in the parent graph, there is no need to also swap this field.
1250 */
1251static void swap_sched(struct isl_sched_node *node1,
1252 struct isl_sched_node *node2)
1253{
1254 isl_mat *sched;
1255 isl_map *sched_map;
1256
1257 sched = node1->sched;
1258 node1->sched = node2->sched;
1259 node2->sched = sched;
1260
1261 sched_map = node1->sched_map;
1262 node1->sched_map = node2->sched_map;
1263 node2->sched_map = sched_map;
1264}
1265
1266/* Copy the current band schedule from the SCCs that form the cluster
1267 * with index "pos" to the actual cluster at position "pos".
1268 * By construction, the index of the first SCC that belongs to the cluster
1269 * is also "pos".
1270 *
1271 * The order of the nodes inside both the SCCs and the cluster
1272 * is assumed to be same as the order in the original "graph".
1273 *
1274 * Since the SCC graphs will no longer be used after this function,
1275 * the schedules are actually swapped rather than copied.
1276 */
1277static isl_stat copy_partial(struct isl_sched_graph *graph,
1278 struct isl_clustering *c, int pos)
1279{
1280 int i, j;
1281
1282 c->cluster[pos].n_total_row = c->scc[pos].n_total_row;
1283 c->cluster[pos].n_row = c->scc[pos].n_row;
1284 c->cluster[pos].maxvar = c->scc[pos].maxvar;
1285 j = 0;
1286 for (i = 0; i < graph->n; ++i) {
1287 int k;
1288 int s;
1289
1290 if (graph->node[i].cluster != pos)
1291 continue;
1292 s = graph->node[i].scc;
1293 k = c->scc_node[s]++;
1294 swap_sched(node1: &c->cluster[pos].node[j], node2: &c->scc[s].node[k]);
1295 if (c->scc[s].maxvar > c->cluster[pos].maxvar)
1296 c->cluster[pos].maxvar = c->scc[s].maxvar;
1297 ++j;
1298 }
1299
1300 return isl_stat_ok;
1301}
1302
1303/* Is there a (conditional) validity dependence from node[j] to node[i],
1304 * forcing node[i] to follow node[j] or do the nodes belong to the same
1305 * cluster?
1306 */
1307static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user)
1308{
1309 struct isl_sched_graph *graph = user;
1310
1311 if (graph->node[i].cluster == graph->node[j].cluster)
1312 return isl_bool_true;
1313 return isl_sched_graph_has_validity_edge(graph, src: &graph->node[j],
1314 dst: &graph->node[i]);
1315}
1316
1317/* Extract the merged clusters of SCCs in "graph", sort them, and
1318 * store them in c->clusters. Update c->scc_cluster accordingly.
1319 *
1320 * First keep track of the cluster containing the SCC to which a node
1321 * belongs in the node itself.
1322 * Then extract the clusters into c->clusters, copying the current
1323 * band schedule from the SCCs that belong to the cluster.
1324 * Do this only once per cluster.
1325 *
1326 * Finally, topologically sort the clusters and update c->scc_cluster
1327 * to match the new scc numbering. While the SCCs were originally
1328 * sorted already, some SCCs that depend on some other SCCs may
1329 * have been merged with SCCs that appear before these other SCCs.
1330 * A reordering may therefore be required.
1331 */
1332static isl_stat extract_clusters(isl_ctx *ctx, struct isl_sched_graph *graph,
1333 struct isl_clustering *c)
1334{
1335 int i;
1336
1337 for (i = 0; i < graph->n; ++i)
1338 graph->node[i].cluster = c->scc_cluster[graph->node[i].scc];
1339
1340 for (i = 0; i < graph->scc; ++i) {
1341 if (c->scc_cluster[i] != i)
1342 continue;
1343 if (isl_sched_graph_extract_sub_graph(ctx, graph,
1344 node_pred: &node_cluster_exactly,
1345 edge_pred: &edge_cluster_exactly, data: i, sub: &c->cluster[i]) < 0)
1346 return isl_stat_error;
1347 c->cluster[i].src_scc = -1;
1348 c->cluster[i].dst_scc = -1;
1349 if (copy_partial(graph, c, pos: i) < 0)
1350 return isl_stat_error;
1351 }
1352
1353 if (isl_sched_graph_detect_ccs(ctx, graph,
1354 follows: &node_follows_strong_or_same_cluster) < 0)
1355 return isl_stat_error;
1356 for (i = 0; i < graph->n; ++i)
1357 c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster;
1358
1359 return isl_stat_ok;
1360}
1361
1362/* Compute weights on the proximity edges of "graph" that can
1363 * be used by find_proximity to find the most appropriate
1364 * proximity edge to use to merge two clusters in "c".
1365 * The weights are also used by has_bounded_distances to determine
1366 * whether the merge should be allowed.
1367 * Store the maximum of the computed weights in graph->max_weight.
1368 *
1369 * The computed weight is a measure for the number of remaining schedule
1370 * dimensions that can still be completely aligned.
1371 * In particular, compute the number of equalities between
1372 * input dimensions and output dimensions in the proximity constraints.
1373 * The directions that are already handled by outer schedule bands
1374 * are projected out prior to determining this number.
1375 *
1376 * Edges that will never be considered by find_proximity are ignored.
1377 */
1378static isl_stat compute_weights(struct isl_sched_graph *graph,
1379 struct isl_clustering *c)
1380{
1381 int i;
1382
1383 graph->max_weight = 0;
1384
1385 for (i = 0; i < graph->n_edge; ++i) {
1386 struct isl_sched_edge *edge = &graph->edge[i];
1387 struct isl_sched_node *src = edge->src;
1388 struct isl_sched_node *dst = edge->dst;
1389 isl_basic_map *hull;
1390 isl_bool prox;
1391 isl_size n_in, n_out, n;
1392
1393 prox = is_non_empty_proximity(edge);
1394 if (prox < 0)
1395 return isl_stat_error;
1396 if (!prox)
1397 continue;
1398 if (bad_cluster(graph: &c->scc[edge->src->scc]) ||
1399 bad_cluster(graph: &c->scc[edge->dst->scc]))
1400 continue;
1401 if (c->scc_cluster[edge->dst->scc] ==
1402 c->scc_cluster[edge->src->scc])
1403 continue;
1404
1405 hull = isl_map_affine_hull(map: isl_map_copy(map: edge->map));
1406 hull = isl_basic_map_transform_dims(bmap: hull, type: isl_dim_in, first: 0,
1407 trans: isl_mat_copy(mat: src->vmap));
1408 hull = isl_basic_map_transform_dims(bmap: hull, type: isl_dim_out, first: 0,
1409 trans: isl_mat_copy(mat: dst->vmap));
1410 hull = isl_basic_map_project_out(bmap: hull,
1411 type: isl_dim_in, first: 0, n: src->rank);
1412 hull = isl_basic_map_project_out(bmap: hull,
1413 type: isl_dim_out, first: 0, n: dst->rank);
1414 hull = isl_basic_map_remove_divs(bmap: hull);
1415 n_in = isl_basic_map_dim(bmap: hull, type: isl_dim_in);
1416 n_out = isl_basic_map_dim(bmap: hull, type: isl_dim_out);
1417 if (n_in < 0 || n_out < 0)
1418 hull = isl_basic_map_free(bmap: hull);
1419 hull = isl_basic_map_drop_constraints_not_involving_dims(bmap: hull,
1420 type: isl_dim_in, first: 0, n: n_in);
1421 hull = isl_basic_map_drop_constraints_not_involving_dims(bmap: hull,
1422 type: isl_dim_out, first: 0, n: n_out);
1423 n = isl_basic_map_n_equality(bmap: hull);
1424 isl_basic_map_free(bmap: hull);
1425 if (n < 0)
1426 return isl_stat_error;
1427 edge->weight = n;
1428
1429 if (edge->weight > graph->max_weight)
1430 graph->max_weight = edge->weight;
1431 }
1432
1433 return isl_stat_ok;
1434}
1435
1436/* Call isl_schedule_node_compute_finish_band on each of the clusters in "c" and
1437 * update "node" to arrange for them to be executed in an order
1438 * possibly involving set nodes that generalizes the topological order
1439 * determined by the scc fields of the nodes in "graph".
1440 *
1441 * Note that at this stage, there are graph->scc clusters and
1442 * their positions in c->cluster are determined by the values
1443 * of c->scc_cluster.
1444 *
1445 * Construct an isl_scc_graph and perform the decomposition
1446 * using this graph.
1447 */
1448static __isl_give isl_schedule_node *finish_bands_decompose(
1449 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
1450 struct isl_clustering *c)
1451{
1452 isl_ctx *ctx;
1453 struct isl_scc_graph *scc_graph;
1454
1455 ctx = isl_schedule_node_get_ctx(node);
1456
1457 scc_graph = isl_scc_graph_from_sched_graph(ctx, graph, c);
1458 node = isl_scc_graph_decompose(scc_graph, node);
1459 isl_scc_graph_free(scc_graph);
1460
1461 return node;
1462}
1463
1464/* Call isl_schedule_node_compute_finish_band on each of the clusters in "c"
1465 * in their topological order. This order is determined by the scc
1466 * fields of the nodes in "graph".
1467 * Combine the results in a sequence expressing the topological order.
1468 *
1469 * If there is only one cluster left, then there is no need to introduce
1470 * a sequence node. Also, in this case, the cluster necessarily contains
1471 * the SCC at position 0 in the original graph and is therefore also
1472 * stored in the first cluster of "c".
1473 *
1474 * If there are more than two clusters left, then some subsets of the clusters
1475 * may still be independent of each other. These could then still
1476 * be reordered with respect to each other. Call finish_bands_decompose
1477 * to try and construct an ordering involving set and sequence nodes
1478 * that generalizes the topological order.
1479 * Note that at the outermost level there can be no independent components
1480 * because isl_schedule_node_compute_wcc_clustering is called
1481 * on a (weakly) connected component.
1482 */
1483static __isl_give isl_schedule_node *finish_bands_clustering(
1484 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph,
1485 struct isl_clustering *c)
1486{
1487 int i;
1488 isl_ctx *ctx;
1489 isl_union_set_list *filters;
1490
1491 if (graph->scc == 1)
1492 return isl_schedule_node_compute_finish_band(node,
1493 graph: &c->cluster[0], initialized: 0);
1494 if (graph->scc > 2)
1495 return finish_bands_decompose(node, graph, c);
1496
1497 ctx = isl_schedule_node_get_ctx(node);
1498
1499 filters = isl_sched_graph_extract_sccs(ctx, graph);
1500 node = isl_schedule_node_insert_sequence(node, filters);
1501
1502 for (i = 0; i < graph->scc; ++i) {
1503 int j = c->scc_cluster[i];
1504 node = isl_schedule_node_grandchild(node, pos1: i, pos2: 0);
1505 node = isl_schedule_node_compute_finish_band(node,
1506 graph: &c->cluster[j], initialized: 0);
1507 node = isl_schedule_node_grandparent(node);
1508 }
1509
1510 return node;
1511}
1512
1513/* Compute a schedule for a connected dependence graph by first considering
1514 * each strongly connected component (SCC) in the graph separately and then
1515 * incrementally combining them into clusters.
1516 * Return the updated schedule node.
1517 *
1518 * Initially, each cluster consists of a single SCC, each with its
1519 * own band schedule. The algorithm then tries to merge pairs
1520 * of clusters along a proximity edge until no more suitable
1521 * proximity edges can be found. During this merging, the schedule
1522 * is maintained in the individual SCCs.
1523 * After the merging is completed, the full resulting clusters
1524 * are extracted and in finish_bands_clustering,
1525 * isl_schedule_node_compute_finish_band is called on each of them to integrate
1526 * the band into "node" and to continue the computation.
1527 *
1528 * compute_weights initializes the weights that are used by find_proximity.
1529 */
1530__isl_give isl_schedule_node *isl_schedule_node_compute_wcc_clustering(
1531 __isl_take isl_schedule_node *node, struct isl_sched_graph *graph)
1532{
1533 isl_ctx *ctx;
1534 struct isl_clustering c;
1535 int i;
1536
1537 ctx = isl_schedule_node_get_ctx(node);
1538
1539 if (clustering_init(ctx, c: &c, graph) < 0)
1540 goto error;
1541
1542 if (compute_weights(graph, c: &c) < 0)
1543 goto error;
1544
1545 for (;;) {
1546 i = find_proximity(graph, c: &c);
1547 if (i < 0)
1548 goto error;
1549 if (i >= graph->n_edge)
1550 break;
1551 if (merge_clusters_along_edge(ctx, graph, edge: i, c: &c) < 0)
1552 goto error;
1553 }
1554
1555 if (extract_clusters(ctx, graph, c: &c) < 0)
1556 goto error;
1557
1558 node = finish_bands_clustering(node, graph, c: &c);
1559
1560 clustering_free(ctx, c: &c);
1561 return node;
1562error:
1563 clustering_free(ctx, c: &c);
1564 return isl_schedule_node_free(node);
1565}
1566

source code of polly/lib/External/isl/isl_scheduler_clustering.c