isl_scheduler_clustering.c source code [polly/lib/External/isl/isl_scheduler_clustering.c]

1	/*
2	* Copyright 2015 Sven Verdoolaege
3	*
4	* Use of this software is governed by the MIT license
5	*
6	* Written by Sven Verdoolaege
7	*/
8
9	#include "isl_map_private.h"
10
11	#include <isl/id.h>
12	#include <isl/schedule_node.h>
13	#include <isl/union_set.h>
14
15	#include "isl_mat_private.h"
16	#include "isl_scheduler_clustering.h"
17	#include "isl_scheduler_scc.h"
18	#include "isl_seq.h"
19	#include "isl_tarjan.h"
20
21	/ Initialize the clustering data structure "c" from "graph".*
22	*
23	* In particular, allocate memory, extract the SCCs from "graph"
24	* into c->scc, initialize scc_cluster and construct
25	* a band of schedule rows for each SCC.
26	* Within each SCC, there is only one SCC by definition.
27	* Each SCC initially belongs to a cluster containing only that SCC.
28	*/
29	static isl_stat clustering_init(isl_ctx ctx, struct* isl_clustering *c,
30	struct isl_sched_graph *graph)
31	{
32	int i;
33
34	c->n = graph->scc;
35	c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
36	c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n);
37	c->scc_cluster = isl_calloc_array(ctx, int, c->n);
38	c->scc_node = isl_calloc_array(ctx, int, c->n);
39	c->scc_in_merge = isl_calloc_array(ctx, int, c->n);
40	if (!c->scc \|\| !c->cluster \|\|
41	!c->scc_cluster \|\| !c->scc_node \|\| !c->scc_in_merge)
42	return isl_stat_error;
43
44	for (i = `0`; i < c->n; ++i) {
45	if (isl_sched_graph_extract_sub_graph(ctx, graph,
46	node_pred: &isl_sched_node_scc_exactly,
47	edge_pred: &isl_sched_edge_scc_exactly,
48	data: i, sub: &c->scc[i]) < `0`)
49	return isl_stat_error;
50	c->scc[i].scc = `1`;
51	if (isl_sched_graph_compute_maxvar(graph: &c->scc[i]) < `0`)
52	return isl_stat_error;
53	if (isl_schedule_node_compute_wcc_band(ctx, graph: &c->scc[i]) < `0`)
54	return isl_stat_error;
55	c->scc_cluster[i] = i;
56	}
57
58	return isl_stat_ok;
59	}
60
61	/ Free all memory allocated for "c".*
62	*/
63	static void clustering_free(isl_ctx ctx, struct* isl_clustering *c)
64	{
65	int i;
66
67	if (c->scc)
68	for (i = `0`; i < c->n; ++i)
69	isl_sched_graph_free(ctx, graph: &c->scc[i]);
70	free(ptr: c->scc);
71	if (c->cluster)
72	for (i = `0`; i < c->n; ++i)
73	isl_sched_graph_free(ctx, graph: &c->cluster[i]);
74	free(ptr: c->cluster);
75	free(ptr: c->scc_cluster);
76	free(ptr: c->scc_node);
77	free(ptr: c->scc_in_merge);
78	}
79
80	/ Should we refrain from merging the cluster in "graph" with*
81	* any other cluster?
82	* In particular, is its current schedule band empty and incomplete.
83	*/
84	static int bad_cluster(struct isl_sched_graph *graph)
85	{
86	return graph->n_row < graph->maxvar &&
87	graph->n_total_row == graph->band_start;
88	}
89
90	/ Is "edge" a proximity edge with a non-empty dependence relation?*
91	*/
92	static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge)
93	{
94	if (!isl_sched_edge_is_proximity(edge))
95	return isl_bool_false;
96	return isl_bool_not(b: isl_map_plain_is_empty(map: edge->map));
97	}
98
99	/ Return the index of an edge in "graph" that can be used to merge*
100	* two clusters in "c".
101	* Return graph->n_edge if no such edge can be found.
102	* Return -1 on error.
103	*
104	* In particular, return a proximity edge between two clusters
105	* that is not marked "no_merge" and such that neither of the
106	* two clusters has an incomplete, empty band.
107	*
108	* If there are multiple such edges, then try and find the most
109	* appropriate edge to use for merging. In particular, pick the edge
110	* with the greatest weight. If there are multiple of those,
111	* then pick one with the shortest distance between
112	* the two cluster representatives.
113	*/
114	static int find_proximity(struct isl_sched_graph *graph,
115	struct isl_clustering *c)
116	{
117	int i, best = graph->n_edge, best_dist, best_weight;
118
119	for (i = `0`; i < graph->n_edge; ++i) {
120	struct isl_sched_edge *edge = &graph->edge[i];
121	int dist, weight;
122	isl_bool prox;
123
124	prox = is_non_empty_proximity(edge);
125	if (prox < `0`)
126	return -`1`;
127	if (!prox)
128	continue;
129	if (edge->no_merge)
130	continue;
131	if (bad_cluster(graph: &c->scc[edge->src->scc]) \|\|
132	bad_cluster(graph: &c->scc[edge->dst->scc]))
133	continue;
134	dist = c->scc_cluster[edge->dst->scc] -
135	c->scc_cluster[edge->src->scc];
136	if (dist == `0`)
137	continue;
138	weight = edge->weight;
139	if (best < graph->n_edge) {
140	if (best_weight > weight)
141	continue;
142	if (best_weight == weight && best_dist <= dist)
143	continue;
144	}
145	best = i;
146	best_dist = dist;
147	best_weight = weight;
148	}
149
150	return best;
151	}
152
153	/ Internal data structure used in mark_merge_sccs.*
154	*
155	* "graph" is the dependence graph in which a strongly connected
156	* component is constructed.
157	* "scc_cluster" maps each SCC index to the cluster to which it belongs.
158	* "src" and "dst" are the indices of the nodes that are being merged.
159	*/
160	struct isl_mark_merge_sccs_data {
161	struct isl_sched_graph *graph;
162	int *scc_cluster;
163	int src;
164	int dst;
165	};
166
167	/ Check whether the cluster containing node "i" depends on the cluster*
168	* containing node "j". If "i" and "j" belong to the same cluster,
169	* then they are taken to depend on each other to ensure that
170	* the resulting strongly connected component consists of complete
171	* clusters. Furthermore, if "i" and "j" are the two nodes that
172	* are being merged, then they are taken to depend on each other as well.
173	* Otherwise, check if there is a (conditional) validity dependence
174	* from node[j] to node[i], forcing node[i] to follow node[j].
175	*/
176	static isl_bool cluster_follows(int i, int j, void *user)
177	{
178	struct isl_mark_merge_sccs_data *data = user;
179	struct isl_sched_graph *graph = data->graph;
180	int *scc_cluster = data->scc_cluster;
181
182	if (data->src == i && data->dst == j)
183	return isl_bool_true;
184	if (data->src == j && data->dst == i)
185	return isl_bool_true;
186	if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc])
187	return isl_bool_true;
188
189	return isl_sched_graph_has_validity_edge(graph, src: &graph->node[j],
190	dst: &graph->node[i]);
191	}
192
193	/ Mark all SCCs that belong to either of the two clusters in "c"*
194	* connected by the edge in "graph" with index "edge", or to any
195	* of the intermediate clusters.
196	* The marking is recorded in c->scc_in_merge.
197	*
198	* The given edge has been selected for merging two clusters,
199	* meaning that there is at least a proximity edge between the two nodes.
200	* However, there may also be (indirect) validity dependences
201	* between the two nodes. When merging the two clusters, all clusters
202	* containing one or more of the intermediate nodes along the
203	* indirect validity dependences need to be merged in as well.
204	*
205	* First collect all such nodes by computing the strongly connected
206	* component (SCC) containing the two nodes connected by the edge, where
207	* the two nodes are considered to depend on each other to make
208	* sure they end up in the same SCC. Similarly, each node is considered
209	* to depend on every other node in the same cluster to ensure
210	* that the SCC consists of complete clusters.
211	*
212	* Then the original SCCs that contain any of these nodes are marked
213	* in c->scc_in_merge.
214	*/
215	static isl_stat mark_merge_sccs(isl_ctx ctx, struct* isl_sched_graph *graph,
216	int edge, struct isl_clustering *c)
217	{
218	struct isl_mark_merge_sccs_data data;
219	struct isl_tarjan_graph *g;
220	int i;
221
222	for (i = `0`; i < c->n; ++i)
223	c->scc_in_merge[i] = `0`;
224
225	data.graph = graph;
226	data.scc_cluster = c->scc_cluster;
227	data.src = graph->edge[edge].src - graph->node;
228	data.dst = graph->edge[edge].dst - graph->node;
229
230	g = isl_tarjan_graph_component(ctx, len: graph->n, node: data.dst,
231	follows: &cluster_follows, user: &data);
232	if (!g)
233	goto error;
234
235	i = g->op;
236	if (i < `3`)
237	isl_die(ctx, isl_error_internal,
238	"expecting at least two nodes in component",
239	goto error);
240	if (g->order[--i] != -`1`)
241	isl_die(ctx, isl_error_internal,
242	"expecting end of component marker", goto error);
243
244	for (--i; i >= `0` && g->order[i] != -`1`; --i) {
245	int scc = graph->node[g->order[i]].scc;
246	c->scc_in_merge[scc] = `1`;
247	}
248
249	isl_tarjan_graph_free(g);
250	return isl_stat_ok;
251	error:
252	isl_tarjan_graph_free(g);
253	return isl_stat_error;
254	}
255
256	/ Construct the identifier "cluster_i".*
257	*/
258	static __isl_give isl_id cluster_id(isl_ctx ctx, int i)
259	{
260	char name[`40`];
261
262	snprintf(s: name, maxlen: sizeof(name), format: "cluster_%d", i);
263	return isl_id_alloc(ctx, name, NULL);
264	}
265
266	/ Construct the space of the cluster with index "i" containing*
267	* the strongly connected component "scc".
268	*
269	* In particular, construct a space called cluster_i with dimension equal
270	* to the number of schedule rows in the current band of "scc".
271	*/
272	static __isl_give isl_space cluster_space(struct* isl_sched_graph scc, int* i)
273	{
274	int nvar;
275	isl_space *space;
276	isl_id *id;
277
278	nvar = scc->n_total_row - scc->band_start;
279	space = isl_space_copy(space: scc->node[`0`].space);
280	space = isl_space_params(space);
281	space = isl_space_set_from_params(space);
282	space = isl_space_add_dims(space, type: isl_dim_set, n: nvar);
283	id = cluster_id(ctx: isl_space_get_ctx(space), i);
284	space = isl_space_set_tuple_id(space, type: isl_dim_set, id);
285
286	return space;
287	}
288
289	/ Collect the domain of the graph for merging clusters.*
290	*
291	* In particular, for each cluster with first SCC "i", construct
292	* a set in the space called cluster_i with dimension equal
293	* to the number of schedule rows in the current band of the cluster.
294	*/
295	static __isl_give isl_union_set collect_domain(isl_ctx ctx,
296	struct isl_sched_graph graph, struct* isl_clustering *c)
297	{
298	int i;
299	isl_space *space;
300	isl_union_set *domain;
301
302	space = isl_space_params_alloc(ctx, nparam: `0`);
303	domain = isl_union_set_empty(space);
304
305	for (i = `0`; i < graph->scc; ++i) {
306	isl_space *space;
307
308	if (!c->scc_in_merge[i])
309	continue;
310	if (c->scc_cluster[i] != i)
311	continue;
312	space = cluster_space(scc: &c->scc[i], i);
313	domain = isl_union_set_add_set(uset: domain, set: isl_set_universe(space));
314	}
315
316	return domain;
317	}
318
319	/ Construct a map from the original instances to the corresponding*
320	* cluster instance in the current bands of the clusters in "c".
321	*/
322	static __isl_give isl_union_map collect_cluster_map(isl_ctx ctx,
323	struct isl_sched_graph graph, struct* isl_clustering *c)
324	{
325	int i, j;
326	isl_space *space;
327	isl_union_map *cluster_map;
328
329	space = isl_space_params_alloc(ctx, nparam: `0`);
330	cluster_map = isl_union_map_empty(space);
331	for (i = `0`; i < graph->scc; ++i) {
332	int start, n;
333	isl_id *id;
334
335	if (!c->scc_in_merge[i])
336	continue;
337
338	id = cluster_id(ctx, i: c->scc_cluster[i]);
339	start = c->scc[i].band_start;
340	n = c->scc[i].n_total_row - start;
341	for (j = `0`; j < c->scc[i].n; ++j) {
342	isl_multi_aff *ma;
343	isl_map *map;
344	struct isl_sched_node *node = &c->scc[i].node[j];
345
346	ma = isl_sched_node_extract_partial_schedule_multi_aff(
347	node, first: start, n);
348	ma = isl_multi_aff_set_tuple_id(multi: ma, type: isl_dim_out,
349	id: isl_id_copy(id));
350	map = isl_map_from_multi_aff(maff: ma);
351	cluster_map = isl_union_map_add_map(umap: cluster_map, map);
352	}
353	isl_id_free(id);
354	}
355
356	return cluster_map;
357	}
358
359	/ Add "umap" to the schedule constraints "sc" of all types of "edge"*
360	* that are not isl_edge_condition or isl_edge_conditional_validity.
361	*/
362	static __isl_give isl_schedule_constraints *add_non_conditional_constraints(
363	struct isl_sched_edge edge, __isl_keep isl_union_map umap,
364	__isl_take isl_schedule_constraints *sc)
365	{
366	enum isl_edge_type t;
367
368	if (!sc)
369	return NULL;
370
371	for (t = isl_edge_first; t <= isl_edge_last; ++t) {
372	if (t == isl_edge_condition \|\|
373	t == isl_edge_conditional_validity)
374	continue;
375	if (!isl_sched_edge_has_type(edge, type: t))
376	continue;
377	sc = isl_schedule_constraints_add(sc, type: t,
378	c: isl_union_map_copy(umap));
379	}
380
381	return sc;
382	}
383
384	/ Add schedule constraints of types isl_edge_condition and*
385	* isl_edge_conditional_validity to "sc" by applying "umap" to
386	* the domains of the wrapped relations in domain and range
387	* of the corresponding tagged constraints of "edge".
388	*/
389	static __isl_give isl_schedule_constraints *add_conditional_constraints(
390	struct isl_sched_edge edge, __isl_keep isl_union_map umap,
391	__isl_take isl_schedule_constraints *sc)
392	{
393	enum isl_edge_type t;
394	isl_union_map *tagged;
395
396	for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) {
397	if (!isl_sched_edge_has_type(edge, type: t))
398	continue;
399	if (t == isl_edge_condition)
400	tagged = isl_union_map_copy(umap: edge->tagged_condition);
401	else
402	tagged = isl_union_map_copy(umap: edge->tagged_validity);
403	tagged = isl_union_map_zip(umap: tagged);
404	tagged = isl_union_map_apply_domain(umap1: tagged,
405	umap2: isl_union_map_copy(umap));
406	tagged = isl_union_map_zip(umap: tagged);
407	sc = isl_schedule_constraints_add(sc, type: t, c: tagged);
408	if (!sc)
409	return NULL;
410	}
411
412	return sc;
413	}
414
415	/ Given a mapping "cluster_map" from the original instances to*
416	* the cluster instances, add schedule constraints on the clusters
417	* to "sc" corresponding to the original constraints represented by "edge".
418	*
419	* For non-tagged dependence constraints, the cluster constraints
420	* are obtained by applying "cluster_map" to the edge->map.
421	*
422	* For tagged dependence constraints, "cluster_map" needs to be applied
423	* to the domains of the wrapped relations in domain and range
424	* of the tagged dependence constraints. Pick out the mappings
425	* from these domains from "cluster_map" and construct their product.
426	* This mapping can then be applied to the pair of domains.
427	*/
428	static __isl_give isl_schedule_constraints *collect_edge_constraints(
429	struct isl_sched_edge edge, __isl_keep isl_union_map cluster_map,
430	__isl_take isl_schedule_constraints *sc)
431	{
432	isl_union_map *umap;
433	isl_space *space;
434	isl_union_set *uset;
435	isl_union_map umap1, umap2;
436
437	if (!sc)
438	return NULL;
439
440	umap = isl_union_map_from_map(map: isl_map_copy(map: edge->map));
441	umap = isl_union_map_apply_domain(umap1: umap,
442	umap2: isl_union_map_copy(umap: cluster_map));
443	umap = isl_union_map_apply_range(umap1: umap,
444	umap2: isl_union_map_copy(umap: cluster_map));
445	sc = add_non_conditional_constraints(edge, umap, sc);
446	isl_union_map_free(umap);
447
448	if (!sc \|\|
449	(!isl_sched_edge_is_condition(edge) &&
450	!isl_sched_edge_is_conditional_validity(edge)))
451	return sc;
452
453	space = isl_space_domain(space: isl_map_get_space(map: edge->map));
454	uset = isl_union_set_from_set(set: isl_set_universe(space));
455	umap1 = isl_union_map_copy(umap: cluster_map);
456	umap1 = isl_union_map_intersect_domain(umap: umap1, uset);
457	space = isl_space_range(space: isl_map_get_space(map: edge->map));
458	uset = isl_union_set_from_set(set: isl_set_universe(space));
459	umap2 = isl_union_map_copy(umap: cluster_map);
460	umap2 = isl_union_map_intersect_domain(umap: umap2, uset);
461	umap = isl_union_map_product(umap1, umap2);
462
463	sc = add_conditional_constraints(edge, umap, sc);
464
465	isl_union_map_free(umap);
466	return sc;
467	}
468
469	/ Given a mapping "cluster_map" from the original instances to*
470	* the cluster instances, add schedule constraints on the clusters
471	* to "sc" corresponding to all edges in "graph" between nodes that
472	* belong to SCCs that are marked for merging in "scc_in_merge".
473	*/
474	static __isl_give isl_schedule_constraints *collect_constraints(
475	struct isl_sched_graph graph, int* *scc_in_merge,
476	__isl_keep isl_union_map *cluster_map,
477	__isl_take isl_schedule_constraints *sc)
478	{
479	int i;
480
481	for (i = `0`; i < graph->n_edge; ++i) {
482	struct isl_sched_edge *edge = &graph->edge[i];
483
484	if (!scc_in_merge[edge->src->scc])
485	continue;
486	if (!scc_in_merge[edge->dst->scc])
487	continue;
488	sc = collect_edge_constraints(edge, cluster_map, sc);
489	}
490
491	return sc;
492	}
493
494	/ Construct a dependence graph for scheduling clusters with respect*
495	* to each other and store the result in "merge_graph".
496	* In particular, the nodes of the graph correspond to the schedule
497	* dimensions of the current bands of those clusters that have been
498	* marked for merging in "c".
499	*
500	* First construct an isl_schedule_constraints object for this domain
501	* by transforming the edges in "graph" to the domain.
502	* Then initialize a dependence graph for scheduling from these
503	* constraints.
504	*/
505	static isl_stat init_merge_graph(isl_ctx ctx, struct* isl_sched_graph *graph,
506	struct isl_clustering c, struct* isl_sched_graph *merge_graph)
507	{
508	isl_union_set *domain;
509	isl_union_map *cluster_map;
510	isl_schedule_constraints *sc;
511	isl_stat r;
512
513	domain = collect_domain(ctx, graph, c);
514	sc = isl_schedule_constraints_on_domain(domain);
515	if (!sc)
516	return isl_stat_error;
517	cluster_map = collect_cluster_map(ctx, graph, c);
518	sc = collect_constraints(graph, scc_in_merge: c->scc_in_merge, cluster_map, sc);
519	isl_union_map_free(umap: cluster_map);
520
521	r = isl_sched_graph_init(graph: merge_graph, sc);
522
523	isl_schedule_constraints_free(sc);
524
525	return r;
526	}
527
528	/ Compute the maximal number of remaining schedule rows that still need*
529	* to be computed for the nodes that belong to clusters with the maximal
530	* dimension for the current band (i.e., the band that is to be merged).
531	* Only clusters that are about to be merged are considered.
532	* "maxvar" is the maximal dimension for the current band.
533	* "c" contains information about the clusters.
534	*
535	* Return the maximal number of remaining schedule rows or
536	* isl_size_error on error.
537	*/
538	static isl_size compute_maxvar_max_slack(int maxvar, struct isl_clustering *c)
539	{
540	int i, j;
541	int max_slack;
542
543	max_slack = `0`;
544	for (i = `0`; i < c->n; ++i) {
545	int nvar;
546	struct isl_sched_graph *scc;
547
548	if (!c->scc_in_merge[i])
549	continue;
550	scc = &c->scc[i];
551	nvar = scc->n_total_row - scc->band_start;
552	if (nvar != maxvar)
553	continue;
554	for (j = `0`; j < scc->n; ++j) {
555	struct isl_sched_node *node = &scc->node[j];
556	int slack;
557
558	if (isl_sched_node_update_vmap(node) < `0`)
559	return isl_size_error;
560	slack = node->nvar - node->rank;
561	if (slack > max_slack)
562	max_slack = slack;
563	}
564	}
565
566	return max_slack;
567	}
568
569	/ If there are any clusters where the dimension of the current band*
570	* (i.e., the band that is to be merged) is smaller than "maxvar" and
571	* if there are any nodes in such a cluster where the number
572	* of remaining schedule rows that still need to be computed
573	* is greater than "max_slack", then return the smallest current band
574	* dimension of all these clusters. Otherwise return the original value
575	* of "maxvar". Return isl_size_error in case of any error.
576	* Only clusters that are about to be merged are considered.
577	* "c" contains information about the clusters.
578	*/
579	static isl_size limit_maxvar_to_slack(int maxvar, int max_slack,
580	struct isl_clustering *c)
581	{
582	int i, j;
583
584	for (i = `0`; i < c->n; ++i) {
585	int nvar;
586	struct isl_sched_graph *scc;
587
588	if (!c->scc_in_merge[i])
589	continue;
590	scc = &c->scc[i];
591	nvar = scc->n_total_row - scc->band_start;
592	if (nvar >= maxvar)
593	continue;
594	for (j = `0`; j < scc->n; ++j) {
595	struct isl_sched_node *node = &scc->node[j];
596	int slack;
597
598	if (isl_sched_node_update_vmap(node) < `0`)
599	return isl_size_error;
600	slack = node->nvar - node->rank;
601	if (slack > max_slack) {
602	maxvar = nvar;
603	break;
604	}
605	}
606	}
607
608	return maxvar;
609	}
610
611	/ Adjust merge_graph->maxvar based on the number of remaining schedule rows*
612	* that still need to be computed. In particular, if there is a node
613	* in a cluster where the dimension of the current band is smaller
614	* than merge_graph->maxvar, but the number of remaining schedule rows
615	* is greater than that of any node in a cluster with the maximal
616	* dimension for the current band (i.e., merge_graph->maxvar),
617	* then adjust merge_graph->maxvar to the (smallest) current band dimension
618	* of those clusters. Without this adjustment, the total number of
619	* schedule dimensions would be increased, resulting in a skewed view
620	* of the number of coincident dimensions.
621	* "c" contains information about the clusters.
622	*
623	* If the maximize_band_depth option is set and merge_graph->maxvar is reduced,
624	* then there is no point in attempting any merge since it will be rejected
625	* anyway. Set merge_graph->maxvar to zero in such cases.
626	*/
627	static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx,
628	struct isl_sched_graph merge_graph, struct* isl_clustering *c)
629	{
630	isl_size max_slack, maxvar;
631
632	max_slack = compute_maxvar_max_slack(maxvar: merge_graph->maxvar, c);
633	if (max_slack < `0`)
634	return isl_stat_error;
635	maxvar = limit_maxvar_to_slack(maxvar: merge_graph->maxvar, max_slack, c);
636	if (maxvar < `0`)
637	return isl_stat_error;
638
639	if (maxvar < merge_graph->maxvar) {
640	if (isl_options_get_schedule_maximize_band_depth(ctx))
641	merge_graph->maxvar = `0`;
642	else
643	merge_graph->maxvar = maxvar;
644	}
645
646	return isl_stat_ok;
647	}
648
649	/ Return the number of coincident dimensions in the current band of "graph",*
650	* where the nodes of "graph" are assumed to be scheduled by a single band.
651	*/
652	static int get_n_coincident(struct isl_sched_graph *graph)
653	{
654	int i;
655
656	for (i = graph->band_start; i < graph->n_total_row; ++i)
657	if (!graph->node[`0`].coincident[i])
658	break;
659
660	return i - graph->band_start;
661	}
662
663	/ Should the clusters be merged based on the cluster schedule*
664	* in the current (and only) band of "merge_graph", given that
665	* coincidence should be maximized?
666	*
667	* If the number of coincident schedule dimensions in the merged band
668	* would be less than the maximal number of coincident schedule dimensions
669	* in any of the merged clusters, then the clusters should not be merged.
670	*/
671	static isl_bool ok_to_merge_coincident(struct isl_clustering *c,
672	struct isl_sched_graph *merge_graph)
673	{
674	int i;
675	int n_coincident;
676	int max_coincident;
677
678	max_coincident = `0`;
679	for (i = `0`; i < c->n; ++i) {
680	if (!c->scc_in_merge[i])
681	continue;
682	n_coincident = get_n_coincident(graph: &c->scc[i]);
683	if (n_coincident > max_coincident)
684	max_coincident = n_coincident;
685	}
686
687	n_coincident = get_n_coincident(graph: merge_graph);
688
689	return isl_bool_ok(b: n_coincident >= max_coincident);
690	}
691
692	/ Return the transformation on "node" expressed by the current (and only)*
693	* band of "merge_graph" applied to the clusters in "c".
694	*
695	* First find the representation of "node" in its SCC in "c" and
696	* extract the transformation expressed by the current band.
697	* Then extract the transformation applied by "merge_graph"
698	* to the cluster to which this SCC belongs.
699	* Combine the two to obtain the complete transformation on the node.
700	*
701	* Note that the range of the first transformation is an anonymous space,
702	* while the domain of the second is named "cluster_X". The range
703	* of the former therefore needs to be adjusted before the two
704	* can be combined.
705	*/
706	static __isl_give isl_map extract_node_transformation(isl_ctx ctx,
707	struct isl_sched_node node, struct* isl_clustering *c,
708	struct isl_sched_graph *merge_graph)
709	{
710	struct isl_sched_node scc_node, cluster_node;
711	int start, n;
712	isl_id *id;
713	isl_space *space;
714	isl_multi_aff ma, ma2;
715
716	scc_node = isl_sched_graph_find_node(ctx, graph: &c->scc[node->scc],
717	space: node->space);
718	if (scc_node && !isl_sched_graph_is_node(graph: &c->scc[node->scc], node: scc_node))
719	isl_die(ctx, isl_error_internal, "unable to find node",
720	return NULL);
721	start = c->scc[node->scc].band_start;
722	n = c->scc[node->scc].n_total_row - start;
723	ma = isl_sched_node_extract_partial_schedule_multi_aff(node: scc_node,
724	first: start, n);
725	space = cluster_space(scc: &c->scc[node->scc], i: c->scc_cluster[node->scc]);
726	cluster_node = isl_sched_graph_find_node(ctx, graph: merge_graph, space);
727	if (cluster_node && !isl_sched_graph_is_node(graph: merge_graph, node: cluster_node))
728	isl_die(ctx, isl_error_internal, "unable to find cluster",
729	space = isl_space_free(space));
730	id = isl_space_get_tuple_id(space, type: isl_dim_set);
731	ma = isl_multi_aff_set_tuple_id(multi: ma, type: isl_dim_out, id);
732	isl_space_free(space);
733	n = merge_graph->n_total_row;
734	ma2 = isl_sched_node_extract_partial_schedule_multi_aff(node: cluster_node,
735	first: `0`, n);
736	ma = isl_multi_aff_pullback_multi_aff(ma1: ma2, ma2: ma);
737
738	return isl_map_from_multi_aff(maff: ma);
739	}
740
741	/ Give a set of distances "set", are they bounded by a small constant*
742	* in direction "pos"?
743	* In practice, check if they are bounded by 2 by checking that there
744	* are no elements with a value greater than or equal to 3 or
745	* smaller than or equal to -3.
746	*/
747	static isl_bool distance_is_bounded(__isl_keep isl_set set, int* pos)
748	{
749	isl_bool bounded;
750	isl_set *test;
751
752	if (!set)
753	return isl_bool_error;
754
755	test = isl_set_copy(set);
756	test = isl_set_lower_bound_si(set: test, type: isl_dim_set, pos, value: `3`);
757	bounded = isl_set_is_empty(set: test);
758	isl_set_free(set: test);
759
760	if (bounded < `0` \|\| !bounded)
761	return bounded;
762
763	test = isl_set_copy(set);
764	test = isl_set_upper_bound_si(set: test, type: isl_dim_set, pos, value: -`3`);
765	bounded = isl_set_is_empty(set: test);
766	isl_set_free(set: test);
767
768	return bounded;
769	}
770
771	/ Does the set "set" have a fixed (but possible parametric) value*
772	* at dimension "pos"?
773	*/
774	static isl_bool has_single_value(__isl_keep isl_set set, int* pos)
775	{
776	isl_size n;
777	isl_bool single;
778
779	n = isl_set_dim(set, type: isl_dim_set);
780	if (n < `0`)
781	return isl_bool_error;
782	set = isl_set_copy(set);
783	set = isl_set_project_out(set, type: isl_dim_set, first: pos + `1`, n: n - (pos + `1`));
784	set = isl_set_project_out(set, type: isl_dim_set, first: `0`, n: pos);
785	single = isl_set_is_singleton(set);
786	isl_set_free(set);
787
788	return single;
789	}
790
791	/ Does "map" have a fixed (but possible parametric) value*
792	* at dimension "pos" of either its domain or its range?
793	*/
794	static isl_bool has_singular_src_or_dst(__isl_keep isl_map map, int* pos)
795	{
796	isl_set *set;
797	isl_bool single;
798
799	set = isl_map_domain(bmap: isl_map_copy(map));
800	single = has_single_value(set, pos);
801	isl_set_free(set);
802
803	if (single < `0` \|\| single)
804	return single;
805
806	set = isl_map_range(map: isl_map_copy(map));
807	single = has_single_value(set, pos);
808	isl_set_free(set);
809
810	return single;
811	}
812
813	/ Does the edge "edge" from "graph" have bounded dependence distances*
814	* in the merged graph "merge_graph" of a selection of clusters in "c"?
815	*
816	* Extract the complete transformations of the source and destination
817	* nodes of the edge, apply them to the edge constraints and
818	* compute the differences. Finally, check if these differences are bounded
819	* in each direction.
820	*
821	* If the dimension of the band is greater than the number of
822	* dimensions that can be expected to be optimized by the edge
823	* (based on its weight), then also allow the differences to be unbounded
824	* in the remaining dimensions, but only if either the source or
825	* the destination has a fixed value in that direction.
826	* This allows a statement that produces values that are used by
827	* several instances of another statement to be merged with that
828	* other statement.
829	* However, merging such clusters will introduce an inherently
830	* large proximity distance inside the merged cluster, meaning
831	* that proximity distances will no longer be optimized in
832	* subsequent merges. These merges are therefore only allowed
833	* after all other possible merges have been tried.
834	* The first time such a merge is encountered, the weight of the edge
835	* is replaced by a negative weight. The second time (i.e., after
836	* all merges over edges with a non-negative weight have been tried),
837	* the merge is allowed.
838	*/
839	static isl_bool has_bounded_distances(isl_ctx ctx, struct* isl_sched_edge *edge,
840	struct isl_sched_graph graph, struct* isl_clustering *c,
841	struct isl_sched_graph *merge_graph)
842	{
843	int i, n_slack;
844	isl_size n;
845	isl_bool bounded;
846	isl_map map, t;
847	isl_set *dist;
848
849	map = isl_map_copy(map: edge->map);
850	t = extract_node_transformation(ctx, node: edge->src, c, merge_graph);
851	map = isl_map_apply_domain(map1: map, map2: t);
852	t = extract_node_transformation(ctx, node: edge->dst, c, merge_graph);
853	map = isl_map_apply_range(map1: map, map2: t);
854	dist = isl_map_deltas(map: isl_map_copy(map));
855
856	bounded = isl_bool_true;
857	n = isl_set_dim(set: dist, type: isl_dim_set);
858	if (n < `0`)
859	goto error;
860	n_slack = n - edge->weight;
861	if (edge->weight < `0`)
862	n_slack -= graph->max_weight + `1`;
863	for (i = `0`; i < n; ++i) {
864	isl_bool bounded_i, singular_i;
865
866	bounded_i = distance_is_bounded(set: dist, pos: i);
867	if (bounded_i < `0`)
868	goto error;
869	if (bounded_i)
870	continue;
871	if (edge->weight >= `0`)
872	bounded = isl_bool_false;
873	n_slack--;
874	if (n_slack < `0`)
875	break;
876	singular_i = has_singular_src_or_dst(map, pos: i);
877	if (singular_i < `0`)
878	goto error;
879	if (singular_i)
880	continue;
881	bounded = isl_bool_false;
882	break;
883	}
884	if (!bounded && i >= n && edge->weight >= `0`)
885	edge->weight -= graph->max_weight + `1`;
886	isl_map_free(map);
887	isl_set_free(set: dist);
888
889	return bounded;
890	error:
891	isl_map_free(map);
892	isl_set_free(set: dist);
893	return isl_bool_error;
894	}
895
896	/ Should the clusters be merged based on the cluster schedule*
897	* in the current (and only) band of "merge_graph"?
898	* "graph" is the original dependence graph, while "c" records
899	* which SCCs are involved in the latest merge.
900	*
901	* In particular, is there at least one proximity constraint
902	* that is optimized by the merge?
903	*
904	* A proximity constraint is considered to be optimized
905	* if the dependence distances are small.
906	*/
907	static isl_bool ok_to_merge_proximity(isl_ctx *ctx,
908	struct isl_sched_graph graph, struct* isl_clustering *c,
909	struct isl_sched_graph *merge_graph)
910	{
911	int i;
912
913	for (i = `0`; i < graph->n_edge; ++i) {
914	struct isl_sched_edge *edge = &graph->edge[i];
915	isl_bool bounded;
916
917	if (!isl_sched_edge_is_proximity(edge))
918	continue;
919	if (!c->scc_in_merge[edge->src->scc])
920	continue;
921	if (!c->scc_in_merge[edge->dst->scc])
922	continue;
923	if (c->scc_cluster[edge->dst->scc] ==
924	c->scc_cluster[edge->src->scc])
925	continue;
926	bounded = has_bounded_distances(ctx, edge, graph, c,
927	merge_graph);
928	if (bounded < `0` \|\| bounded)
929	return bounded;
930	}
931
932	return isl_bool_false;
933	}
934
935	/ Should the clusters be merged based on the cluster schedule*
936	* in the current (and only) band of "merge_graph"?
937	* "graph" is the original dependence graph, while "c" records
938	* which SCCs are involved in the latest merge.
939	*
940	* If the current band is empty, then the clusters should not be merged.
941	*
942	* If the band depth should be maximized and the merge schedule
943	* is incomplete (meaning that the dimension of some of the schedule
944	* bands in the original schedule will be reduced), then the clusters
945	* should not be merged.
946	*
947	* If the schedule_maximize_coincidence option is set, then check that
948	* the number of coincident schedule dimensions is not reduced.
949	*
950	* Finally, only allow the merge if at least one proximity
951	* constraint is optimized.
952	*/
953	static isl_bool ok_to_merge(isl_ctx ctx, struct* isl_sched_graph *graph,
954	struct isl_clustering c, struct* isl_sched_graph *merge_graph)
955	{
956	if (merge_graph->n_total_row == merge_graph->band_start)
957	return isl_bool_false;
958
959	if (isl_options_get_schedule_maximize_band_depth(ctx) &&
960	merge_graph->n_total_row < merge_graph->maxvar)
961	return isl_bool_false;
962
963	if (isl_options_get_schedule_maximize_coincidence(ctx)) {
964	isl_bool ok;
965
966	ok = ok_to_merge_coincident(c, merge_graph);
967	if (ok < `0` \|\| !ok)
968	return ok;
969	}
970
971	return ok_to_merge_proximity(ctx, graph, c, merge_graph);
972	}
973
974	/ Apply the schedule in "t_node" to the "n" rows starting at "first"*
975	* of the schedule in "node" and return the result.
976	*
977	* That is, essentially compute
978	*
979	* T * N(first:first+n-1)
980	*
981	* taking into account the constant term and the parameter coefficients
982	* in "t_node".
983	*/
984	static __isl_give isl_mat node_transformation(isl_ctx ctx,
985	struct isl_sched_node t_node, struct* isl_sched_node *node,
986	int first, int n)
987	{
988	int i, j;
989	isl_mat *t;
990	isl_size n_row, n_col;
991	int n_param, n_var;
992
993	n_param = node->nparam;
994	n_var = node->nvar;
995	n_row = isl_mat_rows(mat: t_node->sched);
996	n_col = isl_mat_cols(mat: node->sched);
997	if (n_row < `0` \|\| n_col < `0`)
998	return NULL;
999	t = isl_mat_alloc(ctx, n_row, n_col);
1000	if (!t)
1001	return NULL;
1002	for (i = `0`; i < n_row; ++i) {
1003	isl_seq_cpy(dst: t->row[i], src: t_node->sched->row[i], len: `1` + n_param);
1004	isl_seq_clr(p: t->row[i] + `1` + n_param, len: n_var);
1005	for (j = `0`; j < n; ++j)
1006	isl_seq_addmul(dst: t->row[i],
1007	f: t_node->sched->row[i][`1` + n_param + j],
1008	src: node->sched->row[first + j],
1009	len: `1` + n_param + n_var);
1010	}
1011	return t;
1012	}
1013
1014	/ Apply the cluster schedule in "t_node" to the current band*
1015	* schedule of the nodes in "graph".
1016	*
1017	* In particular, replace the rows starting at band_start
1018	* by the result of applying the cluster schedule in "t_node"
1019	* to the original rows.
1020	*
1021	* The coincidence of the schedule is determined by the coincidence
1022	* of the cluster schedule.
1023	*/
1024	static isl_stat transform(isl_ctx ctx, struct* isl_sched_graph *graph,
1025	struct isl_sched_node *t_node)
1026	{
1027	int i, j;
1028	isl_size n_new;
1029	int start, n;
1030
1031	start = graph->band_start;
1032	n = graph->n_total_row - start;
1033
1034	n_new = isl_mat_rows(mat: t_node->sched);
1035	if (n_new < `0`)
1036	return isl_stat_error;
1037	for (i = `0`; i < graph->n; ++i) {
1038	struct isl_sched_node *node = &graph->node[i];
1039	isl_mat *t;
1040
1041	t = node_transformation(ctx, t_node, node, first: start, n);
1042	node->sched = isl_mat_drop_rows(mat: node->sched, row: start, n);
1043	node->sched = isl_mat_concat(top: node->sched, bot: t);
1044	node->sched_map = isl_map_free(map: node->sched_map);
1045	if (!node->sched)
1046	return isl_stat_error;
1047	for (j = `0`; j < n_new; ++j)
1048	node->coincident[start + j] = t_node->coincident[j];
1049	}
1050	graph->n_total_row -= n;
1051	graph->n_row -= n;
1052	graph->n_total_row += n_new;
1053	graph->n_row += n_new;
1054
1055	return isl_stat_ok;
1056	}
1057
1058	/ Merge the clusters marked for merging in "c" into a single*
1059	* cluster using the cluster schedule in the current band of "merge_graph".
1060	* The representative SCC for the new cluster is the SCC with
1061	* the smallest index.
1062	*
1063	* The current band schedule of each SCC in the new cluster is obtained
1064	* by applying the schedule of the corresponding original cluster
1065	* to the original band schedule.
1066	* All SCCs in the new cluster have the same number of schedule rows.
1067	*/
1068	static isl_stat merge(isl_ctx ctx, struct* isl_clustering *c,
1069	struct isl_sched_graph *merge_graph)
1070	{
1071	int i;
1072	int cluster = -`1`;
1073	isl_space *space;
1074
1075	for (i = `0`; i < c->n; ++i) {
1076	struct isl_sched_node *node;
1077
1078	if (!c->scc_in_merge[i])
1079	continue;
1080	if (cluster < `0`)
1081	cluster = i;
1082	space = cluster_space(scc: &c->scc[i], i: c->scc_cluster[i]);
1083	node = isl_sched_graph_find_node(ctx, graph: merge_graph, space);
1084	isl_space_free(space);
1085	if (!node)
1086	return isl_stat_error;
1087	if (!isl_sched_graph_is_node(graph: merge_graph, node))
1088	isl_die(ctx, isl_error_internal,
1089	"unable to find cluster",
1090	return isl_stat_error);
1091	if (transform(ctx, graph: &c->scc[i], t_node: node) < `0`)
1092	return isl_stat_error;
1093	c->scc_cluster[i] = cluster;
1094	}
1095
1096	return isl_stat_ok;
1097	}
1098
1099	/ Try and merge the clusters of SCCs marked in c->scc_in_merge*
1100	* by scheduling the current cluster bands with respect to each other.
1101	*
1102	* Construct a dependence graph with a space for each cluster and
1103	* with the coordinates of each space corresponding to the schedule
1104	* dimensions of the current band of that cluster.
1105	* Construct a cluster schedule in this cluster dependence graph and
1106	* apply it to the current cluster bands if it is applicable
1107	* according to ok_to_merge.
1108	*
1109	* If the number of remaining schedule dimensions in a cluster
1110	* with a non-maximal current schedule dimension is greater than
1111	* the number of remaining schedule dimensions in clusters
1112	* with a maximal current schedule dimension, then restrict
1113	* the number of rows to be computed in the cluster schedule
1114	* to the minimal such non-maximal current schedule dimension.
1115	* Do this by adjusting merge_graph.maxvar.
1116	*
1117	* Return isl_bool_true if the clusters have effectively been merged
1118	* into a single cluster.
1119	*
1120	* Note that since the standard scheduling algorithm minimizes the maximal
1121	* distance over proximity constraints, the proximity constraints between
1122	* the merged clusters may not be optimized any further than what is
1123	* sufficient to bring the distances within the limits of the internal
1124	* proximity constraints inside the individual clusters.
1125	* It may therefore make sense to perform an additional translation step
1126	* to bring the clusters closer to each other, while maintaining
1127	* the linear part of the merging schedule found using the standard
1128	* scheduling algorithm.
1129	*/
1130	static isl_bool try_merge(isl_ctx ctx, struct* isl_sched_graph *graph,
1131	struct isl_clustering *c)
1132	{
1133	struct isl_sched_graph merge_graph = { `0` };
1134	isl_bool merged;
1135
1136	if (init_merge_graph(ctx, graph, c, merge_graph: &merge_graph) < `0`)
1137	goto error;
1138
1139	if (isl_sched_graph_compute_maxvar(graph: &merge_graph) < `0`)
1140	goto error;
1141	if (adjust_maxvar_to_slack(ctx, merge_graph: &merge_graph,c) < `0`)
1142	goto error;
1143	if (isl_schedule_node_compute_wcc_band(ctx, graph: &merge_graph) < `0`)
1144	goto error;
1145	merged = ok_to_merge(ctx, graph, c, merge_graph: &merge_graph);
1146	if (merged && merge(ctx, c, merge_graph: &merge_graph) < `0`)
1147	goto error;
1148
1149	isl_sched_graph_free(ctx, graph: &merge_graph);
1150	return merged;
1151	error:
1152	isl_sched_graph_free(ctx, graph: &merge_graph);
1153	return isl_bool_error;
1154	}
1155
1156	/ Is there any edge marked "no_merge" between two SCCs that are*
1157	* about to be merged (i.e., that are set in "scc_in_merge")?
1158	* "merge_edge" is the proximity edge along which the clusters of SCCs
1159	* are going to be merged.
1160	*
1161	* If there is any edge between two SCCs with a negative weight,
1162	* while the weight of "merge_edge" is non-negative, then this
1163	* means that the edge was postponed. "merge_edge" should then
1164	* also be postponed since merging along the edge with negative weight should
1165	* be postponed until all edges with non-negative weight have been tried.
1166	* Replace the weight of "merge_edge" by a negative weight as well and
1167	* tell the caller not to attempt a merge.
1168	*/
1169	static int any_no_merge(struct isl_sched_graph graph, int* *scc_in_merge,
1170	struct isl_sched_edge *merge_edge)
1171	{
1172	int i;
1173
1174	for (i = `0`; i < graph->n_edge; ++i) {
1175	struct isl_sched_edge *edge = &graph->edge[i];
1176
1177	if (!scc_in_merge[edge->src->scc])
1178	continue;
1179	if (!scc_in_merge[edge->dst->scc])
1180	continue;
1181	if (edge->no_merge)
1182	return `1`;
1183	if (merge_edge->weight >= `0` && edge->weight < `0`) {
1184	merge_edge->weight -= graph->max_weight + `1`;
1185	return `1`;
1186	}
1187	}
1188
1189	return `0`;
1190	}
1191
1192	/ Merge the two clusters in "c" connected by the edge in "graph"*
1193	* with index "edge" into a single cluster.
1194	* If it turns out to be impossible to merge these two clusters,
1195	* then mark the edge as "no_merge" such that it will not be
1196	* considered again.
1197	*
1198	* First mark all SCCs that need to be merged. This includes the SCCs
1199	* in the two clusters, but it may also include the SCCs
1200	* of intermediate clusters.
1201	* If there is already a no_merge edge between any pair of such SCCs,
1202	* then simply mark the current edge as no_merge as well.
1203	* Likewise, if any of those edges was postponed by has_bounded_distances,
1204	* then postpone the current edge as well.
1205	* Otherwise, try and merge the clusters and mark "edge" as "no_merge"
1206	* if the clusters did not end up getting merged, unless the non-merge
1207	* is due to the fact that the edge was postponed. This postponement
1208	* can be recognized by a change in weight (from non-negative to negative).
1209	*/
1210	static isl_stat merge_clusters_along_edge(isl_ctx *ctx,
1211	struct isl_sched_graph graph, int* edge, struct isl_clustering *c)
1212	{
1213	isl_bool merged;
1214	int edge_weight = graph->edge[edge].weight;
1215
1216	if (mark_merge_sccs(ctx, graph, edge, c) < `0`)
1217	return isl_stat_error;
1218
1219	if (any_no_merge(graph, scc_in_merge: c->scc_in_merge, merge_edge: &graph->edge[edge]))
1220	merged = isl_bool_false;
1221	else
1222	merged = try_merge(ctx, graph, c);
1223	if (merged < `0`)
1224	return isl_stat_error;
1225	if (!merged && edge_weight == graph->edge[edge].weight)
1226	graph->edge[edge].no_merge = `1`;
1227
1228	return isl_stat_ok;
1229	}
1230
1231	/ Does "node" belong to the cluster identified by "cluster"?*
1232	*/
1233	static int node_cluster_exactly(struct isl_sched_node node, int* cluster)
1234	{
1235	return node->cluster == cluster;
1236	}
1237
1238	/ Does "edge" connect two nodes belonging to the cluster*
1239	* identified by "cluster"?
1240	*/
1241	static int edge_cluster_exactly(struct isl_sched_edge edge, int* cluster)
1242	{
1243	return edge->src->cluster == cluster && edge->dst->cluster == cluster;
1244	}
1245
1246	/ Swap the schedule of "node1" and "node2".*
1247	* Both nodes have been derived from the same node in a common parent graph.
1248	* Since the "coincident" field is shared with that node
1249	* in the parent graph, there is no need to also swap this field.
1250	*/
1251	static void swap_sched(struct isl_sched_node *node1,
1252	struct isl_sched_node *node2)
1253	{
1254	isl_mat *sched;
1255	isl_map *sched_map;
1256
1257	sched = node1->sched;
1258	node1->sched = node2->sched;
1259	node2->sched = sched;
1260
1261	sched_map = node1->sched_map;
1262	node1->sched_map = node2->sched_map;
1263	node2->sched_map = sched_map;
1264	}
1265
1266	/ Copy the current band schedule from the SCCs that form the cluster*
1267	* with index "pos" to the actual cluster at position "pos".
1268	* By construction, the index of the first SCC that belongs to the cluster
1269	* is also "pos".
1270	*
1271	* The order of the nodes inside both the SCCs and the cluster
1272	* is assumed to be same as the order in the original "graph".
1273	*
1274	* Since the SCC graphs will no longer be used after this function,
1275	* the schedules are actually swapped rather than copied.
1276	*/
1277	static isl_stat copy_partial(struct isl_sched_graph *graph,
1278	struct isl_clustering c, int* pos)
1279	{
1280	int i, j;
1281
1282	c->cluster[pos].n_total_row = c->scc[pos].n_total_row;
1283	c->cluster[pos].n_row = c->scc[pos].n_row;
1284	c->cluster[pos].maxvar = c->scc[pos].maxvar;
1285	j = `0`;
1286	for (i = `0`; i < graph->n; ++i) {
1287	int k;
1288	int s;
1289
1290	if (graph->node[i].cluster != pos)
1291	continue;
1292	s = graph->node[i].scc;
1293	k = c->scc_node[s]++;
1294	swap_sched(node1: &c->cluster[pos].node[j], node2: &c->scc[s].node[k]);
1295	if (c->scc[s].maxvar > c->cluster[pos].maxvar)
1296	c->cluster[pos].maxvar = c->scc[s].maxvar;
1297	++j;
1298	}
1299
1300	return isl_stat_ok;
1301	}
1302
1303	/ Is there a (conditional) validity dependence from node[j] to node[i],*
1304	* forcing node[i] to follow node[j] or do the nodes belong to the same
1305	* cluster?
1306	*/
1307	static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user)
1308	{
1309	struct isl_sched_graph *graph = user;
1310
1311	if (graph->node[i].cluster == graph->node[j].cluster)
1312	return isl_bool_true;
1313	return isl_sched_graph_has_validity_edge(graph, src: &graph->node[j],
1314	dst: &graph->node[i]);
1315	}
1316
1317	/ Extract the merged clusters of SCCs in "graph", sort them, and*
1318	* store them in c->clusters. Update c->scc_cluster accordingly.
1319	*
1320	* First keep track of the cluster containing the SCC to which a node
1321	* belongs in the node itself.
1322	* Then extract the clusters into c->clusters, copying the current
1323	* band schedule from the SCCs that belong to the cluster.
1324	* Do this only once per cluster.
1325	*
1326	* Finally, topologically sort the clusters and update c->scc_cluster
1327	* to match the new scc numbering. While the SCCs were originally
1328	* sorted already, some SCCs that depend on some other SCCs may
1329	* have been merged with SCCs that appear before these other SCCs.
1330	* A reordering may therefore be required.
1331	*/
1332	static isl_stat extract_clusters(isl_ctx ctx, struct* isl_sched_graph *graph,
1333	struct isl_clustering *c)
1334	{
1335	int i;
1336
1337	for (i = `0`; i < graph->n; ++i)
1338	graph->node[i].cluster = c->scc_cluster[graph->node[i].scc];
1339
1340	for (i = `0`; i < graph->scc; ++i) {
1341	if (c->scc_cluster[i] != i)
1342	continue;
1343	if (isl_sched_graph_extract_sub_graph(ctx, graph,
1344	node_pred: &node_cluster_exactly,
1345	edge_pred: &edge_cluster_exactly, data: i, sub: &c->cluster[i]) < `0`)
1346	return isl_stat_error;
1347	c->cluster[i].src_scc = -`1`;
1348	c->cluster[i].dst_scc = -`1`;
1349	if (copy_partial(graph, c, pos: i) < `0`)
1350	return isl_stat_error;
1351	}
1352
1353	if (isl_sched_graph_detect_ccs(ctx, graph,
1354	follows: &node_follows_strong_or_same_cluster) < `0`)
1355	return isl_stat_error;
1356	for (i = `0`; i < graph->n; ++i)
1357	c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster;
1358
1359	return isl_stat_ok;
1360	}
1361
1362	/ Compute weights on the proximity edges of "graph" that can*
1363	* be used by find_proximity to find the most appropriate
1364	* proximity edge to use to merge two clusters in "c".
1365	* The weights are also used by has_bounded_distances to determine
1366	* whether the merge should be allowed.
1367	* Store the maximum of the computed weights in graph->max_weight.
1368	*
1369	* The computed weight is a measure for the number of remaining schedule
1370	* dimensions that can still be completely aligned.
1371	* In particular, compute the number of equalities between
1372	* input dimensions and output dimensions in the proximity constraints.
1373	* The directions that are already handled by outer schedule bands
1374	* are projected out prior to determining this number.
1375	*
1376	* Edges that will never be considered by find_proximity are ignored.
1377	*/
1378	static isl_stat compute_weights(struct isl_sched_graph *graph,
1379	struct isl_clustering *c)
1380	{
1381	int i;
1382
1383	graph->max_weight = `0`;
1384
1385	for (i = `0`; i < graph->n_edge; ++i) {
1386	struct isl_sched_edge *edge = &graph->edge[i];
1387	struct isl_sched_node *src = edge->src;
1388	struct isl_sched_node *dst = edge->dst;
1389	isl_basic_map *hull;
1390	isl_bool prox;
1391	isl_size n_in, n_out, n;
1392
1393	prox = is_non_empty_proximity(edge);
1394	if (prox < `0`)
1395	return isl_stat_error;
1396	if (!prox)
1397	continue;
1398	if (bad_cluster(graph: &c->scc[edge->src->scc]) \|\|
1399	bad_cluster(graph: &c->scc[edge->dst->scc]))
1400	continue;
1401	if (c->scc_cluster[edge->dst->scc] ==
1402	c->scc_cluster[edge->src->scc])
1403	continue;
1404
1405	hull = isl_map_affine_hull(map: isl_map_copy(map: edge->map));
1406	hull = isl_basic_map_transform_dims(bmap: hull, type: isl_dim_in, first: `0`,
1407	trans: isl_mat_copy(mat: src->vmap));
1408	hull = isl_basic_map_transform_dims(bmap: hull, type: isl_dim_out, first: `0`,
1409	trans: isl_mat_copy(mat: dst->vmap));
1410	hull = isl_basic_map_project_out(bmap: hull,
1411	type: isl_dim_in, first: `0`, n: src->rank);
1412	hull = isl_basic_map_project_out(bmap: hull,
1413	type: isl_dim_out, first: `0`, n: dst->rank);
1414	hull = isl_basic_map_remove_divs(bmap: hull);
1415	n_in = isl_basic_map_dim(bmap: hull, type: isl_dim_in);
1416	n_out = isl_basic_map_dim(bmap: hull, type: isl_dim_out);
1417	if (n_in < `0` \|\| n_out < `0`)
1418	hull = isl_basic_map_free(bmap: hull);
1419	hull = isl_basic_map_drop_constraints_not_involving_dims(bmap: hull,
1420	type: isl_dim_in, first: `0`, n: n_in);
1421	hull = isl_basic_map_drop_constraints_not_involving_dims(bmap: hull,
1422	type: isl_dim_out, first: `0`, n: n_out);
1423	n = isl_basic_map_n_equality(bmap: hull);
1424	isl_basic_map_free(bmap: hull);
1425	if (n < `0`)
1426	return isl_stat_error;
1427	edge->weight = n;
1428
1429	if (edge->weight > graph->max_weight)
1430	graph->max_weight = edge->weight;
1431	}
1432
1433	return isl_stat_ok;
1434	}
1435
1436	/ Call isl_schedule_node_compute_finish_band on each of the clusters in "c" and*
1437	* update "node" to arrange for them to be executed in an order
1438	* possibly involving set nodes that generalizes the topological order
1439	* determined by the scc fields of the nodes in "graph".
1440	*
1441	* Note that at this stage, there are graph->scc clusters and
1442	* their positions in c->cluster are determined by the values
1443	* of c->scc_cluster.
1444	*
1445	* Construct an isl_scc_graph and perform the decomposition
1446	* using this graph.
1447	*/
1448	static __isl_give isl_schedule_node *finish_bands_decompose(
1449	__isl_take isl_schedule_node node, struct* isl_sched_graph *graph,
1450	struct isl_clustering *c)
1451	{
1452	isl_ctx *ctx;
1453	struct isl_scc_graph *scc_graph;
1454
1455	ctx = isl_schedule_node_get_ctx(node);
1456
1457	scc_graph = isl_scc_graph_from_sched_graph(ctx, graph, c);
1458	node = isl_scc_graph_decompose(scc_graph, node);
1459	isl_scc_graph_free(scc_graph);
1460
1461	return node;
1462	}
1463
1464	/ Call isl_schedule_node_compute_finish_band on each of the clusters in "c"*
1465	* in their topological order. This order is determined by the scc
1466	* fields of the nodes in "graph".
1467	* Combine the results in a sequence expressing the topological order.
1468	*
1469	* If there is only one cluster left, then there is no need to introduce
1470	* a sequence node. Also, in this case, the cluster necessarily contains
1471	* the SCC at position 0 in the original graph and is therefore also
1472	* stored in the first cluster of "c".
1473	*
1474	* If there are more than two clusters left, then some subsets of the clusters
1475	* may still be independent of each other. These could then still
1476	* be reordered with respect to each other. Call finish_bands_decompose
1477	* to try and construct an ordering involving set and sequence nodes
1478	* that generalizes the topological order.
1479	* Note that at the outermost level there can be no independent components
1480	* because isl_schedule_node_compute_wcc_clustering is called
1481	* on a (weakly) connected component.
1482	*/
1483	static __isl_give isl_schedule_node *finish_bands_clustering(
1484	__isl_take isl_schedule_node node, struct* isl_sched_graph *graph,
1485	struct isl_clustering *c)
1486	{
1487	int i;
1488	isl_ctx *ctx;
1489	isl_union_set_list *filters;
1490
1491	if (graph->scc == `1`)
1492	return isl_schedule_node_compute_finish_band(node,
1493	graph: &c->cluster[`0`], initialized: `0`);
1494	if (graph->scc > `2`)
1495	return finish_bands_decompose(node, graph, c);
1496
1497	ctx = isl_schedule_node_get_ctx(node);
1498
1499	filters = isl_sched_graph_extract_sccs(ctx, graph);
1500	node = isl_schedule_node_insert_sequence(node, filters);
1501
1502	for (i = `0`; i < graph->scc; ++i) {
1503	int j = c->scc_cluster[i];
1504	node = isl_schedule_node_grandchild(node, pos1: i, pos2: `0`);
1505	node = isl_schedule_node_compute_finish_band(node,
1506	graph: &c->cluster[j], initialized: `0`);
1507	node = isl_schedule_node_grandparent(node);
1508	}
1509
1510	return node;
1511	}
1512
1513	/ Compute a schedule for a connected dependence graph by first considering*
1514	* each strongly connected component (SCC) in the graph separately and then
1515	* incrementally combining them into clusters.
1516	* Return the updated schedule node.
1517	*
1518	* Initially, each cluster consists of a single SCC, each with its
1519	* own band schedule. The algorithm then tries to merge pairs
1520	* of clusters along a proximity edge until no more suitable
1521	* proximity edges can be found. During this merging, the schedule
1522	* is maintained in the individual SCCs.
1523	* After the merging is completed, the full resulting clusters
1524	* are extracted and in finish_bands_clustering,
1525	* isl_schedule_node_compute_finish_band is called on each of them to integrate
1526	* the band into "node" and to continue the computation.
1527	*
1528	* compute_weights initializes the weights that are used by find_proximity.
1529	*/
1530	__isl_give isl_schedule_node *isl_schedule_node_compute_wcc_clustering(
1531	__isl_take isl_schedule_node node, struct* isl_sched_graph *graph)
1532	{
1533	isl_ctx *ctx;
1534	struct isl_clustering c;
1535	int i;
1536
1537	ctx = isl_schedule_node_get_ctx(node);
1538
1539	if (clustering_init(ctx, c: &c, graph) < `0`)
1540	goto error;
1541
1542	if (compute_weights(graph, c: &c) < `0`)
1543	goto error;
1544
1545	for (;;) {
1546	i = find_proximity(graph, c: &c);
1547	if (i < `0`)
1548	goto error;
1549	if (i >= graph->n_edge)
1550	break;
1551	if (merge_clusters_along_edge(ctx, graph, edge: i, c: &c) < `0`)
1552	goto error;
1553	}
1554
1555	if (extract_clusters(ctx, graph, c: &c) < `0`)
1556	goto error;
1557
1558	node = finish_bands_clustering(node, graph, c: &c);
1559
1560	clustering_free(ctx, c: &c);
1561	return node;
1562	error:
1563	clustering_free(ctx, c: &c);
1564	return isl_schedule_node_free(node);
1565	}
1566

source code of polly/lib/External/isl/isl_scheduler_clustering.c