| 1 | /* |
| 2 | * Copyright 2015 Sven Verdoolaege |
| 3 | * |
| 4 | * Use of this software is governed by the MIT license |
| 5 | * |
| 6 | * Written by Sven Verdoolaege |
| 7 | */ |
| 8 | |
| 9 | #include "isl_map_private.h" |
| 10 | |
| 11 | #include <isl/id.h> |
| 12 | #include <isl/schedule_node.h> |
| 13 | #include <isl/union_set.h> |
| 14 | |
| 15 | #include "isl_mat_private.h" |
| 16 | #include "isl_scheduler_clustering.h" |
| 17 | #include "isl_scheduler_scc.h" |
| 18 | #include "isl_seq.h" |
| 19 | #include "isl_tarjan.h" |
| 20 | |
| 21 | /* Initialize the clustering data structure "c" from "graph". |
| 22 | * |
| 23 | * In particular, allocate memory, extract the SCCs from "graph" |
| 24 | * into c->scc, initialize scc_cluster and construct |
| 25 | * a band of schedule rows for each SCC. |
| 26 | * Within each SCC, there is only one SCC by definition. |
| 27 | * Each SCC initially belongs to a cluster containing only that SCC. |
| 28 | */ |
| 29 | static isl_stat clustering_init(isl_ctx *ctx, struct isl_clustering *c, |
| 30 | struct isl_sched_graph *graph) |
| 31 | { |
| 32 | int i; |
| 33 | |
| 34 | c->n = graph->scc; |
| 35 | c->scc = isl_calloc_array(ctx, struct isl_sched_graph, c->n); |
| 36 | c->cluster = isl_calloc_array(ctx, struct isl_sched_graph, c->n); |
| 37 | c->scc_cluster = isl_calloc_array(ctx, int, c->n); |
| 38 | c->scc_node = isl_calloc_array(ctx, int, c->n); |
| 39 | c->scc_in_merge = isl_calloc_array(ctx, int, c->n); |
| 40 | if (!c->scc || !c->cluster || |
| 41 | !c->scc_cluster || !c->scc_node || !c->scc_in_merge) |
| 42 | return isl_stat_error; |
| 43 | |
| 44 | for (i = 0; i < c->n; ++i) { |
| 45 | if (isl_sched_graph_extract_sub_graph(ctx, graph, |
| 46 | node_pred: &isl_sched_node_scc_exactly, |
| 47 | edge_pred: &isl_sched_edge_scc_exactly, |
| 48 | data: i, sub: &c->scc[i]) < 0) |
| 49 | return isl_stat_error; |
| 50 | c->scc[i].scc = 1; |
| 51 | if (isl_sched_graph_compute_maxvar(graph: &c->scc[i]) < 0) |
| 52 | return isl_stat_error; |
| 53 | if (isl_schedule_node_compute_wcc_band(ctx, graph: &c->scc[i]) < 0) |
| 54 | return isl_stat_error; |
| 55 | c->scc_cluster[i] = i; |
| 56 | } |
| 57 | |
| 58 | return isl_stat_ok; |
| 59 | } |
| 60 | |
| 61 | /* Free all memory allocated for "c". |
| 62 | */ |
| 63 | static void clustering_free(isl_ctx *ctx, struct isl_clustering *c) |
| 64 | { |
| 65 | int i; |
| 66 | |
| 67 | if (c->scc) |
| 68 | for (i = 0; i < c->n; ++i) |
| 69 | isl_sched_graph_free(ctx, graph: &c->scc[i]); |
| 70 | free(ptr: c->scc); |
| 71 | if (c->cluster) |
| 72 | for (i = 0; i < c->n; ++i) |
| 73 | isl_sched_graph_free(ctx, graph: &c->cluster[i]); |
| 74 | free(ptr: c->cluster); |
| 75 | free(ptr: c->scc_cluster); |
| 76 | free(ptr: c->scc_node); |
| 77 | free(ptr: c->scc_in_merge); |
| 78 | } |
| 79 | |
| 80 | /* Should we refrain from merging the cluster in "graph" with |
| 81 | * any other cluster? |
| 82 | * In particular, is its current schedule band empty and incomplete. |
| 83 | */ |
| 84 | static int bad_cluster(struct isl_sched_graph *graph) |
| 85 | { |
| 86 | return graph->n_row < graph->maxvar && |
| 87 | graph->n_total_row == graph->band_start; |
| 88 | } |
| 89 | |
| 90 | /* Is "edge" a proximity edge with a non-empty dependence relation? |
| 91 | */ |
| 92 | static isl_bool is_non_empty_proximity(struct isl_sched_edge *edge) |
| 93 | { |
| 94 | if (!isl_sched_edge_is_proximity(edge)) |
| 95 | return isl_bool_false; |
| 96 | return isl_bool_not(b: isl_map_plain_is_empty(map: edge->map)); |
| 97 | } |
| 98 | |
| 99 | /* Return the index of an edge in "graph" that can be used to merge |
| 100 | * two clusters in "c". |
| 101 | * Return graph->n_edge if no such edge can be found. |
| 102 | * Return -1 on error. |
| 103 | * |
| 104 | * In particular, return a proximity edge between two clusters |
| 105 | * that is not marked "no_merge" and such that neither of the |
| 106 | * two clusters has an incomplete, empty band. |
| 107 | * |
| 108 | * If there are multiple such edges, then try and find the most |
| 109 | * appropriate edge to use for merging. In particular, pick the edge |
| 110 | * with the greatest weight. If there are multiple of those, |
| 111 | * then pick one with the shortest distance between |
| 112 | * the two cluster representatives. |
| 113 | */ |
| 114 | static int find_proximity(struct isl_sched_graph *graph, |
| 115 | struct isl_clustering *c) |
| 116 | { |
| 117 | int i, best = graph->n_edge, best_dist, best_weight; |
| 118 | |
| 119 | for (i = 0; i < graph->n_edge; ++i) { |
| 120 | struct isl_sched_edge *edge = &graph->edge[i]; |
| 121 | int dist, weight; |
| 122 | isl_bool prox; |
| 123 | |
| 124 | prox = is_non_empty_proximity(edge); |
| 125 | if (prox < 0) |
| 126 | return -1; |
| 127 | if (!prox) |
| 128 | continue; |
| 129 | if (edge->no_merge) |
| 130 | continue; |
| 131 | if (bad_cluster(graph: &c->scc[edge->src->scc]) || |
| 132 | bad_cluster(graph: &c->scc[edge->dst->scc])) |
| 133 | continue; |
| 134 | dist = c->scc_cluster[edge->dst->scc] - |
| 135 | c->scc_cluster[edge->src->scc]; |
| 136 | if (dist == 0) |
| 137 | continue; |
| 138 | weight = edge->weight; |
| 139 | if (best < graph->n_edge) { |
| 140 | if (best_weight > weight) |
| 141 | continue; |
| 142 | if (best_weight == weight && best_dist <= dist) |
| 143 | continue; |
| 144 | } |
| 145 | best = i; |
| 146 | best_dist = dist; |
| 147 | best_weight = weight; |
| 148 | } |
| 149 | |
| 150 | return best; |
| 151 | } |
| 152 | |
| 153 | /* Internal data structure used in mark_merge_sccs. |
| 154 | * |
| 155 | * "graph" is the dependence graph in which a strongly connected |
| 156 | * component is constructed. |
| 157 | * "scc_cluster" maps each SCC index to the cluster to which it belongs. |
| 158 | * "src" and "dst" are the indices of the nodes that are being merged. |
| 159 | */ |
| 160 | struct isl_mark_merge_sccs_data { |
| 161 | struct isl_sched_graph *graph; |
| 162 | int *scc_cluster; |
| 163 | int src; |
| 164 | int dst; |
| 165 | }; |
| 166 | |
| 167 | /* Check whether the cluster containing node "i" depends on the cluster |
| 168 | * containing node "j". If "i" and "j" belong to the same cluster, |
| 169 | * then they are taken to depend on each other to ensure that |
| 170 | * the resulting strongly connected component consists of complete |
| 171 | * clusters. Furthermore, if "i" and "j" are the two nodes that |
| 172 | * are being merged, then they are taken to depend on each other as well. |
| 173 | * Otherwise, check if there is a (conditional) validity dependence |
| 174 | * from node[j] to node[i], forcing node[i] to follow node[j]. |
| 175 | */ |
| 176 | static isl_bool cluster_follows(int i, int j, void *user) |
| 177 | { |
| 178 | struct isl_mark_merge_sccs_data *data = user; |
| 179 | struct isl_sched_graph *graph = data->graph; |
| 180 | int *scc_cluster = data->scc_cluster; |
| 181 | |
| 182 | if (data->src == i && data->dst == j) |
| 183 | return isl_bool_true; |
| 184 | if (data->src == j && data->dst == i) |
| 185 | return isl_bool_true; |
| 186 | if (scc_cluster[graph->node[i].scc] == scc_cluster[graph->node[j].scc]) |
| 187 | return isl_bool_true; |
| 188 | |
| 189 | return isl_sched_graph_has_validity_edge(graph, src: &graph->node[j], |
| 190 | dst: &graph->node[i]); |
| 191 | } |
| 192 | |
| 193 | /* Mark all SCCs that belong to either of the two clusters in "c" |
| 194 | * connected by the edge in "graph" with index "edge", or to any |
| 195 | * of the intermediate clusters. |
| 196 | * The marking is recorded in c->scc_in_merge. |
| 197 | * |
| 198 | * The given edge has been selected for merging two clusters, |
| 199 | * meaning that there is at least a proximity edge between the two nodes. |
| 200 | * However, there may also be (indirect) validity dependences |
| 201 | * between the two nodes. When merging the two clusters, all clusters |
| 202 | * containing one or more of the intermediate nodes along the |
| 203 | * indirect validity dependences need to be merged in as well. |
| 204 | * |
| 205 | * First collect all such nodes by computing the strongly connected |
| 206 | * component (SCC) containing the two nodes connected by the edge, where |
| 207 | * the two nodes are considered to depend on each other to make |
| 208 | * sure they end up in the same SCC. Similarly, each node is considered |
| 209 | * to depend on every other node in the same cluster to ensure |
| 210 | * that the SCC consists of complete clusters. |
| 211 | * |
| 212 | * Then the original SCCs that contain any of these nodes are marked |
| 213 | * in c->scc_in_merge. |
| 214 | */ |
| 215 | static isl_stat mark_merge_sccs(isl_ctx *ctx, struct isl_sched_graph *graph, |
| 216 | int edge, struct isl_clustering *c) |
| 217 | { |
| 218 | struct isl_mark_merge_sccs_data data; |
| 219 | struct isl_tarjan_graph *g; |
| 220 | int i; |
| 221 | |
| 222 | for (i = 0; i < c->n; ++i) |
| 223 | c->scc_in_merge[i] = 0; |
| 224 | |
| 225 | data.graph = graph; |
| 226 | data.scc_cluster = c->scc_cluster; |
| 227 | data.src = graph->edge[edge].src - graph->node; |
| 228 | data.dst = graph->edge[edge].dst - graph->node; |
| 229 | |
| 230 | g = isl_tarjan_graph_component(ctx, len: graph->n, node: data.dst, |
| 231 | follows: &cluster_follows, user: &data); |
| 232 | if (!g) |
| 233 | goto error; |
| 234 | |
| 235 | i = g->op; |
| 236 | if (i < 3) |
| 237 | isl_die(ctx, isl_error_internal, |
| 238 | "expecting at least two nodes in component" , |
| 239 | goto error); |
| 240 | if (g->order[--i] != -1) |
| 241 | isl_die(ctx, isl_error_internal, |
| 242 | "expecting end of component marker" , goto error); |
| 243 | |
| 244 | for (--i; i >= 0 && g->order[i] != -1; --i) { |
| 245 | int scc = graph->node[g->order[i]].scc; |
| 246 | c->scc_in_merge[scc] = 1; |
| 247 | } |
| 248 | |
| 249 | isl_tarjan_graph_free(g); |
| 250 | return isl_stat_ok; |
| 251 | error: |
| 252 | isl_tarjan_graph_free(g); |
| 253 | return isl_stat_error; |
| 254 | } |
| 255 | |
| 256 | /* Construct the identifier "cluster_i". |
| 257 | */ |
| 258 | static __isl_give isl_id *cluster_id(isl_ctx *ctx, int i) |
| 259 | { |
| 260 | char name[40]; |
| 261 | |
| 262 | snprintf(s: name, maxlen: sizeof(name), format: "cluster_%d" , i); |
| 263 | return isl_id_alloc(ctx, name, NULL); |
| 264 | } |
| 265 | |
| 266 | /* Construct the space of the cluster with index "i" containing |
| 267 | * the strongly connected component "scc". |
| 268 | * |
| 269 | * In particular, construct a space called cluster_i with dimension equal |
| 270 | * to the number of schedule rows in the current band of "scc". |
| 271 | */ |
| 272 | static __isl_give isl_space *cluster_space(struct isl_sched_graph *scc, int i) |
| 273 | { |
| 274 | int nvar; |
| 275 | isl_space *space; |
| 276 | isl_id *id; |
| 277 | |
| 278 | nvar = scc->n_total_row - scc->band_start; |
| 279 | space = isl_space_copy(space: scc->node[0].space); |
| 280 | space = isl_space_params(space); |
| 281 | space = isl_space_set_from_params(space); |
| 282 | space = isl_space_add_dims(space, type: isl_dim_set, n: nvar); |
| 283 | id = cluster_id(ctx: isl_space_get_ctx(space), i); |
| 284 | space = isl_space_set_tuple_id(space, type: isl_dim_set, id); |
| 285 | |
| 286 | return space; |
| 287 | } |
| 288 | |
| 289 | /* Collect the domain of the graph for merging clusters. |
| 290 | * |
| 291 | * In particular, for each cluster with first SCC "i", construct |
| 292 | * a set in the space called cluster_i with dimension equal |
| 293 | * to the number of schedule rows in the current band of the cluster. |
| 294 | */ |
| 295 | static __isl_give isl_union_set *collect_domain(isl_ctx *ctx, |
| 296 | struct isl_sched_graph *graph, struct isl_clustering *c) |
| 297 | { |
| 298 | int i; |
| 299 | isl_space *space; |
| 300 | isl_union_set *domain; |
| 301 | |
| 302 | space = isl_space_params_alloc(ctx, nparam: 0); |
| 303 | domain = isl_union_set_empty(space); |
| 304 | |
| 305 | for (i = 0; i < graph->scc; ++i) { |
| 306 | isl_space *space; |
| 307 | |
| 308 | if (!c->scc_in_merge[i]) |
| 309 | continue; |
| 310 | if (c->scc_cluster[i] != i) |
| 311 | continue; |
| 312 | space = cluster_space(scc: &c->scc[i], i); |
| 313 | domain = isl_union_set_add_set(uset: domain, set: isl_set_universe(space)); |
| 314 | } |
| 315 | |
| 316 | return domain; |
| 317 | } |
| 318 | |
| 319 | /* Construct a map from the original instances to the corresponding |
| 320 | * cluster instance in the current bands of the clusters in "c". |
| 321 | */ |
| 322 | static __isl_give isl_union_map *collect_cluster_map(isl_ctx *ctx, |
| 323 | struct isl_sched_graph *graph, struct isl_clustering *c) |
| 324 | { |
| 325 | int i, j; |
| 326 | isl_space *space; |
| 327 | isl_union_map *cluster_map; |
| 328 | |
| 329 | space = isl_space_params_alloc(ctx, nparam: 0); |
| 330 | cluster_map = isl_union_map_empty(space); |
| 331 | for (i = 0; i < graph->scc; ++i) { |
| 332 | int start, n; |
| 333 | isl_id *id; |
| 334 | |
| 335 | if (!c->scc_in_merge[i]) |
| 336 | continue; |
| 337 | |
| 338 | id = cluster_id(ctx, i: c->scc_cluster[i]); |
| 339 | start = c->scc[i].band_start; |
| 340 | n = c->scc[i].n_total_row - start; |
| 341 | for (j = 0; j < c->scc[i].n; ++j) { |
| 342 | isl_multi_aff *ma; |
| 343 | isl_map *map; |
| 344 | struct isl_sched_node *node = &c->scc[i].node[j]; |
| 345 | |
| 346 | ma = isl_sched_node_extract_partial_schedule_multi_aff( |
| 347 | node, first: start, n); |
| 348 | ma = isl_multi_aff_set_tuple_id(multi: ma, type: isl_dim_out, |
| 349 | id: isl_id_copy(id)); |
| 350 | map = isl_map_from_multi_aff(maff: ma); |
| 351 | cluster_map = isl_union_map_add_map(umap: cluster_map, map); |
| 352 | } |
| 353 | isl_id_free(id); |
| 354 | } |
| 355 | |
| 356 | return cluster_map; |
| 357 | } |
| 358 | |
| 359 | /* Add "umap" to the schedule constraints "sc" of all types of "edge" |
| 360 | * that are not isl_edge_condition or isl_edge_conditional_validity. |
| 361 | */ |
| 362 | static __isl_give isl_schedule_constraints *add_non_conditional_constraints( |
| 363 | struct isl_sched_edge *edge, __isl_keep isl_union_map *umap, |
| 364 | __isl_take isl_schedule_constraints *sc) |
| 365 | { |
| 366 | enum isl_edge_type t; |
| 367 | |
| 368 | if (!sc) |
| 369 | return NULL; |
| 370 | |
| 371 | for (t = isl_edge_first; t <= isl_edge_last; ++t) { |
| 372 | if (t == isl_edge_condition || |
| 373 | t == isl_edge_conditional_validity) |
| 374 | continue; |
| 375 | if (!isl_sched_edge_has_type(edge, type: t)) |
| 376 | continue; |
| 377 | sc = isl_schedule_constraints_add(sc, type: t, |
| 378 | c: isl_union_map_copy(umap)); |
| 379 | } |
| 380 | |
| 381 | return sc; |
| 382 | } |
| 383 | |
| 384 | /* Add schedule constraints of types isl_edge_condition and |
| 385 | * isl_edge_conditional_validity to "sc" by applying "umap" to |
| 386 | * the domains of the wrapped relations in domain and range |
| 387 | * of the corresponding tagged constraints of "edge". |
| 388 | */ |
| 389 | static __isl_give isl_schedule_constraints *add_conditional_constraints( |
| 390 | struct isl_sched_edge *edge, __isl_keep isl_union_map *umap, |
| 391 | __isl_take isl_schedule_constraints *sc) |
| 392 | { |
| 393 | enum isl_edge_type t; |
| 394 | isl_union_map *tagged; |
| 395 | |
| 396 | for (t = isl_edge_condition; t <= isl_edge_conditional_validity; ++t) { |
| 397 | if (!isl_sched_edge_has_type(edge, type: t)) |
| 398 | continue; |
| 399 | if (t == isl_edge_condition) |
| 400 | tagged = isl_union_map_copy(umap: edge->tagged_condition); |
| 401 | else |
| 402 | tagged = isl_union_map_copy(umap: edge->tagged_validity); |
| 403 | tagged = isl_union_map_zip(umap: tagged); |
| 404 | tagged = isl_union_map_apply_domain(umap1: tagged, |
| 405 | umap2: isl_union_map_copy(umap)); |
| 406 | tagged = isl_union_map_zip(umap: tagged); |
| 407 | sc = isl_schedule_constraints_add(sc, type: t, c: tagged); |
| 408 | if (!sc) |
| 409 | return NULL; |
| 410 | } |
| 411 | |
| 412 | return sc; |
| 413 | } |
| 414 | |
| 415 | /* Given a mapping "cluster_map" from the original instances to |
| 416 | * the cluster instances, add schedule constraints on the clusters |
| 417 | * to "sc" corresponding to the original constraints represented by "edge". |
| 418 | * |
| 419 | * For non-tagged dependence constraints, the cluster constraints |
| 420 | * are obtained by applying "cluster_map" to the edge->map. |
| 421 | * |
| 422 | * For tagged dependence constraints, "cluster_map" needs to be applied |
| 423 | * to the domains of the wrapped relations in domain and range |
| 424 | * of the tagged dependence constraints. Pick out the mappings |
| 425 | * from these domains from "cluster_map" and construct their product. |
| 426 | * This mapping can then be applied to the pair of domains. |
| 427 | */ |
| 428 | static __isl_give isl_schedule_constraints *collect_edge_constraints( |
| 429 | struct isl_sched_edge *edge, __isl_keep isl_union_map *cluster_map, |
| 430 | __isl_take isl_schedule_constraints *sc) |
| 431 | { |
| 432 | isl_union_map *umap; |
| 433 | isl_space *space; |
| 434 | isl_union_set *uset; |
| 435 | isl_union_map *umap1, *umap2; |
| 436 | |
| 437 | if (!sc) |
| 438 | return NULL; |
| 439 | |
| 440 | umap = isl_union_map_from_map(map: isl_map_copy(map: edge->map)); |
| 441 | umap = isl_union_map_apply_domain(umap1: umap, |
| 442 | umap2: isl_union_map_copy(umap: cluster_map)); |
| 443 | umap = isl_union_map_apply_range(umap1: umap, |
| 444 | umap2: isl_union_map_copy(umap: cluster_map)); |
| 445 | sc = add_non_conditional_constraints(edge, umap, sc); |
| 446 | isl_union_map_free(umap); |
| 447 | |
| 448 | if (!sc || |
| 449 | (!isl_sched_edge_is_condition(edge) && |
| 450 | !isl_sched_edge_is_conditional_validity(edge))) |
| 451 | return sc; |
| 452 | |
| 453 | space = isl_space_domain(space: isl_map_get_space(map: edge->map)); |
| 454 | uset = isl_union_set_from_set(set: isl_set_universe(space)); |
| 455 | umap1 = isl_union_map_copy(umap: cluster_map); |
| 456 | umap1 = isl_union_map_intersect_domain(umap: umap1, uset); |
| 457 | space = isl_space_range(space: isl_map_get_space(map: edge->map)); |
| 458 | uset = isl_union_set_from_set(set: isl_set_universe(space)); |
| 459 | umap2 = isl_union_map_copy(umap: cluster_map); |
| 460 | umap2 = isl_union_map_intersect_domain(umap: umap2, uset); |
| 461 | umap = isl_union_map_product(umap1, umap2); |
| 462 | |
| 463 | sc = add_conditional_constraints(edge, umap, sc); |
| 464 | |
| 465 | isl_union_map_free(umap); |
| 466 | return sc; |
| 467 | } |
| 468 | |
| 469 | /* Given a mapping "cluster_map" from the original instances to |
| 470 | * the cluster instances, add schedule constraints on the clusters |
| 471 | * to "sc" corresponding to all edges in "graph" between nodes that |
| 472 | * belong to SCCs that are marked for merging in "scc_in_merge". |
| 473 | */ |
| 474 | static __isl_give isl_schedule_constraints *collect_constraints( |
| 475 | struct isl_sched_graph *graph, int *scc_in_merge, |
| 476 | __isl_keep isl_union_map *cluster_map, |
| 477 | __isl_take isl_schedule_constraints *sc) |
| 478 | { |
| 479 | int i; |
| 480 | |
| 481 | for (i = 0; i < graph->n_edge; ++i) { |
| 482 | struct isl_sched_edge *edge = &graph->edge[i]; |
| 483 | |
| 484 | if (!scc_in_merge[edge->src->scc]) |
| 485 | continue; |
| 486 | if (!scc_in_merge[edge->dst->scc]) |
| 487 | continue; |
| 488 | sc = collect_edge_constraints(edge, cluster_map, sc); |
| 489 | } |
| 490 | |
| 491 | return sc; |
| 492 | } |
| 493 | |
| 494 | /* Construct a dependence graph for scheduling clusters with respect |
| 495 | * to each other and store the result in "merge_graph". |
| 496 | * In particular, the nodes of the graph correspond to the schedule |
| 497 | * dimensions of the current bands of those clusters that have been |
| 498 | * marked for merging in "c". |
| 499 | * |
| 500 | * First construct an isl_schedule_constraints object for this domain |
| 501 | * by transforming the edges in "graph" to the domain. |
| 502 | * Then initialize a dependence graph for scheduling from these |
| 503 | * constraints. |
| 504 | */ |
| 505 | static isl_stat init_merge_graph(isl_ctx *ctx, struct isl_sched_graph *graph, |
| 506 | struct isl_clustering *c, struct isl_sched_graph *merge_graph) |
| 507 | { |
| 508 | isl_union_set *domain; |
| 509 | isl_union_map *cluster_map; |
| 510 | isl_schedule_constraints *sc; |
| 511 | isl_stat r; |
| 512 | |
| 513 | domain = collect_domain(ctx, graph, c); |
| 514 | sc = isl_schedule_constraints_on_domain(domain); |
| 515 | if (!sc) |
| 516 | return isl_stat_error; |
| 517 | cluster_map = collect_cluster_map(ctx, graph, c); |
| 518 | sc = collect_constraints(graph, scc_in_merge: c->scc_in_merge, cluster_map, sc); |
| 519 | isl_union_map_free(umap: cluster_map); |
| 520 | |
| 521 | r = isl_sched_graph_init(graph: merge_graph, sc); |
| 522 | |
| 523 | isl_schedule_constraints_free(sc); |
| 524 | |
| 525 | return r; |
| 526 | } |
| 527 | |
| 528 | /* Compute the maximal number of remaining schedule rows that still need |
| 529 | * to be computed for the nodes that belong to clusters with the maximal |
| 530 | * dimension for the current band (i.e., the band that is to be merged). |
| 531 | * Only clusters that are about to be merged are considered. |
| 532 | * "maxvar" is the maximal dimension for the current band. |
| 533 | * "c" contains information about the clusters. |
| 534 | * |
| 535 | * Return the maximal number of remaining schedule rows or |
| 536 | * isl_size_error on error. |
| 537 | */ |
| 538 | static isl_size compute_maxvar_max_slack(int maxvar, struct isl_clustering *c) |
| 539 | { |
| 540 | int i, j; |
| 541 | int max_slack; |
| 542 | |
| 543 | max_slack = 0; |
| 544 | for (i = 0; i < c->n; ++i) { |
| 545 | int nvar; |
| 546 | struct isl_sched_graph *scc; |
| 547 | |
| 548 | if (!c->scc_in_merge[i]) |
| 549 | continue; |
| 550 | scc = &c->scc[i]; |
| 551 | nvar = scc->n_total_row - scc->band_start; |
| 552 | if (nvar != maxvar) |
| 553 | continue; |
| 554 | for (j = 0; j < scc->n; ++j) { |
| 555 | struct isl_sched_node *node = &scc->node[j]; |
| 556 | int slack; |
| 557 | |
| 558 | if (isl_sched_node_update_vmap(node) < 0) |
| 559 | return isl_size_error; |
| 560 | slack = node->nvar - node->rank; |
| 561 | if (slack > max_slack) |
| 562 | max_slack = slack; |
| 563 | } |
| 564 | } |
| 565 | |
| 566 | return max_slack; |
| 567 | } |
| 568 | |
| 569 | /* If there are any clusters where the dimension of the current band |
| 570 | * (i.e., the band that is to be merged) is smaller than "maxvar" and |
| 571 | * if there are any nodes in such a cluster where the number |
| 572 | * of remaining schedule rows that still need to be computed |
| 573 | * is greater than "max_slack", then return the smallest current band |
| 574 | * dimension of all these clusters. Otherwise return the original value |
| 575 | * of "maxvar". Return isl_size_error in case of any error. |
| 576 | * Only clusters that are about to be merged are considered. |
| 577 | * "c" contains information about the clusters. |
| 578 | */ |
| 579 | static isl_size limit_maxvar_to_slack(int maxvar, int max_slack, |
| 580 | struct isl_clustering *c) |
| 581 | { |
| 582 | int i, j; |
| 583 | |
| 584 | for (i = 0; i < c->n; ++i) { |
| 585 | int nvar; |
| 586 | struct isl_sched_graph *scc; |
| 587 | |
| 588 | if (!c->scc_in_merge[i]) |
| 589 | continue; |
| 590 | scc = &c->scc[i]; |
| 591 | nvar = scc->n_total_row - scc->band_start; |
| 592 | if (nvar >= maxvar) |
| 593 | continue; |
| 594 | for (j = 0; j < scc->n; ++j) { |
| 595 | struct isl_sched_node *node = &scc->node[j]; |
| 596 | int slack; |
| 597 | |
| 598 | if (isl_sched_node_update_vmap(node) < 0) |
| 599 | return isl_size_error; |
| 600 | slack = node->nvar - node->rank; |
| 601 | if (slack > max_slack) { |
| 602 | maxvar = nvar; |
| 603 | break; |
| 604 | } |
| 605 | } |
| 606 | } |
| 607 | |
| 608 | return maxvar; |
| 609 | } |
| 610 | |
| 611 | /* Adjust merge_graph->maxvar based on the number of remaining schedule rows |
| 612 | * that still need to be computed. In particular, if there is a node |
| 613 | * in a cluster where the dimension of the current band is smaller |
| 614 | * than merge_graph->maxvar, but the number of remaining schedule rows |
| 615 | * is greater than that of any node in a cluster with the maximal |
| 616 | * dimension for the current band (i.e., merge_graph->maxvar), |
| 617 | * then adjust merge_graph->maxvar to the (smallest) current band dimension |
| 618 | * of those clusters. Without this adjustment, the total number of |
| 619 | * schedule dimensions would be increased, resulting in a skewed view |
| 620 | * of the number of coincident dimensions. |
| 621 | * "c" contains information about the clusters. |
| 622 | * |
| 623 | * If the maximize_band_depth option is set and merge_graph->maxvar is reduced, |
| 624 | * then there is no point in attempting any merge since it will be rejected |
| 625 | * anyway. Set merge_graph->maxvar to zero in such cases. |
| 626 | */ |
| 627 | static isl_stat adjust_maxvar_to_slack(isl_ctx *ctx, |
| 628 | struct isl_sched_graph *merge_graph, struct isl_clustering *c) |
| 629 | { |
| 630 | isl_size max_slack, maxvar; |
| 631 | |
| 632 | max_slack = compute_maxvar_max_slack(maxvar: merge_graph->maxvar, c); |
| 633 | if (max_slack < 0) |
| 634 | return isl_stat_error; |
| 635 | maxvar = limit_maxvar_to_slack(maxvar: merge_graph->maxvar, max_slack, c); |
| 636 | if (maxvar < 0) |
| 637 | return isl_stat_error; |
| 638 | |
| 639 | if (maxvar < merge_graph->maxvar) { |
| 640 | if (isl_options_get_schedule_maximize_band_depth(ctx)) |
| 641 | merge_graph->maxvar = 0; |
| 642 | else |
| 643 | merge_graph->maxvar = maxvar; |
| 644 | } |
| 645 | |
| 646 | return isl_stat_ok; |
| 647 | } |
| 648 | |
| 649 | /* Return the number of coincident dimensions in the current band of "graph", |
| 650 | * where the nodes of "graph" are assumed to be scheduled by a single band. |
| 651 | */ |
| 652 | static int get_n_coincident(struct isl_sched_graph *graph) |
| 653 | { |
| 654 | int i; |
| 655 | |
| 656 | for (i = graph->band_start; i < graph->n_total_row; ++i) |
| 657 | if (!graph->node[0].coincident[i]) |
| 658 | break; |
| 659 | |
| 660 | return i - graph->band_start; |
| 661 | } |
| 662 | |
| 663 | /* Should the clusters be merged based on the cluster schedule |
| 664 | * in the current (and only) band of "merge_graph", given that |
| 665 | * coincidence should be maximized? |
| 666 | * |
| 667 | * If the number of coincident schedule dimensions in the merged band |
| 668 | * would be less than the maximal number of coincident schedule dimensions |
| 669 | * in any of the merged clusters, then the clusters should not be merged. |
| 670 | */ |
| 671 | static isl_bool ok_to_merge_coincident(struct isl_clustering *c, |
| 672 | struct isl_sched_graph *merge_graph) |
| 673 | { |
| 674 | int i; |
| 675 | int n_coincident; |
| 676 | int max_coincident; |
| 677 | |
| 678 | max_coincident = 0; |
| 679 | for (i = 0; i < c->n; ++i) { |
| 680 | if (!c->scc_in_merge[i]) |
| 681 | continue; |
| 682 | n_coincident = get_n_coincident(graph: &c->scc[i]); |
| 683 | if (n_coincident > max_coincident) |
| 684 | max_coincident = n_coincident; |
| 685 | } |
| 686 | |
| 687 | n_coincident = get_n_coincident(graph: merge_graph); |
| 688 | |
| 689 | return isl_bool_ok(b: n_coincident >= max_coincident); |
| 690 | } |
| 691 | |
| 692 | /* Return the transformation on "node" expressed by the current (and only) |
| 693 | * band of "merge_graph" applied to the clusters in "c". |
| 694 | * |
| 695 | * First find the representation of "node" in its SCC in "c" and |
| 696 | * extract the transformation expressed by the current band. |
| 697 | * Then extract the transformation applied by "merge_graph" |
| 698 | * to the cluster to which this SCC belongs. |
| 699 | * Combine the two to obtain the complete transformation on the node. |
| 700 | * |
| 701 | * Note that the range of the first transformation is an anonymous space, |
| 702 | * while the domain of the second is named "cluster_X". The range |
| 703 | * of the former therefore needs to be adjusted before the two |
| 704 | * can be combined. |
| 705 | */ |
| 706 | static __isl_give isl_map *(isl_ctx *ctx, |
| 707 | struct isl_sched_node *node, struct isl_clustering *c, |
| 708 | struct isl_sched_graph *merge_graph) |
| 709 | { |
| 710 | struct isl_sched_node *scc_node, *cluster_node; |
| 711 | int start, n; |
| 712 | isl_id *id; |
| 713 | isl_space *space; |
| 714 | isl_multi_aff *ma, *ma2; |
| 715 | |
| 716 | scc_node = isl_sched_graph_find_node(ctx, graph: &c->scc[node->scc], |
| 717 | space: node->space); |
| 718 | if (scc_node && !isl_sched_graph_is_node(graph: &c->scc[node->scc], node: scc_node)) |
| 719 | isl_die(ctx, isl_error_internal, "unable to find node" , |
| 720 | return NULL); |
| 721 | start = c->scc[node->scc].band_start; |
| 722 | n = c->scc[node->scc].n_total_row - start; |
| 723 | ma = isl_sched_node_extract_partial_schedule_multi_aff(node: scc_node, |
| 724 | first: start, n); |
| 725 | space = cluster_space(scc: &c->scc[node->scc], i: c->scc_cluster[node->scc]); |
| 726 | cluster_node = isl_sched_graph_find_node(ctx, graph: merge_graph, space); |
| 727 | if (cluster_node && !isl_sched_graph_is_node(graph: merge_graph, node: cluster_node)) |
| 728 | isl_die(ctx, isl_error_internal, "unable to find cluster" , |
| 729 | space = isl_space_free(space)); |
| 730 | id = isl_space_get_tuple_id(space, type: isl_dim_set); |
| 731 | ma = isl_multi_aff_set_tuple_id(multi: ma, type: isl_dim_out, id); |
| 732 | isl_space_free(space); |
| 733 | n = merge_graph->n_total_row; |
| 734 | ma2 = isl_sched_node_extract_partial_schedule_multi_aff(node: cluster_node, |
| 735 | first: 0, n); |
| 736 | ma = isl_multi_aff_pullback_multi_aff(ma1: ma2, ma2: ma); |
| 737 | |
| 738 | return isl_map_from_multi_aff(maff: ma); |
| 739 | } |
| 740 | |
| 741 | /* Give a set of distances "set", are they bounded by a small constant |
| 742 | * in direction "pos"? |
| 743 | * In practice, check if they are bounded by 2 by checking that there |
| 744 | * are no elements with a value greater than or equal to 3 or |
| 745 | * smaller than or equal to -3. |
| 746 | */ |
| 747 | static isl_bool distance_is_bounded(__isl_keep isl_set *set, int pos) |
| 748 | { |
| 749 | isl_bool bounded; |
| 750 | isl_set *test; |
| 751 | |
| 752 | if (!set) |
| 753 | return isl_bool_error; |
| 754 | |
| 755 | test = isl_set_copy(set); |
| 756 | test = isl_set_lower_bound_si(set: test, type: isl_dim_set, pos, value: 3); |
| 757 | bounded = isl_set_is_empty(set: test); |
| 758 | isl_set_free(set: test); |
| 759 | |
| 760 | if (bounded < 0 || !bounded) |
| 761 | return bounded; |
| 762 | |
| 763 | test = isl_set_copy(set); |
| 764 | test = isl_set_upper_bound_si(set: test, type: isl_dim_set, pos, value: -3); |
| 765 | bounded = isl_set_is_empty(set: test); |
| 766 | isl_set_free(set: test); |
| 767 | |
| 768 | return bounded; |
| 769 | } |
| 770 | |
| 771 | /* Does the set "set" have a fixed (but possible parametric) value |
| 772 | * at dimension "pos"? |
| 773 | */ |
| 774 | static isl_bool has_single_value(__isl_keep isl_set *set, int pos) |
| 775 | { |
| 776 | isl_size n; |
| 777 | isl_bool single; |
| 778 | |
| 779 | n = isl_set_dim(set, type: isl_dim_set); |
| 780 | if (n < 0) |
| 781 | return isl_bool_error; |
| 782 | set = isl_set_copy(set); |
| 783 | set = isl_set_project_out(set, type: isl_dim_set, first: pos + 1, n: n - (pos + 1)); |
| 784 | set = isl_set_project_out(set, type: isl_dim_set, first: 0, n: pos); |
| 785 | single = isl_set_is_singleton(set); |
| 786 | isl_set_free(set); |
| 787 | |
| 788 | return single; |
| 789 | } |
| 790 | |
| 791 | /* Does "map" have a fixed (but possible parametric) value |
| 792 | * at dimension "pos" of either its domain or its range? |
| 793 | */ |
| 794 | static isl_bool has_singular_src_or_dst(__isl_keep isl_map *map, int pos) |
| 795 | { |
| 796 | isl_set *set; |
| 797 | isl_bool single; |
| 798 | |
| 799 | set = isl_map_domain(bmap: isl_map_copy(map)); |
| 800 | single = has_single_value(set, pos); |
| 801 | isl_set_free(set); |
| 802 | |
| 803 | if (single < 0 || single) |
| 804 | return single; |
| 805 | |
| 806 | set = isl_map_range(map: isl_map_copy(map)); |
| 807 | single = has_single_value(set, pos); |
| 808 | isl_set_free(set); |
| 809 | |
| 810 | return single; |
| 811 | } |
| 812 | |
| 813 | /* Does the edge "edge" from "graph" have bounded dependence distances |
| 814 | * in the merged graph "merge_graph" of a selection of clusters in "c"? |
| 815 | * |
| 816 | * Extract the complete transformations of the source and destination |
| 817 | * nodes of the edge, apply them to the edge constraints and |
| 818 | * compute the differences. Finally, check if these differences are bounded |
| 819 | * in each direction. |
| 820 | * |
| 821 | * If the dimension of the band is greater than the number of |
| 822 | * dimensions that can be expected to be optimized by the edge |
| 823 | * (based on its weight), then also allow the differences to be unbounded |
| 824 | * in the remaining dimensions, but only if either the source or |
| 825 | * the destination has a fixed value in that direction. |
| 826 | * This allows a statement that produces values that are used by |
| 827 | * several instances of another statement to be merged with that |
| 828 | * other statement. |
| 829 | * However, merging such clusters will introduce an inherently |
| 830 | * large proximity distance inside the merged cluster, meaning |
| 831 | * that proximity distances will no longer be optimized in |
| 832 | * subsequent merges. These merges are therefore only allowed |
| 833 | * after all other possible merges have been tried. |
| 834 | * The first time such a merge is encountered, the weight of the edge |
| 835 | * is replaced by a negative weight. The second time (i.e., after |
| 836 | * all merges over edges with a non-negative weight have been tried), |
| 837 | * the merge is allowed. |
| 838 | */ |
| 839 | static isl_bool has_bounded_distances(isl_ctx *ctx, struct isl_sched_edge *edge, |
| 840 | struct isl_sched_graph *graph, struct isl_clustering *c, |
| 841 | struct isl_sched_graph *merge_graph) |
| 842 | { |
| 843 | int i, n_slack; |
| 844 | isl_size n; |
| 845 | isl_bool bounded; |
| 846 | isl_map *map, *t; |
| 847 | isl_set *dist; |
| 848 | |
| 849 | map = isl_map_copy(map: edge->map); |
| 850 | t = extract_node_transformation(ctx, node: edge->src, c, merge_graph); |
| 851 | map = isl_map_apply_domain(map1: map, map2: t); |
| 852 | t = extract_node_transformation(ctx, node: edge->dst, c, merge_graph); |
| 853 | map = isl_map_apply_range(map1: map, map2: t); |
| 854 | dist = isl_map_deltas(map: isl_map_copy(map)); |
| 855 | |
| 856 | bounded = isl_bool_true; |
| 857 | n = isl_set_dim(set: dist, type: isl_dim_set); |
| 858 | if (n < 0) |
| 859 | goto error; |
| 860 | n_slack = n - edge->weight; |
| 861 | if (edge->weight < 0) |
| 862 | n_slack -= graph->max_weight + 1; |
| 863 | for (i = 0; i < n; ++i) { |
| 864 | isl_bool bounded_i, singular_i; |
| 865 | |
| 866 | bounded_i = distance_is_bounded(set: dist, pos: i); |
| 867 | if (bounded_i < 0) |
| 868 | goto error; |
| 869 | if (bounded_i) |
| 870 | continue; |
| 871 | if (edge->weight >= 0) |
| 872 | bounded = isl_bool_false; |
| 873 | n_slack--; |
| 874 | if (n_slack < 0) |
| 875 | break; |
| 876 | singular_i = has_singular_src_or_dst(map, pos: i); |
| 877 | if (singular_i < 0) |
| 878 | goto error; |
| 879 | if (singular_i) |
| 880 | continue; |
| 881 | bounded = isl_bool_false; |
| 882 | break; |
| 883 | } |
| 884 | if (!bounded && i >= n && edge->weight >= 0) |
| 885 | edge->weight -= graph->max_weight + 1; |
| 886 | isl_map_free(map); |
| 887 | isl_set_free(set: dist); |
| 888 | |
| 889 | return bounded; |
| 890 | error: |
| 891 | isl_map_free(map); |
| 892 | isl_set_free(set: dist); |
| 893 | return isl_bool_error; |
| 894 | } |
| 895 | |
| 896 | /* Should the clusters be merged based on the cluster schedule |
| 897 | * in the current (and only) band of "merge_graph"? |
| 898 | * "graph" is the original dependence graph, while "c" records |
| 899 | * which SCCs are involved in the latest merge. |
| 900 | * |
| 901 | * In particular, is there at least one proximity constraint |
| 902 | * that is optimized by the merge? |
| 903 | * |
| 904 | * A proximity constraint is considered to be optimized |
| 905 | * if the dependence distances are small. |
| 906 | */ |
| 907 | static isl_bool ok_to_merge_proximity(isl_ctx *ctx, |
| 908 | struct isl_sched_graph *graph, struct isl_clustering *c, |
| 909 | struct isl_sched_graph *merge_graph) |
| 910 | { |
| 911 | int i; |
| 912 | |
| 913 | for (i = 0; i < graph->n_edge; ++i) { |
| 914 | struct isl_sched_edge *edge = &graph->edge[i]; |
| 915 | isl_bool bounded; |
| 916 | |
| 917 | if (!isl_sched_edge_is_proximity(edge)) |
| 918 | continue; |
| 919 | if (!c->scc_in_merge[edge->src->scc]) |
| 920 | continue; |
| 921 | if (!c->scc_in_merge[edge->dst->scc]) |
| 922 | continue; |
| 923 | if (c->scc_cluster[edge->dst->scc] == |
| 924 | c->scc_cluster[edge->src->scc]) |
| 925 | continue; |
| 926 | bounded = has_bounded_distances(ctx, edge, graph, c, |
| 927 | merge_graph); |
| 928 | if (bounded < 0 || bounded) |
| 929 | return bounded; |
| 930 | } |
| 931 | |
| 932 | return isl_bool_false; |
| 933 | } |
| 934 | |
| 935 | /* Should the clusters be merged based on the cluster schedule |
| 936 | * in the current (and only) band of "merge_graph"? |
| 937 | * "graph" is the original dependence graph, while "c" records |
| 938 | * which SCCs are involved in the latest merge. |
| 939 | * |
| 940 | * If the current band is empty, then the clusters should not be merged. |
| 941 | * |
| 942 | * If the band depth should be maximized and the merge schedule |
| 943 | * is incomplete (meaning that the dimension of some of the schedule |
| 944 | * bands in the original schedule will be reduced), then the clusters |
| 945 | * should not be merged. |
| 946 | * |
| 947 | * If the schedule_maximize_coincidence option is set, then check that |
| 948 | * the number of coincident schedule dimensions is not reduced. |
| 949 | * |
| 950 | * Finally, only allow the merge if at least one proximity |
| 951 | * constraint is optimized. |
| 952 | */ |
| 953 | static isl_bool ok_to_merge(isl_ctx *ctx, struct isl_sched_graph *graph, |
| 954 | struct isl_clustering *c, struct isl_sched_graph *merge_graph) |
| 955 | { |
| 956 | if (merge_graph->n_total_row == merge_graph->band_start) |
| 957 | return isl_bool_false; |
| 958 | |
| 959 | if (isl_options_get_schedule_maximize_band_depth(ctx) && |
| 960 | merge_graph->n_total_row < merge_graph->maxvar) |
| 961 | return isl_bool_false; |
| 962 | |
| 963 | if (isl_options_get_schedule_maximize_coincidence(ctx)) { |
| 964 | isl_bool ok; |
| 965 | |
| 966 | ok = ok_to_merge_coincident(c, merge_graph); |
| 967 | if (ok < 0 || !ok) |
| 968 | return ok; |
| 969 | } |
| 970 | |
| 971 | return ok_to_merge_proximity(ctx, graph, c, merge_graph); |
| 972 | } |
| 973 | |
| 974 | /* Apply the schedule in "t_node" to the "n" rows starting at "first" |
| 975 | * of the schedule in "node" and return the result. |
| 976 | * |
| 977 | * That is, essentially compute |
| 978 | * |
| 979 | * T * N(first:first+n-1) |
| 980 | * |
| 981 | * taking into account the constant term and the parameter coefficients |
| 982 | * in "t_node". |
| 983 | */ |
| 984 | static __isl_give isl_mat *node_transformation(isl_ctx *ctx, |
| 985 | struct isl_sched_node *t_node, struct isl_sched_node *node, |
| 986 | int first, int n) |
| 987 | { |
| 988 | int i, j; |
| 989 | isl_mat *t; |
| 990 | isl_size n_row, n_col; |
| 991 | int n_param, n_var; |
| 992 | |
| 993 | n_param = node->nparam; |
| 994 | n_var = node->nvar; |
| 995 | n_row = isl_mat_rows(mat: t_node->sched); |
| 996 | n_col = isl_mat_cols(mat: node->sched); |
| 997 | if (n_row < 0 || n_col < 0) |
| 998 | return NULL; |
| 999 | t = isl_mat_alloc(ctx, n_row, n_col); |
| 1000 | if (!t) |
| 1001 | return NULL; |
| 1002 | for (i = 0; i < n_row; ++i) { |
| 1003 | isl_seq_cpy(dst: t->row[i], src: t_node->sched->row[i], len: 1 + n_param); |
| 1004 | isl_seq_clr(p: t->row[i] + 1 + n_param, len: n_var); |
| 1005 | for (j = 0; j < n; ++j) |
| 1006 | isl_seq_addmul(dst: t->row[i], |
| 1007 | f: t_node->sched->row[i][1 + n_param + j], |
| 1008 | src: node->sched->row[first + j], |
| 1009 | len: 1 + n_param + n_var); |
| 1010 | } |
| 1011 | return t; |
| 1012 | } |
| 1013 | |
| 1014 | /* Apply the cluster schedule in "t_node" to the current band |
| 1015 | * schedule of the nodes in "graph". |
| 1016 | * |
| 1017 | * In particular, replace the rows starting at band_start |
| 1018 | * by the result of applying the cluster schedule in "t_node" |
| 1019 | * to the original rows. |
| 1020 | * |
| 1021 | * The coincidence of the schedule is determined by the coincidence |
| 1022 | * of the cluster schedule. |
| 1023 | */ |
| 1024 | static isl_stat transform(isl_ctx *ctx, struct isl_sched_graph *graph, |
| 1025 | struct isl_sched_node *t_node) |
| 1026 | { |
| 1027 | int i, j; |
| 1028 | isl_size n_new; |
| 1029 | int start, n; |
| 1030 | |
| 1031 | start = graph->band_start; |
| 1032 | n = graph->n_total_row - start; |
| 1033 | |
| 1034 | n_new = isl_mat_rows(mat: t_node->sched); |
| 1035 | if (n_new < 0) |
| 1036 | return isl_stat_error; |
| 1037 | for (i = 0; i < graph->n; ++i) { |
| 1038 | struct isl_sched_node *node = &graph->node[i]; |
| 1039 | isl_mat *t; |
| 1040 | |
| 1041 | t = node_transformation(ctx, t_node, node, first: start, n); |
| 1042 | node->sched = isl_mat_drop_rows(mat: node->sched, row: start, n); |
| 1043 | node->sched = isl_mat_concat(top: node->sched, bot: t); |
| 1044 | node->sched_map = isl_map_free(map: node->sched_map); |
| 1045 | if (!node->sched) |
| 1046 | return isl_stat_error; |
| 1047 | for (j = 0; j < n_new; ++j) |
| 1048 | node->coincident[start + j] = t_node->coincident[j]; |
| 1049 | } |
| 1050 | graph->n_total_row -= n; |
| 1051 | graph->n_row -= n; |
| 1052 | graph->n_total_row += n_new; |
| 1053 | graph->n_row += n_new; |
| 1054 | |
| 1055 | return isl_stat_ok; |
| 1056 | } |
| 1057 | |
| 1058 | /* Merge the clusters marked for merging in "c" into a single |
| 1059 | * cluster using the cluster schedule in the current band of "merge_graph". |
| 1060 | * The representative SCC for the new cluster is the SCC with |
| 1061 | * the smallest index. |
| 1062 | * |
| 1063 | * The current band schedule of each SCC in the new cluster is obtained |
| 1064 | * by applying the schedule of the corresponding original cluster |
| 1065 | * to the original band schedule. |
| 1066 | * All SCCs in the new cluster have the same number of schedule rows. |
| 1067 | */ |
| 1068 | static isl_stat merge(isl_ctx *ctx, struct isl_clustering *c, |
| 1069 | struct isl_sched_graph *merge_graph) |
| 1070 | { |
| 1071 | int i; |
| 1072 | int cluster = -1; |
| 1073 | isl_space *space; |
| 1074 | |
| 1075 | for (i = 0; i < c->n; ++i) { |
| 1076 | struct isl_sched_node *node; |
| 1077 | |
| 1078 | if (!c->scc_in_merge[i]) |
| 1079 | continue; |
| 1080 | if (cluster < 0) |
| 1081 | cluster = i; |
| 1082 | space = cluster_space(scc: &c->scc[i], i: c->scc_cluster[i]); |
| 1083 | node = isl_sched_graph_find_node(ctx, graph: merge_graph, space); |
| 1084 | isl_space_free(space); |
| 1085 | if (!node) |
| 1086 | return isl_stat_error; |
| 1087 | if (!isl_sched_graph_is_node(graph: merge_graph, node)) |
| 1088 | isl_die(ctx, isl_error_internal, |
| 1089 | "unable to find cluster" , |
| 1090 | return isl_stat_error); |
| 1091 | if (transform(ctx, graph: &c->scc[i], t_node: node) < 0) |
| 1092 | return isl_stat_error; |
| 1093 | c->scc_cluster[i] = cluster; |
| 1094 | } |
| 1095 | |
| 1096 | return isl_stat_ok; |
| 1097 | } |
| 1098 | |
| 1099 | /* Try and merge the clusters of SCCs marked in c->scc_in_merge |
| 1100 | * by scheduling the current cluster bands with respect to each other. |
| 1101 | * |
| 1102 | * Construct a dependence graph with a space for each cluster and |
| 1103 | * with the coordinates of each space corresponding to the schedule |
| 1104 | * dimensions of the current band of that cluster. |
| 1105 | * Construct a cluster schedule in this cluster dependence graph and |
| 1106 | * apply it to the current cluster bands if it is applicable |
| 1107 | * according to ok_to_merge. |
| 1108 | * |
| 1109 | * If the number of remaining schedule dimensions in a cluster |
| 1110 | * with a non-maximal current schedule dimension is greater than |
| 1111 | * the number of remaining schedule dimensions in clusters |
| 1112 | * with a maximal current schedule dimension, then restrict |
| 1113 | * the number of rows to be computed in the cluster schedule |
| 1114 | * to the minimal such non-maximal current schedule dimension. |
| 1115 | * Do this by adjusting merge_graph.maxvar. |
| 1116 | * |
| 1117 | * Return isl_bool_true if the clusters have effectively been merged |
| 1118 | * into a single cluster. |
| 1119 | * |
| 1120 | * Note that since the standard scheduling algorithm minimizes the maximal |
| 1121 | * distance over proximity constraints, the proximity constraints between |
| 1122 | * the merged clusters may not be optimized any further than what is |
| 1123 | * sufficient to bring the distances within the limits of the internal |
| 1124 | * proximity constraints inside the individual clusters. |
| 1125 | * It may therefore make sense to perform an additional translation step |
| 1126 | * to bring the clusters closer to each other, while maintaining |
| 1127 | * the linear part of the merging schedule found using the standard |
| 1128 | * scheduling algorithm. |
| 1129 | */ |
| 1130 | static isl_bool try_merge(isl_ctx *ctx, struct isl_sched_graph *graph, |
| 1131 | struct isl_clustering *c) |
| 1132 | { |
| 1133 | struct isl_sched_graph merge_graph = { 0 }; |
| 1134 | isl_bool merged; |
| 1135 | |
| 1136 | if (init_merge_graph(ctx, graph, c, merge_graph: &merge_graph) < 0) |
| 1137 | goto error; |
| 1138 | |
| 1139 | if (isl_sched_graph_compute_maxvar(graph: &merge_graph) < 0) |
| 1140 | goto error; |
| 1141 | if (adjust_maxvar_to_slack(ctx, merge_graph: &merge_graph,c) < 0) |
| 1142 | goto error; |
| 1143 | if (isl_schedule_node_compute_wcc_band(ctx, graph: &merge_graph) < 0) |
| 1144 | goto error; |
| 1145 | merged = ok_to_merge(ctx, graph, c, merge_graph: &merge_graph); |
| 1146 | if (merged && merge(ctx, c, merge_graph: &merge_graph) < 0) |
| 1147 | goto error; |
| 1148 | |
| 1149 | isl_sched_graph_free(ctx, graph: &merge_graph); |
| 1150 | return merged; |
| 1151 | error: |
| 1152 | isl_sched_graph_free(ctx, graph: &merge_graph); |
| 1153 | return isl_bool_error; |
| 1154 | } |
| 1155 | |
| 1156 | /* Is there any edge marked "no_merge" between two SCCs that are |
| 1157 | * about to be merged (i.e., that are set in "scc_in_merge")? |
| 1158 | * "merge_edge" is the proximity edge along which the clusters of SCCs |
| 1159 | * are going to be merged. |
| 1160 | * |
| 1161 | * If there is any edge between two SCCs with a negative weight, |
| 1162 | * while the weight of "merge_edge" is non-negative, then this |
| 1163 | * means that the edge was postponed. "merge_edge" should then |
| 1164 | * also be postponed since merging along the edge with negative weight should |
| 1165 | * be postponed until all edges with non-negative weight have been tried. |
| 1166 | * Replace the weight of "merge_edge" by a negative weight as well and |
| 1167 | * tell the caller not to attempt a merge. |
| 1168 | */ |
| 1169 | static int any_no_merge(struct isl_sched_graph *graph, int *scc_in_merge, |
| 1170 | struct isl_sched_edge *merge_edge) |
| 1171 | { |
| 1172 | int i; |
| 1173 | |
| 1174 | for (i = 0; i < graph->n_edge; ++i) { |
| 1175 | struct isl_sched_edge *edge = &graph->edge[i]; |
| 1176 | |
| 1177 | if (!scc_in_merge[edge->src->scc]) |
| 1178 | continue; |
| 1179 | if (!scc_in_merge[edge->dst->scc]) |
| 1180 | continue; |
| 1181 | if (edge->no_merge) |
| 1182 | return 1; |
| 1183 | if (merge_edge->weight >= 0 && edge->weight < 0) { |
| 1184 | merge_edge->weight -= graph->max_weight + 1; |
| 1185 | return 1; |
| 1186 | } |
| 1187 | } |
| 1188 | |
| 1189 | return 0; |
| 1190 | } |
| 1191 | |
| 1192 | /* Merge the two clusters in "c" connected by the edge in "graph" |
| 1193 | * with index "edge" into a single cluster. |
| 1194 | * If it turns out to be impossible to merge these two clusters, |
| 1195 | * then mark the edge as "no_merge" such that it will not be |
| 1196 | * considered again. |
| 1197 | * |
| 1198 | * First mark all SCCs that need to be merged. This includes the SCCs |
| 1199 | * in the two clusters, but it may also include the SCCs |
| 1200 | * of intermediate clusters. |
| 1201 | * If there is already a no_merge edge between any pair of such SCCs, |
| 1202 | * then simply mark the current edge as no_merge as well. |
| 1203 | * Likewise, if any of those edges was postponed by has_bounded_distances, |
| 1204 | * then postpone the current edge as well. |
| 1205 | * Otherwise, try and merge the clusters and mark "edge" as "no_merge" |
| 1206 | * if the clusters did not end up getting merged, unless the non-merge |
| 1207 | * is due to the fact that the edge was postponed. This postponement |
| 1208 | * can be recognized by a change in weight (from non-negative to negative). |
| 1209 | */ |
| 1210 | static isl_stat merge_clusters_along_edge(isl_ctx *ctx, |
| 1211 | struct isl_sched_graph *graph, int edge, struct isl_clustering *c) |
| 1212 | { |
| 1213 | isl_bool merged; |
| 1214 | int edge_weight = graph->edge[edge].weight; |
| 1215 | |
| 1216 | if (mark_merge_sccs(ctx, graph, edge, c) < 0) |
| 1217 | return isl_stat_error; |
| 1218 | |
| 1219 | if (any_no_merge(graph, scc_in_merge: c->scc_in_merge, merge_edge: &graph->edge[edge])) |
| 1220 | merged = isl_bool_false; |
| 1221 | else |
| 1222 | merged = try_merge(ctx, graph, c); |
| 1223 | if (merged < 0) |
| 1224 | return isl_stat_error; |
| 1225 | if (!merged && edge_weight == graph->edge[edge].weight) |
| 1226 | graph->edge[edge].no_merge = 1; |
| 1227 | |
| 1228 | return isl_stat_ok; |
| 1229 | } |
| 1230 | |
| 1231 | /* Does "node" belong to the cluster identified by "cluster"? |
| 1232 | */ |
| 1233 | static int node_cluster_exactly(struct isl_sched_node *node, int cluster) |
| 1234 | { |
| 1235 | return node->cluster == cluster; |
| 1236 | } |
| 1237 | |
| 1238 | /* Does "edge" connect two nodes belonging to the cluster |
| 1239 | * identified by "cluster"? |
| 1240 | */ |
| 1241 | static int edge_cluster_exactly(struct isl_sched_edge *edge, int cluster) |
| 1242 | { |
| 1243 | return edge->src->cluster == cluster && edge->dst->cluster == cluster; |
| 1244 | } |
| 1245 | |
| 1246 | /* Swap the schedule of "node1" and "node2". |
| 1247 | * Both nodes have been derived from the same node in a common parent graph. |
| 1248 | * Since the "coincident" field is shared with that node |
| 1249 | * in the parent graph, there is no need to also swap this field. |
| 1250 | */ |
| 1251 | static void swap_sched(struct isl_sched_node *node1, |
| 1252 | struct isl_sched_node *node2) |
| 1253 | { |
| 1254 | isl_mat *sched; |
| 1255 | isl_map *sched_map; |
| 1256 | |
| 1257 | sched = node1->sched; |
| 1258 | node1->sched = node2->sched; |
| 1259 | node2->sched = sched; |
| 1260 | |
| 1261 | sched_map = node1->sched_map; |
| 1262 | node1->sched_map = node2->sched_map; |
| 1263 | node2->sched_map = sched_map; |
| 1264 | } |
| 1265 | |
| 1266 | /* Copy the current band schedule from the SCCs that form the cluster |
| 1267 | * with index "pos" to the actual cluster at position "pos". |
| 1268 | * By construction, the index of the first SCC that belongs to the cluster |
| 1269 | * is also "pos". |
| 1270 | * |
| 1271 | * The order of the nodes inside both the SCCs and the cluster |
| 1272 | * is assumed to be same as the order in the original "graph". |
| 1273 | * |
| 1274 | * Since the SCC graphs will no longer be used after this function, |
| 1275 | * the schedules are actually swapped rather than copied. |
| 1276 | */ |
| 1277 | static isl_stat copy_partial(struct isl_sched_graph *graph, |
| 1278 | struct isl_clustering *c, int pos) |
| 1279 | { |
| 1280 | int i, j; |
| 1281 | |
| 1282 | c->cluster[pos].n_total_row = c->scc[pos].n_total_row; |
| 1283 | c->cluster[pos].n_row = c->scc[pos].n_row; |
| 1284 | c->cluster[pos].maxvar = c->scc[pos].maxvar; |
| 1285 | j = 0; |
| 1286 | for (i = 0; i < graph->n; ++i) { |
| 1287 | int k; |
| 1288 | int s; |
| 1289 | |
| 1290 | if (graph->node[i].cluster != pos) |
| 1291 | continue; |
| 1292 | s = graph->node[i].scc; |
| 1293 | k = c->scc_node[s]++; |
| 1294 | swap_sched(node1: &c->cluster[pos].node[j], node2: &c->scc[s].node[k]); |
| 1295 | if (c->scc[s].maxvar > c->cluster[pos].maxvar) |
| 1296 | c->cluster[pos].maxvar = c->scc[s].maxvar; |
| 1297 | ++j; |
| 1298 | } |
| 1299 | |
| 1300 | return isl_stat_ok; |
| 1301 | } |
| 1302 | |
| 1303 | /* Is there a (conditional) validity dependence from node[j] to node[i], |
| 1304 | * forcing node[i] to follow node[j] or do the nodes belong to the same |
| 1305 | * cluster? |
| 1306 | */ |
| 1307 | static isl_bool node_follows_strong_or_same_cluster(int i, int j, void *user) |
| 1308 | { |
| 1309 | struct isl_sched_graph *graph = user; |
| 1310 | |
| 1311 | if (graph->node[i].cluster == graph->node[j].cluster) |
| 1312 | return isl_bool_true; |
| 1313 | return isl_sched_graph_has_validity_edge(graph, src: &graph->node[j], |
| 1314 | dst: &graph->node[i]); |
| 1315 | } |
| 1316 | |
| 1317 | /* Extract the merged clusters of SCCs in "graph", sort them, and |
| 1318 | * store them in c->clusters. Update c->scc_cluster accordingly. |
| 1319 | * |
| 1320 | * First keep track of the cluster containing the SCC to which a node |
| 1321 | * belongs in the node itself. |
| 1322 | * Then extract the clusters into c->clusters, copying the current |
| 1323 | * band schedule from the SCCs that belong to the cluster. |
| 1324 | * Do this only once per cluster. |
| 1325 | * |
| 1326 | * Finally, topologically sort the clusters and update c->scc_cluster |
| 1327 | * to match the new scc numbering. While the SCCs were originally |
| 1328 | * sorted already, some SCCs that depend on some other SCCs may |
| 1329 | * have been merged with SCCs that appear before these other SCCs. |
| 1330 | * A reordering may therefore be required. |
| 1331 | */ |
| 1332 | static isl_stat (isl_ctx *ctx, struct isl_sched_graph *graph, |
| 1333 | struct isl_clustering *c) |
| 1334 | { |
| 1335 | int i; |
| 1336 | |
| 1337 | for (i = 0; i < graph->n; ++i) |
| 1338 | graph->node[i].cluster = c->scc_cluster[graph->node[i].scc]; |
| 1339 | |
| 1340 | for (i = 0; i < graph->scc; ++i) { |
| 1341 | if (c->scc_cluster[i] != i) |
| 1342 | continue; |
| 1343 | if (isl_sched_graph_extract_sub_graph(ctx, graph, |
| 1344 | node_pred: &node_cluster_exactly, |
| 1345 | edge_pred: &edge_cluster_exactly, data: i, sub: &c->cluster[i]) < 0) |
| 1346 | return isl_stat_error; |
| 1347 | c->cluster[i].src_scc = -1; |
| 1348 | c->cluster[i].dst_scc = -1; |
| 1349 | if (copy_partial(graph, c, pos: i) < 0) |
| 1350 | return isl_stat_error; |
| 1351 | } |
| 1352 | |
| 1353 | if (isl_sched_graph_detect_ccs(ctx, graph, |
| 1354 | follows: &node_follows_strong_or_same_cluster) < 0) |
| 1355 | return isl_stat_error; |
| 1356 | for (i = 0; i < graph->n; ++i) |
| 1357 | c->scc_cluster[graph->node[i].scc] = graph->node[i].cluster; |
| 1358 | |
| 1359 | return isl_stat_ok; |
| 1360 | } |
| 1361 | |
| 1362 | /* Compute weights on the proximity edges of "graph" that can |
| 1363 | * be used by find_proximity to find the most appropriate |
| 1364 | * proximity edge to use to merge two clusters in "c". |
| 1365 | * The weights are also used by has_bounded_distances to determine |
| 1366 | * whether the merge should be allowed. |
| 1367 | * Store the maximum of the computed weights in graph->max_weight. |
| 1368 | * |
| 1369 | * The computed weight is a measure for the number of remaining schedule |
| 1370 | * dimensions that can still be completely aligned. |
| 1371 | * In particular, compute the number of equalities between |
| 1372 | * input dimensions and output dimensions in the proximity constraints. |
| 1373 | * The directions that are already handled by outer schedule bands |
| 1374 | * are projected out prior to determining this number. |
| 1375 | * |
| 1376 | * Edges that will never be considered by find_proximity are ignored. |
| 1377 | */ |
| 1378 | static isl_stat compute_weights(struct isl_sched_graph *graph, |
| 1379 | struct isl_clustering *c) |
| 1380 | { |
| 1381 | int i; |
| 1382 | |
| 1383 | graph->max_weight = 0; |
| 1384 | |
| 1385 | for (i = 0; i < graph->n_edge; ++i) { |
| 1386 | struct isl_sched_edge *edge = &graph->edge[i]; |
| 1387 | struct isl_sched_node *src = edge->src; |
| 1388 | struct isl_sched_node *dst = edge->dst; |
| 1389 | isl_basic_map *hull; |
| 1390 | isl_bool prox; |
| 1391 | isl_size n_in, n_out, n; |
| 1392 | |
| 1393 | prox = is_non_empty_proximity(edge); |
| 1394 | if (prox < 0) |
| 1395 | return isl_stat_error; |
| 1396 | if (!prox) |
| 1397 | continue; |
| 1398 | if (bad_cluster(graph: &c->scc[edge->src->scc]) || |
| 1399 | bad_cluster(graph: &c->scc[edge->dst->scc])) |
| 1400 | continue; |
| 1401 | if (c->scc_cluster[edge->dst->scc] == |
| 1402 | c->scc_cluster[edge->src->scc]) |
| 1403 | continue; |
| 1404 | |
| 1405 | hull = isl_map_affine_hull(map: isl_map_copy(map: edge->map)); |
| 1406 | hull = isl_basic_map_transform_dims(bmap: hull, type: isl_dim_in, first: 0, |
| 1407 | trans: isl_mat_copy(mat: src->vmap)); |
| 1408 | hull = isl_basic_map_transform_dims(bmap: hull, type: isl_dim_out, first: 0, |
| 1409 | trans: isl_mat_copy(mat: dst->vmap)); |
| 1410 | hull = isl_basic_map_project_out(bmap: hull, |
| 1411 | type: isl_dim_in, first: 0, n: src->rank); |
| 1412 | hull = isl_basic_map_project_out(bmap: hull, |
| 1413 | type: isl_dim_out, first: 0, n: dst->rank); |
| 1414 | hull = isl_basic_map_remove_divs(bmap: hull); |
| 1415 | n_in = isl_basic_map_dim(bmap: hull, type: isl_dim_in); |
| 1416 | n_out = isl_basic_map_dim(bmap: hull, type: isl_dim_out); |
| 1417 | if (n_in < 0 || n_out < 0) |
| 1418 | hull = isl_basic_map_free(bmap: hull); |
| 1419 | hull = isl_basic_map_drop_constraints_not_involving_dims(bmap: hull, |
| 1420 | type: isl_dim_in, first: 0, n: n_in); |
| 1421 | hull = isl_basic_map_drop_constraints_not_involving_dims(bmap: hull, |
| 1422 | type: isl_dim_out, first: 0, n: n_out); |
| 1423 | n = isl_basic_map_n_equality(bmap: hull); |
| 1424 | isl_basic_map_free(bmap: hull); |
| 1425 | if (n < 0) |
| 1426 | return isl_stat_error; |
| 1427 | edge->weight = n; |
| 1428 | |
| 1429 | if (edge->weight > graph->max_weight) |
| 1430 | graph->max_weight = edge->weight; |
| 1431 | } |
| 1432 | |
| 1433 | return isl_stat_ok; |
| 1434 | } |
| 1435 | |
| 1436 | /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c" and |
| 1437 | * update "node" to arrange for them to be executed in an order |
| 1438 | * possibly involving set nodes that generalizes the topological order |
| 1439 | * determined by the scc fields of the nodes in "graph". |
| 1440 | * |
| 1441 | * Note that at this stage, there are graph->scc clusters and |
| 1442 | * their positions in c->cluster are determined by the values |
| 1443 | * of c->scc_cluster. |
| 1444 | * |
| 1445 | * Construct an isl_scc_graph and perform the decomposition |
| 1446 | * using this graph. |
| 1447 | */ |
| 1448 | static __isl_give isl_schedule_node *finish_bands_decompose( |
| 1449 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
| 1450 | struct isl_clustering *c) |
| 1451 | { |
| 1452 | isl_ctx *ctx; |
| 1453 | struct isl_scc_graph *scc_graph; |
| 1454 | |
| 1455 | ctx = isl_schedule_node_get_ctx(node); |
| 1456 | |
| 1457 | scc_graph = isl_scc_graph_from_sched_graph(ctx, graph, c); |
| 1458 | node = isl_scc_graph_decompose(scc_graph, node); |
| 1459 | isl_scc_graph_free(scc_graph); |
| 1460 | |
| 1461 | return node; |
| 1462 | } |
| 1463 | |
| 1464 | /* Call isl_schedule_node_compute_finish_band on each of the clusters in "c" |
| 1465 | * in their topological order. This order is determined by the scc |
| 1466 | * fields of the nodes in "graph". |
| 1467 | * Combine the results in a sequence expressing the topological order. |
| 1468 | * |
| 1469 | * If there is only one cluster left, then there is no need to introduce |
| 1470 | * a sequence node. Also, in this case, the cluster necessarily contains |
| 1471 | * the SCC at position 0 in the original graph and is therefore also |
| 1472 | * stored in the first cluster of "c". |
| 1473 | * |
| 1474 | * If there are more than two clusters left, then some subsets of the clusters |
| 1475 | * may still be independent of each other. These could then still |
| 1476 | * be reordered with respect to each other. Call finish_bands_decompose |
| 1477 | * to try and construct an ordering involving set and sequence nodes |
| 1478 | * that generalizes the topological order. |
| 1479 | * Note that at the outermost level there can be no independent components |
| 1480 | * because isl_schedule_node_compute_wcc_clustering is called |
| 1481 | * on a (weakly) connected component. |
| 1482 | */ |
| 1483 | static __isl_give isl_schedule_node *finish_bands_clustering( |
| 1484 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
| 1485 | struct isl_clustering *c) |
| 1486 | { |
| 1487 | int i; |
| 1488 | isl_ctx *ctx; |
| 1489 | isl_union_set_list *filters; |
| 1490 | |
| 1491 | if (graph->scc == 1) |
| 1492 | return isl_schedule_node_compute_finish_band(node, |
| 1493 | graph: &c->cluster[0], initialized: 0); |
| 1494 | if (graph->scc > 2) |
| 1495 | return finish_bands_decompose(node, graph, c); |
| 1496 | |
| 1497 | ctx = isl_schedule_node_get_ctx(node); |
| 1498 | |
| 1499 | filters = isl_sched_graph_extract_sccs(ctx, graph); |
| 1500 | node = isl_schedule_node_insert_sequence(node, filters); |
| 1501 | |
| 1502 | for (i = 0; i < graph->scc; ++i) { |
| 1503 | int j = c->scc_cluster[i]; |
| 1504 | node = isl_schedule_node_grandchild(node, pos1: i, pos2: 0); |
| 1505 | node = isl_schedule_node_compute_finish_band(node, |
| 1506 | graph: &c->cluster[j], initialized: 0); |
| 1507 | node = isl_schedule_node_grandparent(node); |
| 1508 | } |
| 1509 | |
| 1510 | return node; |
| 1511 | } |
| 1512 | |
| 1513 | /* Compute a schedule for a connected dependence graph by first considering |
| 1514 | * each strongly connected component (SCC) in the graph separately and then |
| 1515 | * incrementally combining them into clusters. |
| 1516 | * Return the updated schedule node. |
| 1517 | * |
| 1518 | * Initially, each cluster consists of a single SCC, each with its |
| 1519 | * own band schedule. The algorithm then tries to merge pairs |
| 1520 | * of clusters along a proximity edge until no more suitable |
| 1521 | * proximity edges can be found. During this merging, the schedule |
| 1522 | * is maintained in the individual SCCs. |
| 1523 | * After the merging is completed, the full resulting clusters |
| 1524 | * are extracted and in finish_bands_clustering, |
| 1525 | * isl_schedule_node_compute_finish_band is called on each of them to integrate |
| 1526 | * the band into "node" and to continue the computation. |
| 1527 | * |
| 1528 | * compute_weights initializes the weights that are used by find_proximity. |
| 1529 | */ |
| 1530 | __isl_give isl_schedule_node *isl_schedule_node_compute_wcc_clustering( |
| 1531 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
| 1532 | { |
| 1533 | isl_ctx *ctx; |
| 1534 | struct isl_clustering c; |
| 1535 | int i; |
| 1536 | |
| 1537 | ctx = isl_schedule_node_get_ctx(node); |
| 1538 | |
| 1539 | if (clustering_init(ctx, c: &c, graph) < 0) |
| 1540 | goto error; |
| 1541 | |
| 1542 | if (compute_weights(graph, c: &c) < 0) |
| 1543 | goto error; |
| 1544 | |
| 1545 | for (;;) { |
| 1546 | i = find_proximity(graph, c: &c); |
| 1547 | if (i < 0) |
| 1548 | goto error; |
| 1549 | if (i >= graph->n_edge) |
| 1550 | break; |
| 1551 | if (merge_clusters_along_edge(ctx, graph, edge: i, c: &c) < 0) |
| 1552 | goto error; |
| 1553 | } |
| 1554 | |
| 1555 | if (extract_clusters(ctx, graph, c: &c) < 0) |
| 1556 | goto error; |
| 1557 | |
| 1558 | node = finish_bands_clustering(node, graph, c: &c); |
| 1559 | |
| 1560 | clustering_free(ctx, c: &c); |
| 1561 | return node; |
| 1562 | error: |
| 1563 | clustering_free(ctx, c: &c); |
| 1564 | return isl_schedule_node_free(node); |
| 1565 | } |
| 1566 | |