| 1 | /* |
| 2 | * Copyright 2011 INRIA Saclay |
| 3 | * Copyright 2012-2014 Ecole Normale Superieure |
| 4 | * Copyright 2015-2016 Sven Verdoolaege |
| 5 | * Copyright 2016 INRIA Paris |
| 6 | * Copyright 2017 Sven Verdoolaege |
| 7 | * |
| 8 | * Use of this software is governed by the MIT license |
| 9 | * |
| 10 | * Written by Sven Verdoolaege, INRIA Saclay - Ile-de-France, |
| 11 | * Parc Club Orsay Universite, ZAC des vignes, 4 rue Jacques Monod, |
| 12 | * 91893 Orsay, France |
| 13 | * and Ecole Normale Superieure, 45 rue d'Ulm, 75230 Paris, France |
| 14 | * and Centre de Recherche Inria de Paris, 2 rue Simone Iff - Voie DQ12, |
| 15 | * CS 42112, 75589 Paris Cedex 12, France |
| 16 | */ |
| 17 | |
| 18 | #include <isl_ctx_private.h> |
| 19 | #include <isl_map_private.h> |
| 20 | #include <isl_space_private.h> |
| 21 | #include <isl_aff_private.h> |
| 22 | #include <isl/hash.h> |
| 23 | #include <isl/id.h> |
| 24 | #include <isl/constraint.h> |
| 25 | #include <isl/schedule.h> |
| 26 | #include <isl_schedule_constraints.h> |
| 27 | #include <isl/schedule_node.h> |
| 28 | #include <isl_mat_private.h> |
| 29 | #include <isl_vec_private.h> |
| 30 | #include <isl/set.h> |
| 31 | #include <isl_union_set_private.h> |
| 32 | #include <isl_seq.h> |
| 33 | #include <isl_tab.h> |
| 34 | #include <isl_dim_map.h> |
| 35 | #include <isl/map_to_basic_set.h> |
| 36 | #include <isl_sort.h> |
| 37 | #include <isl_options_private.h> |
| 38 | #include <isl_tarjan.h> |
| 39 | #include <isl_morph.h> |
| 40 | #include <isl/ilp.h> |
| 41 | #include <isl_val_private.h> |
| 42 | |
| 43 | #include "isl_scheduler.h" |
| 44 | #include "isl_scheduler_clustering.h" |
| 45 | |
| 46 | /* |
| 47 | * The scheduling algorithm implemented in this file was inspired by |
| 48 | * Bondhugula et al., "Automatic Transformations for Communication-Minimized |
| 49 | * Parallelization and Locality Optimization in the Polyhedral Model". |
| 50 | * |
| 51 | * For a detailed description of the variant implemented in isl, |
| 52 | * see Verdoolaege and Janssens, "Scheduling for PPCG" (2017). |
| 53 | */ |
| 54 | |
| 55 | |
| 56 | static isl_bool node_has_tuples(const void *entry, const void *val) |
| 57 | { |
| 58 | struct isl_sched_node *node = (struct isl_sched_node *)entry; |
| 59 | isl_space *space = (isl_space *) val; |
| 60 | |
| 61 | return isl_space_has_equal_tuples(space1: node->space, space2: space); |
| 62 | } |
| 63 | |
| 64 | int isl_sched_node_scc_exactly(struct isl_sched_node *node, int scc) |
| 65 | { |
| 66 | return node->scc == scc; |
| 67 | } |
| 68 | |
| 69 | static int node_scc_at_most(struct isl_sched_node *node, int scc) |
| 70 | { |
| 71 | return node->scc <= scc; |
| 72 | } |
| 73 | |
| 74 | static int node_scc_at_least(struct isl_sched_node *node, int scc) |
| 75 | { |
| 76 | return node->scc >= scc; |
| 77 | } |
| 78 | |
| 79 | /* Is "edge" marked as being of type "type"? |
| 80 | */ |
| 81 | int isl_sched_edge_has_type(struct isl_sched_edge *edge, |
| 82 | enum isl_edge_type type) |
| 83 | { |
| 84 | return ISL_FL_ISSET(edge->types, 1 << type); |
| 85 | } |
| 86 | |
| 87 | /* Mark "edge" as being of type "type". |
| 88 | */ |
| 89 | static void set_type(struct isl_sched_edge *edge, enum isl_edge_type type) |
| 90 | { |
| 91 | ISL_FL_SET(edge->types, 1 << type); |
| 92 | } |
| 93 | |
| 94 | /* No longer mark "edge" as being of type "type"? |
| 95 | */ |
| 96 | static void clear_type(struct isl_sched_edge *edge, enum isl_edge_type type) |
| 97 | { |
| 98 | ISL_FL_CLR(edge->types, 1 << type); |
| 99 | } |
| 100 | |
| 101 | /* Is "edge" marked as a validity edge? |
| 102 | */ |
| 103 | static int is_validity(struct isl_sched_edge *edge) |
| 104 | { |
| 105 | return isl_sched_edge_has_type(edge, type: isl_edge_validity); |
| 106 | } |
| 107 | |
| 108 | /* Mark "edge" as a validity edge. |
| 109 | */ |
| 110 | static void set_validity(struct isl_sched_edge *edge) |
| 111 | { |
| 112 | set_type(edge, type: isl_edge_validity); |
| 113 | } |
| 114 | |
| 115 | /* Is "edge" marked as a proximity edge? |
| 116 | */ |
| 117 | int isl_sched_edge_is_proximity(struct isl_sched_edge *edge) |
| 118 | { |
| 119 | return isl_sched_edge_has_type(edge, type: isl_edge_proximity); |
| 120 | } |
| 121 | |
| 122 | /* Is "edge" marked as a local edge? |
| 123 | */ |
| 124 | static int is_local(struct isl_sched_edge *edge) |
| 125 | { |
| 126 | return isl_sched_edge_has_type(edge, type: isl_edge_local); |
| 127 | } |
| 128 | |
| 129 | /* Mark "edge" as a local edge. |
| 130 | */ |
| 131 | static void set_local(struct isl_sched_edge *edge) |
| 132 | { |
| 133 | set_type(edge, type: isl_edge_local); |
| 134 | } |
| 135 | |
| 136 | /* No longer mark "edge" as a local edge. |
| 137 | */ |
| 138 | static void clear_local(struct isl_sched_edge *edge) |
| 139 | { |
| 140 | clear_type(edge, type: isl_edge_local); |
| 141 | } |
| 142 | |
| 143 | /* Is "edge" marked as a coincidence edge? |
| 144 | */ |
| 145 | static int is_coincidence(struct isl_sched_edge *edge) |
| 146 | { |
| 147 | return isl_sched_edge_has_type(edge, type: isl_edge_coincidence); |
| 148 | } |
| 149 | |
| 150 | /* Is "edge" marked as a condition edge? |
| 151 | */ |
| 152 | int isl_sched_edge_is_condition(struct isl_sched_edge *edge) |
| 153 | { |
| 154 | return isl_sched_edge_has_type(edge, type: isl_edge_condition); |
| 155 | } |
| 156 | |
| 157 | /* Is "edge" marked as a conditional validity edge? |
| 158 | */ |
| 159 | int isl_sched_edge_is_conditional_validity(struct isl_sched_edge *edge) |
| 160 | { |
| 161 | return isl_sched_edge_has_type(edge, type: isl_edge_conditional_validity); |
| 162 | } |
| 163 | |
| 164 | /* Is "edge" of a type that can appear multiple times between |
| 165 | * the same pair of nodes? |
| 166 | * |
| 167 | * Condition edges and conditional validity edges may have tagged |
| 168 | * dependence relations, in which case an edge is added for each |
| 169 | * pair of tags. |
| 170 | */ |
| 171 | static int is_multi_edge_type(struct isl_sched_edge *edge) |
| 172 | { |
| 173 | return isl_sched_edge_is_condition(edge) || |
| 174 | isl_sched_edge_is_conditional_validity(edge); |
| 175 | } |
| 176 | |
| 177 | /* Initialize node_table based on the list of nodes. |
| 178 | */ |
| 179 | static int graph_init_table(isl_ctx *ctx, struct isl_sched_graph *graph) |
| 180 | { |
| 181 | int i; |
| 182 | |
| 183 | graph->node_table = isl_hash_table_alloc(ctx, min_size: graph->n); |
| 184 | if (!graph->node_table) |
| 185 | return -1; |
| 186 | |
| 187 | for (i = 0; i < graph->n; ++i) { |
| 188 | struct isl_hash_table_entry *entry; |
| 189 | uint32_t hash; |
| 190 | |
| 191 | hash = isl_space_get_tuple_hash(space: graph->node[i].space); |
| 192 | entry = isl_hash_table_find(ctx, table: graph->node_table, key_hash: hash, |
| 193 | eq: &node_has_tuples, |
| 194 | val: graph->node[i].space, reserve: 1); |
| 195 | if (!entry) |
| 196 | return -1; |
| 197 | entry->data = &graph->node[i]; |
| 198 | } |
| 199 | |
| 200 | return 0; |
| 201 | } |
| 202 | |
| 203 | /* Return a pointer to the node that lives within the given space, |
| 204 | * an invalid node if there is no such node, or NULL in case of error. |
| 205 | */ |
| 206 | struct isl_sched_node *isl_sched_graph_find_node(isl_ctx *ctx, |
| 207 | struct isl_sched_graph *graph, __isl_keep isl_space *space) |
| 208 | { |
| 209 | struct isl_hash_table_entry *entry; |
| 210 | uint32_t hash; |
| 211 | |
| 212 | if (!space) |
| 213 | return NULL; |
| 214 | |
| 215 | hash = isl_space_get_tuple_hash(space); |
| 216 | entry = isl_hash_table_find(ctx, table: graph->node_table, key_hash: hash, |
| 217 | eq: &node_has_tuples, val: space, reserve: 0); |
| 218 | if (!entry) |
| 219 | return NULL; |
| 220 | if (entry == isl_hash_table_entry_none) |
| 221 | return graph->node + graph->n; |
| 222 | |
| 223 | return entry->data; |
| 224 | } |
| 225 | |
| 226 | /* Is "node" a node in "graph"? |
| 227 | */ |
| 228 | int isl_sched_graph_is_node(struct isl_sched_graph *graph, |
| 229 | struct isl_sched_node *node) |
| 230 | { |
| 231 | return node && node >= &graph->node[0] && node < &graph->node[graph->n]; |
| 232 | } |
| 233 | |
| 234 | static isl_bool edge_has_src_and_dst(const void *entry, const void *val) |
| 235 | { |
| 236 | const struct isl_sched_edge *edge = entry; |
| 237 | const struct isl_sched_edge *temp = val; |
| 238 | |
| 239 | return isl_bool_ok(b: edge->src == temp->src && edge->dst == temp->dst); |
| 240 | } |
| 241 | |
| 242 | /* Add the given edge to graph->edge_table[type]. |
| 243 | */ |
| 244 | static isl_stat graph_edge_table_add(isl_ctx *ctx, |
| 245 | struct isl_sched_graph *graph, enum isl_edge_type type, |
| 246 | struct isl_sched_edge *edge) |
| 247 | { |
| 248 | struct isl_hash_table_entry *entry; |
| 249 | uint32_t hash; |
| 250 | |
| 251 | hash = isl_hash_init(); |
| 252 | hash = isl_hash_builtin(hash, edge->src); |
| 253 | hash = isl_hash_builtin(hash, edge->dst); |
| 254 | entry = isl_hash_table_find(ctx, table: graph->edge_table[type], key_hash: hash, |
| 255 | eq: &edge_has_src_and_dst, val: edge, reserve: 1); |
| 256 | if (!entry) |
| 257 | return isl_stat_error; |
| 258 | entry->data = edge; |
| 259 | |
| 260 | return isl_stat_ok; |
| 261 | } |
| 262 | |
| 263 | /* Add "edge" to all relevant edge tables. |
| 264 | * That is, for every type of the edge, add it to the corresponding table. |
| 265 | */ |
| 266 | static isl_stat graph_edge_tables_add(isl_ctx *ctx, |
| 267 | struct isl_sched_graph *graph, struct isl_sched_edge *edge) |
| 268 | { |
| 269 | enum isl_edge_type t; |
| 270 | |
| 271 | for (t = isl_edge_first; t <= isl_edge_last; ++t) { |
| 272 | if (!isl_sched_edge_has_type(edge, type: t)) |
| 273 | continue; |
| 274 | if (graph_edge_table_add(ctx, graph, type: t, edge) < 0) |
| 275 | return isl_stat_error; |
| 276 | } |
| 277 | |
| 278 | return isl_stat_ok; |
| 279 | } |
| 280 | |
| 281 | /* Allocate the edge_tables based on the maximal number of edges of |
| 282 | * each type. |
| 283 | */ |
| 284 | static int graph_init_edge_tables(isl_ctx *ctx, struct isl_sched_graph *graph) |
| 285 | { |
| 286 | int i; |
| 287 | |
| 288 | for (i = 0; i <= isl_edge_last; ++i) { |
| 289 | graph->edge_table[i] = isl_hash_table_alloc(ctx, |
| 290 | min_size: graph->max_edge[i]); |
| 291 | if (!graph->edge_table[i]) |
| 292 | return -1; |
| 293 | } |
| 294 | |
| 295 | return 0; |
| 296 | } |
| 297 | |
| 298 | /* If graph->edge_table[type] contains an edge from the given source |
| 299 | * to the given destination, then return the hash table entry of this edge. |
| 300 | * Otherwise, return NULL. |
| 301 | */ |
| 302 | static struct isl_hash_table_entry *graph_find_edge_entry( |
| 303 | struct isl_sched_graph *graph, |
| 304 | enum isl_edge_type type, |
| 305 | struct isl_sched_node *src, struct isl_sched_node *dst) |
| 306 | { |
| 307 | isl_ctx *ctx = isl_space_get_ctx(space: src->space); |
| 308 | uint32_t hash; |
| 309 | struct isl_sched_edge temp = { .src = src, .dst = dst }; |
| 310 | |
| 311 | hash = isl_hash_init(); |
| 312 | hash = isl_hash_builtin(hash, temp.src); |
| 313 | hash = isl_hash_builtin(hash, temp.dst); |
| 314 | return isl_hash_table_find(ctx, table: graph->edge_table[type], key_hash: hash, |
| 315 | eq: &edge_has_src_and_dst, val: &temp, reserve: 0); |
| 316 | } |
| 317 | |
| 318 | |
| 319 | /* If graph->edge_table[type] contains an edge from the given source |
| 320 | * to the given destination, then return this edge. |
| 321 | * Return "none" if no such edge can be found. |
| 322 | * Return NULL on error. |
| 323 | */ |
| 324 | static struct isl_sched_edge *graph_find_edge(struct isl_sched_graph *graph, |
| 325 | enum isl_edge_type type, |
| 326 | struct isl_sched_node *src, struct isl_sched_node *dst, |
| 327 | struct isl_sched_edge *none) |
| 328 | { |
| 329 | struct isl_hash_table_entry *entry; |
| 330 | |
| 331 | entry = graph_find_edge_entry(graph, type, src, dst); |
| 332 | if (!entry) |
| 333 | return NULL; |
| 334 | if (entry == isl_hash_table_entry_none) |
| 335 | return none; |
| 336 | |
| 337 | return entry->data; |
| 338 | } |
| 339 | |
| 340 | /* Check whether the dependence graph has an edge of the given type |
| 341 | * between the given two nodes. |
| 342 | */ |
| 343 | static isl_bool graph_has_edge(struct isl_sched_graph *graph, |
| 344 | enum isl_edge_type type, |
| 345 | struct isl_sched_node *src, struct isl_sched_node *dst) |
| 346 | { |
| 347 | struct isl_sched_edge dummy; |
| 348 | struct isl_sched_edge *edge; |
| 349 | isl_bool empty; |
| 350 | |
| 351 | edge = graph_find_edge(graph, type, src, dst, none: &dummy); |
| 352 | if (!edge) |
| 353 | return isl_bool_error; |
| 354 | if (edge == &dummy) |
| 355 | return isl_bool_false; |
| 356 | |
| 357 | empty = isl_map_plain_is_empty(map: edge->map); |
| 358 | |
| 359 | return isl_bool_not(b: empty); |
| 360 | } |
| 361 | |
| 362 | /* Look for any edge with the same src, dst and map fields as "model". |
| 363 | * |
| 364 | * Return the matching edge if one can be found. |
| 365 | * Return "model" if no matching edge is found. |
| 366 | * Return NULL on error. |
| 367 | */ |
| 368 | static struct isl_sched_edge *graph_find_matching_edge( |
| 369 | struct isl_sched_graph *graph, struct isl_sched_edge *model) |
| 370 | { |
| 371 | enum isl_edge_type i; |
| 372 | struct isl_sched_edge *edge; |
| 373 | |
| 374 | for (i = isl_edge_first; i <= isl_edge_last; ++i) { |
| 375 | int is_equal; |
| 376 | |
| 377 | edge = graph_find_edge(graph, type: i, src: model->src, dst: model->dst, none: model); |
| 378 | if (!edge) |
| 379 | return NULL; |
| 380 | if (edge == model) |
| 381 | continue; |
| 382 | is_equal = isl_map_plain_is_equal(map1: model->map, map2: edge->map); |
| 383 | if (is_equal < 0) |
| 384 | return NULL; |
| 385 | if (is_equal) |
| 386 | return edge; |
| 387 | } |
| 388 | |
| 389 | return model; |
| 390 | } |
| 391 | |
| 392 | /* Remove the given edge from all the edge_tables that refer to it. |
| 393 | */ |
| 394 | static isl_stat graph_remove_edge(struct isl_sched_graph *graph, |
| 395 | struct isl_sched_edge *edge) |
| 396 | { |
| 397 | isl_ctx *ctx = isl_map_get_ctx(map: edge->map); |
| 398 | enum isl_edge_type i; |
| 399 | |
| 400 | for (i = isl_edge_first; i <= isl_edge_last; ++i) { |
| 401 | struct isl_hash_table_entry *entry; |
| 402 | |
| 403 | entry = graph_find_edge_entry(graph, type: i, src: edge->src, dst: edge->dst); |
| 404 | if (!entry) |
| 405 | return isl_stat_error; |
| 406 | if (entry == isl_hash_table_entry_none) |
| 407 | continue; |
| 408 | if (entry->data != edge) |
| 409 | continue; |
| 410 | isl_hash_table_remove(ctx, table: graph->edge_table[i], entry); |
| 411 | } |
| 412 | |
| 413 | return isl_stat_ok; |
| 414 | } |
| 415 | |
| 416 | /* Check whether the dependence graph has any edge |
| 417 | * between the given two nodes. |
| 418 | */ |
| 419 | static isl_bool graph_has_any_edge(struct isl_sched_graph *graph, |
| 420 | struct isl_sched_node *src, struct isl_sched_node *dst) |
| 421 | { |
| 422 | enum isl_edge_type i; |
| 423 | isl_bool r; |
| 424 | |
| 425 | for (i = isl_edge_first; i <= isl_edge_last; ++i) { |
| 426 | r = graph_has_edge(graph, type: i, src, dst); |
| 427 | if (r < 0 || r) |
| 428 | return r; |
| 429 | } |
| 430 | |
| 431 | return r; |
| 432 | } |
| 433 | |
| 434 | /* Check whether the dependence graph has a validity edge |
| 435 | * between the given two nodes. |
| 436 | * |
| 437 | * Conditional validity edges are essentially validity edges that |
| 438 | * can be ignored if the corresponding condition edges are iteration private. |
| 439 | * Here, we are only checking for the presence of validity |
| 440 | * edges, so we need to consider the conditional validity edges too. |
| 441 | * In particular, this function is used during the detection |
| 442 | * of strongly connected components and we cannot ignore |
| 443 | * conditional validity edges during this detection. |
| 444 | */ |
| 445 | isl_bool isl_sched_graph_has_validity_edge(struct isl_sched_graph *graph, |
| 446 | struct isl_sched_node *src, struct isl_sched_node *dst) |
| 447 | { |
| 448 | isl_bool r; |
| 449 | |
| 450 | r = graph_has_edge(graph, type: isl_edge_validity, src, dst); |
| 451 | if (r < 0 || r) |
| 452 | return r; |
| 453 | |
| 454 | return graph_has_edge(graph, type: isl_edge_conditional_validity, src, dst); |
| 455 | } |
| 456 | |
| 457 | /* Perform all the required memory allocations for a schedule graph "graph" |
| 458 | * with "n_node" nodes and "n_edge" edge and initialize the corresponding |
| 459 | * fields. |
| 460 | */ |
| 461 | static isl_stat graph_alloc(isl_ctx *ctx, struct isl_sched_graph *graph, |
| 462 | int n_node, int n_edge) |
| 463 | { |
| 464 | int i; |
| 465 | |
| 466 | graph->n = n_node; |
| 467 | graph->n_edge = n_edge; |
| 468 | graph->node = isl_calloc_array(ctx, struct isl_sched_node, graph->n); |
| 469 | graph->sorted = isl_calloc_array(ctx, int, graph->n); |
| 470 | graph->region = isl_alloc_array(ctx, |
| 471 | struct isl_trivial_region, graph->n); |
| 472 | graph->edge = isl_calloc_array(ctx, |
| 473 | struct isl_sched_edge, graph->n_edge); |
| 474 | |
| 475 | graph->intra_hmap = isl_map_to_basic_set_alloc(ctx, min_size: 2 * n_edge); |
| 476 | graph->intra_hmap_param = isl_map_to_basic_set_alloc(ctx, min_size: 2 * n_edge); |
| 477 | graph->inter_hmap = isl_map_to_basic_set_alloc(ctx, min_size: 2 * n_edge); |
| 478 | |
| 479 | if (!graph->node || !graph->region || (graph->n_edge && !graph->edge) || |
| 480 | !graph->sorted) |
| 481 | return isl_stat_error; |
| 482 | |
| 483 | for(i = 0; i < graph->n; ++i) |
| 484 | graph->sorted[i] = i; |
| 485 | |
| 486 | return isl_stat_ok; |
| 487 | } |
| 488 | |
| 489 | /* Free the memory associated to node "node" in "graph". |
| 490 | * The "coincident" field is shared by nodes in a graph and its subgraph. |
| 491 | * It therefore only needs to be freed for the original dependence graph, |
| 492 | * i.e., one that is not the result of splitting. |
| 493 | */ |
| 494 | static void clear_node(struct isl_sched_graph *graph, |
| 495 | struct isl_sched_node *node) |
| 496 | { |
| 497 | isl_space_free(space: node->space); |
| 498 | isl_set_free(set: node->hull); |
| 499 | isl_multi_aff_free(multi: node->compress); |
| 500 | isl_pw_multi_aff_free(pma: node->decompress); |
| 501 | isl_mat_free(mat: node->sched); |
| 502 | isl_map_free(map: node->sched_map); |
| 503 | isl_mat_free(mat: node->indep); |
| 504 | isl_mat_free(mat: node->vmap); |
| 505 | if (graph->root == graph) |
| 506 | free(ptr: node->coincident); |
| 507 | isl_multi_val_free(multi: node->sizes); |
| 508 | isl_basic_set_free(bset: node->bounds); |
| 509 | isl_vec_free(vec: node->max); |
| 510 | } |
| 511 | |
| 512 | void isl_sched_graph_free(isl_ctx *ctx, struct isl_sched_graph *graph) |
| 513 | { |
| 514 | int i; |
| 515 | |
| 516 | isl_map_to_basic_set_free(hmap: graph->intra_hmap); |
| 517 | isl_map_to_basic_set_free(hmap: graph->intra_hmap_param); |
| 518 | isl_map_to_basic_set_free(hmap: graph->inter_hmap); |
| 519 | |
| 520 | if (graph->node) |
| 521 | for (i = 0; i < graph->n; ++i) |
| 522 | clear_node(graph, node: &graph->node[i]); |
| 523 | free(ptr: graph->node); |
| 524 | free(ptr: graph->sorted); |
| 525 | if (graph->edge) |
| 526 | for (i = 0; i < graph->n_edge; ++i) { |
| 527 | isl_map_free(map: graph->edge[i].map); |
| 528 | isl_union_map_free(umap: graph->edge[i].tagged_condition); |
| 529 | isl_union_map_free(umap: graph->edge[i].tagged_validity); |
| 530 | } |
| 531 | free(ptr: graph->edge); |
| 532 | free(ptr: graph->region); |
| 533 | for (i = 0; i <= isl_edge_last; ++i) |
| 534 | isl_hash_table_free(ctx, table: graph->edge_table[i]); |
| 535 | isl_hash_table_free(ctx, table: graph->node_table); |
| 536 | isl_basic_set_free(bset: graph->lp); |
| 537 | } |
| 538 | |
| 539 | /* For each "set" on which this function is called, increment |
| 540 | * graph->n by one and update graph->maxvar. |
| 541 | */ |
| 542 | static isl_stat init_n_maxvar(__isl_take isl_set *set, void *user) |
| 543 | { |
| 544 | struct isl_sched_graph *graph = user; |
| 545 | isl_size nvar = isl_set_dim(set, type: isl_dim_set); |
| 546 | |
| 547 | graph->n++; |
| 548 | if (nvar > graph->maxvar) |
| 549 | graph->maxvar = nvar; |
| 550 | |
| 551 | isl_set_free(set); |
| 552 | |
| 553 | if (nvar < 0) |
| 554 | return isl_stat_error; |
| 555 | return isl_stat_ok; |
| 556 | } |
| 557 | |
| 558 | /* Compute the number of rows that should be allocated for the schedule. |
| 559 | * In particular, we need one row for each variable or one row |
| 560 | * for each basic map in the dependences. |
| 561 | * Note that it is practically impossible to exhaust both |
| 562 | * the number of dependences and the number of variables. |
| 563 | */ |
| 564 | static isl_stat compute_max_row(struct isl_sched_graph *graph, |
| 565 | __isl_keep isl_schedule_constraints *sc) |
| 566 | { |
| 567 | int n_edge; |
| 568 | isl_stat r; |
| 569 | isl_union_set *domain; |
| 570 | |
| 571 | graph->n = 0; |
| 572 | graph->maxvar = 0; |
| 573 | domain = isl_schedule_constraints_get_domain(sc); |
| 574 | r = isl_union_set_foreach_set(uset: domain, fn: &init_n_maxvar, user: graph); |
| 575 | isl_union_set_free(uset: domain); |
| 576 | if (r < 0) |
| 577 | return isl_stat_error; |
| 578 | n_edge = isl_schedule_constraints_n_basic_map(sc); |
| 579 | if (n_edge < 0) |
| 580 | return isl_stat_error; |
| 581 | graph->max_row = n_edge + graph->maxvar; |
| 582 | |
| 583 | return isl_stat_ok; |
| 584 | } |
| 585 | |
| 586 | /* Does "bset" have any defining equalities for its set variables? |
| 587 | */ |
| 588 | static isl_bool has_any_defining_equality(__isl_keep isl_basic_set *bset) |
| 589 | { |
| 590 | int i; |
| 591 | isl_size n; |
| 592 | |
| 593 | n = isl_basic_set_dim(bset, type: isl_dim_set); |
| 594 | if (n < 0) |
| 595 | return isl_bool_error; |
| 596 | |
| 597 | for (i = 0; i < n; ++i) { |
| 598 | isl_bool has; |
| 599 | |
| 600 | has = isl_basic_set_has_defining_equality(bset, type: isl_dim_set, pos: i, |
| 601 | NULL); |
| 602 | if (has < 0 || has) |
| 603 | return has; |
| 604 | } |
| 605 | |
| 606 | return isl_bool_false; |
| 607 | } |
| 608 | |
| 609 | /* Set the entries of node->max to the value of the schedule_max_coefficient |
| 610 | * option, if set. |
| 611 | */ |
| 612 | static isl_stat set_max_coefficient(isl_ctx *ctx, struct isl_sched_node *node) |
| 613 | { |
| 614 | int max; |
| 615 | |
| 616 | max = isl_options_get_schedule_max_coefficient(ctx); |
| 617 | if (max == -1) |
| 618 | return isl_stat_ok; |
| 619 | |
| 620 | node->max = isl_vec_alloc(ctx, size: node->nvar); |
| 621 | node->max = isl_vec_set_si(vec: node->max, v: max); |
| 622 | if (!node->max) |
| 623 | return isl_stat_error; |
| 624 | |
| 625 | return isl_stat_ok; |
| 626 | } |
| 627 | |
| 628 | /* Set the entries of node->max to the minimum of the schedule_max_coefficient |
| 629 | * option (if set) and half of the minimum of the sizes in the other |
| 630 | * dimensions. Round up when computing the half such that |
| 631 | * if the minimum of the sizes is one, half of the size is taken to be one |
| 632 | * rather than zero. |
| 633 | * If the global minimum is unbounded (i.e., if both |
| 634 | * the schedule_max_coefficient is not set and the sizes in the other |
| 635 | * dimensions are unbounded), then store a negative value. |
| 636 | * If the schedule coefficient is close to the size of the instance set |
| 637 | * in another dimension, then the schedule may represent a loop |
| 638 | * coalescing transformation (especially if the coefficient |
| 639 | * in that other dimension is one). Forcing the coefficient to be |
| 640 | * smaller than or equal to half the minimal size should avoid this |
| 641 | * situation. |
| 642 | */ |
| 643 | static isl_stat compute_max_coefficient(isl_ctx *ctx, |
| 644 | struct isl_sched_node *node) |
| 645 | { |
| 646 | int max; |
| 647 | int i, j; |
| 648 | isl_vec *v; |
| 649 | |
| 650 | max = isl_options_get_schedule_max_coefficient(ctx); |
| 651 | v = isl_vec_alloc(ctx, size: node->nvar); |
| 652 | if (!v) |
| 653 | return isl_stat_error; |
| 654 | |
| 655 | for (i = 0; i < node->nvar; ++i) { |
| 656 | isl_int_set_si(v->el[i], max); |
| 657 | isl_int_mul_si(v->el[i], v->el[i], 2); |
| 658 | } |
| 659 | |
| 660 | for (i = 0; i < node->nvar; ++i) { |
| 661 | isl_val *size; |
| 662 | |
| 663 | size = isl_multi_val_get_val(multi: node->sizes, pos: i); |
| 664 | if (!size) |
| 665 | goto error; |
| 666 | if (!isl_val_is_int(v: size)) { |
| 667 | isl_val_free(v: size); |
| 668 | continue; |
| 669 | } |
| 670 | for (j = 0; j < node->nvar; ++j) { |
| 671 | if (j == i) |
| 672 | continue; |
| 673 | if (isl_int_is_neg(v->el[j]) || |
| 674 | isl_int_gt(v->el[j], size->n)) |
| 675 | isl_int_set(v->el[j], size->n); |
| 676 | } |
| 677 | isl_val_free(v: size); |
| 678 | } |
| 679 | |
| 680 | for (i = 0; i < node->nvar; ++i) |
| 681 | isl_int_cdiv_q_ui(v->el[i], v->el[i], 2); |
| 682 | |
| 683 | node->max = v; |
| 684 | return isl_stat_ok; |
| 685 | error: |
| 686 | isl_vec_free(vec: v); |
| 687 | return isl_stat_error; |
| 688 | } |
| 689 | |
| 690 | /* Construct an identifier for node "node", which will represent "set". |
| 691 | * The name of the identifier is either "compressed" or |
| 692 | * "compressed_<name>", with <name> the name of the space of "set". |
| 693 | * The user pointer of the identifier points to "node". |
| 694 | */ |
| 695 | static __isl_give isl_id *construct_compressed_id(__isl_keep isl_set *set, |
| 696 | struct isl_sched_node *node) |
| 697 | { |
| 698 | isl_bool has_name; |
| 699 | isl_ctx *ctx; |
| 700 | isl_id *id; |
| 701 | isl_printer *p; |
| 702 | const char *name; |
| 703 | char *id_name; |
| 704 | |
| 705 | has_name = isl_set_has_tuple_name(set); |
| 706 | if (has_name < 0) |
| 707 | return NULL; |
| 708 | |
| 709 | ctx = isl_set_get_ctx(set); |
| 710 | if (!has_name) |
| 711 | return isl_id_alloc(ctx, name: "compressed" , user: node); |
| 712 | |
| 713 | p = isl_printer_to_str(ctx); |
| 714 | name = isl_set_get_tuple_name(set); |
| 715 | p = isl_printer_print_str(p, s: "compressed_" ); |
| 716 | p = isl_printer_print_str(p, s: name); |
| 717 | id_name = isl_printer_get_str(printer: p); |
| 718 | isl_printer_free(printer: p); |
| 719 | |
| 720 | id = isl_id_alloc(ctx, name: id_name, user: node); |
| 721 | free(ptr: id_name); |
| 722 | |
| 723 | return id; |
| 724 | } |
| 725 | |
| 726 | /* Construct a map that isolates the variable in position "pos" in "set". |
| 727 | * |
| 728 | * That is, construct |
| 729 | * |
| 730 | * [i_0, ..., i_pos-1, i_pos+1, ...] -> [i_pos] |
| 731 | */ |
| 732 | static __isl_give isl_map *isolate(__isl_take isl_set *set, int pos) |
| 733 | { |
| 734 | isl_map *map; |
| 735 | |
| 736 | map = isl_set_project_onto_map(set, type: isl_dim_set, first: pos, n: 1); |
| 737 | map = isl_map_project_out(map, type: isl_dim_in, first: pos, n: 1); |
| 738 | return map; |
| 739 | } |
| 740 | |
| 741 | /* Compute and return the size of "set" in dimension "dim". |
| 742 | * The size is taken to be the difference in values for that variable |
| 743 | * for fixed values of the other variables. |
| 744 | * This assumes that "set" is convex. |
| 745 | * In particular, the variable is first isolated from the other variables |
| 746 | * in the range of a map |
| 747 | * |
| 748 | * [i_0, ..., i_dim-1, i_dim+1, ...] -> [i_dim] |
| 749 | * |
| 750 | * and then duplicated |
| 751 | * |
| 752 | * [i_0, ..., i_dim-1, i_dim+1, ...] -> [[i_dim] -> [i_dim']] |
| 753 | * |
| 754 | * The shared variables are then projected out and the maximal value |
| 755 | * of i_dim' - i_dim is computed. |
| 756 | */ |
| 757 | static __isl_give isl_val *compute_size(__isl_take isl_set *set, int dim) |
| 758 | { |
| 759 | isl_map *map; |
| 760 | isl_local_space *ls; |
| 761 | isl_aff *obj; |
| 762 | isl_val *v; |
| 763 | |
| 764 | map = isolate(set, pos: dim); |
| 765 | map = isl_map_range_product(map1: map, map2: isl_map_copy(map)); |
| 766 | map = isl_set_unwrap(set: isl_map_range(map)); |
| 767 | set = isl_map_deltas(map); |
| 768 | ls = isl_local_space_from_space(space: isl_set_get_space(set)); |
| 769 | obj = isl_aff_var_on_domain(ls, type: isl_dim_set, pos: 0); |
| 770 | v = isl_set_max_val(set, obj); |
| 771 | isl_aff_free(aff: obj); |
| 772 | isl_set_free(set); |
| 773 | |
| 774 | return v; |
| 775 | } |
| 776 | |
| 777 | /* Perform a compression on "node" where "hull" represents the constraints |
| 778 | * that were used to derive the compression, while "compress" and |
| 779 | * "decompress" map the original space to the compressed space and |
| 780 | * vice versa. |
| 781 | * |
| 782 | * If "node" was not compressed already, then simply store |
| 783 | * the compression information. |
| 784 | * Otherwise the "original" space is actually the result |
| 785 | * of a previous compression, which is then combined |
| 786 | * with the present compression. |
| 787 | * |
| 788 | * The dimensionality of the compressed domain is also adjusted. |
| 789 | * Other information, such as the sizes and the maximal coefficient values, |
| 790 | * has not been computed yet and therefore does not need to be adjusted. |
| 791 | */ |
| 792 | static isl_stat compress_node(struct isl_sched_node *node, |
| 793 | __isl_take isl_set *hull, __isl_take isl_multi_aff *compress, |
| 794 | __isl_take isl_pw_multi_aff *decompress) |
| 795 | { |
| 796 | node->nvar = isl_multi_aff_dim(multi: compress, type: isl_dim_out); |
| 797 | if (!node->compressed) { |
| 798 | node->compressed = 1; |
| 799 | node->hull = hull; |
| 800 | node->compress = compress; |
| 801 | node->decompress = decompress; |
| 802 | } else { |
| 803 | hull = isl_set_preimage_multi_aff(set: hull, |
| 804 | ma: isl_multi_aff_copy(multi: node->compress)); |
| 805 | node->hull = isl_set_intersect(set1: node->hull, set2: hull); |
| 806 | node->compress = isl_multi_aff_pullback_multi_aff( |
| 807 | ma1: compress, ma2: node->compress); |
| 808 | node->decompress = isl_pw_multi_aff_pullback_pw_multi_aff( |
| 809 | pma1: node->decompress, pma2: decompress); |
| 810 | } |
| 811 | |
| 812 | if (!node->hull || !node->compress || !node->decompress) |
| 813 | return isl_stat_error; |
| 814 | |
| 815 | return isl_stat_ok; |
| 816 | } |
| 817 | |
| 818 | /* Given that dimension "pos" in "set" has a fixed value |
| 819 | * in terms of the other dimensions, (further) compress "node" |
| 820 | * by projecting out this dimension. |
| 821 | * "set" may be the result of a previous compression. |
| 822 | * "uncompressed" is the original domain (without compression). |
| 823 | * |
| 824 | * The compression function simply projects out the dimension. |
| 825 | * The decompression function adds back the dimension |
| 826 | * in the right position as an expression of the other dimensions |
| 827 | * derived from "set". |
| 828 | * As in extract_node, the compressed space has an identifier |
| 829 | * that references "node" such that each compressed space is unique and |
| 830 | * such that the node can be recovered from the compressed space. |
| 831 | * |
| 832 | * The constraint removed through the compression is added to the "hull" |
| 833 | * such that only edges that relate to the original domains |
| 834 | * are taken into account. |
| 835 | * In particular, it is obtained by composing compression and decompression and |
| 836 | * taking the relation among the variables in the range. |
| 837 | */ |
| 838 | static isl_stat project_out_fixed(struct isl_sched_node *node, |
| 839 | __isl_keep isl_set *uncompressed, __isl_take isl_set *set, int pos) |
| 840 | { |
| 841 | isl_id *id; |
| 842 | isl_space *space; |
| 843 | isl_set *domain; |
| 844 | isl_map *map; |
| 845 | isl_multi_aff *compress; |
| 846 | isl_pw_multi_aff *decompress, *pma; |
| 847 | isl_multi_pw_aff *mpa; |
| 848 | isl_set *hull; |
| 849 | |
| 850 | map = isolate(set: isl_set_copy(set), pos); |
| 851 | pma = isl_pw_multi_aff_from_map(map); |
| 852 | domain = isl_pw_multi_aff_domain(pma: isl_pw_multi_aff_copy(pma)); |
| 853 | pma = isl_pw_multi_aff_gist(pma, set: domain); |
| 854 | space = isl_pw_multi_aff_get_domain_space(pma); |
| 855 | mpa = isl_multi_pw_aff_identity(space: isl_space_map_from_set(space)); |
| 856 | mpa = isl_multi_pw_aff_range_splice(multi1: mpa, pos, |
| 857 | multi2: isl_multi_pw_aff_from_pw_multi_aff(pma)); |
| 858 | decompress = isl_pw_multi_aff_from_multi_pw_aff(mpa); |
| 859 | space = isl_set_get_space(set); |
| 860 | compress = isl_multi_aff_project_out_map(space, type: isl_dim_set, first: pos, n: 1); |
| 861 | id = construct_compressed_id(set: uncompressed, node); |
| 862 | compress = isl_multi_aff_set_tuple_id(multi: compress, type: isl_dim_out, id); |
| 863 | space = isl_space_reverse(space: isl_multi_aff_get_space(multi: compress)); |
| 864 | decompress = isl_pw_multi_aff_reset_space(pwmaff: decompress, space); |
| 865 | pma = isl_pw_multi_aff_pullback_multi_aff( |
| 866 | pma: isl_pw_multi_aff_copy(pma: decompress), ma: isl_multi_aff_copy(multi: compress)); |
| 867 | hull = isl_map_range(map: isl_map_from_pw_multi_aff(pma)); |
| 868 | |
| 869 | isl_set_free(set); |
| 870 | |
| 871 | return compress_node(node, hull, compress, decompress); |
| 872 | } |
| 873 | |
| 874 | /* Compute the size of the compressed domain in each dimension and |
| 875 | * store the results in node->sizes. |
| 876 | * "uncompressed" is the original domain (without compression). |
| 877 | * |
| 878 | * First compress the domain if needed and then compute the size |
| 879 | * in each direction. |
| 880 | * If the domain is not convex, then the sizes are computed |
| 881 | * on a convex superset in order to avoid picking up sizes |
| 882 | * that are valid for the individual disjuncts, but not for |
| 883 | * the domain as a whole. |
| 884 | * |
| 885 | * If any of the sizes turns out to be zero, then this means |
| 886 | * that this dimension has a fixed value in terms of |
| 887 | * the other dimensions. Perform an (extra) compression |
| 888 | * to remove this dimension. |
| 889 | */ |
| 890 | static isl_stat compute_sizes(struct isl_sched_node *node, |
| 891 | __isl_keep isl_set *uncompressed) |
| 892 | { |
| 893 | int j; |
| 894 | isl_size n; |
| 895 | isl_multi_val *mv; |
| 896 | isl_set *set = isl_set_copy(set: uncompressed); |
| 897 | |
| 898 | if (node->compressed) |
| 899 | set = isl_set_preimage_pw_multi_aff(set, |
| 900 | pma: isl_pw_multi_aff_copy(pma: node->decompress)); |
| 901 | set = isl_set_from_basic_set(bset: isl_set_simple_hull(set)); |
| 902 | mv = isl_multi_val_zero(space: isl_set_get_space(set)); |
| 903 | n = isl_set_dim(set, type: isl_dim_set); |
| 904 | if (n < 0) |
| 905 | mv = isl_multi_val_free(multi: mv); |
| 906 | for (j = 0; j < n; ++j) { |
| 907 | isl_bool is_zero; |
| 908 | isl_val *v; |
| 909 | |
| 910 | v = compute_size(set: isl_set_copy(set), dim: j); |
| 911 | is_zero = isl_val_is_zero(v); |
| 912 | mv = isl_multi_val_set_val(multi: mv, pos: j, el: v); |
| 913 | if (is_zero >= 0 && is_zero) { |
| 914 | isl_multi_val_free(multi: mv); |
| 915 | if (project_out_fixed(node, uncompressed, set, pos: j) < 0) |
| 916 | return isl_stat_error; |
| 917 | return compute_sizes(node, uncompressed); |
| 918 | } |
| 919 | } |
| 920 | node->sizes = mv; |
| 921 | isl_set_free(set); |
| 922 | if (!node->sizes) |
| 923 | return isl_stat_error; |
| 924 | return isl_stat_ok; |
| 925 | } |
| 926 | |
| 927 | /* Compute the size of the instance set "set" of "node", after compression, |
| 928 | * as well as bounds on the corresponding coefficients, if needed. |
| 929 | * |
| 930 | * The sizes are needed when the schedule_treat_coalescing option is set. |
| 931 | * The bounds are needed when the schedule_treat_coalescing option or |
| 932 | * the schedule_max_coefficient option is set. |
| 933 | * |
| 934 | * If the schedule_treat_coalescing option is not set, then at most |
| 935 | * the bounds need to be set and this is done in set_max_coefficient. |
| 936 | * Otherwise, compute the size of the compressed domain |
| 937 | * in each direction and store the results in node->size. |
| 938 | * Finally, set the bounds on the coefficients based on the sizes |
| 939 | * and the schedule_max_coefficient option in compute_max_coefficient. |
| 940 | */ |
| 941 | static isl_stat compute_sizes_and_max(isl_ctx *ctx, struct isl_sched_node *node, |
| 942 | __isl_take isl_set *set) |
| 943 | { |
| 944 | isl_stat r; |
| 945 | |
| 946 | if (!isl_options_get_schedule_treat_coalescing(ctx)) { |
| 947 | isl_set_free(set); |
| 948 | return set_max_coefficient(ctx, node); |
| 949 | } |
| 950 | |
| 951 | r = compute_sizes(node, uncompressed: set); |
| 952 | isl_set_free(set); |
| 953 | if (r < 0) |
| 954 | return isl_stat_error; |
| 955 | return compute_max_coefficient(ctx, node); |
| 956 | } |
| 957 | |
| 958 | /* Add a new node to the graph representing the given instance set. |
| 959 | * "nvar" is the (possibly compressed) number of variables and |
| 960 | * may be smaller than then number of set variables in "set" |
| 961 | * if "compressed" is set. |
| 962 | * If "compressed" is set, then "hull" represents the constraints |
| 963 | * that were used to derive the compression, while "compress" and |
| 964 | * "decompress" map the original space to the compressed space and |
| 965 | * vice versa. |
| 966 | * If "compressed" is not set, then "hull", "compress" and "decompress" |
| 967 | * should be NULL. |
| 968 | * |
| 969 | * Compute the size of the instance set and bounds on the coefficients, |
| 970 | * if needed. |
| 971 | */ |
| 972 | static isl_stat add_node(struct isl_sched_graph *graph, |
| 973 | __isl_take isl_set *set, int nvar, int compressed, |
| 974 | __isl_take isl_set *hull, __isl_take isl_multi_aff *compress, |
| 975 | __isl_take isl_pw_multi_aff *decompress) |
| 976 | { |
| 977 | isl_size nparam; |
| 978 | isl_ctx *ctx; |
| 979 | isl_mat *sched; |
| 980 | isl_space *space; |
| 981 | int *coincident; |
| 982 | struct isl_sched_node *node; |
| 983 | |
| 984 | nparam = isl_set_dim(set, type: isl_dim_param); |
| 985 | if (nparam < 0) |
| 986 | goto error; |
| 987 | |
| 988 | ctx = isl_set_get_ctx(set); |
| 989 | if (!ctx->opt->schedule_parametric) |
| 990 | nparam = 0; |
| 991 | sched = isl_mat_alloc(ctx, n_row: 0, n_col: 1 + nparam + nvar); |
| 992 | node = &graph->node[graph->n]; |
| 993 | graph->n++; |
| 994 | space = isl_set_get_space(set); |
| 995 | node->space = space; |
| 996 | node->nvar = nvar; |
| 997 | node->nparam = nparam; |
| 998 | node->sched = sched; |
| 999 | node->sched_map = NULL; |
| 1000 | coincident = isl_calloc_array(ctx, int, graph->max_row); |
| 1001 | node->coincident = coincident; |
| 1002 | node->compressed = compressed; |
| 1003 | node->hull = hull; |
| 1004 | node->compress = compress; |
| 1005 | node->decompress = decompress; |
| 1006 | if (compute_sizes_and_max(ctx, node, set) < 0) |
| 1007 | return isl_stat_error; |
| 1008 | |
| 1009 | if (!space || !sched || (graph->max_row && !coincident)) |
| 1010 | return isl_stat_error; |
| 1011 | if (compressed && (!hull || !compress || !decompress)) |
| 1012 | return isl_stat_error; |
| 1013 | |
| 1014 | return isl_stat_ok; |
| 1015 | error: |
| 1016 | isl_set_free(set); |
| 1017 | isl_set_free(set: hull); |
| 1018 | isl_multi_aff_free(multi: compress); |
| 1019 | isl_pw_multi_aff_free(pma: decompress); |
| 1020 | return isl_stat_error; |
| 1021 | } |
| 1022 | |
| 1023 | /* Add a new node to the graph representing the given set. |
| 1024 | * |
| 1025 | * If any of the set variables is defined by an equality, then |
| 1026 | * we perform variable compression such that we can perform |
| 1027 | * the scheduling on the compressed domain. |
| 1028 | * In this case, an identifier is used that references the new node |
| 1029 | * such that each compressed space is unique and |
| 1030 | * such that the node can be recovered from the compressed space. |
| 1031 | */ |
| 1032 | static isl_stat (__isl_take isl_set *set, void *user) |
| 1033 | { |
| 1034 | isl_size nvar; |
| 1035 | isl_bool has_equality; |
| 1036 | isl_id *id; |
| 1037 | isl_basic_set *hull; |
| 1038 | isl_set *hull_set; |
| 1039 | isl_morph *morph; |
| 1040 | isl_multi_aff *compress, *decompress_ma; |
| 1041 | isl_pw_multi_aff *decompress; |
| 1042 | struct isl_sched_graph *graph = user; |
| 1043 | |
| 1044 | hull = isl_set_affine_hull(set: isl_set_copy(set)); |
| 1045 | hull = isl_basic_set_remove_divs(bset: hull); |
| 1046 | nvar = isl_set_dim(set, type: isl_dim_set); |
| 1047 | has_equality = has_any_defining_equality(bset: hull); |
| 1048 | |
| 1049 | if (nvar < 0 || has_equality < 0) |
| 1050 | goto error; |
| 1051 | if (!has_equality) { |
| 1052 | isl_basic_set_free(bset: hull); |
| 1053 | return add_node(graph, set, nvar, compressed: 0, NULL, NULL, NULL); |
| 1054 | } |
| 1055 | |
| 1056 | id = construct_compressed_id(set, node: &graph->node[graph->n]); |
| 1057 | morph = isl_basic_set_variable_compression_with_id(bset: hull, id); |
| 1058 | isl_id_free(id); |
| 1059 | nvar = isl_morph_ran_dim(morph, type: isl_dim_set); |
| 1060 | if (nvar < 0) |
| 1061 | set = isl_set_free(set); |
| 1062 | compress = isl_morph_get_var_multi_aff(morph); |
| 1063 | morph = isl_morph_inverse(morph); |
| 1064 | decompress_ma = isl_morph_get_var_multi_aff(morph); |
| 1065 | decompress = isl_pw_multi_aff_from_multi_aff(ma: decompress_ma); |
| 1066 | isl_morph_free(morph); |
| 1067 | |
| 1068 | hull_set = isl_set_from_basic_set(bset: hull); |
| 1069 | return add_node(graph, set, nvar, compressed: 1, hull: hull_set, compress, decompress); |
| 1070 | error: |
| 1071 | isl_basic_set_free(bset: hull); |
| 1072 | isl_set_free(set); |
| 1073 | return isl_stat_error; |
| 1074 | } |
| 1075 | |
| 1076 | struct { |
| 1077 | enum isl_edge_type ; |
| 1078 | struct isl_sched_graph *; |
| 1079 | }; |
| 1080 | |
| 1081 | /* Merge edge2 into edge1, freeing the contents of edge2. |
| 1082 | * Return 0 on success and -1 on failure. |
| 1083 | * |
| 1084 | * edge1 and edge2 are assumed to have the same value for the map field. |
| 1085 | */ |
| 1086 | static int merge_edge(struct isl_sched_edge *edge1, |
| 1087 | struct isl_sched_edge *edge2) |
| 1088 | { |
| 1089 | edge1->types |= edge2->types; |
| 1090 | isl_map_free(map: edge2->map); |
| 1091 | |
| 1092 | if (isl_sched_edge_is_condition(edge: edge2)) { |
| 1093 | if (!edge1->tagged_condition) |
| 1094 | edge1->tagged_condition = edge2->tagged_condition; |
| 1095 | else |
| 1096 | edge1->tagged_condition = |
| 1097 | isl_union_map_union(umap1: edge1->tagged_condition, |
| 1098 | umap2: edge2->tagged_condition); |
| 1099 | } |
| 1100 | |
| 1101 | if (isl_sched_edge_is_conditional_validity(edge: edge2)) { |
| 1102 | if (!edge1->tagged_validity) |
| 1103 | edge1->tagged_validity = edge2->tagged_validity; |
| 1104 | else |
| 1105 | edge1->tagged_validity = |
| 1106 | isl_union_map_union(umap1: edge1->tagged_validity, |
| 1107 | umap2: edge2->tagged_validity); |
| 1108 | } |
| 1109 | |
| 1110 | if (isl_sched_edge_is_condition(edge: edge2) && !edge1->tagged_condition) |
| 1111 | return -1; |
| 1112 | if (isl_sched_edge_is_conditional_validity(edge: edge2) && |
| 1113 | !edge1->tagged_validity) |
| 1114 | return -1; |
| 1115 | |
| 1116 | return 0; |
| 1117 | } |
| 1118 | |
| 1119 | /* Insert dummy tags in domain and range of "map". |
| 1120 | * |
| 1121 | * In particular, if "map" is of the form |
| 1122 | * |
| 1123 | * A -> B |
| 1124 | * |
| 1125 | * then return |
| 1126 | * |
| 1127 | * [A -> dummy_tag] -> [B -> dummy_tag] |
| 1128 | * |
| 1129 | * where the dummy_tags are identical and equal to any dummy tags |
| 1130 | * introduced by any other call to this function. |
| 1131 | */ |
| 1132 | static __isl_give isl_map *insert_dummy_tags(__isl_take isl_map *map) |
| 1133 | { |
| 1134 | static char dummy; |
| 1135 | isl_ctx *ctx; |
| 1136 | isl_id *id; |
| 1137 | isl_space *space; |
| 1138 | isl_set *domain, *range; |
| 1139 | |
| 1140 | ctx = isl_map_get_ctx(map); |
| 1141 | |
| 1142 | id = isl_id_alloc(ctx, NULL, user: &dummy); |
| 1143 | space = isl_space_params(space: isl_map_get_space(map)); |
| 1144 | space = isl_space_set_from_params(space); |
| 1145 | space = isl_space_set_tuple_id(space, type: isl_dim_set, id); |
| 1146 | space = isl_space_map_from_set(space); |
| 1147 | |
| 1148 | domain = isl_map_wrap(map); |
| 1149 | range = isl_map_wrap(map: isl_map_universe(space)); |
| 1150 | map = isl_map_from_domain_and_range(domain, range); |
| 1151 | map = isl_map_zip(map); |
| 1152 | |
| 1153 | return map; |
| 1154 | } |
| 1155 | |
| 1156 | /* Given that at least one of "src" or "dst" is compressed, return |
| 1157 | * a map between the spaces of these nodes restricted to the affine |
| 1158 | * hull that was used in the compression. |
| 1159 | */ |
| 1160 | static __isl_give isl_map *(struct isl_sched_node *src, |
| 1161 | struct isl_sched_node *dst) |
| 1162 | { |
| 1163 | isl_set *dom, *ran; |
| 1164 | |
| 1165 | if (src->compressed) |
| 1166 | dom = isl_set_copy(set: src->hull); |
| 1167 | else |
| 1168 | dom = isl_set_universe(space: isl_space_copy(space: src->space)); |
| 1169 | if (dst->compressed) |
| 1170 | ran = isl_set_copy(set: dst->hull); |
| 1171 | else |
| 1172 | ran = isl_set_universe(space: isl_space_copy(space: dst->space)); |
| 1173 | |
| 1174 | return isl_map_from_domain_and_range(domain: dom, range: ran); |
| 1175 | } |
| 1176 | |
| 1177 | /* Intersect the domains of the nested relations in domain and range |
| 1178 | * of "tagged" with "map". |
| 1179 | */ |
| 1180 | static __isl_give isl_map *map_intersect_domains(__isl_take isl_map *tagged, |
| 1181 | __isl_keep isl_map *map) |
| 1182 | { |
| 1183 | isl_set *set; |
| 1184 | |
| 1185 | tagged = isl_map_zip(map: tagged); |
| 1186 | set = isl_map_wrap(map: isl_map_copy(map)); |
| 1187 | tagged = isl_map_intersect_domain(map: tagged, set); |
| 1188 | tagged = isl_map_zip(map: tagged); |
| 1189 | return tagged; |
| 1190 | } |
| 1191 | |
| 1192 | /* Return a pointer to the node that lives in the domain space of "map", |
| 1193 | * an invalid node if there is no such node, or NULL in case of error. |
| 1194 | */ |
| 1195 | static struct isl_sched_node *find_domain_node(isl_ctx *ctx, |
| 1196 | struct isl_sched_graph *graph, __isl_keep isl_map *map) |
| 1197 | { |
| 1198 | struct isl_sched_node *node; |
| 1199 | isl_space *space; |
| 1200 | |
| 1201 | space = isl_space_domain(space: isl_map_get_space(map)); |
| 1202 | node = isl_sched_graph_find_node(ctx, graph, space); |
| 1203 | isl_space_free(space); |
| 1204 | |
| 1205 | return node; |
| 1206 | } |
| 1207 | |
| 1208 | /* Return a pointer to the node that lives in the range space of "map", |
| 1209 | * an invalid node if there is no such node, or NULL in case of error. |
| 1210 | */ |
| 1211 | static struct isl_sched_node *find_range_node(isl_ctx *ctx, |
| 1212 | struct isl_sched_graph *graph, __isl_keep isl_map *map) |
| 1213 | { |
| 1214 | struct isl_sched_node *node; |
| 1215 | isl_space *space; |
| 1216 | |
| 1217 | space = isl_space_range(space: isl_map_get_space(map)); |
| 1218 | node = isl_sched_graph_find_node(ctx, graph, space); |
| 1219 | isl_space_free(space); |
| 1220 | |
| 1221 | return node; |
| 1222 | } |
| 1223 | |
| 1224 | /* Refrain from adding a new edge based on "map". |
| 1225 | * Instead, just free the map. |
| 1226 | * "tagged" is either a copy of "map" with additional tags or NULL. |
| 1227 | */ |
| 1228 | static isl_stat skip_edge(__isl_take isl_map *map, __isl_take isl_map *tagged) |
| 1229 | { |
| 1230 | isl_map_free(map); |
| 1231 | isl_map_free(map: tagged); |
| 1232 | |
| 1233 | return isl_stat_ok; |
| 1234 | } |
| 1235 | |
| 1236 | /* Add a new edge to the graph based on the given map |
| 1237 | * and add it to data->graph->edge_table[data->type]. |
| 1238 | * If a dependence relation of a given type happens to be identical |
| 1239 | * to one of the dependence relations of a type that was added before, |
| 1240 | * then we don't create a new edge, but instead mark the original edge |
| 1241 | * as also representing a dependence of the current type. |
| 1242 | * |
| 1243 | * Edges of type isl_edge_condition or isl_edge_conditional_validity |
| 1244 | * may be specified as "tagged" dependence relations. That is, "map" |
| 1245 | * may contain elements (i -> a) -> (j -> b), where i -> j denotes |
| 1246 | * the dependence on iterations and a and b are tags. |
| 1247 | * edge->map is set to the relation containing the elements i -> j, |
| 1248 | * while edge->tagged_condition and edge->tagged_validity contain |
| 1249 | * the union of all the "map" relations |
| 1250 | * for which extract_edge is called that result in the same edge->map. |
| 1251 | * |
| 1252 | * If the source or the destination node is compressed, then |
| 1253 | * intersect both "map" and "tagged" with the constraints that |
| 1254 | * were used to construct the compression. |
| 1255 | * This ensures that there are no schedule constraints defined |
| 1256 | * outside of these domains, while the scheduler no longer has |
| 1257 | * any control over those outside parts. |
| 1258 | */ |
| 1259 | static isl_stat (__isl_take isl_map *map, void *user) |
| 1260 | { |
| 1261 | isl_bool empty; |
| 1262 | isl_ctx *ctx = isl_map_get_ctx(map); |
| 1263 | struct isl_extract_edge_data *data = user; |
| 1264 | struct isl_sched_graph *graph = data->graph; |
| 1265 | struct isl_sched_node *src, *dst; |
| 1266 | struct isl_sched_edge *edge; |
| 1267 | isl_map *tagged = NULL; |
| 1268 | |
| 1269 | if (data->type == isl_edge_condition || |
| 1270 | data->type == isl_edge_conditional_validity) { |
| 1271 | if (isl_map_can_zip(map)) { |
| 1272 | tagged = isl_map_copy(map); |
| 1273 | map = isl_set_unwrap(set: isl_map_domain(bmap: isl_map_zip(map))); |
| 1274 | } else { |
| 1275 | tagged = insert_dummy_tags(map: isl_map_copy(map)); |
| 1276 | } |
| 1277 | } |
| 1278 | |
| 1279 | src = find_domain_node(ctx, graph, map); |
| 1280 | dst = find_range_node(ctx, graph, map); |
| 1281 | |
| 1282 | if (!src || !dst) |
| 1283 | goto error; |
| 1284 | if (!isl_sched_graph_is_node(graph, node: src) || |
| 1285 | !isl_sched_graph_is_node(graph, node: dst)) |
| 1286 | return skip_edge(map, tagged); |
| 1287 | |
| 1288 | if (src->compressed || dst->compressed) { |
| 1289 | isl_map *hull; |
| 1290 | hull = extract_hull(src, dst); |
| 1291 | if (tagged) |
| 1292 | tagged = map_intersect_domains(tagged, map: hull); |
| 1293 | map = isl_map_intersect(map1: map, map2: hull); |
| 1294 | } |
| 1295 | |
| 1296 | empty = isl_map_plain_is_empty(map); |
| 1297 | if (empty < 0) |
| 1298 | goto error; |
| 1299 | if (empty) |
| 1300 | return skip_edge(map, tagged); |
| 1301 | |
| 1302 | graph->edge[graph->n_edge].src = src; |
| 1303 | graph->edge[graph->n_edge].dst = dst; |
| 1304 | graph->edge[graph->n_edge].map = map; |
| 1305 | graph->edge[graph->n_edge].types = 0; |
| 1306 | graph->edge[graph->n_edge].tagged_condition = NULL; |
| 1307 | graph->edge[graph->n_edge].tagged_validity = NULL; |
| 1308 | set_type(edge: &graph->edge[graph->n_edge], type: data->type); |
| 1309 | if (data->type == isl_edge_condition) |
| 1310 | graph->edge[graph->n_edge].tagged_condition = |
| 1311 | isl_union_map_from_map(map: tagged); |
| 1312 | if (data->type == isl_edge_conditional_validity) |
| 1313 | graph->edge[graph->n_edge].tagged_validity = |
| 1314 | isl_union_map_from_map(map: tagged); |
| 1315 | |
| 1316 | edge = graph_find_matching_edge(graph, model: &graph->edge[graph->n_edge]); |
| 1317 | if (!edge) { |
| 1318 | graph->n_edge++; |
| 1319 | return isl_stat_error; |
| 1320 | } |
| 1321 | if (edge == &graph->edge[graph->n_edge]) |
| 1322 | return graph_edge_table_add(ctx, graph, type: data->type, |
| 1323 | edge: &graph->edge[graph->n_edge++]); |
| 1324 | |
| 1325 | if (merge_edge(edge1: edge, edge2: &graph->edge[graph->n_edge]) < 0) |
| 1326 | return isl_stat_error; |
| 1327 | |
| 1328 | return graph_edge_table_add(ctx, graph, type: data->type, edge); |
| 1329 | error: |
| 1330 | isl_map_free(map); |
| 1331 | isl_map_free(map: tagged); |
| 1332 | return isl_stat_error; |
| 1333 | } |
| 1334 | |
| 1335 | /* Initialize the schedule graph "graph" from the schedule constraints "sc". |
| 1336 | * |
| 1337 | * The context is included in the domain before the nodes of |
| 1338 | * the graphs are extracted in order to be able to exploit |
| 1339 | * any possible additional equalities. |
| 1340 | * Note that this intersection is only performed locally here. |
| 1341 | */ |
| 1342 | isl_stat isl_sched_graph_init(struct isl_sched_graph *graph, |
| 1343 | __isl_keep isl_schedule_constraints *sc) |
| 1344 | { |
| 1345 | isl_ctx *ctx; |
| 1346 | isl_union_set *domain; |
| 1347 | isl_union_map *c; |
| 1348 | struct isl_extract_edge_data data; |
| 1349 | enum isl_edge_type i; |
| 1350 | isl_stat r; |
| 1351 | isl_size n; |
| 1352 | |
| 1353 | if (!sc) |
| 1354 | return isl_stat_error; |
| 1355 | |
| 1356 | ctx = isl_schedule_constraints_get_ctx(sc); |
| 1357 | |
| 1358 | domain = isl_schedule_constraints_get_domain(sc); |
| 1359 | n = isl_union_set_n_set(uset: domain); |
| 1360 | graph->n = n; |
| 1361 | isl_union_set_free(uset: domain); |
| 1362 | if (n < 0) |
| 1363 | return isl_stat_error; |
| 1364 | |
| 1365 | n = isl_schedule_constraints_n_map(sc); |
| 1366 | if (n < 0 || graph_alloc(ctx, graph, n_node: graph->n, n_edge: n) < 0) |
| 1367 | return isl_stat_error; |
| 1368 | |
| 1369 | if (compute_max_row(graph, sc) < 0) |
| 1370 | return isl_stat_error; |
| 1371 | graph->root = graph; |
| 1372 | graph->n = 0; |
| 1373 | domain = isl_schedule_constraints_get_domain(sc); |
| 1374 | domain = isl_union_set_intersect_params(uset: domain, |
| 1375 | set: isl_schedule_constraints_get_context(sc)); |
| 1376 | r = isl_union_set_foreach_set(uset: domain, fn: &extract_node, user: graph); |
| 1377 | isl_union_set_free(uset: domain); |
| 1378 | if (r < 0) |
| 1379 | return isl_stat_error; |
| 1380 | if (graph_init_table(ctx, graph) < 0) |
| 1381 | return isl_stat_error; |
| 1382 | for (i = isl_edge_first; i <= isl_edge_last; ++i) { |
| 1383 | isl_size n; |
| 1384 | |
| 1385 | c = isl_schedule_constraints_get(sc, type: i); |
| 1386 | n = isl_union_map_n_map(umap: c); |
| 1387 | graph->max_edge[i] = n; |
| 1388 | isl_union_map_free(umap: c); |
| 1389 | if (n < 0) |
| 1390 | return isl_stat_error; |
| 1391 | } |
| 1392 | if (graph_init_edge_tables(ctx, graph) < 0) |
| 1393 | return isl_stat_error; |
| 1394 | graph->n_edge = 0; |
| 1395 | data.graph = graph; |
| 1396 | for (i = isl_edge_first; i <= isl_edge_last; ++i) { |
| 1397 | isl_stat r; |
| 1398 | |
| 1399 | data.type = i; |
| 1400 | c = isl_schedule_constraints_get(sc, type: i); |
| 1401 | r = isl_union_map_foreach_map(umap: c, fn: &extract_edge, user: &data); |
| 1402 | isl_union_map_free(umap: c); |
| 1403 | if (r < 0) |
| 1404 | return isl_stat_error; |
| 1405 | } |
| 1406 | |
| 1407 | return isl_stat_ok; |
| 1408 | } |
| 1409 | |
| 1410 | /* Check whether there is any dependence from node[j] to node[i] |
| 1411 | * or from node[i] to node[j]. |
| 1412 | */ |
| 1413 | static isl_bool node_follows_weak(int i, int j, void *user) |
| 1414 | { |
| 1415 | isl_bool f; |
| 1416 | struct isl_sched_graph *graph = user; |
| 1417 | |
| 1418 | f = graph_has_any_edge(graph, src: &graph->node[j], dst: &graph->node[i]); |
| 1419 | if (f < 0 || f) |
| 1420 | return f; |
| 1421 | return graph_has_any_edge(graph, src: &graph->node[i], dst: &graph->node[j]); |
| 1422 | } |
| 1423 | |
| 1424 | /* Check whether there is a (conditional) validity dependence from node[j] |
| 1425 | * to node[i], forcing node[i] to follow node[j]. |
| 1426 | */ |
| 1427 | static isl_bool node_follows_strong(int i, int j, void *user) |
| 1428 | { |
| 1429 | struct isl_sched_graph *graph = user; |
| 1430 | |
| 1431 | return isl_sched_graph_has_validity_edge(graph, src: &graph->node[j], |
| 1432 | dst: &graph->node[i]); |
| 1433 | } |
| 1434 | |
| 1435 | /* Use Tarjan's algorithm for computing the strongly connected components |
| 1436 | * in the dependence graph only considering those edges defined by "follows". |
| 1437 | */ |
| 1438 | isl_stat isl_sched_graph_detect_ccs(isl_ctx *ctx, |
| 1439 | struct isl_sched_graph *graph, |
| 1440 | isl_bool (*follows)(int i, int j, void *user)) |
| 1441 | { |
| 1442 | int i, n; |
| 1443 | struct isl_tarjan_graph *g = NULL; |
| 1444 | |
| 1445 | g = isl_tarjan_graph_init(ctx, len: graph->n, follows, user: graph); |
| 1446 | if (!g) |
| 1447 | return isl_stat_error; |
| 1448 | |
| 1449 | graph->scc = 0; |
| 1450 | i = 0; |
| 1451 | n = graph->n; |
| 1452 | while (n) { |
| 1453 | while (g->order[i] != -1) { |
| 1454 | graph->node[g->order[i]].scc = graph->scc; |
| 1455 | --n; |
| 1456 | ++i; |
| 1457 | } |
| 1458 | ++i; |
| 1459 | graph->scc++; |
| 1460 | } |
| 1461 | |
| 1462 | isl_tarjan_graph_free(g); |
| 1463 | |
| 1464 | return isl_stat_ok; |
| 1465 | } |
| 1466 | |
| 1467 | /* Apply Tarjan's algorithm to detect the strongly connected components |
| 1468 | * in the dependence graph. |
| 1469 | * Only consider the (conditional) validity dependences and clear "weak". |
| 1470 | */ |
| 1471 | static isl_stat detect_sccs(isl_ctx *ctx, struct isl_sched_graph *graph) |
| 1472 | { |
| 1473 | graph->weak = 0; |
| 1474 | return isl_sched_graph_detect_ccs(ctx, graph, follows: &node_follows_strong); |
| 1475 | } |
| 1476 | |
| 1477 | /* Apply Tarjan's algorithm to detect the (weakly) connected components |
| 1478 | * in the dependence graph. |
| 1479 | * Consider all dependences and set "weak". |
| 1480 | */ |
| 1481 | static isl_stat detect_wccs(isl_ctx *ctx, struct isl_sched_graph *graph) |
| 1482 | { |
| 1483 | graph->weak = 1; |
| 1484 | return isl_sched_graph_detect_ccs(ctx, graph, follows: &node_follows_weak); |
| 1485 | } |
| 1486 | |
| 1487 | static int cmp_scc(const void *a, const void *b, void *data) |
| 1488 | { |
| 1489 | struct isl_sched_graph *graph = data; |
| 1490 | const int *i1 = a; |
| 1491 | const int *i2 = b; |
| 1492 | |
| 1493 | return graph->node[*i1].scc - graph->node[*i2].scc; |
| 1494 | } |
| 1495 | |
| 1496 | /* Sort the elements of graph->sorted according to the corresponding SCCs. |
| 1497 | */ |
| 1498 | static int sort_sccs(struct isl_sched_graph *graph) |
| 1499 | { |
| 1500 | return isl_sort(pbase: graph->sorted, total_elems: graph->n, size: sizeof(int), cmp: &cmp_scc, arg: graph); |
| 1501 | } |
| 1502 | |
| 1503 | /* Return a non-parametric set in the compressed space of "node" that is |
| 1504 | * bounded by the size in each direction |
| 1505 | * |
| 1506 | * { [x] : -S_i <= x_i <= S_i } |
| 1507 | * |
| 1508 | * If S_i is infinity in direction i, then there are no constraints |
| 1509 | * in that direction. |
| 1510 | * |
| 1511 | * Cache the result in node->bounds. |
| 1512 | */ |
| 1513 | static __isl_give isl_basic_set *get_size_bounds(struct isl_sched_node *node) |
| 1514 | { |
| 1515 | isl_space *space; |
| 1516 | isl_basic_set *bounds; |
| 1517 | int i; |
| 1518 | |
| 1519 | if (node->bounds) |
| 1520 | return isl_basic_set_copy(bset: node->bounds); |
| 1521 | |
| 1522 | if (node->compressed) |
| 1523 | space = isl_pw_multi_aff_get_domain_space(pma: node->decompress); |
| 1524 | else |
| 1525 | space = isl_space_copy(space: node->space); |
| 1526 | space = isl_space_drop_all_params(space); |
| 1527 | bounds = isl_basic_set_universe(space); |
| 1528 | |
| 1529 | for (i = 0; i < node->nvar; ++i) { |
| 1530 | isl_val *size; |
| 1531 | |
| 1532 | size = isl_multi_val_get_val(multi: node->sizes, pos: i); |
| 1533 | if (!size) |
| 1534 | return isl_basic_set_free(bset: bounds); |
| 1535 | if (!isl_val_is_int(v: size)) { |
| 1536 | isl_val_free(v: size); |
| 1537 | continue; |
| 1538 | } |
| 1539 | bounds = isl_basic_set_upper_bound_val(bset: bounds, type: isl_dim_set, pos: i, |
| 1540 | value: isl_val_copy(v: size)); |
| 1541 | bounds = isl_basic_set_lower_bound_val(bset: bounds, type: isl_dim_set, pos: i, |
| 1542 | value: isl_val_neg(v: size)); |
| 1543 | } |
| 1544 | |
| 1545 | node->bounds = isl_basic_set_copy(bset: bounds); |
| 1546 | return bounds; |
| 1547 | } |
| 1548 | |
| 1549 | /* Compress the dependence relation "map", if needed, i.e., |
| 1550 | * when the source node "src" and/or the destination node "dst" |
| 1551 | * has been compressed. |
| 1552 | */ |
| 1553 | static __isl_give isl_map *compress(__isl_take isl_map *map, |
| 1554 | struct isl_sched_node *src, struct isl_sched_node *dst) |
| 1555 | { |
| 1556 | if (src->compressed) |
| 1557 | map = isl_map_preimage_domain_pw_multi_aff(map, |
| 1558 | pma: isl_pw_multi_aff_copy(pma: src->decompress)); |
| 1559 | if (dst->compressed) |
| 1560 | map = isl_map_preimage_range_pw_multi_aff(map, |
| 1561 | pma: isl_pw_multi_aff_copy(pma: dst->decompress)); |
| 1562 | return map; |
| 1563 | } |
| 1564 | |
| 1565 | /* Drop some constraints from "delta" that could be exploited |
| 1566 | * to construct loop coalescing schedules. |
| 1567 | * In particular, drop those constraint that bound the difference |
| 1568 | * to the size of the domain. |
| 1569 | * First project out the parameters to improve the effectiveness. |
| 1570 | */ |
| 1571 | static __isl_give isl_set *drop_coalescing_constraints( |
| 1572 | __isl_take isl_set *delta, struct isl_sched_node *node) |
| 1573 | { |
| 1574 | isl_size nparam; |
| 1575 | isl_basic_set *bounds; |
| 1576 | |
| 1577 | nparam = isl_set_dim(set: delta, type: isl_dim_param); |
| 1578 | if (nparam < 0) |
| 1579 | return isl_set_free(set: delta); |
| 1580 | |
| 1581 | bounds = get_size_bounds(node); |
| 1582 | |
| 1583 | delta = isl_set_project_out(set: delta, type: isl_dim_param, first: 0, n: nparam); |
| 1584 | delta = isl_set_remove_divs(set: delta); |
| 1585 | delta = isl_set_plain_gist_basic_set(set: delta, context: bounds); |
| 1586 | return delta; |
| 1587 | } |
| 1588 | |
| 1589 | /* Given a dependence relation R from "node" to itself, |
| 1590 | * construct the set of coefficients of valid constraints for elements |
| 1591 | * in that dependence relation. |
| 1592 | * In particular, the result contains tuples of coefficients |
| 1593 | * c_0, c_n, c_x such that |
| 1594 | * |
| 1595 | * c_0 + c_n n + c_x y - c_x x >= 0 for each (x,y) in R |
| 1596 | * |
| 1597 | * or, equivalently, |
| 1598 | * |
| 1599 | * c_0 + c_n n + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R } |
| 1600 | * |
| 1601 | * We choose here to compute the dual of delta R. |
| 1602 | * Alternatively, we could have computed the dual of R, resulting |
| 1603 | * in a set of tuples c_0, c_n, c_x, c_y, and then |
| 1604 | * plugged in (c_0, c_n, c_x, -c_x). |
| 1605 | * |
| 1606 | * If "need_param" is set, then the resulting coefficients effectively |
| 1607 | * include coefficients for the parameters c_n. Otherwise, they may |
| 1608 | * have been projected out already. |
| 1609 | * Since the constraints may be different for these two cases, |
| 1610 | * they are stored in separate caches. |
| 1611 | * In particular, if no parameter coefficients are required and |
| 1612 | * the schedule_treat_coalescing option is set, then the parameters |
| 1613 | * are projected out and some constraints that could be exploited |
| 1614 | * to construct coalescing schedules are removed before the dual |
| 1615 | * is computed. |
| 1616 | * |
| 1617 | * If "node" has been compressed, then the dependence relation |
| 1618 | * is also compressed before the set of coefficients is computed. |
| 1619 | */ |
| 1620 | static __isl_give isl_basic_set *intra_coefficients( |
| 1621 | struct isl_sched_graph *graph, struct isl_sched_node *node, |
| 1622 | __isl_take isl_map *map, int need_param) |
| 1623 | { |
| 1624 | isl_ctx *ctx; |
| 1625 | isl_set *delta; |
| 1626 | isl_map *key; |
| 1627 | isl_basic_set *coef; |
| 1628 | isl_maybe_isl_basic_set m; |
| 1629 | isl_map_to_basic_set **hmap = &graph->intra_hmap; |
| 1630 | int treat; |
| 1631 | |
| 1632 | if (!map) |
| 1633 | return NULL; |
| 1634 | |
| 1635 | ctx = isl_map_get_ctx(map); |
| 1636 | treat = !need_param && isl_options_get_schedule_treat_coalescing(ctx); |
| 1637 | if (!treat) |
| 1638 | hmap = &graph->intra_hmap_param; |
| 1639 | m = isl_map_to_basic_set_try_get(hmap: *hmap, key: map); |
| 1640 | if (m.valid < 0 || m.valid) { |
| 1641 | isl_map_free(map); |
| 1642 | return m.value; |
| 1643 | } |
| 1644 | |
| 1645 | key = isl_map_copy(map); |
| 1646 | map = compress(map, src: node, dst: node); |
| 1647 | delta = isl_map_deltas(map); |
| 1648 | if (treat) |
| 1649 | delta = drop_coalescing_constraints(delta, node); |
| 1650 | delta = isl_set_remove_divs(set: delta); |
| 1651 | coef = isl_set_coefficients(set: delta); |
| 1652 | *hmap = isl_map_to_basic_set_set(hmap: *hmap, key, val: isl_basic_set_copy(bset: coef)); |
| 1653 | |
| 1654 | return coef; |
| 1655 | } |
| 1656 | |
| 1657 | /* Given a dependence relation R, construct the set of coefficients |
| 1658 | * of valid constraints for elements in that dependence relation. |
| 1659 | * In particular, the result contains tuples of coefficients |
| 1660 | * c_0, c_n, c_x, c_y such that |
| 1661 | * |
| 1662 | * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R |
| 1663 | * |
| 1664 | * If the source or destination nodes of "edge" have been compressed, |
| 1665 | * then the dependence relation is also compressed before |
| 1666 | * the set of coefficients is computed. |
| 1667 | */ |
| 1668 | static __isl_give isl_basic_set *inter_coefficients( |
| 1669 | struct isl_sched_graph *graph, struct isl_sched_edge *edge, |
| 1670 | __isl_take isl_map *map) |
| 1671 | { |
| 1672 | isl_set *set; |
| 1673 | isl_map *key; |
| 1674 | isl_basic_set *coef; |
| 1675 | isl_maybe_isl_basic_set m; |
| 1676 | |
| 1677 | m = isl_map_to_basic_set_try_get(hmap: graph->inter_hmap, key: map); |
| 1678 | if (m.valid < 0 || m.valid) { |
| 1679 | isl_map_free(map); |
| 1680 | return m.value; |
| 1681 | } |
| 1682 | |
| 1683 | key = isl_map_copy(map); |
| 1684 | map = compress(map, src: edge->src, dst: edge->dst); |
| 1685 | set = isl_map_wrap(map: isl_map_remove_divs(map)); |
| 1686 | coef = isl_set_coefficients(set); |
| 1687 | graph->inter_hmap = isl_map_to_basic_set_set(hmap: graph->inter_hmap, key, |
| 1688 | val: isl_basic_set_copy(bset: coef)); |
| 1689 | |
| 1690 | return coef; |
| 1691 | } |
| 1692 | |
| 1693 | /* Return the position of the coefficients of the variables in |
| 1694 | * the coefficients constraints "coef". |
| 1695 | * |
| 1696 | * The space of "coef" is of the form |
| 1697 | * |
| 1698 | * { coefficients[[cst, params] -> S] } |
| 1699 | * |
| 1700 | * Return the position of S. |
| 1701 | */ |
| 1702 | static isl_size coef_var_offset(__isl_keep isl_basic_set *coef) |
| 1703 | { |
| 1704 | isl_size offset; |
| 1705 | isl_space *space; |
| 1706 | |
| 1707 | space = isl_space_unwrap(space: isl_basic_set_get_space(bset: coef)); |
| 1708 | offset = isl_space_dim(space, type: isl_dim_in); |
| 1709 | isl_space_free(space); |
| 1710 | |
| 1711 | return offset; |
| 1712 | } |
| 1713 | |
| 1714 | /* Return the offset of the coefficient of the constant term of "node" |
| 1715 | * within the (I)LP. |
| 1716 | * |
| 1717 | * Within each node, the coefficients have the following order: |
| 1718 | * - positive and negative parts of c_i_x |
| 1719 | * - c_i_n (if parametric) |
| 1720 | * - c_i_0 |
| 1721 | */ |
| 1722 | static int node_cst_coef_offset(struct isl_sched_node *node) |
| 1723 | { |
| 1724 | return node->start + 2 * node->nvar + node->nparam; |
| 1725 | } |
| 1726 | |
| 1727 | /* Return the offset of the coefficients of the parameters of "node" |
| 1728 | * within the (I)LP. |
| 1729 | * |
| 1730 | * Within each node, the coefficients have the following order: |
| 1731 | * - positive and negative parts of c_i_x |
| 1732 | * - c_i_n (if parametric) |
| 1733 | * - c_i_0 |
| 1734 | */ |
| 1735 | static int node_par_coef_offset(struct isl_sched_node *node) |
| 1736 | { |
| 1737 | return node->start + 2 * node->nvar; |
| 1738 | } |
| 1739 | |
| 1740 | /* Return the offset of the coefficients of the variables of "node" |
| 1741 | * within the (I)LP. |
| 1742 | * |
| 1743 | * Within each node, the coefficients have the following order: |
| 1744 | * - positive and negative parts of c_i_x |
| 1745 | * - c_i_n (if parametric) |
| 1746 | * - c_i_0 |
| 1747 | */ |
| 1748 | static int node_var_coef_offset(struct isl_sched_node *node) |
| 1749 | { |
| 1750 | return node->start; |
| 1751 | } |
| 1752 | |
| 1753 | /* Return the position of the pair of variables encoding |
| 1754 | * coefficient "i" of "node". |
| 1755 | * |
| 1756 | * The order of these variable pairs is the opposite of |
| 1757 | * that of the coefficients, with 2 variables per coefficient. |
| 1758 | */ |
| 1759 | static int node_var_coef_pos(struct isl_sched_node *node, int i) |
| 1760 | { |
| 1761 | return node_var_coef_offset(node) + 2 * (node->nvar - 1 - i); |
| 1762 | } |
| 1763 | |
| 1764 | /* Construct an isl_dim_map for mapping constraints on coefficients |
| 1765 | * for "node" to the corresponding positions in graph->lp. |
| 1766 | * "offset" is the offset of the coefficients for the variables |
| 1767 | * in the input constraints. |
| 1768 | * "s" is the sign of the mapping. |
| 1769 | * |
| 1770 | * The input constraints are given in terms of the coefficients |
| 1771 | * (c_0, c_x) or (c_0, c_n, c_x). |
| 1772 | * The mapping produced by this function essentially plugs in |
| 1773 | * (0, c_i_x^+ - c_i_x^-) if s = 1 and |
| 1774 | * (0, -c_i_x^+ + c_i_x^-) if s = -1 or |
| 1775 | * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and |
| 1776 | * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1. |
| 1777 | * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart. |
| 1778 | * Furthermore, the order of these pairs is the opposite of that |
| 1779 | * of the corresponding coefficients. |
| 1780 | * |
| 1781 | * The caller can extend the mapping to also map the other coefficients |
| 1782 | * (and therefore not plug in 0). |
| 1783 | */ |
| 1784 | static __isl_give isl_dim_map *intra_dim_map(isl_ctx *ctx, |
| 1785 | struct isl_sched_graph *graph, struct isl_sched_node *node, |
| 1786 | int offset, int s) |
| 1787 | { |
| 1788 | int pos; |
| 1789 | isl_size total; |
| 1790 | isl_dim_map *dim_map; |
| 1791 | |
| 1792 | total = isl_basic_set_dim(bset: graph->lp, type: isl_dim_all); |
| 1793 | if (!node || total < 0) |
| 1794 | return NULL; |
| 1795 | |
| 1796 | pos = node_var_coef_pos(node, i: 0); |
| 1797 | dim_map = isl_dim_map_alloc(ctx, len: total); |
| 1798 | isl_dim_map_range(dim_map, dst_pos: pos, dst_stride: -2, src_pos: offset, src_stride: 1, n: node->nvar, sign: -s); |
| 1799 | isl_dim_map_range(dim_map, dst_pos: pos + 1, dst_stride: -2, src_pos: offset, src_stride: 1, n: node->nvar, sign: s); |
| 1800 | |
| 1801 | return dim_map; |
| 1802 | } |
| 1803 | |
| 1804 | /* Construct an isl_dim_map for mapping constraints on coefficients |
| 1805 | * for "src" (node i) and "dst" (node j) to the corresponding positions |
| 1806 | * in graph->lp. |
| 1807 | * "offset" is the offset of the coefficients for the variables of "src" |
| 1808 | * in the input constraints. |
| 1809 | * "s" is the sign of the mapping. |
| 1810 | * |
| 1811 | * The input constraints are given in terms of the coefficients |
| 1812 | * (c_0, c_n, c_x, c_y). |
| 1813 | * The mapping produced by this function essentially plugs in |
| 1814 | * (c_j_0 - c_i_0, c_j_n - c_i_n, |
| 1815 | * -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and |
| 1816 | * (-c_j_0 + c_i_0, -c_j_n + c_i_n, |
| 1817 | * c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1. |
| 1818 | * In graph->lp, the c_*^- appear before their c_*^+ counterpart. |
| 1819 | * Furthermore, the order of these pairs is the opposite of that |
| 1820 | * of the corresponding coefficients. |
| 1821 | * |
| 1822 | * The caller can further extend the mapping. |
| 1823 | */ |
| 1824 | static __isl_give isl_dim_map *inter_dim_map(isl_ctx *ctx, |
| 1825 | struct isl_sched_graph *graph, struct isl_sched_node *src, |
| 1826 | struct isl_sched_node *dst, int offset, int s) |
| 1827 | { |
| 1828 | int pos; |
| 1829 | isl_size total; |
| 1830 | isl_dim_map *dim_map; |
| 1831 | |
| 1832 | total = isl_basic_set_dim(bset: graph->lp, type: isl_dim_all); |
| 1833 | if (!src || !dst || total < 0) |
| 1834 | return NULL; |
| 1835 | |
| 1836 | dim_map = isl_dim_map_alloc(ctx, len: total); |
| 1837 | |
| 1838 | pos = node_cst_coef_offset(node: dst); |
| 1839 | isl_dim_map_range(dim_map, dst_pos: pos, dst_stride: 0, src_pos: 0, src_stride: 0, n: 1, sign: s); |
| 1840 | pos = node_par_coef_offset(node: dst); |
| 1841 | isl_dim_map_range(dim_map, dst_pos: pos, dst_stride: 1, src_pos: 1, src_stride: 1, n: dst->nparam, sign: s); |
| 1842 | pos = node_var_coef_pos(node: dst, i: 0); |
| 1843 | isl_dim_map_range(dim_map, dst_pos: pos, dst_stride: -2, src_pos: offset + src->nvar, src_stride: 1, |
| 1844 | n: dst->nvar, sign: -s); |
| 1845 | isl_dim_map_range(dim_map, dst_pos: pos + 1, dst_stride: -2, src_pos: offset + src->nvar, src_stride: 1, |
| 1846 | n: dst->nvar, sign: s); |
| 1847 | |
| 1848 | pos = node_cst_coef_offset(node: src); |
| 1849 | isl_dim_map_range(dim_map, dst_pos: pos, dst_stride: 0, src_pos: 0, src_stride: 0, n: 1, sign: -s); |
| 1850 | pos = node_par_coef_offset(node: src); |
| 1851 | isl_dim_map_range(dim_map, dst_pos: pos, dst_stride: 1, src_pos: 1, src_stride: 1, n: src->nparam, sign: -s); |
| 1852 | pos = node_var_coef_pos(node: src, i: 0); |
| 1853 | isl_dim_map_range(dim_map, dst_pos: pos, dst_stride: -2, src_pos: offset, src_stride: 1, n: src->nvar, sign: s); |
| 1854 | isl_dim_map_range(dim_map, dst_pos: pos + 1, dst_stride: -2, src_pos: offset, src_stride: 1, n: src->nvar, sign: -s); |
| 1855 | |
| 1856 | return dim_map; |
| 1857 | } |
| 1858 | |
| 1859 | /* Add the constraints from "src" to "dst" using "dim_map", |
| 1860 | * after making sure there is enough room in "dst" for the extra constraints. |
| 1861 | */ |
| 1862 | static __isl_give isl_basic_set *add_constraints_dim_map( |
| 1863 | __isl_take isl_basic_set *dst, __isl_take isl_basic_set *src, |
| 1864 | __isl_take isl_dim_map *dim_map) |
| 1865 | { |
| 1866 | isl_size n_eq, n_ineq; |
| 1867 | |
| 1868 | n_eq = isl_basic_set_n_equality(bset: src); |
| 1869 | n_ineq = isl_basic_set_n_inequality(bset: src); |
| 1870 | if (n_eq < 0 || n_ineq < 0) |
| 1871 | dst = isl_basic_set_free(bset: dst); |
| 1872 | dst = isl_basic_set_extend_constraints(base: dst, n_eq, n_ineq); |
| 1873 | dst = isl_basic_set_add_constraints_dim_map(dst, src, dim_map); |
| 1874 | return dst; |
| 1875 | } |
| 1876 | |
| 1877 | /* Add constraints to graph->lp that force validity for the given |
| 1878 | * dependence from a node i to itself. |
| 1879 | * That is, add constraints that enforce |
| 1880 | * |
| 1881 | * (c_i_0 + c_i_n n + c_i_x y) - (c_i_0 + c_i_n n + c_i_x x) |
| 1882 | * = c_i_x (y - x) >= 0 |
| 1883 | * |
| 1884 | * for each (x,y) in R. |
| 1885 | * We obtain general constraints on coefficients (c_0, c_x) |
| 1886 | * of valid constraints for (y - x) and then plug in (0, c_i_x^+ - c_i_x^-), |
| 1887 | * where c_i_x = c_i_x^+ - c_i_x^-, with c_i_x^+ and c_i_x^- non-negative. |
| 1888 | * In graph->lp, the c_i_x^- appear before their c_i_x^+ counterpart. |
| 1889 | * Note that the result of intra_coefficients may also contain |
| 1890 | * parameter coefficients c_n, in which case 0 is plugged in for them as well. |
| 1891 | */ |
| 1892 | static isl_stat add_intra_validity_constraints(struct isl_sched_graph *graph, |
| 1893 | struct isl_sched_edge *edge) |
| 1894 | { |
| 1895 | isl_size offset; |
| 1896 | isl_map *map = isl_map_copy(map: edge->map); |
| 1897 | isl_ctx *ctx = isl_map_get_ctx(map); |
| 1898 | isl_dim_map *dim_map; |
| 1899 | isl_basic_set *coef; |
| 1900 | struct isl_sched_node *node = edge->src; |
| 1901 | |
| 1902 | coef = intra_coefficients(graph, node, map, need_param: 0); |
| 1903 | |
| 1904 | offset = coef_var_offset(coef); |
| 1905 | if (offset < 0) |
| 1906 | coef = isl_basic_set_free(bset: coef); |
| 1907 | if (!coef) |
| 1908 | return isl_stat_error; |
| 1909 | |
| 1910 | dim_map = intra_dim_map(ctx, graph, node, offset, s: 1); |
| 1911 | graph->lp = add_constraints_dim_map(dst: graph->lp, src: coef, dim_map); |
| 1912 | |
| 1913 | return isl_stat_ok; |
| 1914 | } |
| 1915 | |
| 1916 | /* Add constraints to graph->lp that force validity for the given |
| 1917 | * dependence from node i to node j. |
| 1918 | * That is, add constraints that enforce |
| 1919 | * |
| 1920 | * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) >= 0 |
| 1921 | * |
| 1922 | * for each (x,y) in R. |
| 1923 | * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y) |
| 1924 | * of valid constraints for R and then plug in |
| 1925 | * (c_j_0 - c_i_0, c_j_n - c_i_n, -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-), |
| 1926 | * where c_* = c_*^+ - c_*^-, with c_*^+ and c_*^- non-negative. |
| 1927 | * In graph->lp, the c_*^- appear before their c_*^+ counterpart. |
| 1928 | */ |
| 1929 | static isl_stat add_inter_validity_constraints(struct isl_sched_graph *graph, |
| 1930 | struct isl_sched_edge *edge) |
| 1931 | { |
| 1932 | isl_size offset; |
| 1933 | isl_map *map; |
| 1934 | isl_ctx *ctx; |
| 1935 | isl_dim_map *dim_map; |
| 1936 | isl_basic_set *coef; |
| 1937 | struct isl_sched_node *src = edge->src; |
| 1938 | struct isl_sched_node *dst = edge->dst; |
| 1939 | |
| 1940 | if (!graph->lp) |
| 1941 | return isl_stat_error; |
| 1942 | |
| 1943 | map = isl_map_copy(map: edge->map); |
| 1944 | ctx = isl_map_get_ctx(map); |
| 1945 | coef = inter_coefficients(graph, edge, map); |
| 1946 | |
| 1947 | offset = coef_var_offset(coef); |
| 1948 | if (offset < 0) |
| 1949 | coef = isl_basic_set_free(bset: coef); |
| 1950 | if (!coef) |
| 1951 | return isl_stat_error; |
| 1952 | |
| 1953 | dim_map = inter_dim_map(ctx, graph, src, dst, offset, s: 1); |
| 1954 | |
| 1955 | edge->start = graph->lp->n_ineq; |
| 1956 | graph->lp = add_constraints_dim_map(dst: graph->lp, src: coef, dim_map); |
| 1957 | if (!graph->lp) |
| 1958 | return isl_stat_error; |
| 1959 | edge->end = graph->lp->n_ineq; |
| 1960 | |
| 1961 | return isl_stat_ok; |
| 1962 | } |
| 1963 | |
| 1964 | /* Add constraints to graph->lp that bound the dependence distance for the given |
| 1965 | * dependence from a node i to itself. |
| 1966 | * If s = 1, we add the constraint |
| 1967 | * |
| 1968 | * c_i_x (y - x) <= m_0 + m_n n |
| 1969 | * |
| 1970 | * or |
| 1971 | * |
| 1972 | * -c_i_x (y - x) + m_0 + m_n n >= 0 |
| 1973 | * |
| 1974 | * for each (x,y) in R. |
| 1975 | * If s = -1, we add the constraint |
| 1976 | * |
| 1977 | * -c_i_x (y - x) <= m_0 + m_n n |
| 1978 | * |
| 1979 | * or |
| 1980 | * |
| 1981 | * c_i_x (y - x) + m_0 + m_n n >= 0 |
| 1982 | * |
| 1983 | * for each (x,y) in R. |
| 1984 | * We obtain general constraints on coefficients (c_0, c_n, c_x) |
| 1985 | * of valid constraints for (y - x) and then plug in (m_0, m_n, -s * c_i_x), |
| 1986 | * with each coefficient (except m_0) represented as a pair of non-negative |
| 1987 | * coefficients. |
| 1988 | * |
| 1989 | * |
| 1990 | * If "local" is set, then we add constraints |
| 1991 | * |
| 1992 | * c_i_x (y - x) <= 0 |
| 1993 | * |
| 1994 | * or |
| 1995 | * |
| 1996 | * -c_i_x (y - x) <= 0 |
| 1997 | * |
| 1998 | * instead, forcing the dependence distance to be (less than or) equal to 0. |
| 1999 | * That is, we plug in (0, 0, -s * c_i_x), |
| 2000 | * intra_coefficients is not required to have c_n in its result when |
| 2001 | * "local" is set. If they are missing, then (0, -s * c_i_x) is plugged in. |
| 2002 | * Note that dependences marked local are treated as validity constraints |
| 2003 | * by add_all_validity_constraints and therefore also have |
| 2004 | * their distances bounded by 0 from below. |
| 2005 | */ |
| 2006 | static isl_stat add_intra_proximity_constraints(struct isl_sched_graph *graph, |
| 2007 | struct isl_sched_edge *edge, int s, int local) |
| 2008 | { |
| 2009 | isl_size offset; |
| 2010 | isl_size nparam; |
| 2011 | isl_map *map = isl_map_copy(map: edge->map); |
| 2012 | isl_ctx *ctx = isl_map_get_ctx(map); |
| 2013 | isl_dim_map *dim_map; |
| 2014 | isl_basic_set *coef; |
| 2015 | struct isl_sched_node *node = edge->src; |
| 2016 | |
| 2017 | coef = intra_coefficients(graph, node, map, need_param: !local); |
| 2018 | nparam = isl_space_dim(space: node->space, type: isl_dim_param); |
| 2019 | |
| 2020 | offset = coef_var_offset(coef); |
| 2021 | if (nparam < 0 || offset < 0) |
| 2022 | coef = isl_basic_set_free(bset: coef); |
| 2023 | if (!coef) |
| 2024 | return isl_stat_error; |
| 2025 | |
| 2026 | dim_map = intra_dim_map(ctx, graph, node, offset, s: -s); |
| 2027 | |
| 2028 | if (!local) { |
| 2029 | isl_dim_map_range(dim_map, dst_pos: 1, dst_stride: 0, src_pos: 0, src_stride: 0, n: 1, sign: 1); |
| 2030 | isl_dim_map_range(dim_map, dst_pos: 4, dst_stride: 2, src_pos: 1, src_stride: 1, n: nparam, sign: -1); |
| 2031 | isl_dim_map_range(dim_map, dst_pos: 5, dst_stride: 2, src_pos: 1, src_stride: 1, n: nparam, sign: 1); |
| 2032 | } |
| 2033 | graph->lp = add_constraints_dim_map(dst: graph->lp, src: coef, dim_map); |
| 2034 | |
| 2035 | return isl_stat_ok; |
| 2036 | } |
| 2037 | |
| 2038 | /* Add constraints to graph->lp that bound the dependence distance for the given |
| 2039 | * dependence from node i to node j. |
| 2040 | * If s = 1, we add the constraint |
| 2041 | * |
| 2042 | * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) |
| 2043 | * <= m_0 + m_n n |
| 2044 | * |
| 2045 | * or |
| 2046 | * |
| 2047 | * -(c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x) + |
| 2048 | * m_0 + m_n n >= 0 |
| 2049 | * |
| 2050 | * for each (x,y) in R. |
| 2051 | * If s = -1, we add the constraint |
| 2052 | * |
| 2053 | * -((c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x)) |
| 2054 | * <= m_0 + m_n n |
| 2055 | * |
| 2056 | * or |
| 2057 | * |
| 2058 | * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) + |
| 2059 | * m_0 + m_n n >= 0 |
| 2060 | * |
| 2061 | * for each (x,y) in R. |
| 2062 | * We obtain general constraints on coefficients (c_0, c_n, c_x, c_y) |
| 2063 | * of valid constraints for R and then plug in |
| 2064 | * (m_0 - s*c_j_0 + s*c_i_0, m_n - s*c_j_n + s*c_i_n, |
| 2065 | * s*c_i_x, -s*c_j_x) |
| 2066 | * with each coefficient (except m_0, c_*_0 and c_*_n) |
| 2067 | * represented as a pair of non-negative coefficients. |
| 2068 | * |
| 2069 | * |
| 2070 | * If "local" is set (and s = 1), then we add constraints |
| 2071 | * |
| 2072 | * (c_j_0 + c_j_n n + c_j_x y) - (c_i_0 + c_i_n n + c_i_x x) <= 0 |
| 2073 | * |
| 2074 | * or |
| 2075 | * |
| 2076 | * -((c_j_0 + c_j_n n + c_j_x y) + (c_i_0 + c_i_n n + c_i_x x)) >= 0 |
| 2077 | * |
| 2078 | * instead, forcing the dependence distance to be (less than or) equal to 0. |
| 2079 | * That is, we plug in |
| 2080 | * (-s*c_j_0 + s*c_i_0, -s*c_j_n + s*c_i_n, s*c_i_x, -s*c_j_x). |
| 2081 | * Note that dependences marked local are treated as validity constraints |
| 2082 | * by add_all_validity_constraints and therefore also have |
| 2083 | * their distances bounded by 0 from below. |
| 2084 | */ |
| 2085 | static isl_stat add_inter_proximity_constraints(struct isl_sched_graph *graph, |
| 2086 | struct isl_sched_edge *edge, int s, int local) |
| 2087 | { |
| 2088 | isl_size offset; |
| 2089 | isl_size nparam; |
| 2090 | isl_map *map = isl_map_copy(map: edge->map); |
| 2091 | isl_ctx *ctx = isl_map_get_ctx(map); |
| 2092 | isl_dim_map *dim_map; |
| 2093 | isl_basic_set *coef; |
| 2094 | struct isl_sched_node *src = edge->src; |
| 2095 | struct isl_sched_node *dst = edge->dst; |
| 2096 | |
| 2097 | coef = inter_coefficients(graph, edge, map); |
| 2098 | nparam = isl_space_dim(space: src->space, type: isl_dim_param); |
| 2099 | |
| 2100 | offset = coef_var_offset(coef); |
| 2101 | if (nparam < 0 || offset < 0) |
| 2102 | coef = isl_basic_set_free(bset: coef); |
| 2103 | if (!coef) |
| 2104 | return isl_stat_error; |
| 2105 | |
| 2106 | dim_map = inter_dim_map(ctx, graph, src, dst, offset, s: -s); |
| 2107 | |
| 2108 | if (!local) { |
| 2109 | isl_dim_map_range(dim_map, dst_pos: 1, dst_stride: 0, src_pos: 0, src_stride: 0, n: 1, sign: 1); |
| 2110 | isl_dim_map_range(dim_map, dst_pos: 4, dst_stride: 2, src_pos: 1, src_stride: 1, n: nparam, sign: -1); |
| 2111 | isl_dim_map_range(dim_map, dst_pos: 5, dst_stride: 2, src_pos: 1, src_stride: 1, n: nparam, sign: 1); |
| 2112 | } |
| 2113 | |
| 2114 | graph->lp = add_constraints_dim_map(dst: graph->lp, src: coef, dim_map); |
| 2115 | |
| 2116 | return isl_stat_ok; |
| 2117 | } |
| 2118 | |
| 2119 | /* Should the distance over "edge" be forced to zero? |
| 2120 | * That is, is it marked as a local edge? |
| 2121 | * If "use_coincidence" is set, then coincidence edges are treated |
| 2122 | * as local edges. |
| 2123 | */ |
| 2124 | static int force_zero(struct isl_sched_edge *edge, int use_coincidence) |
| 2125 | { |
| 2126 | return is_local(edge) || (use_coincidence && is_coincidence(edge)); |
| 2127 | } |
| 2128 | |
| 2129 | /* Add all validity constraints to graph->lp. |
| 2130 | * |
| 2131 | * An edge that is forced to be local needs to have its dependence |
| 2132 | * distances equal to zero. We take care of bounding them by 0 from below |
| 2133 | * here. add_all_proximity_constraints takes care of bounding them by 0 |
| 2134 | * from above. |
| 2135 | * |
| 2136 | * If "use_coincidence" is set, then we treat coincidence edges as local edges. |
| 2137 | * Otherwise, we ignore them. |
| 2138 | */ |
| 2139 | static int add_all_validity_constraints(struct isl_sched_graph *graph, |
| 2140 | int use_coincidence) |
| 2141 | { |
| 2142 | int i; |
| 2143 | |
| 2144 | for (i = 0; i < graph->n_edge; ++i) { |
| 2145 | struct isl_sched_edge *edge = &graph->edge[i]; |
| 2146 | int zero; |
| 2147 | |
| 2148 | zero = force_zero(edge, use_coincidence); |
| 2149 | if (!is_validity(edge) && !zero) |
| 2150 | continue; |
| 2151 | if (edge->src != edge->dst) |
| 2152 | continue; |
| 2153 | if (add_intra_validity_constraints(graph, edge) < 0) |
| 2154 | return -1; |
| 2155 | } |
| 2156 | |
| 2157 | for (i = 0; i < graph->n_edge; ++i) { |
| 2158 | struct isl_sched_edge *edge = &graph->edge[i]; |
| 2159 | int zero; |
| 2160 | |
| 2161 | zero = force_zero(edge, use_coincidence); |
| 2162 | if (!is_validity(edge) && !zero) |
| 2163 | continue; |
| 2164 | if (edge->src == edge->dst) |
| 2165 | continue; |
| 2166 | if (add_inter_validity_constraints(graph, edge) < 0) |
| 2167 | return -1; |
| 2168 | } |
| 2169 | |
| 2170 | return 0; |
| 2171 | } |
| 2172 | |
| 2173 | /* Add constraints to graph->lp that bound the dependence distance |
| 2174 | * for all dependence relations. |
| 2175 | * If a given proximity dependence is identical to a validity |
| 2176 | * dependence, then the dependence distance is already bounded |
| 2177 | * from below (by zero), so we only need to bound the distance |
| 2178 | * from above. (This includes the case of "local" dependences |
| 2179 | * which are treated as validity dependence by add_all_validity_constraints.) |
| 2180 | * Otherwise, we need to bound the distance both from above and from below. |
| 2181 | * |
| 2182 | * If "use_coincidence" is set, then we treat coincidence edges as local edges. |
| 2183 | * Otherwise, we ignore them. |
| 2184 | */ |
| 2185 | static int add_all_proximity_constraints(struct isl_sched_graph *graph, |
| 2186 | int use_coincidence) |
| 2187 | { |
| 2188 | int i; |
| 2189 | |
| 2190 | for (i = 0; i < graph->n_edge; ++i) { |
| 2191 | struct isl_sched_edge *edge = &graph->edge[i]; |
| 2192 | int zero; |
| 2193 | |
| 2194 | zero = force_zero(edge, use_coincidence); |
| 2195 | if (!isl_sched_edge_is_proximity(edge) && !zero) |
| 2196 | continue; |
| 2197 | if (edge->src == edge->dst && |
| 2198 | add_intra_proximity_constraints(graph, edge, s: 1, local: zero) < 0) |
| 2199 | return -1; |
| 2200 | if (edge->src != edge->dst && |
| 2201 | add_inter_proximity_constraints(graph, edge, s: 1, local: zero) < 0) |
| 2202 | return -1; |
| 2203 | if (is_validity(edge) || zero) |
| 2204 | continue; |
| 2205 | if (edge->src == edge->dst && |
| 2206 | add_intra_proximity_constraints(graph, edge, s: -1, local: 0) < 0) |
| 2207 | return -1; |
| 2208 | if (edge->src != edge->dst && |
| 2209 | add_inter_proximity_constraints(graph, edge, s: -1, local: 0) < 0) |
| 2210 | return -1; |
| 2211 | } |
| 2212 | |
| 2213 | return 0; |
| 2214 | } |
| 2215 | |
| 2216 | /* Normalize the rows of "indep" such that all rows are lexicographically |
| 2217 | * positive and such that each row contains as many final zeros as possible, |
| 2218 | * given the choice for the previous rows. |
| 2219 | * Do this by performing elementary row operations. |
| 2220 | */ |
| 2221 | static __isl_give isl_mat *normalize_independent(__isl_take isl_mat *indep) |
| 2222 | { |
| 2223 | indep = isl_mat_reverse_gauss(mat: indep); |
| 2224 | indep = isl_mat_lexnonneg_rows(mat: indep); |
| 2225 | return indep; |
| 2226 | } |
| 2227 | |
| 2228 | /* Extract the linear part of the current schedule for node "node". |
| 2229 | */ |
| 2230 | static __isl_give isl_mat *(struct isl_sched_node *node) |
| 2231 | { |
| 2232 | isl_size n_row = isl_mat_rows(mat: node->sched); |
| 2233 | |
| 2234 | if (n_row < 0) |
| 2235 | return NULL; |
| 2236 | return isl_mat_sub_alloc(mat: node->sched, first_row: 0, n_row, |
| 2237 | first_col: 1 + node->nparam, n_col: node->nvar); |
| 2238 | } |
| 2239 | |
| 2240 | /* Compute a basis for the rows in the linear part of the schedule |
| 2241 | * and extend this basis to a full basis. The remaining rows |
| 2242 | * can then be used to force linear independence from the rows |
| 2243 | * in the schedule. |
| 2244 | * |
| 2245 | * In particular, given the schedule rows S, we compute |
| 2246 | * |
| 2247 | * S = H Q |
| 2248 | * S U = H |
| 2249 | * |
| 2250 | * with H the Hermite normal form of S. That is, all but the |
| 2251 | * first rank columns of H are zero and so each row in S is |
| 2252 | * a linear combination of the first rank rows of Q. |
| 2253 | * The matrix Q can be used as a variable transformation |
| 2254 | * that isolates the directions of S in the first rank rows. |
| 2255 | * Transposing S U = H yields |
| 2256 | * |
| 2257 | * U^T S^T = H^T |
| 2258 | * |
| 2259 | * with all but the first rank rows of H^T zero. |
| 2260 | * The last rows of U^T are therefore linear combinations |
| 2261 | * of schedule coefficients that are all zero on schedule |
| 2262 | * coefficients that are linearly dependent on the rows of S. |
| 2263 | * At least one of these combinations is non-zero on |
| 2264 | * linearly independent schedule coefficients. |
| 2265 | * The rows are normalized to involve as few of the last |
| 2266 | * coefficients as possible and to have a positive initial value. |
| 2267 | */ |
| 2268 | isl_stat isl_sched_node_update_vmap(struct isl_sched_node *node) |
| 2269 | { |
| 2270 | isl_mat *H, *U, *Q; |
| 2271 | |
| 2272 | H = extract_linear_schedule(node); |
| 2273 | |
| 2274 | H = isl_mat_left_hermite(M: H, neg: 0, U: &U, Q: &Q); |
| 2275 | isl_mat_free(mat: node->indep); |
| 2276 | isl_mat_free(mat: node->vmap); |
| 2277 | node->vmap = Q; |
| 2278 | node->indep = isl_mat_transpose(mat: U); |
| 2279 | node->rank = isl_mat_initial_non_zero_cols(mat: H); |
| 2280 | node->indep = isl_mat_drop_rows(mat: node->indep, row: 0, n: node->rank); |
| 2281 | node->indep = normalize_independent(indep: node->indep); |
| 2282 | isl_mat_free(mat: H); |
| 2283 | |
| 2284 | if (!node->indep || !node->vmap || node->rank < 0) |
| 2285 | return isl_stat_error; |
| 2286 | return isl_stat_ok; |
| 2287 | } |
| 2288 | |
| 2289 | /* Is "edge" marked as a validity or a conditional validity edge? |
| 2290 | */ |
| 2291 | static int is_any_validity(struct isl_sched_edge *edge) |
| 2292 | { |
| 2293 | return is_validity(edge) || |
| 2294 | isl_sched_edge_is_conditional_validity(edge); |
| 2295 | } |
| 2296 | |
| 2297 | /* How many times should we count the constraints in "edge"? |
| 2298 | * |
| 2299 | * We count as follows |
| 2300 | * validity -> 1 (>= 0) |
| 2301 | * validity+proximity -> 2 (>= 0 and upper bound) |
| 2302 | * proximity -> 2 (lower and upper bound) |
| 2303 | * local(+any) -> 2 (>= 0 and <= 0) |
| 2304 | * |
| 2305 | * If an edge is only marked conditional_validity then it counts |
| 2306 | * as zero since it is only checked afterwards. |
| 2307 | * |
| 2308 | * If "use_coincidence" is set, then we treat coincidence edges as local edges. |
| 2309 | * Otherwise, we ignore them. |
| 2310 | */ |
| 2311 | static int edge_multiplicity(struct isl_sched_edge *edge, int use_coincidence) |
| 2312 | { |
| 2313 | if (isl_sched_edge_is_proximity(edge) || |
| 2314 | force_zero(edge, use_coincidence)) |
| 2315 | return 2; |
| 2316 | if (is_validity(edge)) |
| 2317 | return 1; |
| 2318 | return 0; |
| 2319 | } |
| 2320 | |
| 2321 | /* How many times should the constraints in "edge" be counted |
| 2322 | * as a parametric intra-node constraint? |
| 2323 | * |
| 2324 | * Only proximity edges that are not forced zero need |
| 2325 | * coefficient constraints that include coefficients for parameters. |
| 2326 | * If the edge is also a validity edge, then only |
| 2327 | * an upper bound is introduced. Otherwise, both lower and upper bounds |
| 2328 | * are introduced. |
| 2329 | */ |
| 2330 | static int parametric_intra_edge_multiplicity(struct isl_sched_edge *edge, |
| 2331 | int use_coincidence) |
| 2332 | { |
| 2333 | if (edge->src != edge->dst) |
| 2334 | return 0; |
| 2335 | if (!isl_sched_edge_is_proximity(edge)) |
| 2336 | return 0; |
| 2337 | if (force_zero(edge, use_coincidence)) |
| 2338 | return 0; |
| 2339 | if (is_validity(edge)) |
| 2340 | return 1; |
| 2341 | else |
| 2342 | return 2; |
| 2343 | } |
| 2344 | |
| 2345 | /* Add "f" times the number of equality and inequality constraints of "bset" |
| 2346 | * to "n_eq" and "n_ineq" and free "bset". |
| 2347 | */ |
| 2348 | static isl_stat update_count(__isl_take isl_basic_set *bset, |
| 2349 | int f, int *n_eq, int *n_ineq) |
| 2350 | { |
| 2351 | isl_size eq, ineq; |
| 2352 | |
| 2353 | eq = isl_basic_set_n_equality(bset); |
| 2354 | ineq = isl_basic_set_n_inequality(bset); |
| 2355 | isl_basic_set_free(bset); |
| 2356 | |
| 2357 | if (eq < 0 || ineq < 0) |
| 2358 | return isl_stat_error; |
| 2359 | |
| 2360 | *n_eq += eq; |
| 2361 | *n_ineq += ineq; |
| 2362 | |
| 2363 | return isl_stat_ok; |
| 2364 | } |
| 2365 | |
| 2366 | /* Count the number of equality and inequality constraints |
| 2367 | * that will be added for the given map. |
| 2368 | * |
| 2369 | * The edges that require parameter coefficients are counted separately. |
| 2370 | * |
| 2371 | * "use_coincidence" is set if we should take into account coincidence edges. |
| 2372 | */ |
| 2373 | static isl_stat count_map_constraints(struct isl_sched_graph *graph, |
| 2374 | struct isl_sched_edge *edge, __isl_take isl_map *map, |
| 2375 | int *n_eq, int *n_ineq, int use_coincidence) |
| 2376 | { |
| 2377 | isl_map *copy; |
| 2378 | isl_basic_set *coef; |
| 2379 | int f = edge_multiplicity(edge, use_coincidence); |
| 2380 | int fp = parametric_intra_edge_multiplicity(edge, use_coincidence); |
| 2381 | |
| 2382 | if (f == 0) { |
| 2383 | isl_map_free(map); |
| 2384 | return isl_stat_ok; |
| 2385 | } |
| 2386 | |
| 2387 | if (edge->src != edge->dst) { |
| 2388 | coef = inter_coefficients(graph, edge, map); |
| 2389 | return update_count(bset: coef, f, n_eq, n_ineq); |
| 2390 | } |
| 2391 | |
| 2392 | if (fp > 0) { |
| 2393 | copy = isl_map_copy(map); |
| 2394 | coef = intra_coefficients(graph, node: edge->src, map: copy, need_param: 1); |
| 2395 | if (update_count(bset: coef, f: fp, n_eq, n_ineq) < 0) |
| 2396 | goto error; |
| 2397 | } |
| 2398 | |
| 2399 | if (f > fp) { |
| 2400 | copy = isl_map_copy(map); |
| 2401 | coef = intra_coefficients(graph, node: edge->src, map: copy, need_param: 0); |
| 2402 | if (update_count(bset: coef, f: f - fp, n_eq, n_ineq) < 0) |
| 2403 | goto error; |
| 2404 | } |
| 2405 | |
| 2406 | isl_map_free(map); |
| 2407 | return isl_stat_ok; |
| 2408 | error: |
| 2409 | isl_map_free(map); |
| 2410 | return isl_stat_error; |
| 2411 | } |
| 2412 | |
| 2413 | /* Count the number of equality and inequality constraints |
| 2414 | * that will be added to the main lp problem. |
| 2415 | * We count as follows |
| 2416 | * validity -> 1 (>= 0) |
| 2417 | * validity+proximity -> 2 (>= 0 and upper bound) |
| 2418 | * proximity -> 2 (lower and upper bound) |
| 2419 | * local(+any) -> 2 (>= 0 and <= 0) |
| 2420 | * |
| 2421 | * If "use_coincidence" is set, then we treat coincidence edges as local edges. |
| 2422 | * Otherwise, we ignore them. |
| 2423 | */ |
| 2424 | static int count_constraints(struct isl_sched_graph *graph, |
| 2425 | int *n_eq, int *n_ineq, int use_coincidence) |
| 2426 | { |
| 2427 | int i; |
| 2428 | |
| 2429 | *n_eq = *n_ineq = 0; |
| 2430 | for (i = 0; i < graph->n_edge; ++i) { |
| 2431 | struct isl_sched_edge *edge = &graph->edge[i]; |
| 2432 | isl_map *map = isl_map_copy(map: edge->map); |
| 2433 | |
| 2434 | if (count_map_constraints(graph, edge, map, n_eq, n_ineq, |
| 2435 | use_coincidence) < 0) |
| 2436 | return -1; |
| 2437 | } |
| 2438 | |
| 2439 | return 0; |
| 2440 | } |
| 2441 | |
| 2442 | /* Count the number of constraints that will be added by |
| 2443 | * add_bound_constant_constraints to bound the values of the constant terms |
| 2444 | * and increment *n_eq and *n_ineq accordingly. |
| 2445 | * |
| 2446 | * In practice, add_bound_constant_constraints only adds inequalities. |
| 2447 | */ |
| 2448 | static isl_stat count_bound_constant_constraints(isl_ctx *ctx, |
| 2449 | struct isl_sched_graph *graph, int *n_eq, int *n_ineq) |
| 2450 | { |
| 2451 | if (isl_options_get_schedule_max_constant_term(ctx) == -1) |
| 2452 | return isl_stat_ok; |
| 2453 | |
| 2454 | *n_ineq += graph->n; |
| 2455 | |
| 2456 | return isl_stat_ok; |
| 2457 | } |
| 2458 | |
| 2459 | /* Add constraints to bound the values of the constant terms in the schedule, |
| 2460 | * if requested by the user. |
| 2461 | * |
| 2462 | * The maximal value of the constant terms is defined by the option |
| 2463 | * "schedule_max_constant_term". |
| 2464 | */ |
| 2465 | static isl_stat add_bound_constant_constraints(isl_ctx *ctx, |
| 2466 | struct isl_sched_graph *graph) |
| 2467 | { |
| 2468 | int i, k; |
| 2469 | int max; |
| 2470 | isl_size total; |
| 2471 | |
| 2472 | max = isl_options_get_schedule_max_constant_term(ctx); |
| 2473 | if (max == -1) |
| 2474 | return isl_stat_ok; |
| 2475 | |
| 2476 | total = isl_basic_set_dim(bset: graph->lp, type: isl_dim_set); |
| 2477 | if (total < 0) |
| 2478 | return isl_stat_error; |
| 2479 | |
| 2480 | for (i = 0; i < graph->n; ++i) { |
| 2481 | struct isl_sched_node *node = &graph->node[i]; |
| 2482 | int pos; |
| 2483 | |
| 2484 | k = isl_basic_set_alloc_inequality(bset: graph->lp); |
| 2485 | if (k < 0) |
| 2486 | return isl_stat_error; |
| 2487 | isl_seq_clr(p: graph->lp->ineq[k], len: 1 + total); |
| 2488 | pos = node_cst_coef_offset(node); |
| 2489 | isl_int_set_si(graph->lp->ineq[k][1 + pos], -1); |
| 2490 | isl_int_set_si(graph->lp->ineq[k][0], max); |
| 2491 | } |
| 2492 | |
| 2493 | return isl_stat_ok; |
| 2494 | } |
| 2495 | |
| 2496 | /* Count the number of constraints that will be added by |
| 2497 | * add_bound_coefficient_constraints and increment *n_eq and *n_ineq |
| 2498 | * accordingly. |
| 2499 | * |
| 2500 | * In practice, add_bound_coefficient_constraints only adds inequalities. |
| 2501 | */ |
| 2502 | static int count_bound_coefficient_constraints(isl_ctx *ctx, |
| 2503 | struct isl_sched_graph *graph, int *n_eq, int *n_ineq) |
| 2504 | { |
| 2505 | int i; |
| 2506 | |
| 2507 | if (isl_options_get_schedule_max_coefficient(ctx) == -1 && |
| 2508 | !isl_options_get_schedule_treat_coalescing(ctx)) |
| 2509 | return 0; |
| 2510 | |
| 2511 | for (i = 0; i < graph->n; ++i) |
| 2512 | *n_ineq += graph->node[i].nparam + 2 * graph->node[i].nvar; |
| 2513 | |
| 2514 | return 0; |
| 2515 | } |
| 2516 | |
| 2517 | /* Add constraints to graph->lp that bound the values of |
| 2518 | * the parameter schedule coefficients of "node" to "max" and |
| 2519 | * the variable schedule coefficients to the corresponding entry |
| 2520 | * in node->max. |
| 2521 | * In either case, a negative value means that no bound needs to be imposed. |
| 2522 | * |
| 2523 | * For parameter coefficients, this amounts to adding a constraint |
| 2524 | * |
| 2525 | * c_n <= max |
| 2526 | * |
| 2527 | * i.e., |
| 2528 | * |
| 2529 | * -c_n + max >= 0 |
| 2530 | * |
| 2531 | * The variables coefficients are, however, not represented directly. |
| 2532 | * Instead, the variable coefficients c_x are written as differences |
| 2533 | * c_x = c_x^+ - c_x^-. |
| 2534 | * That is, |
| 2535 | * |
| 2536 | * -max_i <= c_x_i <= max_i |
| 2537 | * |
| 2538 | * is encoded as |
| 2539 | * |
| 2540 | * -max_i <= c_x_i^+ - c_x_i^- <= max_i |
| 2541 | * |
| 2542 | * or |
| 2543 | * |
| 2544 | * -(c_x_i^+ - c_x_i^-) + max_i >= 0 |
| 2545 | * c_x_i^+ - c_x_i^- + max_i >= 0 |
| 2546 | */ |
| 2547 | static isl_stat node_add_coefficient_constraints(isl_ctx *ctx, |
| 2548 | struct isl_sched_graph *graph, struct isl_sched_node *node, int max) |
| 2549 | { |
| 2550 | int i, j, k; |
| 2551 | isl_size total; |
| 2552 | isl_vec *ineq; |
| 2553 | |
| 2554 | total = isl_basic_set_dim(bset: graph->lp, type: isl_dim_set); |
| 2555 | if (total < 0) |
| 2556 | return isl_stat_error; |
| 2557 | |
| 2558 | for (j = 0; j < node->nparam; ++j) { |
| 2559 | int dim; |
| 2560 | |
| 2561 | if (max < 0) |
| 2562 | continue; |
| 2563 | |
| 2564 | k = isl_basic_set_alloc_inequality(bset: graph->lp); |
| 2565 | if (k < 0) |
| 2566 | return isl_stat_error; |
| 2567 | dim = 1 + node_par_coef_offset(node) + j; |
| 2568 | isl_seq_clr(p: graph->lp->ineq[k], len: 1 + total); |
| 2569 | isl_int_set_si(graph->lp->ineq[k][dim], -1); |
| 2570 | isl_int_set_si(graph->lp->ineq[k][0], max); |
| 2571 | } |
| 2572 | |
| 2573 | ineq = isl_vec_alloc(ctx, size: 1 + total); |
| 2574 | ineq = isl_vec_clr(vec: ineq); |
| 2575 | if (!ineq) |
| 2576 | return isl_stat_error; |
| 2577 | for (i = 0; i < node->nvar; ++i) { |
| 2578 | int pos = 1 + node_var_coef_pos(node, i); |
| 2579 | |
| 2580 | if (isl_int_is_neg(node->max->el[i])) |
| 2581 | continue; |
| 2582 | |
| 2583 | isl_int_set_si(ineq->el[pos], 1); |
| 2584 | isl_int_set_si(ineq->el[pos + 1], -1); |
| 2585 | isl_int_set(ineq->el[0], node->max->el[i]); |
| 2586 | |
| 2587 | k = isl_basic_set_alloc_inequality(bset: graph->lp); |
| 2588 | if (k < 0) |
| 2589 | goto error; |
| 2590 | isl_seq_cpy(dst: graph->lp->ineq[k], src: ineq->el, len: 1 + total); |
| 2591 | |
| 2592 | isl_seq_neg(dst: ineq->el + pos, src: ineq->el + pos, len: 2); |
| 2593 | k = isl_basic_set_alloc_inequality(bset: graph->lp); |
| 2594 | if (k < 0) |
| 2595 | goto error; |
| 2596 | isl_seq_cpy(dst: graph->lp->ineq[k], src: ineq->el, len: 1 + total); |
| 2597 | |
| 2598 | isl_seq_clr(p: ineq->el + pos, len: 2); |
| 2599 | } |
| 2600 | isl_vec_free(vec: ineq); |
| 2601 | |
| 2602 | return isl_stat_ok; |
| 2603 | error: |
| 2604 | isl_vec_free(vec: ineq); |
| 2605 | return isl_stat_error; |
| 2606 | } |
| 2607 | |
| 2608 | /* Add constraints that bound the values of the variable and parameter |
| 2609 | * coefficients of the schedule. |
| 2610 | * |
| 2611 | * The maximal value of the coefficients is defined by the option |
| 2612 | * 'schedule_max_coefficient' and the entries in node->max. |
| 2613 | * These latter entries are only set if either the schedule_max_coefficient |
| 2614 | * option or the schedule_treat_coalescing option is set. |
| 2615 | */ |
| 2616 | static isl_stat add_bound_coefficient_constraints(isl_ctx *ctx, |
| 2617 | struct isl_sched_graph *graph) |
| 2618 | { |
| 2619 | int i; |
| 2620 | int max; |
| 2621 | |
| 2622 | max = isl_options_get_schedule_max_coefficient(ctx); |
| 2623 | |
| 2624 | if (max == -1 && !isl_options_get_schedule_treat_coalescing(ctx)) |
| 2625 | return isl_stat_ok; |
| 2626 | |
| 2627 | for (i = 0; i < graph->n; ++i) { |
| 2628 | struct isl_sched_node *node = &graph->node[i]; |
| 2629 | |
| 2630 | if (node_add_coefficient_constraints(ctx, graph, node, max) < 0) |
| 2631 | return isl_stat_error; |
| 2632 | } |
| 2633 | |
| 2634 | return isl_stat_ok; |
| 2635 | } |
| 2636 | |
| 2637 | /* Add a constraint to graph->lp that equates the value at position |
| 2638 | * "sum_pos" to the sum of the "n" values starting at "first". |
| 2639 | */ |
| 2640 | static isl_stat add_sum_constraint(struct isl_sched_graph *graph, |
| 2641 | int sum_pos, int first, int n) |
| 2642 | { |
| 2643 | int i, k; |
| 2644 | isl_size total; |
| 2645 | |
| 2646 | total = isl_basic_set_dim(bset: graph->lp, type: isl_dim_set); |
| 2647 | if (total < 0) |
| 2648 | return isl_stat_error; |
| 2649 | |
| 2650 | k = isl_basic_set_alloc_equality(bset: graph->lp); |
| 2651 | if (k < 0) |
| 2652 | return isl_stat_error; |
| 2653 | isl_seq_clr(p: graph->lp->eq[k], len: 1 + total); |
| 2654 | isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1); |
| 2655 | for (i = 0; i < n; ++i) |
| 2656 | isl_int_set_si(graph->lp->eq[k][1 + first + i], 1); |
| 2657 | |
| 2658 | return isl_stat_ok; |
| 2659 | } |
| 2660 | |
| 2661 | /* Add a constraint to graph->lp that equates the value at position |
| 2662 | * "sum_pos" to the sum of the parameter coefficients of all nodes. |
| 2663 | */ |
| 2664 | static isl_stat add_param_sum_constraint(struct isl_sched_graph *graph, |
| 2665 | int sum_pos) |
| 2666 | { |
| 2667 | int i, j, k; |
| 2668 | isl_size total; |
| 2669 | |
| 2670 | total = isl_basic_set_dim(bset: graph->lp, type: isl_dim_set); |
| 2671 | if (total < 0) |
| 2672 | return isl_stat_error; |
| 2673 | |
| 2674 | k = isl_basic_set_alloc_equality(bset: graph->lp); |
| 2675 | if (k < 0) |
| 2676 | return isl_stat_error; |
| 2677 | isl_seq_clr(p: graph->lp->eq[k], len: 1 + total); |
| 2678 | isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1); |
| 2679 | for (i = 0; i < graph->n; ++i) { |
| 2680 | int pos = 1 + node_par_coef_offset(node: &graph->node[i]); |
| 2681 | |
| 2682 | for (j = 0; j < graph->node[i].nparam; ++j) |
| 2683 | isl_int_set_si(graph->lp->eq[k][pos + j], 1); |
| 2684 | } |
| 2685 | |
| 2686 | return isl_stat_ok; |
| 2687 | } |
| 2688 | |
| 2689 | /* Add a constraint to graph->lp that equates the value at position |
| 2690 | * "sum_pos" to the sum of the variable coefficients of all nodes. |
| 2691 | */ |
| 2692 | static isl_stat add_var_sum_constraint(struct isl_sched_graph *graph, |
| 2693 | int sum_pos) |
| 2694 | { |
| 2695 | int i, j, k; |
| 2696 | isl_size total; |
| 2697 | |
| 2698 | total = isl_basic_set_dim(bset: graph->lp, type: isl_dim_set); |
| 2699 | if (total < 0) |
| 2700 | return isl_stat_error; |
| 2701 | |
| 2702 | k = isl_basic_set_alloc_equality(bset: graph->lp); |
| 2703 | if (k < 0) |
| 2704 | return isl_stat_error; |
| 2705 | isl_seq_clr(p: graph->lp->eq[k], len: 1 + total); |
| 2706 | isl_int_set_si(graph->lp->eq[k][1 + sum_pos], -1); |
| 2707 | for (i = 0; i < graph->n; ++i) { |
| 2708 | struct isl_sched_node *node = &graph->node[i]; |
| 2709 | int pos = 1 + node_var_coef_offset(node); |
| 2710 | |
| 2711 | for (j = 0; j < 2 * node->nvar; ++j) |
| 2712 | isl_int_set_si(graph->lp->eq[k][pos + j], 1); |
| 2713 | } |
| 2714 | |
| 2715 | return isl_stat_ok; |
| 2716 | } |
| 2717 | |
| 2718 | /* Construct an ILP problem for finding schedule coefficients |
| 2719 | * that result in non-negative, but small dependence distances |
| 2720 | * over all dependences. |
| 2721 | * In particular, the dependence distances over proximity edges |
| 2722 | * are bounded by m_0 + m_n n and we compute schedule coefficients |
| 2723 | * with small values (preferably zero) of m_n and m_0. |
| 2724 | * |
| 2725 | * All variables of the ILP are non-negative. The actual coefficients |
| 2726 | * may be negative, so each coefficient is represented as the difference |
| 2727 | * of two non-negative variables. The negative part always appears |
| 2728 | * immediately before the positive part. |
| 2729 | * Other than that, the variables have the following order |
| 2730 | * |
| 2731 | * - sum of positive and negative parts of m_n coefficients |
| 2732 | * - m_0 |
| 2733 | * - sum of all c_n coefficients |
| 2734 | * (unconstrained when computing non-parametric schedules) |
| 2735 | * - sum of positive and negative parts of all c_x coefficients |
| 2736 | * - positive and negative parts of m_n coefficients |
| 2737 | * - for each node |
| 2738 | * - positive and negative parts of c_i_x, in opposite order |
| 2739 | * - c_i_n (if parametric) |
| 2740 | * - c_i_0 |
| 2741 | * |
| 2742 | * The constraints are those from the edges plus two or three equalities |
| 2743 | * to express the sums. |
| 2744 | * |
| 2745 | * If "use_coincidence" is set, then we treat coincidence edges as local edges. |
| 2746 | * Otherwise, we ignore them. |
| 2747 | */ |
| 2748 | static isl_stat setup_lp(isl_ctx *ctx, struct isl_sched_graph *graph, |
| 2749 | int use_coincidence) |
| 2750 | { |
| 2751 | int i; |
| 2752 | isl_size nparam; |
| 2753 | unsigned total; |
| 2754 | isl_space *space; |
| 2755 | int parametric; |
| 2756 | int param_pos; |
| 2757 | int n_eq, n_ineq; |
| 2758 | |
| 2759 | parametric = ctx->opt->schedule_parametric; |
| 2760 | nparam = isl_space_dim(space: graph->node[0].space, type: isl_dim_param); |
| 2761 | if (nparam < 0) |
| 2762 | return isl_stat_error; |
| 2763 | param_pos = 4; |
| 2764 | total = param_pos + 2 * nparam; |
| 2765 | for (i = 0; i < graph->n; ++i) { |
| 2766 | struct isl_sched_node *node = &graph->node[graph->sorted[i]]; |
| 2767 | if (isl_sched_node_update_vmap(node) < 0) |
| 2768 | return isl_stat_error; |
| 2769 | node->start = total; |
| 2770 | total += 1 + node->nparam + 2 * node->nvar; |
| 2771 | } |
| 2772 | |
| 2773 | if (count_constraints(graph, n_eq: &n_eq, n_ineq: &n_ineq, use_coincidence) < 0) |
| 2774 | return isl_stat_error; |
| 2775 | if (count_bound_constant_constraints(ctx, graph, n_eq: &n_eq, n_ineq: &n_ineq) < 0) |
| 2776 | return isl_stat_error; |
| 2777 | if (count_bound_coefficient_constraints(ctx, graph, n_eq: &n_eq, n_ineq: &n_ineq) < 0) |
| 2778 | return isl_stat_error; |
| 2779 | |
| 2780 | space = isl_space_set_alloc(ctx, nparam: 0, dim: total); |
| 2781 | isl_basic_set_free(bset: graph->lp); |
| 2782 | n_eq += 2 + parametric; |
| 2783 | |
| 2784 | graph->lp = isl_basic_set_alloc_space(space, extra: 0, n_eq, n_ineq); |
| 2785 | |
| 2786 | if (add_sum_constraint(graph, sum_pos: 0, first: param_pos, n: 2 * nparam) < 0) |
| 2787 | return isl_stat_error; |
| 2788 | if (parametric && add_param_sum_constraint(graph, sum_pos: 2) < 0) |
| 2789 | return isl_stat_error; |
| 2790 | if (add_var_sum_constraint(graph, sum_pos: 3) < 0) |
| 2791 | return isl_stat_error; |
| 2792 | if (add_bound_constant_constraints(ctx, graph) < 0) |
| 2793 | return isl_stat_error; |
| 2794 | if (add_bound_coefficient_constraints(ctx, graph) < 0) |
| 2795 | return isl_stat_error; |
| 2796 | if (add_all_validity_constraints(graph, use_coincidence) < 0) |
| 2797 | return isl_stat_error; |
| 2798 | if (add_all_proximity_constraints(graph, use_coincidence) < 0) |
| 2799 | return isl_stat_error; |
| 2800 | |
| 2801 | return isl_stat_ok; |
| 2802 | } |
| 2803 | |
| 2804 | /* Analyze the conflicting constraint found by |
| 2805 | * isl_tab_basic_set_non_trivial_lexmin. If it corresponds to the validity |
| 2806 | * constraint of one of the edges between distinct nodes, living, moreover |
| 2807 | * in distinct SCCs, then record the source and sink SCC as this may |
| 2808 | * be a good place to cut between SCCs. |
| 2809 | */ |
| 2810 | static int check_conflict(int con, void *user) |
| 2811 | { |
| 2812 | int i; |
| 2813 | struct isl_sched_graph *graph = user; |
| 2814 | |
| 2815 | if (graph->src_scc >= 0) |
| 2816 | return 0; |
| 2817 | |
| 2818 | con -= graph->lp->n_eq; |
| 2819 | |
| 2820 | if (con >= graph->lp->n_ineq) |
| 2821 | return 0; |
| 2822 | |
| 2823 | for (i = 0; i < graph->n_edge; ++i) { |
| 2824 | if (!is_validity(edge: &graph->edge[i])) |
| 2825 | continue; |
| 2826 | if (graph->edge[i].src == graph->edge[i].dst) |
| 2827 | continue; |
| 2828 | if (graph->edge[i].src->scc == graph->edge[i].dst->scc) |
| 2829 | continue; |
| 2830 | if (graph->edge[i].start > con) |
| 2831 | continue; |
| 2832 | if (graph->edge[i].end <= con) |
| 2833 | continue; |
| 2834 | graph->src_scc = graph->edge[i].src->scc; |
| 2835 | graph->dst_scc = graph->edge[i].dst->scc; |
| 2836 | } |
| 2837 | |
| 2838 | return 0; |
| 2839 | } |
| 2840 | |
| 2841 | /* Check whether the next schedule row of the given node needs to be |
| 2842 | * non-trivial. Lower-dimensional domains may have some trivial rows, |
| 2843 | * but as soon as the number of remaining required non-trivial rows |
| 2844 | * is as large as the number or remaining rows to be computed, |
| 2845 | * all remaining rows need to be non-trivial. |
| 2846 | */ |
| 2847 | static int needs_row(struct isl_sched_graph *graph, struct isl_sched_node *node) |
| 2848 | { |
| 2849 | return node->nvar - node->rank >= graph->maxvar - graph->n_row; |
| 2850 | } |
| 2851 | |
| 2852 | /* Construct a non-triviality region with triviality directions |
| 2853 | * corresponding to the rows of "indep". |
| 2854 | * The rows of "indep" are expressed in terms of the schedule coefficients c_i, |
| 2855 | * while the triviality directions are expressed in terms of |
| 2856 | * pairs of non-negative variables c^+_i - c^-_i, with c^-_i appearing |
| 2857 | * before c^+_i. Furthermore, |
| 2858 | * the pairs of non-negative variables representing the coefficients |
| 2859 | * are stored in the opposite order. |
| 2860 | */ |
| 2861 | static __isl_give isl_mat *construct_trivial(__isl_keep isl_mat *indep) |
| 2862 | { |
| 2863 | isl_ctx *ctx; |
| 2864 | isl_mat *mat; |
| 2865 | int i, j; |
| 2866 | isl_size n, n_var; |
| 2867 | |
| 2868 | n = isl_mat_rows(mat: indep); |
| 2869 | n_var = isl_mat_cols(mat: indep); |
| 2870 | if (n < 0 || n_var < 0) |
| 2871 | return NULL; |
| 2872 | |
| 2873 | ctx = isl_mat_get_ctx(mat: indep); |
| 2874 | mat = isl_mat_alloc(ctx, n_row: n, n_col: 2 * n_var); |
| 2875 | if (!mat) |
| 2876 | return NULL; |
| 2877 | for (i = 0; i < n; ++i) { |
| 2878 | for (j = 0; j < n_var; ++j) { |
| 2879 | int nj = n_var - 1 - j; |
| 2880 | isl_int_neg(mat->row[i][2 * nj], indep->row[i][j]); |
| 2881 | isl_int_set(mat->row[i][2 * nj + 1], indep->row[i][j]); |
| 2882 | } |
| 2883 | } |
| 2884 | |
| 2885 | return mat; |
| 2886 | } |
| 2887 | |
| 2888 | /* Solve the ILP problem constructed in setup_lp. |
| 2889 | * For each node such that all the remaining rows of its schedule |
| 2890 | * need to be non-trivial, we construct a non-triviality region. |
| 2891 | * This region imposes that the next row is independent of previous rows. |
| 2892 | * In particular, the non-triviality region enforces that at least |
| 2893 | * one of the linear combinations in the rows of node->indep is non-zero. |
| 2894 | */ |
| 2895 | static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph) |
| 2896 | { |
| 2897 | int i; |
| 2898 | isl_vec *sol; |
| 2899 | isl_basic_set *lp; |
| 2900 | |
| 2901 | for (i = 0; i < graph->n; ++i) { |
| 2902 | struct isl_sched_node *node = &graph->node[i]; |
| 2903 | isl_mat *trivial; |
| 2904 | |
| 2905 | graph->region[i].pos = node_var_coef_offset(node); |
| 2906 | if (needs_row(graph, node)) |
| 2907 | trivial = construct_trivial(indep: node->indep); |
| 2908 | else |
| 2909 | trivial = isl_mat_zero(ctx, n_row: 0, n_col: 0); |
| 2910 | graph->region[i].trivial = trivial; |
| 2911 | } |
| 2912 | lp = isl_basic_set_copy(bset: graph->lp); |
| 2913 | sol = isl_tab_basic_set_non_trivial_lexmin(bset: lp, n_op: 2, n_region: graph->n, |
| 2914 | region: graph->region, conflict: &check_conflict, user: graph); |
| 2915 | for (i = 0; i < graph->n; ++i) |
| 2916 | isl_mat_free(mat: graph->region[i].trivial); |
| 2917 | return sol; |
| 2918 | } |
| 2919 | |
| 2920 | /* Extract the coefficients for the variables of "node" from "sol". |
| 2921 | * |
| 2922 | * Each schedule coefficient c_i_x is represented as the difference |
| 2923 | * between two non-negative variables c_i_x^+ - c_i_x^-. |
| 2924 | * The c_i_x^- appear before their c_i_x^+ counterpart. |
| 2925 | * Furthermore, the order of these pairs is the opposite of that |
| 2926 | * of the corresponding coefficients. |
| 2927 | * |
| 2928 | * Return c_i_x = c_i_x^+ - c_i_x^- |
| 2929 | */ |
| 2930 | static __isl_give isl_vec *(struct isl_sched_node *node, |
| 2931 | __isl_keep isl_vec *sol) |
| 2932 | { |
| 2933 | int i; |
| 2934 | int pos; |
| 2935 | isl_vec *csol; |
| 2936 | |
| 2937 | if (!sol) |
| 2938 | return NULL; |
| 2939 | csol = isl_vec_alloc(ctx: isl_vec_get_ctx(vec: sol), size: node->nvar); |
| 2940 | if (!csol) |
| 2941 | return NULL; |
| 2942 | |
| 2943 | pos = 1 + node_var_coef_offset(node); |
| 2944 | for (i = 0; i < node->nvar; ++i) |
| 2945 | isl_int_sub(csol->el[node->nvar - 1 - i], |
| 2946 | sol->el[pos + 2 * i + 1], sol->el[pos + 2 * i]); |
| 2947 | |
| 2948 | return csol; |
| 2949 | } |
| 2950 | |
| 2951 | /* Update the schedules of all nodes based on the given solution |
| 2952 | * of the LP problem. |
| 2953 | * The new row is added to the current band. |
| 2954 | * All possibly negative coefficients are encoded as a difference |
| 2955 | * of two non-negative variables, so we need to perform the subtraction |
| 2956 | * here. |
| 2957 | * |
| 2958 | * If coincident is set, then the caller guarantees that the new |
| 2959 | * row satisfies the coincidence constraints. |
| 2960 | */ |
| 2961 | static int update_schedule(struct isl_sched_graph *graph, |
| 2962 | __isl_take isl_vec *sol, int coincident) |
| 2963 | { |
| 2964 | int i, j; |
| 2965 | isl_vec *csol = NULL; |
| 2966 | |
| 2967 | if (!sol) |
| 2968 | goto error; |
| 2969 | if (sol->size == 0) |
| 2970 | isl_die(sol->ctx, isl_error_internal, |
| 2971 | "no solution found" , goto error); |
| 2972 | if (graph->n_total_row >= graph->max_row) |
| 2973 | isl_die(sol->ctx, isl_error_internal, |
| 2974 | "too many schedule rows" , goto error); |
| 2975 | |
| 2976 | for (i = 0; i < graph->n; ++i) { |
| 2977 | struct isl_sched_node *node = &graph->node[i]; |
| 2978 | int pos; |
| 2979 | isl_size row = isl_mat_rows(mat: node->sched); |
| 2980 | |
| 2981 | isl_vec_free(vec: csol); |
| 2982 | csol = extract_var_coef(node, sol); |
| 2983 | if (row < 0 || !csol) |
| 2984 | goto error; |
| 2985 | |
| 2986 | isl_map_free(map: node->sched_map); |
| 2987 | node->sched_map = NULL; |
| 2988 | node->sched = isl_mat_add_rows(mat: node->sched, n: 1); |
| 2989 | if (!node->sched) |
| 2990 | goto error; |
| 2991 | pos = node_cst_coef_offset(node); |
| 2992 | node->sched = isl_mat_set_element(mat: node->sched, |
| 2993 | row, col: 0, v: sol->el[1 + pos]); |
| 2994 | pos = node_par_coef_offset(node); |
| 2995 | for (j = 0; j < node->nparam; ++j) |
| 2996 | node->sched = isl_mat_set_element(mat: node->sched, |
| 2997 | row, col: 1 + j, v: sol->el[1 + pos + j]); |
| 2998 | for (j = 0; j < node->nvar; ++j) |
| 2999 | node->sched = isl_mat_set_element(mat: node->sched, |
| 3000 | row, col: 1 + node->nparam + j, v: csol->el[j]); |
| 3001 | node->coincident[graph->n_total_row] = coincident; |
| 3002 | } |
| 3003 | isl_vec_free(vec: sol); |
| 3004 | isl_vec_free(vec: csol); |
| 3005 | |
| 3006 | graph->n_row++; |
| 3007 | graph->n_total_row++; |
| 3008 | |
| 3009 | return 0; |
| 3010 | error: |
| 3011 | isl_vec_free(vec: sol); |
| 3012 | isl_vec_free(vec: csol); |
| 3013 | return -1; |
| 3014 | } |
| 3015 | |
| 3016 | /* Convert row "row" of node->sched into an isl_aff living in "ls" |
| 3017 | * and return this isl_aff. |
| 3018 | */ |
| 3019 | static __isl_give isl_aff *(__isl_take isl_local_space *ls, |
| 3020 | struct isl_sched_node *node, int row) |
| 3021 | { |
| 3022 | int j; |
| 3023 | isl_int v; |
| 3024 | isl_aff *aff; |
| 3025 | |
| 3026 | isl_int_init(v); |
| 3027 | |
| 3028 | aff = isl_aff_zero_on_domain(ls); |
| 3029 | if (isl_mat_get_element(mat: node->sched, row, col: 0, v: &v) < 0) |
| 3030 | goto error; |
| 3031 | aff = isl_aff_set_constant(aff, v); |
| 3032 | for (j = 0; j < node->nparam; ++j) { |
| 3033 | if (isl_mat_get_element(mat: node->sched, row, col: 1 + j, v: &v) < 0) |
| 3034 | goto error; |
| 3035 | aff = isl_aff_set_coefficient(aff, type: isl_dim_param, pos: j, v); |
| 3036 | } |
| 3037 | for (j = 0; j < node->nvar; ++j) { |
| 3038 | if (isl_mat_get_element(mat: node->sched, row, |
| 3039 | col: 1 + node->nparam + j, v: &v) < 0) |
| 3040 | goto error; |
| 3041 | aff = isl_aff_set_coefficient(aff, type: isl_dim_in, pos: j, v); |
| 3042 | } |
| 3043 | |
| 3044 | isl_int_clear(v); |
| 3045 | |
| 3046 | return aff; |
| 3047 | error: |
| 3048 | isl_int_clear(v); |
| 3049 | isl_aff_free(aff); |
| 3050 | return NULL; |
| 3051 | } |
| 3052 | |
| 3053 | /* Convert the "n" rows starting at "first" of node->sched into a multi_aff |
| 3054 | * and return this multi_aff. |
| 3055 | * |
| 3056 | * The result is defined over the uncompressed node domain. |
| 3057 | */ |
| 3058 | __isl_give isl_multi_aff *( |
| 3059 | struct isl_sched_node *node, int first, int n) |
| 3060 | { |
| 3061 | int i; |
| 3062 | isl_space *space; |
| 3063 | isl_local_space *ls; |
| 3064 | isl_aff *aff; |
| 3065 | isl_multi_aff *ma; |
| 3066 | isl_size nrow; |
| 3067 | |
| 3068 | if (!node) |
| 3069 | return NULL; |
| 3070 | nrow = isl_mat_rows(mat: node->sched); |
| 3071 | if (nrow < 0) |
| 3072 | return NULL; |
| 3073 | if (node->compressed) |
| 3074 | space = isl_pw_multi_aff_get_domain_space(pma: node->decompress); |
| 3075 | else |
| 3076 | space = isl_space_copy(space: node->space); |
| 3077 | ls = isl_local_space_from_space(space: isl_space_copy(space)); |
| 3078 | space = isl_space_from_domain(space); |
| 3079 | space = isl_space_add_dims(space, type: isl_dim_out, n); |
| 3080 | ma = isl_multi_aff_zero(space); |
| 3081 | |
| 3082 | for (i = first; i < first + n; ++i) { |
| 3083 | aff = extract_schedule_row(ls: isl_local_space_copy(ls), node, row: i); |
| 3084 | ma = isl_multi_aff_set_aff(multi: ma, pos: i - first, el: aff); |
| 3085 | } |
| 3086 | |
| 3087 | isl_local_space_free(ls); |
| 3088 | |
| 3089 | if (node->compressed) |
| 3090 | ma = isl_multi_aff_pullback_multi_aff(ma1: ma, |
| 3091 | ma2: isl_multi_aff_copy(multi: node->compress)); |
| 3092 | |
| 3093 | return ma; |
| 3094 | } |
| 3095 | |
| 3096 | /* Convert node->sched into a multi_aff and return this multi_aff. |
| 3097 | * |
| 3098 | * The result is defined over the uncompressed node domain. |
| 3099 | */ |
| 3100 | static __isl_give isl_multi_aff *( |
| 3101 | struct isl_sched_node *node) |
| 3102 | { |
| 3103 | isl_size nrow; |
| 3104 | |
| 3105 | nrow = isl_mat_rows(mat: node->sched); |
| 3106 | if (nrow < 0) |
| 3107 | return NULL; |
| 3108 | return isl_sched_node_extract_partial_schedule_multi_aff(node, first: 0, n: nrow); |
| 3109 | } |
| 3110 | |
| 3111 | /* Convert node->sched into a map and return this map. |
| 3112 | * |
| 3113 | * The result is cached in node->sched_map, which needs to be released |
| 3114 | * whenever node->sched is updated. |
| 3115 | * It is defined over the uncompressed node domain. |
| 3116 | */ |
| 3117 | static __isl_give isl_map *(struct isl_sched_node *node) |
| 3118 | { |
| 3119 | if (!node->sched_map) { |
| 3120 | isl_multi_aff *ma; |
| 3121 | |
| 3122 | ma = node_extract_schedule_multi_aff(node); |
| 3123 | node->sched_map = isl_map_from_multi_aff(maff: ma); |
| 3124 | } |
| 3125 | |
| 3126 | return isl_map_copy(map: node->sched_map); |
| 3127 | } |
| 3128 | |
| 3129 | /* Construct a map that can be used to update a dependence relation |
| 3130 | * based on the current schedule. |
| 3131 | * That is, construct a map expressing that source and sink |
| 3132 | * are executed within the same iteration of the current schedule. |
| 3133 | * This map can then be intersected with the dependence relation. |
| 3134 | * This is not the most efficient way, but this shouldn't be a critical |
| 3135 | * operation. |
| 3136 | */ |
| 3137 | static __isl_give isl_map *specializer(struct isl_sched_node *src, |
| 3138 | struct isl_sched_node *dst) |
| 3139 | { |
| 3140 | isl_map *src_sched, *dst_sched; |
| 3141 | |
| 3142 | src_sched = node_extract_schedule(node: src); |
| 3143 | dst_sched = node_extract_schedule(node: dst); |
| 3144 | return isl_map_apply_range(map1: src_sched, map2: isl_map_reverse(map: dst_sched)); |
| 3145 | } |
| 3146 | |
| 3147 | /* Intersect the domains of the nested relations in domain and range |
| 3148 | * of "umap" with "map". |
| 3149 | */ |
| 3150 | static __isl_give isl_union_map *intersect_domains( |
| 3151 | __isl_take isl_union_map *umap, __isl_keep isl_map *map) |
| 3152 | { |
| 3153 | isl_union_set *uset; |
| 3154 | |
| 3155 | umap = isl_union_map_zip(umap); |
| 3156 | uset = isl_union_set_from_set(set: isl_map_wrap(map: isl_map_copy(map))); |
| 3157 | umap = isl_union_map_intersect_domain(umap, uset); |
| 3158 | umap = isl_union_map_zip(umap); |
| 3159 | return umap; |
| 3160 | } |
| 3161 | |
| 3162 | /* Update the dependence relation of the given edge based |
| 3163 | * on the current schedule. |
| 3164 | * If the dependence is carried completely by the current schedule, then |
| 3165 | * it is removed from the edge_tables. It is kept in the list of edges |
| 3166 | * as otherwise all edge_tables would have to be recomputed. |
| 3167 | * |
| 3168 | * If the edge is of a type that can appear multiple times |
| 3169 | * between the same pair of nodes, then it is added to |
| 3170 | * the edge table (again). This prevents the situation |
| 3171 | * where none of these edges is referenced from the edge table |
| 3172 | * because the one that was referenced turned out to be empty and |
| 3173 | * was therefore removed from the table. |
| 3174 | */ |
| 3175 | static isl_stat update_edge(isl_ctx *ctx, struct isl_sched_graph *graph, |
| 3176 | struct isl_sched_edge *edge) |
| 3177 | { |
| 3178 | int empty; |
| 3179 | isl_map *id; |
| 3180 | |
| 3181 | id = specializer(src: edge->src, dst: edge->dst); |
| 3182 | edge->map = isl_map_intersect(map1: edge->map, map2: isl_map_copy(map: id)); |
| 3183 | if (!edge->map) |
| 3184 | goto error; |
| 3185 | |
| 3186 | if (edge->tagged_condition) { |
| 3187 | edge->tagged_condition = |
| 3188 | intersect_domains(umap: edge->tagged_condition, map: id); |
| 3189 | if (!edge->tagged_condition) |
| 3190 | goto error; |
| 3191 | } |
| 3192 | if (edge->tagged_validity) { |
| 3193 | edge->tagged_validity = |
| 3194 | intersect_domains(umap: edge->tagged_validity, map: id); |
| 3195 | if (!edge->tagged_validity) |
| 3196 | goto error; |
| 3197 | } |
| 3198 | |
| 3199 | empty = isl_map_plain_is_empty(map: edge->map); |
| 3200 | if (empty < 0) |
| 3201 | goto error; |
| 3202 | if (empty) { |
| 3203 | if (graph_remove_edge(graph, edge) < 0) |
| 3204 | goto error; |
| 3205 | } else if (is_multi_edge_type(edge)) { |
| 3206 | if (graph_edge_tables_add(ctx, graph, edge) < 0) |
| 3207 | goto error; |
| 3208 | } |
| 3209 | |
| 3210 | isl_map_free(map: id); |
| 3211 | return isl_stat_ok; |
| 3212 | error: |
| 3213 | isl_map_free(map: id); |
| 3214 | return isl_stat_error; |
| 3215 | } |
| 3216 | |
| 3217 | /* Does the domain of "umap" intersect "uset"? |
| 3218 | */ |
| 3219 | static int domain_intersects(__isl_keep isl_union_map *umap, |
| 3220 | __isl_keep isl_union_set *uset) |
| 3221 | { |
| 3222 | int empty; |
| 3223 | |
| 3224 | umap = isl_union_map_copy(umap); |
| 3225 | umap = isl_union_map_intersect_domain(umap, uset: isl_union_set_copy(uset)); |
| 3226 | empty = isl_union_map_is_empty(umap); |
| 3227 | isl_union_map_free(umap); |
| 3228 | |
| 3229 | return empty < 0 ? -1 : !empty; |
| 3230 | } |
| 3231 | |
| 3232 | /* Does the range of "umap" intersect "uset"? |
| 3233 | */ |
| 3234 | static int range_intersects(__isl_keep isl_union_map *umap, |
| 3235 | __isl_keep isl_union_set *uset) |
| 3236 | { |
| 3237 | int empty; |
| 3238 | |
| 3239 | umap = isl_union_map_copy(umap); |
| 3240 | umap = isl_union_map_intersect_range(umap, uset: isl_union_set_copy(uset)); |
| 3241 | empty = isl_union_map_is_empty(umap); |
| 3242 | isl_union_map_free(umap); |
| 3243 | |
| 3244 | return empty < 0 ? -1 : !empty; |
| 3245 | } |
| 3246 | |
| 3247 | /* Are the condition dependences of "edge" local with respect to |
| 3248 | * the current schedule? |
| 3249 | * |
| 3250 | * That is, are domain and range of the condition dependences mapped |
| 3251 | * to the same point? |
| 3252 | * |
| 3253 | * In other words, is the condition false? |
| 3254 | */ |
| 3255 | static int is_condition_false(struct isl_sched_edge *edge) |
| 3256 | { |
| 3257 | isl_union_map *umap; |
| 3258 | isl_map *map, *sched, *test; |
| 3259 | int empty, local; |
| 3260 | |
| 3261 | empty = isl_union_map_is_empty(umap: edge->tagged_condition); |
| 3262 | if (empty < 0 || empty) |
| 3263 | return empty; |
| 3264 | |
| 3265 | umap = isl_union_map_copy(umap: edge->tagged_condition); |
| 3266 | umap = isl_union_map_zip(umap); |
| 3267 | umap = isl_union_set_unwrap(uset: isl_union_map_domain(umap)); |
| 3268 | map = isl_map_from_union_map(umap); |
| 3269 | |
| 3270 | sched = node_extract_schedule(node: edge->src); |
| 3271 | map = isl_map_apply_domain(map1: map, map2: sched); |
| 3272 | sched = node_extract_schedule(node: edge->dst); |
| 3273 | map = isl_map_apply_range(map1: map, map2: sched); |
| 3274 | |
| 3275 | test = isl_map_identity(space: isl_map_get_space(map)); |
| 3276 | local = isl_map_is_subset(map1: map, map2: test); |
| 3277 | isl_map_free(map); |
| 3278 | isl_map_free(map: test); |
| 3279 | |
| 3280 | return local; |
| 3281 | } |
| 3282 | |
| 3283 | /* For each conditional validity constraint that is adjacent |
| 3284 | * to a condition with domain in condition_source or range in condition_sink, |
| 3285 | * turn it into an unconditional validity constraint. |
| 3286 | */ |
| 3287 | static int unconditionalize_adjacent_validity(struct isl_sched_graph *graph, |
| 3288 | __isl_take isl_union_set *condition_source, |
| 3289 | __isl_take isl_union_set *condition_sink) |
| 3290 | { |
| 3291 | int i; |
| 3292 | |
| 3293 | condition_source = isl_union_set_coalesce(uset: condition_source); |
| 3294 | condition_sink = isl_union_set_coalesce(uset: condition_sink); |
| 3295 | |
| 3296 | for (i = 0; i < graph->n_edge; ++i) { |
| 3297 | int adjacent; |
| 3298 | isl_union_map *validity; |
| 3299 | |
| 3300 | if (!isl_sched_edge_is_conditional_validity(edge: &graph->edge[i])) |
| 3301 | continue; |
| 3302 | if (is_validity(edge: &graph->edge[i])) |
| 3303 | continue; |
| 3304 | |
| 3305 | validity = graph->edge[i].tagged_validity; |
| 3306 | adjacent = domain_intersects(umap: validity, uset: condition_sink); |
| 3307 | if (adjacent >= 0 && !adjacent) |
| 3308 | adjacent = range_intersects(umap: validity, uset: condition_source); |
| 3309 | if (adjacent < 0) |
| 3310 | goto error; |
| 3311 | if (!adjacent) |
| 3312 | continue; |
| 3313 | |
| 3314 | set_validity(&graph->edge[i]); |
| 3315 | } |
| 3316 | |
| 3317 | isl_union_set_free(uset: condition_source); |
| 3318 | isl_union_set_free(uset: condition_sink); |
| 3319 | return 0; |
| 3320 | error: |
| 3321 | isl_union_set_free(uset: condition_source); |
| 3322 | isl_union_set_free(uset: condition_sink); |
| 3323 | return -1; |
| 3324 | } |
| 3325 | |
| 3326 | /* Update the dependence relations of all edges based on the current schedule |
| 3327 | * and enforce conditional validity constraints that are adjacent |
| 3328 | * to satisfied condition constraints. |
| 3329 | * |
| 3330 | * First check if any of the condition constraints are satisfied |
| 3331 | * (i.e., not local to the outer schedule) and keep track of |
| 3332 | * their domain and range. |
| 3333 | * Then update all dependence relations (which removes the non-local |
| 3334 | * constraints). |
| 3335 | * Finally, if any condition constraints turned out to be satisfied, |
| 3336 | * then turn all adjacent conditional validity constraints into |
| 3337 | * unconditional validity constraints. |
| 3338 | */ |
| 3339 | static int update_edges(isl_ctx *ctx, struct isl_sched_graph *graph) |
| 3340 | { |
| 3341 | int i; |
| 3342 | int any = 0; |
| 3343 | isl_union_set *source, *sink; |
| 3344 | |
| 3345 | source = isl_union_set_empty(space: isl_space_params_alloc(ctx, nparam: 0)); |
| 3346 | sink = isl_union_set_empty(space: isl_space_params_alloc(ctx, nparam: 0)); |
| 3347 | for (i = 0; i < graph->n_edge; ++i) { |
| 3348 | int local; |
| 3349 | isl_union_set *uset; |
| 3350 | isl_union_map *umap; |
| 3351 | |
| 3352 | if (!isl_sched_edge_is_condition(edge: &graph->edge[i])) |
| 3353 | continue; |
| 3354 | if (is_local(edge: &graph->edge[i])) |
| 3355 | continue; |
| 3356 | local = is_condition_false(edge: &graph->edge[i]); |
| 3357 | if (local < 0) |
| 3358 | goto error; |
| 3359 | if (local) |
| 3360 | continue; |
| 3361 | |
| 3362 | any = 1; |
| 3363 | |
| 3364 | umap = isl_union_map_copy(umap: graph->edge[i].tagged_condition); |
| 3365 | uset = isl_union_map_domain(umap); |
| 3366 | source = isl_union_set_union(uset1: source, uset2: uset); |
| 3367 | |
| 3368 | umap = isl_union_map_copy(umap: graph->edge[i].tagged_condition); |
| 3369 | uset = isl_union_map_range(umap); |
| 3370 | sink = isl_union_set_union(uset1: sink, uset2: uset); |
| 3371 | } |
| 3372 | |
| 3373 | for (i = 0; i < graph->n_edge; ++i) { |
| 3374 | if (update_edge(ctx, graph, edge: &graph->edge[i]) < 0) |
| 3375 | goto error; |
| 3376 | } |
| 3377 | |
| 3378 | if (any) |
| 3379 | return unconditionalize_adjacent_validity(graph, condition_source: source, condition_sink: sink); |
| 3380 | |
| 3381 | isl_union_set_free(uset: source); |
| 3382 | isl_union_set_free(uset: sink); |
| 3383 | return 0; |
| 3384 | error: |
| 3385 | isl_union_set_free(uset: source); |
| 3386 | isl_union_set_free(uset: sink); |
| 3387 | return -1; |
| 3388 | } |
| 3389 | |
| 3390 | static void next_band(struct isl_sched_graph *graph) |
| 3391 | { |
| 3392 | graph->band_start = graph->n_total_row; |
| 3393 | } |
| 3394 | |
| 3395 | /* Return the union of the universe domains of the nodes in "graph" |
| 3396 | * that satisfy "pred". |
| 3397 | */ |
| 3398 | static __isl_give isl_union_set *isl_sched_graph_domain(isl_ctx *ctx, |
| 3399 | struct isl_sched_graph *graph, |
| 3400 | int (*pred)(struct isl_sched_node *node, int data), int data) |
| 3401 | { |
| 3402 | int i; |
| 3403 | isl_set *set; |
| 3404 | isl_union_set *dom; |
| 3405 | |
| 3406 | for (i = 0; i < graph->n; ++i) |
| 3407 | if (pred(&graph->node[i], data)) |
| 3408 | break; |
| 3409 | |
| 3410 | if (i >= graph->n) |
| 3411 | isl_die(ctx, isl_error_internal, |
| 3412 | "empty component" , return NULL); |
| 3413 | |
| 3414 | set = isl_set_universe(space: isl_space_copy(space: graph->node[i].space)); |
| 3415 | dom = isl_union_set_from_set(set); |
| 3416 | |
| 3417 | for (i = i + 1; i < graph->n; ++i) { |
| 3418 | if (!pred(&graph->node[i], data)) |
| 3419 | continue; |
| 3420 | set = isl_set_universe(space: isl_space_copy(space: graph->node[i].space)); |
| 3421 | dom = isl_union_set_union(uset1: dom, uset2: isl_union_set_from_set(set)); |
| 3422 | } |
| 3423 | |
| 3424 | return dom; |
| 3425 | } |
| 3426 | |
| 3427 | /* Return a union of universe domains corresponding to the nodes |
| 3428 | * in the SCC with index "scc". |
| 3429 | */ |
| 3430 | __isl_give isl_union_set *(isl_ctx *ctx, |
| 3431 | struct isl_sched_graph *graph, int scc) |
| 3432 | { |
| 3433 | return isl_sched_graph_domain(ctx, graph, |
| 3434 | pred: &isl_sched_node_scc_exactly, data: scc); |
| 3435 | } |
| 3436 | |
| 3437 | /* Return a list of unions of universe domains, where each element |
| 3438 | * in the list corresponds to an SCC (or WCC) indexed by node->scc. |
| 3439 | */ |
| 3440 | __isl_give isl_union_set_list *(isl_ctx *ctx, |
| 3441 | struct isl_sched_graph *graph) |
| 3442 | { |
| 3443 | int i; |
| 3444 | isl_union_set_list *filters; |
| 3445 | |
| 3446 | filters = isl_union_set_list_alloc(ctx, n: graph->scc); |
| 3447 | for (i = 0; i < graph->scc; ++i) { |
| 3448 | isl_union_set *dom; |
| 3449 | |
| 3450 | dom = isl_sched_graph_extract_scc(ctx, graph, scc: i); |
| 3451 | filters = isl_union_set_list_add(list: filters, el: dom); |
| 3452 | } |
| 3453 | |
| 3454 | return filters; |
| 3455 | } |
| 3456 | |
| 3457 | /* Return a list of two unions of universe domains, one for the SCCs up |
| 3458 | * to and including graph->src_scc and another for the other SCCs. |
| 3459 | */ |
| 3460 | static __isl_give isl_union_set_list *(isl_ctx *ctx, |
| 3461 | struct isl_sched_graph *graph) |
| 3462 | { |
| 3463 | isl_union_set *dom; |
| 3464 | isl_union_set_list *filters; |
| 3465 | |
| 3466 | filters = isl_union_set_list_alloc(ctx, n: 2); |
| 3467 | dom = isl_sched_graph_domain(ctx, graph, |
| 3468 | pred: &node_scc_at_most, data: graph->src_scc); |
| 3469 | filters = isl_union_set_list_add(list: filters, el: dom); |
| 3470 | dom = isl_sched_graph_domain(ctx, graph, |
| 3471 | pred: &node_scc_at_least, data: graph->src_scc + 1); |
| 3472 | filters = isl_union_set_list_add(list: filters, el: dom); |
| 3473 | |
| 3474 | return filters; |
| 3475 | } |
| 3476 | |
| 3477 | /* Copy nodes that satisfy node_pred from the src dependence graph |
| 3478 | * to the dst dependence graph. |
| 3479 | */ |
| 3480 | static isl_stat copy_nodes(struct isl_sched_graph *dst, |
| 3481 | struct isl_sched_graph *src, |
| 3482 | int (*node_pred)(struct isl_sched_node *node, int data), int data) |
| 3483 | { |
| 3484 | int i; |
| 3485 | |
| 3486 | dst->n = 0; |
| 3487 | for (i = 0; i < src->n; ++i) { |
| 3488 | int j; |
| 3489 | |
| 3490 | if (!node_pred(&src->node[i], data)) |
| 3491 | continue; |
| 3492 | |
| 3493 | j = dst->n; |
| 3494 | dst->node[j].space = isl_space_copy(space: src->node[i].space); |
| 3495 | dst->node[j].compressed = src->node[i].compressed; |
| 3496 | dst->node[j].hull = isl_set_copy(set: src->node[i].hull); |
| 3497 | dst->node[j].compress = |
| 3498 | isl_multi_aff_copy(multi: src->node[i].compress); |
| 3499 | dst->node[j].decompress = |
| 3500 | isl_pw_multi_aff_copy(pma: src->node[i].decompress); |
| 3501 | dst->node[j].nvar = src->node[i].nvar; |
| 3502 | dst->node[j].nparam = src->node[i].nparam; |
| 3503 | dst->node[j].sched = isl_mat_copy(mat: src->node[i].sched); |
| 3504 | dst->node[j].sched_map = isl_map_copy(map: src->node[i].sched_map); |
| 3505 | dst->node[j].coincident = src->node[i].coincident; |
| 3506 | dst->node[j].sizes = isl_multi_val_copy(multi: src->node[i].sizes); |
| 3507 | dst->node[j].bounds = isl_basic_set_copy(bset: src->node[i].bounds); |
| 3508 | dst->node[j].max = isl_vec_copy(vec: src->node[i].max); |
| 3509 | dst->n++; |
| 3510 | |
| 3511 | if (!dst->node[j].space || !dst->node[j].sched) |
| 3512 | return isl_stat_error; |
| 3513 | if (dst->node[j].compressed && |
| 3514 | (!dst->node[j].hull || !dst->node[j].compress || |
| 3515 | !dst->node[j].decompress)) |
| 3516 | return isl_stat_error; |
| 3517 | } |
| 3518 | |
| 3519 | return isl_stat_ok; |
| 3520 | } |
| 3521 | |
| 3522 | /* Copy non-empty edges that satisfy edge_pred from the src dependence graph |
| 3523 | * to the dst dependence graph. |
| 3524 | * If the source or destination node of the edge is not in the destination |
| 3525 | * graph, then it must be a backward proximity edge and it should simply |
| 3526 | * be ignored. |
| 3527 | */ |
| 3528 | static isl_stat copy_edges(isl_ctx *ctx, struct isl_sched_graph *dst, |
| 3529 | struct isl_sched_graph *src, |
| 3530 | int (*edge_pred)(struct isl_sched_edge *edge, int data), int data) |
| 3531 | { |
| 3532 | int i; |
| 3533 | |
| 3534 | dst->n_edge = 0; |
| 3535 | for (i = 0; i < src->n_edge; ++i) { |
| 3536 | struct isl_sched_edge *edge = &src->edge[i]; |
| 3537 | isl_map *map; |
| 3538 | isl_union_map *tagged_condition; |
| 3539 | isl_union_map *tagged_validity; |
| 3540 | struct isl_sched_node *dst_src, *dst_dst; |
| 3541 | |
| 3542 | if (!edge_pred(edge, data)) |
| 3543 | continue; |
| 3544 | |
| 3545 | if (isl_map_plain_is_empty(map: edge->map)) |
| 3546 | continue; |
| 3547 | |
| 3548 | dst_src = isl_sched_graph_find_node(ctx, graph: dst, space: edge->src->space); |
| 3549 | dst_dst = isl_sched_graph_find_node(ctx, graph: dst, space: edge->dst->space); |
| 3550 | if (!dst_src || !dst_dst) |
| 3551 | return isl_stat_error; |
| 3552 | if (!isl_sched_graph_is_node(graph: dst, node: dst_src) || |
| 3553 | !isl_sched_graph_is_node(graph: dst, node: dst_dst)) { |
| 3554 | if (is_validity(edge) || |
| 3555 | isl_sched_edge_is_conditional_validity(edge)) |
| 3556 | isl_die(ctx, isl_error_internal, |
| 3557 | "backward (conditional) validity edge" , |
| 3558 | return isl_stat_error); |
| 3559 | continue; |
| 3560 | } |
| 3561 | |
| 3562 | map = isl_map_copy(map: edge->map); |
| 3563 | tagged_condition = isl_union_map_copy(umap: edge->tagged_condition); |
| 3564 | tagged_validity = isl_union_map_copy(umap: edge->tagged_validity); |
| 3565 | |
| 3566 | dst->edge[dst->n_edge].src = dst_src; |
| 3567 | dst->edge[dst->n_edge].dst = dst_dst; |
| 3568 | dst->edge[dst->n_edge].map = map; |
| 3569 | dst->edge[dst->n_edge].tagged_condition = tagged_condition; |
| 3570 | dst->edge[dst->n_edge].tagged_validity = tagged_validity; |
| 3571 | dst->edge[dst->n_edge].types = edge->types; |
| 3572 | dst->n_edge++; |
| 3573 | |
| 3574 | if (edge->tagged_condition && !tagged_condition) |
| 3575 | return isl_stat_error; |
| 3576 | if (edge->tagged_validity && !tagged_validity) |
| 3577 | return isl_stat_error; |
| 3578 | |
| 3579 | if (graph_edge_tables_add(ctx, graph: dst, |
| 3580 | edge: &dst->edge[dst->n_edge - 1]) < 0) |
| 3581 | return isl_stat_error; |
| 3582 | } |
| 3583 | |
| 3584 | return isl_stat_ok; |
| 3585 | } |
| 3586 | |
| 3587 | /* Compute the maximal number of variables over all nodes. |
| 3588 | * This is the maximal number of linearly independent schedule |
| 3589 | * rows that we need to compute. |
| 3590 | * Just in case we end up in a part of the dependence graph |
| 3591 | * with only lower-dimensional domains, we make sure we will |
| 3592 | * compute the required amount of extra linearly independent rows. |
| 3593 | */ |
| 3594 | isl_stat isl_sched_graph_compute_maxvar(struct isl_sched_graph *graph) |
| 3595 | { |
| 3596 | int i; |
| 3597 | |
| 3598 | graph->maxvar = 0; |
| 3599 | for (i = 0; i < graph->n; ++i) { |
| 3600 | struct isl_sched_node *node = &graph->node[i]; |
| 3601 | int nvar; |
| 3602 | |
| 3603 | if (isl_sched_node_update_vmap(node) < 0) |
| 3604 | return isl_stat_error; |
| 3605 | nvar = node->nvar + graph->n_row - node->rank; |
| 3606 | if (nvar > graph->maxvar) |
| 3607 | graph->maxvar = nvar; |
| 3608 | } |
| 3609 | |
| 3610 | return isl_stat_ok; |
| 3611 | } |
| 3612 | |
| 3613 | /* Extract the subgraph of "graph" that consists of the nodes satisfying |
| 3614 | * "node_pred" and the edges satisfying "edge_pred" and store |
| 3615 | * the result in "sub". |
| 3616 | */ |
| 3617 | isl_stat (isl_ctx *ctx, |
| 3618 | struct isl_sched_graph *graph, |
| 3619 | int (*node_pred)(struct isl_sched_node *node, int data), |
| 3620 | int (*edge_pred)(struct isl_sched_edge *edge, int data), |
| 3621 | int data, struct isl_sched_graph *sub) |
| 3622 | { |
| 3623 | int i, n = 0, n_edge = 0; |
| 3624 | int t; |
| 3625 | |
| 3626 | for (i = 0; i < graph->n; ++i) |
| 3627 | if (node_pred(&graph->node[i], data)) |
| 3628 | ++n; |
| 3629 | for (i = 0; i < graph->n_edge; ++i) |
| 3630 | if (edge_pred(&graph->edge[i], data)) |
| 3631 | ++n_edge; |
| 3632 | if (graph_alloc(ctx, graph: sub, n_node: n, n_edge) < 0) |
| 3633 | return isl_stat_error; |
| 3634 | sub->root = graph->root; |
| 3635 | if (copy_nodes(dst: sub, src: graph, node_pred, data) < 0) |
| 3636 | return isl_stat_error; |
| 3637 | if (graph_init_table(ctx, graph: sub) < 0) |
| 3638 | return isl_stat_error; |
| 3639 | for (t = 0; t <= isl_edge_last; ++t) |
| 3640 | sub->max_edge[t] = graph->max_edge[t]; |
| 3641 | if (graph_init_edge_tables(ctx, graph: sub) < 0) |
| 3642 | return isl_stat_error; |
| 3643 | if (copy_edges(ctx, dst: sub, src: graph, edge_pred, data) < 0) |
| 3644 | return isl_stat_error; |
| 3645 | sub->n_row = graph->n_row; |
| 3646 | sub->max_row = graph->max_row; |
| 3647 | sub->n_total_row = graph->n_total_row; |
| 3648 | sub->band_start = graph->band_start; |
| 3649 | |
| 3650 | return isl_stat_ok; |
| 3651 | } |
| 3652 | |
| 3653 | static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node, |
| 3654 | struct isl_sched_graph *graph); |
| 3655 | static __isl_give isl_schedule_node *compute_schedule_wcc( |
| 3656 | isl_schedule_node *node, struct isl_sched_graph *graph); |
| 3657 | |
| 3658 | /* Compute a schedule for a subgraph of "graph". In particular, for |
| 3659 | * the graph composed of nodes that satisfy node_pred and edges that |
| 3660 | * that satisfy edge_pred. |
| 3661 | * If the subgraph is known to consist of a single component, then wcc should |
| 3662 | * be set and then we call compute_schedule_wcc on the constructed subgraph. |
| 3663 | * Otherwise, we call compute_schedule, which will check whether the subgraph |
| 3664 | * is connected. |
| 3665 | * |
| 3666 | * The schedule is inserted at "node" and the updated schedule node |
| 3667 | * is returned. |
| 3668 | */ |
| 3669 | static __isl_give isl_schedule_node *compute_sub_schedule( |
| 3670 | __isl_take isl_schedule_node *node, isl_ctx *ctx, |
| 3671 | struct isl_sched_graph *graph, |
| 3672 | int (*node_pred)(struct isl_sched_node *node, int data), |
| 3673 | int (*edge_pred)(struct isl_sched_edge *edge, int data), |
| 3674 | int data, int wcc) |
| 3675 | { |
| 3676 | struct isl_sched_graph split = { 0 }; |
| 3677 | |
| 3678 | if (isl_sched_graph_extract_sub_graph(ctx, graph, node_pred, edge_pred, |
| 3679 | data, sub: &split) < 0) |
| 3680 | goto error; |
| 3681 | |
| 3682 | if (wcc) |
| 3683 | node = compute_schedule_wcc(node, graph: &split); |
| 3684 | else |
| 3685 | node = compute_schedule(node, graph: &split); |
| 3686 | |
| 3687 | isl_sched_graph_free(ctx, graph: &split); |
| 3688 | return node; |
| 3689 | error: |
| 3690 | isl_sched_graph_free(ctx, graph: &split); |
| 3691 | return isl_schedule_node_free(node); |
| 3692 | } |
| 3693 | |
| 3694 | int isl_sched_edge_scc_exactly(struct isl_sched_edge *edge, int scc) |
| 3695 | { |
| 3696 | return edge->src->scc == scc && edge->dst->scc == scc; |
| 3697 | } |
| 3698 | |
| 3699 | static int edge_dst_scc_at_most(struct isl_sched_edge *edge, int scc) |
| 3700 | { |
| 3701 | return edge->dst->scc <= scc; |
| 3702 | } |
| 3703 | |
| 3704 | static int edge_src_scc_at_least(struct isl_sched_edge *edge, int scc) |
| 3705 | { |
| 3706 | return edge->src->scc >= scc; |
| 3707 | } |
| 3708 | |
| 3709 | /* Reset the current band by dropping all its schedule rows. |
| 3710 | */ |
| 3711 | static isl_stat reset_band(struct isl_sched_graph *graph) |
| 3712 | { |
| 3713 | int i; |
| 3714 | int drop; |
| 3715 | |
| 3716 | drop = graph->n_total_row - graph->band_start; |
| 3717 | graph->n_total_row -= drop; |
| 3718 | graph->n_row -= drop; |
| 3719 | |
| 3720 | for (i = 0; i < graph->n; ++i) { |
| 3721 | struct isl_sched_node *node = &graph->node[i]; |
| 3722 | |
| 3723 | isl_map_free(map: node->sched_map); |
| 3724 | node->sched_map = NULL; |
| 3725 | |
| 3726 | node->sched = isl_mat_drop_rows(mat: node->sched, |
| 3727 | row: graph->band_start, n: drop); |
| 3728 | |
| 3729 | if (!node->sched) |
| 3730 | return isl_stat_error; |
| 3731 | } |
| 3732 | |
| 3733 | return isl_stat_ok; |
| 3734 | } |
| 3735 | |
| 3736 | /* Split the current graph into two parts and compute a schedule for each |
| 3737 | * part individually. In particular, one part consists of all SCCs up |
| 3738 | * to and including graph->src_scc, while the other part contains the other |
| 3739 | * SCCs. The split is enforced by a sequence node inserted at position "node" |
| 3740 | * in the schedule tree. Return the updated schedule node. |
| 3741 | * If either of these two parts consists of a sequence, then it is spliced |
| 3742 | * into the sequence containing the two parts. |
| 3743 | * |
| 3744 | * The current band is reset. It would be possible to reuse |
| 3745 | * the previously computed rows as the first rows in the next |
| 3746 | * band, but recomputing them may result in better rows as we are looking |
| 3747 | * at a smaller part of the dependence graph. |
| 3748 | */ |
| 3749 | static __isl_give isl_schedule_node *compute_split_schedule( |
| 3750 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
| 3751 | { |
| 3752 | isl_ctx *ctx; |
| 3753 | isl_union_set_list *filters; |
| 3754 | |
| 3755 | if (!node) |
| 3756 | return NULL; |
| 3757 | |
| 3758 | if (reset_band(graph) < 0) |
| 3759 | return isl_schedule_node_free(node); |
| 3760 | |
| 3761 | next_band(graph); |
| 3762 | |
| 3763 | ctx = isl_schedule_node_get_ctx(node); |
| 3764 | filters = extract_split(ctx, graph); |
| 3765 | node = isl_schedule_node_insert_sequence(node, filters); |
| 3766 | node = isl_schedule_node_grandchild(node, pos1: 1, pos2: 0); |
| 3767 | |
| 3768 | node = compute_sub_schedule(node, ctx, graph, |
| 3769 | node_pred: &node_scc_at_least, edge_pred: &edge_src_scc_at_least, |
| 3770 | data: graph->src_scc + 1, wcc: 0); |
| 3771 | node = isl_schedule_node_grandparent(node); |
| 3772 | node = isl_schedule_node_grandchild(node, pos1: 0, pos2: 0); |
| 3773 | node = compute_sub_schedule(node, ctx, graph, |
| 3774 | node_pred: &node_scc_at_most, edge_pred: &edge_dst_scc_at_most, |
| 3775 | data: graph->src_scc, wcc: 0); |
| 3776 | node = isl_schedule_node_grandparent(node); |
| 3777 | |
| 3778 | node = isl_schedule_node_sequence_splice_children(node); |
| 3779 | |
| 3780 | return node; |
| 3781 | } |
| 3782 | |
| 3783 | /* Insert a band node at position "node" in the schedule tree corresponding |
| 3784 | * to the current band in "graph". Mark the band node permutable |
| 3785 | * if "permutable" is set. |
| 3786 | * The partial schedules and the coincidence property are extracted |
| 3787 | * from the graph nodes. |
| 3788 | * Return the updated schedule node. |
| 3789 | */ |
| 3790 | static __isl_give isl_schedule_node *insert_current_band( |
| 3791 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
| 3792 | int permutable) |
| 3793 | { |
| 3794 | int i; |
| 3795 | int start, end, n; |
| 3796 | isl_multi_aff *ma; |
| 3797 | isl_multi_pw_aff *mpa; |
| 3798 | isl_multi_union_pw_aff *mupa; |
| 3799 | |
| 3800 | if (!node) |
| 3801 | return NULL; |
| 3802 | |
| 3803 | if (graph->n < 1) |
| 3804 | isl_die(isl_schedule_node_get_ctx(node), isl_error_internal, |
| 3805 | "graph should have at least one node" , |
| 3806 | return isl_schedule_node_free(node)); |
| 3807 | |
| 3808 | start = graph->band_start; |
| 3809 | end = graph->n_total_row; |
| 3810 | n = end - start; |
| 3811 | |
| 3812 | ma = isl_sched_node_extract_partial_schedule_multi_aff(node: &graph->node[0], |
| 3813 | first: start, n); |
| 3814 | mpa = isl_multi_pw_aff_from_multi_aff(ma); |
| 3815 | mupa = isl_multi_union_pw_aff_from_multi_pw_aff(mpa); |
| 3816 | |
| 3817 | for (i = 1; i < graph->n; ++i) { |
| 3818 | isl_multi_union_pw_aff *mupa_i; |
| 3819 | |
| 3820 | ma = isl_sched_node_extract_partial_schedule_multi_aff( |
| 3821 | node: &graph->node[i], first: start, n); |
| 3822 | mpa = isl_multi_pw_aff_from_multi_aff(ma); |
| 3823 | mupa_i = isl_multi_union_pw_aff_from_multi_pw_aff(mpa); |
| 3824 | mupa = isl_multi_union_pw_aff_union_add(mupa1: mupa, mupa2: mupa_i); |
| 3825 | } |
| 3826 | node = isl_schedule_node_insert_partial_schedule(node, schedule: mupa); |
| 3827 | |
| 3828 | for (i = 0; i < n; ++i) |
| 3829 | node = isl_schedule_node_band_member_set_coincident(node, pos: i, |
| 3830 | coincident: graph->node[0].coincident[start + i]); |
| 3831 | node = isl_schedule_node_band_set_permutable(node, permutable); |
| 3832 | |
| 3833 | return node; |
| 3834 | } |
| 3835 | |
| 3836 | /* Update the dependence relations based on the current schedule, |
| 3837 | * add the current band to "node" and then continue with the computation |
| 3838 | * of the next band. |
| 3839 | * Return the updated schedule node. |
| 3840 | */ |
| 3841 | static __isl_give isl_schedule_node *compute_next_band( |
| 3842 | __isl_take isl_schedule_node *node, |
| 3843 | struct isl_sched_graph *graph, int permutable) |
| 3844 | { |
| 3845 | isl_ctx *ctx; |
| 3846 | |
| 3847 | if (!node) |
| 3848 | return NULL; |
| 3849 | |
| 3850 | ctx = isl_schedule_node_get_ctx(node); |
| 3851 | if (update_edges(ctx, graph) < 0) |
| 3852 | return isl_schedule_node_free(node); |
| 3853 | node = insert_current_band(node, graph, permutable); |
| 3854 | next_band(graph); |
| 3855 | |
| 3856 | node = isl_schedule_node_child(node, pos: 0); |
| 3857 | node = compute_schedule(node, graph); |
| 3858 | node = isl_schedule_node_parent(node); |
| 3859 | |
| 3860 | return node; |
| 3861 | } |
| 3862 | |
| 3863 | /* Add the constraints "coef" derived from an edge from "node" to itself |
| 3864 | * to graph->lp in order to respect the dependences and to try and carry them. |
| 3865 | * "pos" is the sequence number of the edge that needs to be carried. |
| 3866 | * "coef" represents general constraints on coefficients (c_0, c_x) |
| 3867 | * of valid constraints for (y - x) with x and y instances of the node. |
| 3868 | * |
| 3869 | * The constraints added to graph->lp need to enforce |
| 3870 | * |
| 3871 | * (c_j_0 + c_j_x y) - (c_j_0 + c_j_x x) |
| 3872 | * = c_j_x (y - x) >= e_i |
| 3873 | * |
| 3874 | * for each (x,y) in the dependence relation of the edge. |
| 3875 | * That is, (-e_i, c_j_x) needs to be plugged in for (c_0, c_x), |
| 3876 | * taking into account that each coefficient in c_j_x is represented |
| 3877 | * as a pair of non-negative coefficients. |
| 3878 | */ |
| 3879 | static isl_stat add_intra_constraints(struct isl_sched_graph *graph, |
| 3880 | struct isl_sched_node *node, __isl_take isl_basic_set *coef, int pos) |
| 3881 | { |
| 3882 | isl_size offset; |
| 3883 | isl_ctx *ctx; |
| 3884 | isl_dim_map *dim_map; |
| 3885 | |
| 3886 | offset = coef_var_offset(coef); |
| 3887 | if (offset < 0) |
| 3888 | coef = isl_basic_set_free(bset: coef); |
| 3889 | if (!coef) |
| 3890 | return isl_stat_error; |
| 3891 | |
| 3892 | ctx = isl_basic_set_get_ctx(bset: coef); |
| 3893 | dim_map = intra_dim_map(ctx, graph, node, offset, s: 1); |
| 3894 | isl_dim_map_range(dim_map, dst_pos: 3 + pos, dst_stride: 0, src_pos: 0, src_stride: 0, n: 1, sign: -1); |
| 3895 | graph->lp = add_constraints_dim_map(dst: graph->lp, src: coef, dim_map); |
| 3896 | |
| 3897 | return isl_stat_ok; |
| 3898 | } |
| 3899 | |
| 3900 | /* Add the constraints "coef" derived from an edge from "src" to "dst" |
| 3901 | * to graph->lp in order to respect the dependences and to try and carry them. |
| 3902 | * "pos" is the sequence number of the edge that needs to be carried or |
| 3903 | * -1 if no attempt should be made to carry the dependences. |
| 3904 | * "coef" represents general constraints on coefficients (c_0, c_n, c_x, c_y) |
| 3905 | * of valid constraints for (x, y) with x and y instances of "src" and "dst". |
| 3906 | * |
| 3907 | * The constraints added to graph->lp need to enforce |
| 3908 | * |
| 3909 | * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= e_i |
| 3910 | * |
| 3911 | * for each (x,y) in the dependence relation of the edge or |
| 3912 | * |
| 3913 | * (c_k_0 + c_k_n n + c_k_x y) - (c_j_0 + c_j_n n + c_j_x x) >= 0 |
| 3914 | * |
| 3915 | * if pos is -1. |
| 3916 | * That is, |
| 3917 | * (-e_i + c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x) |
| 3918 | * or |
| 3919 | * (c_k_0 - c_j_0, c_k_n - c_j_n, -c_j_x, c_k_x) |
| 3920 | * needs to be plugged in for (c_0, c_n, c_x, c_y), |
| 3921 | * taking into account that each coefficient in c_j_x and c_k_x is represented |
| 3922 | * as a pair of non-negative coefficients. |
| 3923 | */ |
| 3924 | static isl_stat add_inter_constraints(struct isl_sched_graph *graph, |
| 3925 | struct isl_sched_node *src, struct isl_sched_node *dst, |
| 3926 | __isl_take isl_basic_set *coef, int pos) |
| 3927 | { |
| 3928 | isl_size offset; |
| 3929 | isl_ctx *ctx; |
| 3930 | isl_dim_map *dim_map; |
| 3931 | |
| 3932 | offset = coef_var_offset(coef); |
| 3933 | if (offset < 0) |
| 3934 | coef = isl_basic_set_free(bset: coef); |
| 3935 | if (!coef) |
| 3936 | return isl_stat_error; |
| 3937 | |
| 3938 | ctx = isl_basic_set_get_ctx(bset: coef); |
| 3939 | dim_map = inter_dim_map(ctx, graph, src, dst, offset, s: 1); |
| 3940 | if (pos >= 0) |
| 3941 | isl_dim_map_range(dim_map, dst_pos: 3 + pos, dst_stride: 0, src_pos: 0, src_stride: 0, n: 1, sign: -1); |
| 3942 | graph->lp = add_constraints_dim_map(dst: graph->lp, src: coef, dim_map); |
| 3943 | |
| 3944 | return isl_stat_ok; |
| 3945 | } |
| 3946 | |
| 3947 | /* Data structure for keeping track of the data needed |
| 3948 | * to exploit non-trivial lineality spaces. |
| 3949 | * |
| 3950 | * "any_non_trivial" is true if there are any non-trivial lineality spaces. |
| 3951 | * If "any_non_trivial" is not true, then "equivalent" and "mask" may be NULL. |
| 3952 | * "equivalent" connects instances to other instances on the same line(s). |
| 3953 | * "mask" contains the domain spaces of "equivalent". |
| 3954 | * Any instance set not in "mask" does not have a non-trivial lineality space. |
| 3955 | */ |
| 3956 | struct isl_exploit_lineality_data { |
| 3957 | isl_bool any_non_trivial; |
| 3958 | isl_union_map *equivalent; |
| 3959 | isl_union_set *mask; |
| 3960 | }; |
| 3961 | |
| 3962 | /* Data structure collecting information used during the construction |
| 3963 | * of an LP for carrying dependences. |
| 3964 | * |
| 3965 | * "intra" is a sequence of coefficient constraints for intra-node edges. |
| 3966 | * "inter" is a sequence of coefficient constraints for inter-node edges. |
| 3967 | * "lineality" contains data used to exploit non-trivial lineality spaces. |
| 3968 | */ |
| 3969 | struct isl_carry { |
| 3970 | isl_basic_set_list *intra; |
| 3971 | isl_basic_set_list *inter; |
| 3972 | struct isl_exploit_lineality_data lineality; |
| 3973 | }; |
| 3974 | |
| 3975 | /* Free all the data stored in "carry". |
| 3976 | */ |
| 3977 | static void isl_carry_clear(struct isl_carry *carry) |
| 3978 | { |
| 3979 | isl_basic_set_list_free(list: carry->intra); |
| 3980 | isl_basic_set_list_free(list: carry->inter); |
| 3981 | isl_union_map_free(umap: carry->lineality.equivalent); |
| 3982 | isl_union_set_free(uset: carry->lineality.mask); |
| 3983 | } |
| 3984 | |
| 3985 | /* Return a pointer to the node in "graph" that lives in "space". |
| 3986 | * If the requested node has been compressed, then "space" |
| 3987 | * corresponds to the compressed space. |
| 3988 | * The graph is assumed to have such a node. |
| 3989 | * Return NULL in case of error. |
| 3990 | * |
| 3991 | * First try and see if "space" is the space of an uncompressed node. |
| 3992 | * If so, return that node. |
| 3993 | * Otherwise, "space" was constructed by construct_compressed_id and |
| 3994 | * contains a user pointer pointing to the node in the tuple id. |
| 3995 | * However, this node belongs to the original dependence graph. |
| 3996 | * If "graph" is a subgraph of this original dependence graph, |
| 3997 | * then the node with the same space still needs to be looked up |
| 3998 | * in the current graph. |
| 3999 | */ |
| 4000 | static struct isl_sched_node *graph_find_compressed_node(isl_ctx *ctx, |
| 4001 | struct isl_sched_graph *graph, __isl_keep isl_space *space) |
| 4002 | { |
| 4003 | isl_id *id; |
| 4004 | struct isl_sched_node *node; |
| 4005 | |
| 4006 | if (!space) |
| 4007 | return NULL; |
| 4008 | |
| 4009 | node = isl_sched_graph_find_node(ctx, graph, space); |
| 4010 | if (!node) |
| 4011 | return NULL; |
| 4012 | if (isl_sched_graph_is_node(graph, node)) |
| 4013 | return node; |
| 4014 | |
| 4015 | id = isl_space_get_tuple_id(space, type: isl_dim_set); |
| 4016 | node = isl_id_get_user(id); |
| 4017 | isl_id_free(id); |
| 4018 | |
| 4019 | if (!node) |
| 4020 | return NULL; |
| 4021 | |
| 4022 | if (!isl_sched_graph_is_node(graph: graph->root, node)) |
| 4023 | isl_die(ctx, isl_error_internal, |
| 4024 | "space points to invalid node" , return NULL); |
| 4025 | if (graph != graph->root) |
| 4026 | node = isl_sched_graph_find_node(ctx, graph, space: node->space); |
| 4027 | if (!isl_sched_graph_is_node(graph, node)) |
| 4028 | isl_die(ctx, isl_error_internal, |
| 4029 | "unable to find node" , return NULL); |
| 4030 | |
| 4031 | return node; |
| 4032 | } |
| 4033 | |
| 4034 | /* Internal data structure for add_all_constraints. |
| 4035 | * |
| 4036 | * "graph" is the schedule constraint graph for which an LP problem |
| 4037 | * is being constructed. |
| 4038 | * "carry_inter" indicates whether inter-node edges should be carried. |
| 4039 | * "pos" is the position of the next edge that needs to be carried. |
| 4040 | */ |
| 4041 | struct isl_add_all_constraints_data { |
| 4042 | isl_ctx *ctx; |
| 4043 | struct isl_sched_graph *graph; |
| 4044 | int carry_inter; |
| 4045 | int pos; |
| 4046 | }; |
| 4047 | |
| 4048 | /* Add the constraints "coef" derived from an edge from a node to itself |
| 4049 | * to data->graph->lp in order to respect the dependences and |
| 4050 | * to try and carry them. |
| 4051 | * |
| 4052 | * The space of "coef" is of the form |
| 4053 | * |
| 4054 | * coefficients[[c_cst] -> S[c_x]] |
| 4055 | * |
| 4056 | * with S[c_x] the (compressed) space of the node. |
| 4057 | * Extract the node from the space and call add_intra_constraints. |
| 4058 | */ |
| 4059 | static isl_stat lp_add_intra(__isl_take isl_basic_set *coef, void *user) |
| 4060 | { |
| 4061 | struct isl_add_all_constraints_data *data = user; |
| 4062 | isl_space *space; |
| 4063 | struct isl_sched_node *node; |
| 4064 | |
| 4065 | space = isl_basic_set_get_space(bset: coef); |
| 4066 | space = isl_space_range(space: isl_space_unwrap(space)); |
| 4067 | node = graph_find_compressed_node(ctx: data->ctx, graph: data->graph, space); |
| 4068 | isl_space_free(space); |
| 4069 | return add_intra_constraints(graph: data->graph, node, coef, pos: data->pos++); |
| 4070 | } |
| 4071 | |
| 4072 | /* Add the constraints "coef" derived from an edge from a node j |
| 4073 | * to a node k to data->graph->lp in order to respect the dependences and |
| 4074 | * to try and carry them (provided data->carry_inter is set). |
| 4075 | * |
| 4076 | * The space of "coef" is of the form |
| 4077 | * |
| 4078 | * coefficients[[c_cst, c_n] -> [S_j[c_x] -> S_k[c_y]]] |
| 4079 | * |
| 4080 | * with S_j[c_x] and S_k[c_y] the (compressed) spaces of the nodes. |
| 4081 | * Extract the nodes from the space and call add_inter_constraints. |
| 4082 | */ |
| 4083 | static isl_stat lp_add_inter(__isl_take isl_basic_set *coef, void *user) |
| 4084 | { |
| 4085 | struct isl_add_all_constraints_data *data = user; |
| 4086 | isl_space *space, *dom; |
| 4087 | struct isl_sched_node *src, *dst; |
| 4088 | int pos; |
| 4089 | |
| 4090 | space = isl_basic_set_get_space(bset: coef); |
| 4091 | space = isl_space_unwrap(space: isl_space_range(space: isl_space_unwrap(space))); |
| 4092 | dom = isl_space_domain(space: isl_space_copy(space)); |
| 4093 | src = graph_find_compressed_node(ctx: data->ctx, graph: data->graph, space: dom); |
| 4094 | isl_space_free(space: dom); |
| 4095 | space = isl_space_range(space); |
| 4096 | dst = graph_find_compressed_node(ctx: data->ctx, graph: data->graph, space); |
| 4097 | isl_space_free(space); |
| 4098 | |
| 4099 | pos = data->carry_inter ? data->pos++ : -1; |
| 4100 | return add_inter_constraints(graph: data->graph, src, dst, coef, pos); |
| 4101 | } |
| 4102 | |
| 4103 | /* Add constraints to graph->lp that force all (conditional) validity |
| 4104 | * dependences to be respected and attempt to carry them. |
| 4105 | * "intra" is the sequence of coefficient constraints for intra-node edges. |
| 4106 | * "inter" is the sequence of coefficient constraints for inter-node edges. |
| 4107 | * "carry_inter" indicates whether inter-node edges should be carried or |
| 4108 | * only respected. |
| 4109 | */ |
| 4110 | static isl_stat add_all_constraints(isl_ctx *ctx, struct isl_sched_graph *graph, |
| 4111 | __isl_keep isl_basic_set_list *intra, |
| 4112 | __isl_keep isl_basic_set_list *inter, int carry_inter) |
| 4113 | { |
| 4114 | struct isl_add_all_constraints_data data = { ctx, graph, carry_inter }; |
| 4115 | |
| 4116 | data.pos = 0; |
| 4117 | if (isl_basic_set_list_foreach(list: intra, fn: &lp_add_intra, user: &data) < 0) |
| 4118 | return isl_stat_error; |
| 4119 | if (isl_basic_set_list_foreach(list: inter, fn: &lp_add_inter, user: &data) < 0) |
| 4120 | return isl_stat_error; |
| 4121 | return isl_stat_ok; |
| 4122 | } |
| 4123 | |
| 4124 | /* Internal data structure for count_all_constraints |
| 4125 | * for keeping track of the number of equality and inequality constraints. |
| 4126 | */ |
| 4127 | struct isl_sched_count { |
| 4128 | int n_eq; |
| 4129 | int n_ineq; |
| 4130 | }; |
| 4131 | |
| 4132 | /* Add the number of equality and inequality constraints of "bset" |
| 4133 | * to data->n_eq and data->n_ineq. |
| 4134 | */ |
| 4135 | static isl_stat bset_update_count(__isl_take isl_basic_set *bset, void *user) |
| 4136 | { |
| 4137 | struct isl_sched_count *data = user; |
| 4138 | |
| 4139 | return update_count(bset, f: 1, n_eq: &data->n_eq, n_ineq: &data->n_ineq); |
| 4140 | } |
| 4141 | |
| 4142 | /* Count the number of equality and inequality constraints |
| 4143 | * that will be added to the carry_lp problem. |
| 4144 | * We count each edge exactly once. |
| 4145 | * "intra" is the sequence of coefficient constraints for intra-node edges. |
| 4146 | * "inter" is the sequence of coefficient constraints for inter-node edges. |
| 4147 | */ |
| 4148 | static isl_stat count_all_constraints(__isl_keep isl_basic_set_list *intra, |
| 4149 | __isl_keep isl_basic_set_list *inter, int *n_eq, int *n_ineq) |
| 4150 | { |
| 4151 | struct isl_sched_count data; |
| 4152 | |
| 4153 | data.n_eq = data.n_ineq = 0; |
| 4154 | if (isl_basic_set_list_foreach(list: inter, fn: &bset_update_count, user: &data) < 0) |
| 4155 | return isl_stat_error; |
| 4156 | if (isl_basic_set_list_foreach(list: intra, fn: &bset_update_count, user: &data) < 0) |
| 4157 | return isl_stat_error; |
| 4158 | |
| 4159 | *n_eq = data.n_eq; |
| 4160 | *n_ineq = data.n_ineq; |
| 4161 | |
| 4162 | return isl_stat_ok; |
| 4163 | } |
| 4164 | |
| 4165 | /* Construct an LP problem for finding schedule coefficients |
| 4166 | * such that the schedule carries as many validity dependences as possible. |
| 4167 | * In particular, for each dependence i, we bound the dependence distance |
| 4168 | * from below by e_i, with 0 <= e_i <= 1 and then maximize the sum |
| 4169 | * of all e_i's. Dependences with e_i = 0 in the solution are simply |
| 4170 | * respected, while those with e_i > 0 (in practice e_i = 1) are carried. |
| 4171 | * "intra" is the sequence of coefficient constraints for intra-node edges. |
| 4172 | * "inter" is the sequence of coefficient constraints for inter-node edges. |
| 4173 | * "n_edge" is the total number of edges. |
| 4174 | * "carry_inter" indicates whether inter-node edges should be carried or |
| 4175 | * only respected. That is, if "carry_inter" is not set, then |
| 4176 | * no e_i variables are introduced for the inter-node edges. |
| 4177 | * |
| 4178 | * All variables of the LP are non-negative. The actual coefficients |
| 4179 | * may be negative, so each coefficient is represented as the difference |
| 4180 | * of two non-negative variables. The negative part always appears |
| 4181 | * immediately before the positive part. |
| 4182 | * Other than that, the variables have the following order |
| 4183 | * |
| 4184 | * - sum of (1 - e_i) over all edges |
| 4185 | * - sum of all c_n coefficients |
| 4186 | * (unconstrained when computing non-parametric schedules) |
| 4187 | * - sum of positive and negative parts of all c_x coefficients |
| 4188 | * - for each edge |
| 4189 | * - e_i |
| 4190 | * - for each node |
| 4191 | * - positive and negative parts of c_i_x, in opposite order |
| 4192 | * - c_i_n (if parametric) |
| 4193 | * - c_i_0 |
| 4194 | * |
| 4195 | * The constraints are those from the (validity) edges plus three equalities |
| 4196 | * to express the sums and n_edge inequalities to express e_i <= 1. |
| 4197 | */ |
| 4198 | static isl_stat setup_carry_lp(isl_ctx *ctx, struct isl_sched_graph *graph, |
| 4199 | int n_edge, __isl_keep isl_basic_set_list *intra, |
| 4200 | __isl_keep isl_basic_set_list *inter, int carry_inter) |
| 4201 | { |
| 4202 | int i; |
| 4203 | int k; |
| 4204 | isl_space *space; |
| 4205 | unsigned total; |
| 4206 | int n_eq, n_ineq; |
| 4207 | |
| 4208 | total = 3 + n_edge; |
| 4209 | for (i = 0; i < graph->n; ++i) { |
| 4210 | struct isl_sched_node *node = &graph->node[graph->sorted[i]]; |
| 4211 | node->start = total; |
| 4212 | total += 1 + node->nparam + 2 * node->nvar; |
| 4213 | } |
| 4214 | |
| 4215 | if (count_all_constraints(intra, inter, n_eq: &n_eq, n_ineq: &n_ineq) < 0) |
| 4216 | return isl_stat_error; |
| 4217 | |
| 4218 | space = isl_space_set_alloc(ctx, nparam: 0, dim: total); |
| 4219 | isl_basic_set_free(bset: graph->lp); |
| 4220 | n_eq += 3; |
| 4221 | n_ineq += n_edge; |
| 4222 | graph->lp = isl_basic_set_alloc_space(space, extra: 0, n_eq, n_ineq); |
| 4223 | graph->lp = isl_basic_set_set_rational(bset: graph->lp); |
| 4224 | |
| 4225 | k = isl_basic_set_alloc_equality(bset: graph->lp); |
| 4226 | if (k < 0) |
| 4227 | return isl_stat_error; |
| 4228 | isl_seq_clr(p: graph->lp->eq[k], len: 1 + total); |
| 4229 | isl_int_set_si(graph->lp->eq[k][0], -n_edge); |
| 4230 | isl_int_set_si(graph->lp->eq[k][1], 1); |
| 4231 | for (i = 0; i < n_edge; ++i) |
| 4232 | isl_int_set_si(graph->lp->eq[k][4 + i], 1); |
| 4233 | |
| 4234 | if (add_param_sum_constraint(graph, sum_pos: 1) < 0) |
| 4235 | return isl_stat_error; |
| 4236 | if (add_var_sum_constraint(graph, sum_pos: 2) < 0) |
| 4237 | return isl_stat_error; |
| 4238 | |
| 4239 | for (i = 0; i < n_edge; ++i) { |
| 4240 | k = isl_basic_set_alloc_inequality(bset: graph->lp); |
| 4241 | if (k < 0) |
| 4242 | return isl_stat_error; |
| 4243 | isl_seq_clr(p: graph->lp->ineq[k], len: 1 + total); |
| 4244 | isl_int_set_si(graph->lp->ineq[k][4 + i], -1); |
| 4245 | isl_int_set_si(graph->lp->ineq[k][0], 1); |
| 4246 | } |
| 4247 | |
| 4248 | if (add_all_constraints(ctx, graph, intra, inter, carry_inter) < 0) |
| 4249 | return isl_stat_error; |
| 4250 | |
| 4251 | return isl_stat_ok; |
| 4252 | } |
| 4253 | |
| 4254 | static __isl_give isl_schedule_node *compute_component_schedule( |
| 4255 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
| 4256 | int wcc); |
| 4257 | |
| 4258 | /* If the schedule_split_scaled option is set and if the linear |
| 4259 | * parts of the scheduling rows for all nodes in the graphs have |
| 4260 | * a non-trivial common divisor, then remove this |
| 4261 | * common divisor from the linear part. |
| 4262 | * Otherwise, insert a band node directly and continue with |
| 4263 | * the construction of the schedule. |
| 4264 | * |
| 4265 | * If a non-trivial common divisor is found, then |
| 4266 | * the linear part is reduced and the remainder is ignored. |
| 4267 | * The pieces of the graph that are assigned different remainders |
| 4268 | * form (groups of) strongly connected components within |
| 4269 | * the scaled down band. If needed, they can therefore |
| 4270 | * be ordered along this remainder in a sequence node. |
| 4271 | * However, this ordering is not enforced here in order to allow |
| 4272 | * the scheduler to combine some of the strongly connected components. |
| 4273 | */ |
| 4274 | static __isl_give isl_schedule_node *split_scaled( |
| 4275 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
| 4276 | { |
| 4277 | int i; |
| 4278 | int row; |
| 4279 | isl_ctx *ctx; |
| 4280 | isl_int gcd, gcd_i; |
| 4281 | isl_size n_row; |
| 4282 | |
| 4283 | if (!node) |
| 4284 | return NULL; |
| 4285 | |
| 4286 | ctx = isl_schedule_node_get_ctx(node); |
| 4287 | if (!ctx->opt->schedule_split_scaled) |
| 4288 | return compute_next_band(node, graph, permutable: 0); |
| 4289 | if (graph->n <= 1) |
| 4290 | return compute_next_band(node, graph, permutable: 0); |
| 4291 | n_row = isl_mat_rows(mat: graph->node[0].sched); |
| 4292 | if (n_row < 0) |
| 4293 | return isl_schedule_node_free(node); |
| 4294 | |
| 4295 | isl_int_init(gcd); |
| 4296 | isl_int_init(gcd_i); |
| 4297 | |
| 4298 | isl_int_set_si(gcd, 0); |
| 4299 | |
| 4300 | row = n_row - 1; |
| 4301 | |
| 4302 | for (i = 0; i < graph->n; ++i) { |
| 4303 | struct isl_sched_node *node = &graph->node[i]; |
| 4304 | isl_size cols = isl_mat_cols(mat: node->sched); |
| 4305 | |
| 4306 | if (cols < 0) |
| 4307 | break; |
| 4308 | isl_seq_gcd(p: node->sched->row[row] + 1, len: cols - 1, gcd: &gcd_i); |
| 4309 | isl_int_gcd(gcd, gcd, gcd_i); |
| 4310 | } |
| 4311 | |
| 4312 | isl_int_clear(gcd_i); |
| 4313 | if (i < graph->n) |
| 4314 | goto error; |
| 4315 | |
| 4316 | if (isl_int_cmp_si(gcd, 1) <= 0) { |
| 4317 | isl_int_clear(gcd); |
| 4318 | return compute_next_band(node, graph, permutable: 0); |
| 4319 | } |
| 4320 | |
| 4321 | for (i = 0; i < graph->n; ++i) { |
| 4322 | struct isl_sched_node *node = &graph->node[i]; |
| 4323 | |
| 4324 | isl_int_fdiv_q(node->sched->row[row][0], |
| 4325 | node->sched->row[row][0], gcd); |
| 4326 | isl_int_mul(node->sched->row[row][0], |
| 4327 | node->sched->row[row][0], gcd); |
| 4328 | node->sched = isl_mat_scale_down_row(mat: node->sched, row, m: gcd); |
| 4329 | if (!node->sched) |
| 4330 | goto error; |
| 4331 | } |
| 4332 | |
| 4333 | isl_int_clear(gcd); |
| 4334 | |
| 4335 | return compute_next_band(node, graph, permutable: 0); |
| 4336 | error: |
| 4337 | isl_int_clear(gcd); |
| 4338 | return isl_schedule_node_free(node); |
| 4339 | } |
| 4340 | |
| 4341 | /* Is the schedule row "sol" trivial on node "node"? |
| 4342 | * That is, is the solution zero on the dimensions linearly independent of |
| 4343 | * the previously found solutions? |
| 4344 | * Return 1 if the solution is trivial, 0 if it is not and -1 on error. |
| 4345 | * |
| 4346 | * Each coefficient is represented as the difference between |
| 4347 | * two non-negative values in "sol". |
| 4348 | * We construct the schedule row s and check if it is linearly |
| 4349 | * independent of previously computed schedule rows |
| 4350 | * by computing T s, with T the linear combinations that are zero |
| 4351 | * on linearly dependent schedule rows. |
| 4352 | * If the result consists of all zeros, then the solution is trivial. |
| 4353 | */ |
| 4354 | static int is_trivial(struct isl_sched_node *node, __isl_keep isl_vec *sol) |
| 4355 | { |
| 4356 | int trivial; |
| 4357 | isl_vec *node_sol; |
| 4358 | |
| 4359 | if (!sol) |
| 4360 | return -1; |
| 4361 | if (node->nvar == node->rank) |
| 4362 | return 0; |
| 4363 | |
| 4364 | node_sol = extract_var_coef(node, sol); |
| 4365 | node_sol = isl_mat_vec_product(mat: isl_mat_copy(mat: node->indep), vec: node_sol); |
| 4366 | if (!node_sol) |
| 4367 | return -1; |
| 4368 | |
| 4369 | trivial = isl_seq_first_non_zero(p: node_sol->el, |
| 4370 | len: node->nvar - node->rank) == -1; |
| 4371 | |
| 4372 | isl_vec_free(vec: node_sol); |
| 4373 | |
| 4374 | return trivial; |
| 4375 | } |
| 4376 | |
| 4377 | /* Is the schedule row "sol" trivial on any node where it should |
| 4378 | * not be trivial? |
| 4379 | * Return 1 if any solution is trivial, 0 if they are not and -1 on error. |
| 4380 | */ |
| 4381 | static int is_any_trivial(struct isl_sched_graph *graph, |
| 4382 | __isl_keep isl_vec *sol) |
| 4383 | { |
| 4384 | int i; |
| 4385 | |
| 4386 | for (i = 0; i < graph->n; ++i) { |
| 4387 | struct isl_sched_node *node = &graph->node[i]; |
| 4388 | int trivial; |
| 4389 | |
| 4390 | if (!needs_row(graph, node)) |
| 4391 | continue; |
| 4392 | trivial = is_trivial(node, sol); |
| 4393 | if (trivial < 0 || trivial) |
| 4394 | return trivial; |
| 4395 | } |
| 4396 | |
| 4397 | return 0; |
| 4398 | } |
| 4399 | |
| 4400 | /* Does the schedule represented by "sol" perform loop coalescing on "node"? |
| 4401 | * If so, return the position of the coalesced dimension. |
| 4402 | * Otherwise, return node->nvar or -1 on error. |
| 4403 | * |
| 4404 | * In particular, look for pairs of coefficients c_i and c_j such that |
| 4405 | * |c_j/c_i| > ceil(size_i/2), i.e., |c_j| > |c_i * ceil(size_i/2)|. |
| 4406 | * If any such pair is found, then return i. |
| 4407 | * If size_i is infinity, then no check on c_i needs to be performed. |
| 4408 | */ |
| 4409 | static int find_node_coalescing(struct isl_sched_node *node, |
| 4410 | __isl_keep isl_vec *sol) |
| 4411 | { |
| 4412 | int i, j; |
| 4413 | isl_int max; |
| 4414 | isl_vec *csol; |
| 4415 | |
| 4416 | if (node->nvar <= 1) |
| 4417 | return node->nvar; |
| 4418 | |
| 4419 | csol = extract_var_coef(node, sol); |
| 4420 | if (!csol) |
| 4421 | return -1; |
| 4422 | isl_int_init(max); |
| 4423 | for (i = 0; i < node->nvar; ++i) { |
| 4424 | isl_val *v; |
| 4425 | |
| 4426 | if (isl_int_is_zero(csol->el[i])) |
| 4427 | continue; |
| 4428 | v = isl_multi_val_get_val(multi: node->sizes, pos: i); |
| 4429 | if (!v) |
| 4430 | goto error; |
| 4431 | if (!isl_val_is_int(v)) { |
| 4432 | isl_val_free(v); |
| 4433 | continue; |
| 4434 | } |
| 4435 | v = isl_val_div_ui(v1: v, v2: 2); |
| 4436 | v = isl_val_ceil(v); |
| 4437 | if (!v) |
| 4438 | goto error; |
| 4439 | isl_int_mul(max, v->n, csol->el[i]); |
| 4440 | isl_val_free(v); |
| 4441 | |
| 4442 | for (j = 0; j < node->nvar; ++j) { |
| 4443 | if (j == i) |
| 4444 | continue; |
| 4445 | if (isl_int_abs_gt(csol->el[j], max)) |
| 4446 | break; |
| 4447 | } |
| 4448 | if (j < node->nvar) |
| 4449 | break; |
| 4450 | } |
| 4451 | |
| 4452 | isl_int_clear(max); |
| 4453 | isl_vec_free(vec: csol); |
| 4454 | return i; |
| 4455 | error: |
| 4456 | isl_int_clear(max); |
| 4457 | isl_vec_free(vec: csol); |
| 4458 | return -1; |
| 4459 | } |
| 4460 | |
| 4461 | /* Force the schedule coefficient at position "pos" of "node" to be zero |
| 4462 | * in "tl". |
| 4463 | * The coefficient is encoded as the difference between two non-negative |
| 4464 | * variables. Force these two variables to have the same value. |
| 4465 | */ |
| 4466 | static __isl_give isl_tab_lexmin *zero_out_node_coef( |
| 4467 | __isl_take isl_tab_lexmin *tl, struct isl_sched_node *node, int pos) |
| 4468 | { |
| 4469 | int dim; |
| 4470 | isl_ctx *ctx; |
| 4471 | isl_vec *eq; |
| 4472 | |
| 4473 | ctx = isl_space_get_ctx(space: node->space); |
| 4474 | dim = isl_tab_lexmin_dim(tl); |
| 4475 | if (dim < 0) |
| 4476 | return isl_tab_lexmin_free(tl); |
| 4477 | eq = isl_vec_alloc(ctx, size: 1 + dim); |
| 4478 | eq = isl_vec_clr(vec: eq); |
| 4479 | if (!eq) |
| 4480 | return isl_tab_lexmin_free(tl); |
| 4481 | |
| 4482 | pos = 1 + node_var_coef_pos(node, i: pos); |
| 4483 | isl_int_set_si(eq->el[pos], 1); |
| 4484 | isl_int_set_si(eq->el[pos + 1], -1); |
| 4485 | tl = isl_tab_lexmin_add_eq(tl, eq: eq->el); |
| 4486 | isl_vec_free(vec: eq); |
| 4487 | |
| 4488 | return tl; |
| 4489 | } |
| 4490 | |
| 4491 | /* Return the lexicographically smallest rational point in the basic set |
| 4492 | * from which "tl" was constructed, double checking that this input set |
| 4493 | * was not empty. |
| 4494 | */ |
| 4495 | static __isl_give isl_vec *non_empty_solution(__isl_keep isl_tab_lexmin *tl) |
| 4496 | { |
| 4497 | isl_vec *sol; |
| 4498 | |
| 4499 | sol = isl_tab_lexmin_get_solution(tl); |
| 4500 | if (!sol) |
| 4501 | return NULL; |
| 4502 | if (sol->size == 0) |
| 4503 | isl_die(isl_vec_get_ctx(sol), isl_error_internal, |
| 4504 | "error in schedule construction" , |
| 4505 | return isl_vec_free(sol)); |
| 4506 | return sol; |
| 4507 | } |
| 4508 | |
| 4509 | /* Does the solution "sol" of the LP problem constructed by setup_carry_lp |
| 4510 | * carry any of the "n_edge" groups of dependences? |
| 4511 | * The value in the first position is the sum of (1 - e_i) over all "n_edge" |
| 4512 | * edges, with 0 <= e_i <= 1 equal to 1 when the dependences represented |
| 4513 | * by the edge are carried by the solution. |
| 4514 | * If the sum of the (1 - e_i) is smaller than "n_edge" then at least |
| 4515 | * one of those is carried. |
| 4516 | * |
| 4517 | * Note that despite the fact that the problem is solved using a rational |
| 4518 | * solver, the solution is guaranteed to be integral. |
| 4519 | * Specifically, the dependence distance lower bounds e_i (and therefore |
| 4520 | * also their sum) are integers. See Lemma 5 of [1]. |
| 4521 | * |
| 4522 | * Any potential denominator of the sum is cleared by this function. |
| 4523 | * The denominator is not relevant for any of the other elements |
| 4524 | * in the solution. |
| 4525 | * |
| 4526 | * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling |
| 4527 | * Problem, Part II: Multi-Dimensional Time. |
| 4528 | * In Intl. Journal of Parallel Programming, 1992. |
| 4529 | */ |
| 4530 | static int carries_dependences(__isl_keep isl_vec *sol, int n_edge) |
| 4531 | { |
| 4532 | isl_int_divexact(sol->el[1], sol->el[1], sol->el[0]); |
| 4533 | isl_int_set_si(sol->el[0], 1); |
| 4534 | return isl_int_cmp_si(sol->el[1], n_edge) < 0; |
| 4535 | } |
| 4536 | |
| 4537 | /* Return the lexicographically smallest rational point in "lp", |
| 4538 | * assuming that all variables are non-negative and performing some |
| 4539 | * additional sanity checks. |
| 4540 | * If "want_integral" is set, then compute the lexicographically smallest |
| 4541 | * integer point instead. |
| 4542 | * In particular, "lp" should not be empty by construction. |
| 4543 | * Double check that this is the case. |
| 4544 | * If dependences are not carried for any of the "n_edge" edges, |
| 4545 | * then return an empty vector. |
| 4546 | * |
| 4547 | * If the schedule_treat_coalescing option is set and |
| 4548 | * if the computed schedule performs loop coalescing on a given node, |
| 4549 | * i.e., if it is of the form |
| 4550 | * |
| 4551 | * c_i i + c_j j + ... |
| 4552 | * |
| 4553 | * with |c_j/c_i| >= size_i, then force the coefficient c_i to be zero |
| 4554 | * to cut out this solution. Repeat this process until no more loop |
| 4555 | * coalescing occurs or until no more dependences can be carried. |
| 4556 | * In the latter case, revert to the previously computed solution. |
| 4557 | * |
| 4558 | * If the caller requests an integral solution and if coalescing should |
| 4559 | * be treated, then perform the coalescing treatment first as |
| 4560 | * an integral solution computed before coalescing treatment |
| 4561 | * would carry the same number of edges and would therefore probably |
| 4562 | * also be coalescing. |
| 4563 | * |
| 4564 | * To allow the coalescing treatment to be performed first, |
| 4565 | * the initial solution is allowed to be rational and it is only |
| 4566 | * cut out (if needed) in the next iteration, if no coalescing measures |
| 4567 | * were taken. |
| 4568 | */ |
| 4569 | static __isl_give isl_vec *non_neg_lexmin(struct isl_sched_graph *graph, |
| 4570 | __isl_take isl_basic_set *lp, int n_edge, int want_integral) |
| 4571 | { |
| 4572 | int i, pos, cut; |
| 4573 | isl_ctx *ctx; |
| 4574 | isl_tab_lexmin *tl; |
| 4575 | isl_vec *sol = NULL, *prev; |
| 4576 | int treat_coalescing; |
| 4577 | int try_again; |
| 4578 | |
| 4579 | if (!lp) |
| 4580 | return NULL; |
| 4581 | ctx = isl_basic_set_get_ctx(bset: lp); |
| 4582 | treat_coalescing = isl_options_get_schedule_treat_coalescing(ctx); |
| 4583 | tl = isl_tab_lexmin_from_basic_set(bset: lp); |
| 4584 | |
| 4585 | cut = 0; |
| 4586 | do { |
| 4587 | int integral; |
| 4588 | |
| 4589 | try_again = 0; |
| 4590 | if (cut) |
| 4591 | tl = isl_tab_lexmin_cut_to_integer(tl); |
| 4592 | prev = sol; |
| 4593 | sol = non_empty_solution(tl); |
| 4594 | if (!sol) |
| 4595 | goto error; |
| 4596 | |
| 4597 | integral = isl_int_is_one(sol->el[0]); |
| 4598 | if (!carries_dependences(sol, n_edge)) { |
| 4599 | if (!prev) |
| 4600 | prev = isl_vec_alloc(ctx, size: 0); |
| 4601 | isl_vec_free(vec: sol); |
| 4602 | sol = prev; |
| 4603 | break; |
| 4604 | } |
| 4605 | prev = isl_vec_free(vec: prev); |
| 4606 | cut = want_integral && !integral; |
| 4607 | if (cut) |
| 4608 | try_again = 1; |
| 4609 | if (!treat_coalescing) |
| 4610 | continue; |
| 4611 | for (i = 0; i < graph->n; ++i) { |
| 4612 | struct isl_sched_node *node = &graph->node[i]; |
| 4613 | |
| 4614 | pos = find_node_coalescing(node, sol); |
| 4615 | if (pos < 0) |
| 4616 | goto error; |
| 4617 | if (pos < node->nvar) |
| 4618 | break; |
| 4619 | } |
| 4620 | if (i < graph->n) { |
| 4621 | try_again = 1; |
| 4622 | tl = zero_out_node_coef(tl, node: &graph->node[i], pos); |
| 4623 | cut = 0; |
| 4624 | } |
| 4625 | } while (try_again); |
| 4626 | |
| 4627 | isl_tab_lexmin_free(tl); |
| 4628 | |
| 4629 | return sol; |
| 4630 | error: |
| 4631 | isl_tab_lexmin_free(tl); |
| 4632 | isl_vec_free(vec: prev); |
| 4633 | isl_vec_free(vec: sol); |
| 4634 | return NULL; |
| 4635 | } |
| 4636 | |
| 4637 | /* If "edge" is an edge from a node to itself, then add the corresponding |
| 4638 | * dependence relation to "umap". |
| 4639 | * If "node" has been compressed, then the dependence relation |
| 4640 | * is also compressed first. |
| 4641 | */ |
| 4642 | static __isl_give isl_union_map *add_intra(__isl_take isl_union_map *umap, |
| 4643 | struct isl_sched_edge *edge) |
| 4644 | { |
| 4645 | isl_map *map; |
| 4646 | struct isl_sched_node *node = edge->src; |
| 4647 | |
| 4648 | if (edge->src != edge->dst) |
| 4649 | return umap; |
| 4650 | |
| 4651 | map = isl_map_copy(map: edge->map); |
| 4652 | map = compress(map, src: node, dst: node); |
| 4653 | umap = isl_union_map_add_map(umap, map); |
| 4654 | return umap; |
| 4655 | } |
| 4656 | |
| 4657 | /* If "edge" is an edge from a node to another node, then add the corresponding |
| 4658 | * dependence relation to "umap". |
| 4659 | * If the source or destination nodes of "edge" have been compressed, |
| 4660 | * then the dependence relation is also compressed first. |
| 4661 | */ |
| 4662 | static __isl_give isl_union_map *add_inter(__isl_take isl_union_map *umap, |
| 4663 | struct isl_sched_edge *edge) |
| 4664 | { |
| 4665 | isl_map *map; |
| 4666 | |
| 4667 | if (edge->src == edge->dst) |
| 4668 | return umap; |
| 4669 | |
| 4670 | map = isl_map_copy(map: edge->map); |
| 4671 | map = compress(map, src: edge->src, dst: edge->dst); |
| 4672 | umap = isl_union_map_add_map(umap, map); |
| 4673 | return umap; |
| 4674 | } |
| 4675 | |
| 4676 | /* Internal data structure used by union_drop_coalescing_constraints |
| 4677 | * to collect bounds on all relevant statements. |
| 4678 | * |
| 4679 | * "graph" is the schedule constraint graph for which an LP problem |
| 4680 | * is being constructed. |
| 4681 | * "bounds" collects the bounds. |
| 4682 | */ |
| 4683 | struct isl_collect_bounds_data { |
| 4684 | isl_ctx *ctx; |
| 4685 | struct isl_sched_graph *graph; |
| 4686 | isl_union_set *bounds; |
| 4687 | }; |
| 4688 | |
| 4689 | /* Add the size bounds for the node with instance deltas in "set" |
| 4690 | * to data->bounds. |
| 4691 | */ |
| 4692 | static isl_stat collect_bounds(__isl_take isl_set *set, void *user) |
| 4693 | { |
| 4694 | struct isl_collect_bounds_data *data = user; |
| 4695 | struct isl_sched_node *node; |
| 4696 | isl_space *space; |
| 4697 | isl_set *bounds; |
| 4698 | |
| 4699 | space = isl_set_get_space(set); |
| 4700 | isl_set_free(set); |
| 4701 | |
| 4702 | node = graph_find_compressed_node(ctx: data->ctx, graph: data->graph, space); |
| 4703 | isl_space_free(space); |
| 4704 | |
| 4705 | bounds = isl_set_from_basic_set(bset: get_size_bounds(node)); |
| 4706 | data->bounds = isl_union_set_add_set(uset: data->bounds, set: bounds); |
| 4707 | |
| 4708 | return isl_stat_ok; |
| 4709 | } |
| 4710 | |
| 4711 | /* Drop some constraints from "delta" that could be exploited |
| 4712 | * to construct loop coalescing schedules. |
| 4713 | * In particular, drop those constraint that bound the difference |
| 4714 | * to the size of the domain. |
| 4715 | * Do this for each set/node in "delta" separately. |
| 4716 | * The parameters are assumed to have been projected out by the caller. |
| 4717 | */ |
| 4718 | static __isl_give isl_union_set *union_drop_coalescing_constraints(isl_ctx *ctx, |
| 4719 | struct isl_sched_graph *graph, __isl_take isl_union_set *delta) |
| 4720 | { |
| 4721 | struct isl_collect_bounds_data data = { ctx, graph }; |
| 4722 | |
| 4723 | data.bounds = isl_union_set_empty(space: isl_space_params_alloc(ctx, nparam: 0)); |
| 4724 | if (isl_union_set_foreach_set(uset: delta, fn: &collect_bounds, user: &data) < 0) |
| 4725 | data.bounds = isl_union_set_free(uset: data.bounds); |
| 4726 | delta = isl_union_set_plain_gist(uset: delta, context: data.bounds); |
| 4727 | |
| 4728 | return delta; |
| 4729 | } |
| 4730 | |
| 4731 | /* Given a non-trivial lineality space "lineality", add the corresponding |
| 4732 | * universe set to data->mask and add a map from elements to |
| 4733 | * other elements along the lines in "lineality" to data->equivalent. |
| 4734 | * If this is the first time this function gets called |
| 4735 | * (data->any_non_trivial is still false), then set data->any_non_trivial and |
| 4736 | * initialize data->mask and data->equivalent. |
| 4737 | * |
| 4738 | * In particular, if the lineality space is defined by equality constraints |
| 4739 | * |
| 4740 | * E x = 0 |
| 4741 | * |
| 4742 | * then construct an affine mapping |
| 4743 | * |
| 4744 | * f : x -> E x |
| 4745 | * |
| 4746 | * and compute the equivalence relation of having the same image under f: |
| 4747 | * |
| 4748 | * { x -> x' : E x = E x' } |
| 4749 | */ |
| 4750 | static isl_stat add_non_trivial_lineality(__isl_take isl_basic_set *lineality, |
| 4751 | struct isl_exploit_lineality_data *data) |
| 4752 | { |
| 4753 | isl_mat *eq; |
| 4754 | isl_space *space; |
| 4755 | isl_set *univ; |
| 4756 | isl_multi_aff *ma; |
| 4757 | isl_multi_pw_aff *mpa; |
| 4758 | isl_map *map; |
| 4759 | isl_size n; |
| 4760 | |
| 4761 | if (isl_basic_set_check_no_locals(bset: lineality) < 0) |
| 4762 | goto error; |
| 4763 | |
| 4764 | space = isl_basic_set_get_space(bset: lineality); |
| 4765 | if (!data->any_non_trivial) { |
| 4766 | data->equivalent = isl_union_map_empty(space: isl_space_copy(space)); |
| 4767 | data->mask = isl_union_set_empty(space: isl_space_copy(space)); |
| 4768 | } |
| 4769 | data->any_non_trivial = isl_bool_true; |
| 4770 | |
| 4771 | univ = isl_set_universe(space: isl_space_copy(space)); |
| 4772 | data->mask = isl_union_set_add_set(uset: data->mask, set: univ); |
| 4773 | |
| 4774 | eq = isl_basic_set_extract_equalities(bset: lineality); |
| 4775 | n = isl_mat_rows(mat: eq); |
| 4776 | if (n < 0) |
| 4777 | space = isl_space_free(space); |
| 4778 | eq = isl_mat_insert_zero_rows(mat: eq, row: 0, n: 1); |
| 4779 | eq = isl_mat_set_element_si(mat: eq, row: 0, col: 0, v: 1); |
| 4780 | space = isl_space_from_domain(space); |
| 4781 | space = isl_space_add_dims(space, type: isl_dim_out, n); |
| 4782 | ma = isl_multi_aff_from_aff_mat(space, mat: eq); |
| 4783 | mpa = isl_multi_pw_aff_from_multi_aff(ma); |
| 4784 | map = isl_multi_pw_aff_eq_map(mpa1: mpa, mpa2: isl_multi_pw_aff_copy(multi: mpa)); |
| 4785 | data->equivalent = isl_union_map_add_map(umap: data->equivalent, map); |
| 4786 | |
| 4787 | isl_basic_set_free(bset: lineality); |
| 4788 | return isl_stat_ok; |
| 4789 | error: |
| 4790 | isl_basic_set_free(bset: lineality); |
| 4791 | return isl_stat_error; |
| 4792 | } |
| 4793 | |
| 4794 | /* Check if the lineality space "set" is non-trivial (i.e., is not just |
| 4795 | * the origin or, in other words, satisfies a number of equality constraints |
| 4796 | * that is smaller than the dimension of the set). |
| 4797 | * If so, extend data->mask and data->equivalent accordingly. |
| 4798 | * |
| 4799 | * The input should not have any local variables already, but |
| 4800 | * isl_set_remove_divs is called to make sure it does not. |
| 4801 | */ |
| 4802 | static isl_stat add_lineality(__isl_take isl_set *set, void *user) |
| 4803 | { |
| 4804 | struct isl_exploit_lineality_data *data = user; |
| 4805 | isl_basic_set *hull; |
| 4806 | isl_size dim; |
| 4807 | isl_size n_eq; |
| 4808 | |
| 4809 | set = isl_set_remove_divs(set); |
| 4810 | hull = isl_set_unshifted_simple_hull(set); |
| 4811 | dim = isl_basic_set_dim(bset: hull, type: isl_dim_set); |
| 4812 | n_eq = isl_basic_set_n_equality(bset: hull); |
| 4813 | if (dim < 0 || n_eq < 0) |
| 4814 | goto error; |
| 4815 | if (dim != n_eq) |
| 4816 | return add_non_trivial_lineality(lineality: hull, data); |
| 4817 | isl_basic_set_free(bset: hull); |
| 4818 | return isl_stat_ok; |
| 4819 | error: |
| 4820 | isl_basic_set_free(bset: hull); |
| 4821 | return isl_stat_error; |
| 4822 | } |
| 4823 | |
| 4824 | /* Check if the difference set on intra-node schedule constraints "intra" |
| 4825 | * has any non-trivial lineality space. |
| 4826 | * If so, then extend the difference set to a difference set |
| 4827 | * on equivalent elements. That is, if "intra" is |
| 4828 | * |
| 4829 | * { y - x : (x,y) \in V } |
| 4830 | * |
| 4831 | * and elements are equivalent if they have the same image under f, |
| 4832 | * then return |
| 4833 | * |
| 4834 | * { y' - x' : (x,y) \in V and f(x) = f(x') and f(y) = f(y') } |
| 4835 | * |
| 4836 | * or, since f is linear, |
| 4837 | * |
| 4838 | * { y' - x' : (x,y) \in V and f(y - x) = f(y' - x') } |
| 4839 | * |
| 4840 | * The results of the search for non-trivial lineality spaces is stored |
| 4841 | * in "data". |
| 4842 | */ |
| 4843 | static __isl_give isl_union_set *exploit_intra_lineality( |
| 4844 | __isl_take isl_union_set *intra, |
| 4845 | struct isl_exploit_lineality_data *data) |
| 4846 | { |
| 4847 | isl_union_set *lineality; |
| 4848 | isl_union_set *uset; |
| 4849 | |
| 4850 | data->any_non_trivial = isl_bool_false; |
| 4851 | lineality = isl_union_set_copy(uset: intra); |
| 4852 | lineality = isl_union_set_combined_lineality_space(uset: lineality); |
| 4853 | if (isl_union_set_foreach_set(uset: lineality, fn: &add_lineality, user: data) < 0) |
| 4854 | data->any_non_trivial = isl_bool_error; |
| 4855 | isl_union_set_free(uset: lineality); |
| 4856 | |
| 4857 | if (data->any_non_trivial < 0) |
| 4858 | return isl_union_set_free(uset: intra); |
| 4859 | if (!data->any_non_trivial) |
| 4860 | return intra; |
| 4861 | |
| 4862 | uset = isl_union_set_copy(uset: intra); |
| 4863 | intra = isl_union_set_subtract(uset1: intra, uset2: isl_union_set_copy(uset: data->mask)); |
| 4864 | uset = isl_union_set_apply(uset, umap: isl_union_map_copy(umap: data->equivalent)); |
| 4865 | intra = isl_union_set_union(uset1: intra, uset2: uset); |
| 4866 | |
| 4867 | intra = isl_union_set_remove_divs(bset: intra); |
| 4868 | |
| 4869 | return intra; |
| 4870 | } |
| 4871 | |
| 4872 | /* If the difference set on intra-node schedule constraints was found to have |
| 4873 | * any non-trivial lineality space by exploit_intra_lineality, |
| 4874 | * as recorded in "data", then extend the inter-node |
| 4875 | * schedule constraints "inter" to schedule constraints on equivalent elements. |
| 4876 | * That is, if "inter" is V and |
| 4877 | * elements are equivalent if they have the same image under f, then return |
| 4878 | * |
| 4879 | * { (x', y') : (x,y) \in V and f(x) = f(x') and f(y) = f(y') } |
| 4880 | */ |
| 4881 | static __isl_give isl_union_map *exploit_inter_lineality( |
| 4882 | __isl_take isl_union_map *inter, |
| 4883 | struct isl_exploit_lineality_data *data) |
| 4884 | { |
| 4885 | isl_union_map *umap; |
| 4886 | |
| 4887 | if (data->any_non_trivial < 0) |
| 4888 | return isl_union_map_free(umap: inter); |
| 4889 | if (!data->any_non_trivial) |
| 4890 | return inter; |
| 4891 | |
| 4892 | umap = isl_union_map_copy(umap: inter); |
| 4893 | inter = isl_union_map_subtract_range(umap: inter, |
| 4894 | dom: isl_union_set_copy(uset: data->mask)); |
| 4895 | umap = isl_union_map_apply_range(umap1: umap, |
| 4896 | umap2: isl_union_map_copy(umap: data->equivalent)); |
| 4897 | inter = isl_union_map_union(umap1: inter, umap2: umap); |
| 4898 | umap = isl_union_map_copy(umap: inter); |
| 4899 | inter = isl_union_map_subtract_domain(umap: inter, |
| 4900 | dom: isl_union_set_copy(uset: data->mask)); |
| 4901 | umap = isl_union_map_apply_range(umap1: isl_union_map_copy(umap: data->equivalent), |
| 4902 | umap2: umap); |
| 4903 | inter = isl_union_map_union(umap1: inter, umap2: umap); |
| 4904 | |
| 4905 | inter = isl_union_map_remove_divs(bmap: inter); |
| 4906 | |
| 4907 | return inter; |
| 4908 | } |
| 4909 | |
| 4910 | /* For each (conditional) validity edge in "graph", |
| 4911 | * add the corresponding dependence relation using "add" |
| 4912 | * to a collection of dependence relations and return the result. |
| 4913 | * If "coincidence" is set, then coincidence edges are considered as well. |
| 4914 | */ |
| 4915 | static __isl_give isl_union_map *collect_validity(struct isl_sched_graph *graph, |
| 4916 | __isl_give isl_union_map *(*add)(__isl_take isl_union_map *umap, |
| 4917 | struct isl_sched_edge *edge), int coincidence) |
| 4918 | { |
| 4919 | int i; |
| 4920 | isl_space *space; |
| 4921 | isl_union_map *umap; |
| 4922 | |
| 4923 | space = isl_space_copy(space: graph->node[0].space); |
| 4924 | umap = isl_union_map_empty(space); |
| 4925 | |
| 4926 | for (i = 0; i < graph->n_edge; ++i) { |
| 4927 | struct isl_sched_edge *edge = &graph->edge[i]; |
| 4928 | |
| 4929 | if (!is_any_validity(edge) && |
| 4930 | (!coincidence || !is_coincidence(edge))) |
| 4931 | continue; |
| 4932 | |
| 4933 | umap = add(umap, edge); |
| 4934 | } |
| 4935 | |
| 4936 | return umap; |
| 4937 | } |
| 4938 | |
| 4939 | /* For each dependence relation on a (conditional) validity edge |
| 4940 | * from a node to itself, |
| 4941 | * construct the set of coefficients of valid constraints for elements |
| 4942 | * in that dependence relation and collect the results. |
| 4943 | * If "coincidence" is set, then coincidence edges are considered as well. |
| 4944 | * |
| 4945 | * In particular, for each dependence relation R, constraints |
| 4946 | * on coefficients (c_0, c_x) are constructed such that |
| 4947 | * |
| 4948 | * c_0 + c_x d >= 0 for each d in delta R = { y - x | (x,y) in R } |
| 4949 | * |
| 4950 | * If the schedule_treat_coalescing option is set, then some constraints |
| 4951 | * that could be exploited to construct coalescing schedules |
| 4952 | * are removed before the dual is computed, but after the parameters |
| 4953 | * have been projected out. |
| 4954 | * The entire computation is essentially the same as that performed |
| 4955 | * by intra_coefficients, except that it operates on multiple |
| 4956 | * edges together and that the parameters are always projected out. |
| 4957 | * |
| 4958 | * Additionally, exploit any non-trivial lineality space |
| 4959 | * in the difference set after removing coalescing constraints and |
| 4960 | * store the results of the non-trivial lineality space detection in "data". |
| 4961 | * The procedure is currently run unconditionally, but it is unlikely |
| 4962 | * to find any non-trivial lineality spaces if no coalescing constraints |
| 4963 | * have been removed. |
| 4964 | * |
| 4965 | * Note that if a dependence relation is a union of basic maps, |
| 4966 | * then each basic map needs to be treated individually as it may only |
| 4967 | * be possible to carry the dependences expressed by some of those |
| 4968 | * basic maps and not all of them. |
| 4969 | * The collected validity constraints are therefore not coalesced and |
| 4970 | * it is assumed that they are not coalesced automatically. |
| 4971 | * Duplicate basic maps can be removed, however. |
| 4972 | * In particular, if the same basic map appears as a disjunct |
| 4973 | * in multiple edges, then it only needs to be carried once. |
| 4974 | */ |
| 4975 | static __isl_give isl_basic_set_list *collect_intra_validity(isl_ctx *ctx, |
| 4976 | struct isl_sched_graph *graph, int coincidence, |
| 4977 | struct isl_exploit_lineality_data *data) |
| 4978 | { |
| 4979 | isl_union_map *intra; |
| 4980 | isl_union_set *delta; |
| 4981 | isl_basic_set_list *list; |
| 4982 | |
| 4983 | intra = collect_validity(graph, add: &add_intra, coincidence); |
| 4984 | delta = isl_union_map_deltas(umap: intra); |
| 4985 | delta = isl_union_set_project_out_all_params(uset: delta); |
| 4986 | delta = isl_union_set_remove_divs(bset: delta); |
| 4987 | if (isl_options_get_schedule_treat_coalescing(ctx)) |
| 4988 | delta = union_drop_coalescing_constraints(ctx, graph, delta); |
| 4989 | delta = exploit_intra_lineality(intra: delta, data); |
| 4990 | list = isl_union_set_get_basic_set_list(uset: delta); |
| 4991 | isl_union_set_free(uset: delta); |
| 4992 | |
| 4993 | return isl_basic_set_list_coefficients(list); |
| 4994 | } |
| 4995 | |
| 4996 | /* For each dependence relation on a (conditional) validity edge |
| 4997 | * from a node to some other node, |
| 4998 | * construct the set of coefficients of valid constraints for elements |
| 4999 | * in that dependence relation and collect the results. |
| 5000 | * If "coincidence" is set, then coincidence edges are considered as well. |
| 5001 | * |
| 5002 | * In particular, for each dependence relation R, constraints |
| 5003 | * on coefficients (c_0, c_n, c_x, c_y) are constructed such that |
| 5004 | * |
| 5005 | * c_0 + c_n n + c_x x + c_y y >= 0 for each (x,y) in R |
| 5006 | * |
| 5007 | * This computation is essentially the same as that performed |
| 5008 | * by inter_coefficients, except that it operates on multiple |
| 5009 | * edges together. |
| 5010 | * |
| 5011 | * Additionally, exploit any non-trivial lineality space |
| 5012 | * that may have been discovered by collect_intra_validity |
| 5013 | * (as stored in "data"). |
| 5014 | * |
| 5015 | * Note that if a dependence relation is a union of basic maps, |
| 5016 | * then each basic map needs to be treated individually as it may only |
| 5017 | * be possible to carry the dependences expressed by some of those |
| 5018 | * basic maps and not all of them. |
| 5019 | * The collected validity constraints are therefore not coalesced and |
| 5020 | * it is assumed that they are not coalesced automatically. |
| 5021 | * Duplicate basic maps can be removed, however. |
| 5022 | * In particular, if the same basic map appears as a disjunct |
| 5023 | * in multiple edges, then it only needs to be carried once. |
| 5024 | */ |
| 5025 | static __isl_give isl_basic_set_list *collect_inter_validity( |
| 5026 | struct isl_sched_graph *graph, int coincidence, |
| 5027 | struct isl_exploit_lineality_data *data) |
| 5028 | { |
| 5029 | isl_union_map *inter; |
| 5030 | isl_union_set *wrap; |
| 5031 | isl_basic_set_list *list; |
| 5032 | |
| 5033 | inter = collect_validity(graph, add: &add_inter, coincidence); |
| 5034 | inter = exploit_inter_lineality(inter, data); |
| 5035 | inter = isl_union_map_remove_divs(bmap: inter); |
| 5036 | wrap = isl_union_map_wrap(umap: inter); |
| 5037 | list = isl_union_set_get_basic_set_list(uset: wrap); |
| 5038 | isl_union_set_free(uset: wrap); |
| 5039 | return isl_basic_set_list_coefficients(list); |
| 5040 | } |
| 5041 | |
| 5042 | /* Construct an LP problem for finding schedule coefficients |
| 5043 | * such that the schedule carries as many of the "n_edge" groups of |
| 5044 | * dependences as possible based on the corresponding coefficient |
| 5045 | * constraints and return the lexicographically smallest non-trivial solution. |
| 5046 | * "intra" is the sequence of coefficient constraints for intra-node edges. |
| 5047 | * "inter" is the sequence of coefficient constraints for inter-node edges. |
| 5048 | * If "want_integral" is set, then compute an integral solution |
| 5049 | * for the coefficients rather than using the numerators |
| 5050 | * of a rational solution. |
| 5051 | * "carry_inter" indicates whether inter-node edges should be carried or |
| 5052 | * only respected. |
| 5053 | * |
| 5054 | * If none of the "n_edge" groups can be carried |
| 5055 | * then return an empty vector. |
| 5056 | */ |
| 5057 | static __isl_give isl_vec *compute_carrying_sol_coef(isl_ctx *ctx, |
| 5058 | struct isl_sched_graph *graph, int n_edge, |
| 5059 | __isl_keep isl_basic_set_list *intra, |
| 5060 | __isl_keep isl_basic_set_list *inter, int want_integral, |
| 5061 | int carry_inter) |
| 5062 | { |
| 5063 | isl_basic_set *lp; |
| 5064 | |
| 5065 | if (setup_carry_lp(ctx, graph, n_edge, intra, inter, carry_inter) < 0) |
| 5066 | return NULL; |
| 5067 | |
| 5068 | lp = isl_basic_set_copy(bset: graph->lp); |
| 5069 | return non_neg_lexmin(graph, lp, n_edge, want_integral); |
| 5070 | } |
| 5071 | |
| 5072 | /* Construct an LP problem for finding schedule coefficients |
| 5073 | * such that the schedule carries as many of the validity dependences |
| 5074 | * as possible and |
| 5075 | * return the lexicographically smallest non-trivial solution. |
| 5076 | * If "fallback" is set, then the carrying is performed as a fallback |
| 5077 | * for the Pluto-like scheduler. |
| 5078 | * If "coincidence" is set, then try and carry coincidence edges as well. |
| 5079 | * |
| 5080 | * The variable "n_edge" stores the number of groups that should be carried. |
| 5081 | * If none of the "n_edge" groups can be carried |
| 5082 | * then return an empty vector. |
| 5083 | * If, moreover, "n_edge" is zero, then the LP problem does not even |
| 5084 | * need to be constructed. |
| 5085 | * |
| 5086 | * If a fallback solution is being computed, then compute an integral solution |
| 5087 | * for the coefficients rather than using the numerators |
| 5088 | * of a rational solution. |
| 5089 | * |
| 5090 | * If a fallback solution is being computed, if there are any intra-node |
| 5091 | * dependences, and if requested by the user, then first try |
| 5092 | * to only carry those intra-node dependences. |
| 5093 | * If this fails to carry any dependences, then try again |
| 5094 | * with the inter-node dependences included. |
| 5095 | */ |
| 5096 | static __isl_give isl_vec *compute_carrying_sol(isl_ctx *ctx, |
| 5097 | struct isl_sched_graph *graph, int fallback, int coincidence) |
| 5098 | { |
| 5099 | isl_size n_intra, n_inter; |
| 5100 | int n_edge; |
| 5101 | struct isl_carry carry = { 0 }; |
| 5102 | isl_vec *sol; |
| 5103 | |
| 5104 | carry.intra = collect_intra_validity(ctx, graph, coincidence, |
| 5105 | data: &carry.lineality); |
| 5106 | carry.inter = collect_inter_validity(graph, coincidence, |
| 5107 | data: &carry.lineality); |
| 5108 | n_intra = isl_basic_set_list_n_basic_set(list: carry.intra); |
| 5109 | n_inter = isl_basic_set_list_n_basic_set(list: carry.inter); |
| 5110 | if (n_intra < 0 || n_inter < 0) |
| 5111 | goto error; |
| 5112 | |
| 5113 | if (fallback && n_intra > 0 && |
| 5114 | isl_options_get_schedule_carry_self_first(ctx)) { |
| 5115 | sol = compute_carrying_sol_coef(ctx, graph, n_edge: n_intra, |
| 5116 | intra: carry.intra, inter: carry.inter, want_integral: fallback, carry_inter: 0); |
| 5117 | if (!sol || sol->size != 0 || n_inter == 0) { |
| 5118 | isl_carry_clear(carry: &carry); |
| 5119 | return sol; |
| 5120 | } |
| 5121 | isl_vec_free(vec: sol); |
| 5122 | } |
| 5123 | |
| 5124 | n_edge = n_intra + n_inter; |
| 5125 | if (n_edge == 0) { |
| 5126 | isl_carry_clear(carry: &carry); |
| 5127 | return isl_vec_alloc(ctx, size: 0); |
| 5128 | } |
| 5129 | |
| 5130 | sol = compute_carrying_sol_coef(ctx, graph, n_edge, |
| 5131 | intra: carry.intra, inter: carry.inter, want_integral: fallback, carry_inter: 1); |
| 5132 | isl_carry_clear(carry: &carry); |
| 5133 | return sol; |
| 5134 | error: |
| 5135 | isl_carry_clear(carry: &carry); |
| 5136 | return NULL; |
| 5137 | } |
| 5138 | |
| 5139 | /* Construct a schedule row for each node such that as many validity dependences |
| 5140 | * as possible are carried and then continue with the next band. |
| 5141 | * If "fallback" is set, then the carrying is performed as a fallback |
| 5142 | * for the Pluto-like scheduler. |
| 5143 | * If "coincidence" is set, then try and carry coincidence edges as well. |
| 5144 | * |
| 5145 | * If there are no validity dependences, then no dependence can be carried and |
| 5146 | * the procedure is guaranteed to fail. If there is more than one component, |
| 5147 | * then try computing a schedule on each component separately |
| 5148 | * to prevent or at least postpone this failure. |
| 5149 | * |
| 5150 | * If a schedule row is computed, then check that dependences are carried |
| 5151 | * for at least one of the edges. |
| 5152 | * |
| 5153 | * If the computed schedule row turns out to be trivial on one or |
| 5154 | * more nodes where it should not be trivial, then we throw it away |
| 5155 | * and try again on each component separately. |
| 5156 | * |
| 5157 | * If there is only one component, then we accept the schedule row anyway, |
| 5158 | * but we do not consider it as a complete row and therefore do not |
| 5159 | * increment graph->n_row. Note that the ranks of the nodes that |
| 5160 | * do get a non-trivial schedule part will get updated regardless and |
| 5161 | * graph->maxvar is computed based on these ranks. The test for |
| 5162 | * whether more schedule rows are required in compute_schedule_wcc |
| 5163 | * is therefore not affected. |
| 5164 | * |
| 5165 | * Insert a band corresponding to the schedule row at position "node" |
| 5166 | * of the schedule tree and continue with the construction of the schedule. |
| 5167 | * This insertion and the continued construction is performed by split_scaled |
| 5168 | * after optionally checking for non-trivial common divisors. |
| 5169 | */ |
| 5170 | static __isl_give isl_schedule_node *carry(__isl_take isl_schedule_node *node, |
| 5171 | struct isl_sched_graph *graph, int fallback, int coincidence) |
| 5172 | { |
| 5173 | int trivial; |
| 5174 | isl_ctx *ctx; |
| 5175 | isl_vec *sol; |
| 5176 | |
| 5177 | if (!node) |
| 5178 | return NULL; |
| 5179 | |
| 5180 | ctx = isl_schedule_node_get_ctx(node); |
| 5181 | sol = compute_carrying_sol(ctx, graph, fallback, coincidence); |
| 5182 | if (!sol) |
| 5183 | return isl_schedule_node_free(node); |
| 5184 | if (sol->size == 0) { |
| 5185 | isl_vec_free(vec: sol); |
| 5186 | if (graph->scc > 1) |
| 5187 | return compute_component_schedule(node, graph, wcc: 1); |
| 5188 | isl_die(ctx, isl_error_unknown, "unable to carry dependences" , |
| 5189 | return isl_schedule_node_free(node)); |
| 5190 | } |
| 5191 | |
| 5192 | trivial = is_any_trivial(graph, sol); |
| 5193 | if (trivial < 0) { |
| 5194 | sol = isl_vec_free(vec: sol); |
| 5195 | } else if (trivial && graph->scc > 1) { |
| 5196 | isl_vec_free(vec: sol); |
| 5197 | return compute_component_schedule(node, graph, wcc: 1); |
| 5198 | } |
| 5199 | |
| 5200 | if (update_schedule(graph, sol, coincident: 0) < 0) |
| 5201 | return isl_schedule_node_free(node); |
| 5202 | if (trivial) |
| 5203 | graph->n_row--; |
| 5204 | |
| 5205 | return split_scaled(node, graph); |
| 5206 | } |
| 5207 | |
| 5208 | /* Construct a schedule row for each node such that as many validity dependences |
| 5209 | * as possible are carried and then continue with the next band. |
| 5210 | * Do so as a fallback for the Pluto-like scheduler. |
| 5211 | * If "coincidence" is set, then try and carry coincidence edges as well. |
| 5212 | */ |
| 5213 | static __isl_give isl_schedule_node *carry_fallback( |
| 5214 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
| 5215 | int coincidence) |
| 5216 | { |
| 5217 | return carry(node, graph, fallback: 1, coincidence); |
| 5218 | } |
| 5219 | |
| 5220 | /* Construct a schedule row for each node such that as many validity dependences |
| 5221 | * as possible are carried and then continue with the next band. |
| 5222 | * Do so for the case where the Feautrier scheduler was selected |
| 5223 | * by the user. |
| 5224 | */ |
| 5225 | static __isl_give isl_schedule_node *carry_feautrier( |
| 5226 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
| 5227 | { |
| 5228 | return carry(node, graph, fallback: 0, coincidence: 0); |
| 5229 | } |
| 5230 | |
| 5231 | /* Construct a schedule row for each node such that as many validity dependences |
| 5232 | * as possible are carried and then continue with the next band. |
| 5233 | * Do so as a fallback for the Pluto-like scheduler. |
| 5234 | */ |
| 5235 | static __isl_give isl_schedule_node *carry_dependences( |
| 5236 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
| 5237 | { |
| 5238 | return carry_fallback(node, graph, coincidence: 0); |
| 5239 | } |
| 5240 | |
| 5241 | /* Construct a schedule row for each node such that as many validity or |
| 5242 | * coincidence dependences as possible are carried and |
| 5243 | * then continue with the next band. |
| 5244 | * Do so as a fallback for the Pluto-like scheduler. |
| 5245 | */ |
| 5246 | static __isl_give isl_schedule_node *carry_coincidence( |
| 5247 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
| 5248 | { |
| 5249 | return carry_fallback(node, graph, coincidence: 1); |
| 5250 | } |
| 5251 | |
| 5252 | /* Topologically sort statements mapped to the same schedule iteration |
| 5253 | * and add insert a sequence node in front of "node" |
| 5254 | * corresponding to this order. |
| 5255 | * If "initialized" is set, then it may be assumed that |
| 5256 | * isl_sched_graph_compute_maxvar |
| 5257 | * has been called on the current band. Otherwise, call |
| 5258 | * isl_sched_graph_compute_maxvar if and before carry_dependences gets called. |
| 5259 | * |
| 5260 | * If it turns out to be impossible to sort the statements apart, |
| 5261 | * because different dependences impose different orderings |
| 5262 | * on the statements, then we extend the schedule such that |
| 5263 | * it carries at least one more dependence. |
| 5264 | */ |
| 5265 | static __isl_give isl_schedule_node *sort_statements( |
| 5266 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
| 5267 | int initialized) |
| 5268 | { |
| 5269 | isl_ctx *ctx; |
| 5270 | isl_union_set_list *filters; |
| 5271 | |
| 5272 | if (!node) |
| 5273 | return NULL; |
| 5274 | |
| 5275 | ctx = isl_schedule_node_get_ctx(node); |
| 5276 | if (graph->n < 1) |
| 5277 | isl_die(ctx, isl_error_internal, |
| 5278 | "graph should have at least one node" , |
| 5279 | return isl_schedule_node_free(node)); |
| 5280 | |
| 5281 | if (graph->n == 1) |
| 5282 | return node; |
| 5283 | |
| 5284 | if (update_edges(ctx, graph) < 0) |
| 5285 | return isl_schedule_node_free(node); |
| 5286 | |
| 5287 | if (graph->n_edge == 0) |
| 5288 | return node; |
| 5289 | |
| 5290 | if (detect_sccs(ctx, graph) < 0) |
| 5291 | return isl_schedule_node_free(node); |
| 5292 | |
| 5293 | next_band(graph); |
| 5294 | if (graph->scc < graph->n) { |
| 5295 | if (!initialized && isl_sched_graph_compute_maxvar(graph) < 0) |
| 5296 | return isl_schedule_node_free(node); |
| 5297 | return carry_dependences(node, graph); |
| 5298 | } |
| 5299 | |
| 5300 | filters = isl_sched_graph_extract_sccs(ctx, graph); |
| 5301 | node = isl_schedule_node_insert_sequence(node, filters); |
| 5302 | |
| 5303 | return node; |
| 5304 | } |
| 5305 | |
| 5306 | /* Are there any (non-empty) (conditional) validity edges in the graph? |
| 5307 | */ |
| 5308 | static int has_validity_edges(struct isl_sched_graph *graph) |
| 5309 | { |
| 5310 | int i; |
| 5311 | |
| 5312 | for (i = 0; i < graph->n_edge; ++i) { |
| 5313 | int empty; |
| 5314 | |
| 5315 | empty = isl_map_plain_is_empty(map: graph->edge[i].map); |
| 5316 | if (empty < 0) |
| 5317 | return -1; |
| 5318 | if (empty) |
| 5319 | continue; |
| 5320 | if (is_any_validity(edge: &graph->edge[i])) |
| 5321 | return 1; |
| 5322 | } |
| 5323 | |
| 5324 | return 0; |
| 5325 | } |
| 5326 | |
| 5327 | /* Should we apply a Feautrier step? |
| 5328 | * That is, did the user request the Feautrier algorithm and are |
| 5329 | * there any validity dependences (left)? |
| 5330 | */ |
| 5331 | static int need_feautrier_step(isl_ctx *ctx, struct isl_sched_graph *graph) |
| 5332 | { |
| 5333 | if (ctx->opt->schedule_algorithm != ISL_SCHEDULE_ALGORITHM_FEAUTRIER) |
| 5334 | return 0; |
| 5335 | |
| 5336 | return has_validity_edges(graph); |
| 5337 | } |
| 5338 | |
| 5339 | /* Compute a schedule for a connected dependence graph using Feautrier's |
| 5340 | * multi-dimensional scheduling algorithm and return the updated schedule node. |
| 5341 | * |
| 5342 | * The original algorithm is described in [1]. |
| 5343 | * The main idea is to minimize the number of scheduling dimensions, by |
| 5344 | * trying to satisfy as many dependences as possible per scheduling dimension. |
| 5345 | * |
| 5346 | * [1] P. Feautrier, Some Efficient Solutions to the Affine Scheduling |
| 5347 | * Problem, Part II: Multi-Dimensional Time. |
| 5348 | * In Intl. Journal of Parallel Programming, 1992. |
| 5349 | */ |
| 5350 | static __isl_give isl_schedule_node *compute_schedule_wcc_feautrier( |
| 5351 | isl_schedule_node *node, struct isl_sched_graph *graph) |
| 5352 | { |
| 5353 | return carry_feautrier(node, graph); |
| 5354 | } |
| 5355 | |
| 5356 | /* Turn off the "local" bit on all (condition) edges. |
| 5357 | */ |
| 5358 | static void clear_local_edges(struct isl_sched_graph *graph) |
| 5359 | { |
| 5360 | int i; |
| 5361 | |
| 5362 | for (i = 0; i < graph->n_edge; ++i) |
| 5363 | if (isl_sched_edge_is_condition(edge: &graph->edge[i])) |
| 5364 | clear_local(edge: &graph->edge[i]); |
| 5365 | } |
| 5366 | |
| 5367 | /* Does "graph" have both condition and conditional validity edges? |
| 5368 | */ |
| 5369 | static int need_condition_check(struct isl_sched_graph *graph) |
| 5370 | { |
| 5371 | int i; |
| 5372 | int any_condition = 0; |
| 5373 | int any_conditional_validity = 0; |
| 5374 | |
| 5375 | for (i = 0; i < graph->n_edge; ++i) { |
| 5376 | if (isl_sched_edge_is_condition(edge: &graph->edge[i])) |
| 5377 | any_condition = 1; |
| 5378 | if (isl_sched_edge_is_conditional_validity(edge: &graph->edge[i])) |
| 5379 | any_conditional_validity = 1; |
| 5380 | } |
| 5381 | |
| 5382 | return any_condition && any_conditional_validity; |
| 5383 | } |
| 5384 | |
| 5385 | /* Does "graph" contain any coincidence edge? |
| 5386 | */ |
| 5387 | static int has_any_coincidence(struct isl_sched_graph *graph) |
| 5388 | { |
| 5389 | int i; |
| 5390 | |
| 5391 | for (i = 0; i < graph->n_edge; ++i) |
| 5392 | if (is_coincidence(edge: &graph->edge[i])) |
| 5393 | return 1; |
| 5394 | |
| 5395 | return 0; |
| 5396 | } |
| 5397 | |
| 5398 | /* Extract the final schedule row as a map with the iteration domain |
| 5399 | * of "node" as domain. |
| 5400 | */ |
| 5401 | static __isl_give isl_map *final_row(struct isl_sched_node *node) |
| 5402 | { |
| 5403 | isl_multi_aff *ma; |
| 5404 | isl_size n_row; |
| 5405 | |
| 5406 | n_row = isl_mat_rows(mat: node->sched); |
| 5407 | if (n_row < 0) |
| 5408 | return NULL; |
| 5409 | ma = isl_sched_node_extract_partial_schedule_multi_aff(node, |
| 5410 | first: n_row - 1, n: 1); |
| 5411 | return isl_map_from_multi_aff(maff: ma); |
| 5412 | } |
| 5413 | |
| 5414 | /* Is the conditional validity dependence in the edge with index "edge_index" |
| 5415 | * violated by the latest (i.e., final) row of the schedule? |
| 5416 | * That is, is i scheduled after j |
| 5417 | * for any conditional validity dependence i -> j? |
| 5418 | */ |
| 5419 | static int is_violated(struct isl_sched_graph *graph, int edge_index) |
| 5420 | { |
| 5421 | isl_map *src_sched, *dst_sched, *map; |
| 5422 | struct isl_sched_edge *edge = &graph->edge[edge_index]; |
| 5423 | int empty; |
| 5424 | |
| 5425 | src_sched = final_row(node: edge->src); |
| 5426 | dst_sched = final_row(node: edge->dst); |
| 5427 | map = isl_map_copy(map: edge->map); |
| 5428 | map = isl_map_apply_domain(map1: map, map2: src_sched); |
| 5429 | map = isl_map_apply_range(map1: map, map2: dst_sched); |
| 5430 | map = isl_map_order_gt(map, type1: isl_dim_in, pos1: 0, type2: isl_dim_out, pos2: 0); |
| 5431 | empty = isl_map_is_empty(map); |
| 5432 | isl_map_free(map); |
| 5433 | |
| 5434 | if (empty < 0) |
| 5435 | return -1; |
| 5436 | |
| 5437 | return !empty; |
| 5438 | } |
| 5439 | |
| 5440 | /* Does "graph" have any satisfied condition edges that |
| 5441 | * are adjacent to the conditional validity constraint with |
| 5442 | * domain "conditional_source" and range "conditional_sink"? |
| 5443 | * |
| 5444 | * A satisfied condition is one that is not local. |
| 5445 | * If a condition was forced to be local already (i.e., marked as local) |
| 5446 | * then there is no need to check if it is in fact local. |
| 5447 | * |
| 5448 | * Additionally, mark all adjacent condition edges found as local. |
| 5449 | */ |
| 5450 | static int has_adjacent_true_conditions(struct isl_sched_graph *graph, |
| 5451 | __isl_keep isl_union_set *conditional_source, |
| 5452 | __isl_keep isl_union_set *conditional_sink) |
| 5453 | { |
| 5454 | int i; |
| 5455 | int any = 0; |
| 5456 | |
| 5457 | for (i = 0; i < graph->n_edge; ++i) { |
| 5458 | int adjacent, local; |
| 5459 | isl_union_map *condition; |
| 5460 | |
| 5461 | if (!isl_sched_edge_is_condition(edge: &graph->edge[i])) |
| 5462 | continue; |
| 5463 | if (is_local(edge: &graph->edge[i])) |
| 5464 | continue; |
| 5465 | |
| 5466 | condition = graph->edge[i].tagged_condition; |
| 5467 | adjacent = domain_intersects(umap: condition, uset: conditional_sink); |
| 5468 | if (adjacent >= 0 && !adjacent) |
| 5469 | adjacent = range_intersects(umap: condition, |
| 5470 | uset: conditional_source); |
| 5471 | if (adjacent < 0) |
| 5472 | return -1; |
| 5473 | if (!adjacent) |
| 5474 | continue; |
| 5475 | |
| 5476 | set_local(&graph->edge[i]); |
| 5477 | |
| 5478 | local = is_condition_false(edge: &graph->edge[i]); |
| 5479 | if (local < 0) |
| 5480 | return -1; |
| 5481 | if (!local) |
| 5482 | any = 1; |
| 5483 | } |
| 5484 | |
| 5485 | return any; |
| 5486 | } |
| 5487 | |
| 5488 | /* Are there any violated conditional validity dependences with |
| 5489 | * adjacent condition dependences that are not local with respect |
| 5490 | * to the current schedule? |
| 5491 | * That is, is the conditional validity constraint violated? |
| 5492 | * |
| 5493 | * Additionally, mark all those adjacent condition dependences as local. |
| 5494 | * We also mark those adjacent condition dependences that were not marked |
| 5495 | * as local before, but just happened to be local already. This ensures |
| 5496 | * that they remain local if the schedule is recomputed. |
| 5497 | * |
| 5498 | * We first collect domain and range of all violated conditional validity |
| 5499 | * dependences and then check if there are any adjacent non-local |
| 5500 | * condition dependences. |
| 5501 | */ |
| 5502 | static int has_violated_conditional_constraint(isl_ctx *ctx, |
| 5503 | struct isl_sched_graph *graph) |
| 5504 | { |
| 5505 | int i; |
| 5506 | int any = 0; |
| 5507 | isl_union_set *source, *sink; |
| 5508 | |
| 5509 | source = isl_union_set_empty(space: isl_space_params_alloc(ctx, nparam: 0)); |
| 5510 | sink = isl_union_set_empty(space: isl_space_params_alloc(ctx, nparam: 0)); |
| 5511 | for (i = 0; i < graph->n_edge; ++i) { |
| 5512 | isl_union_set *uset; |
| 5513 | isl_union_map *umap; |
| 5514 | int violated; |
| 5515 | |
| 5516 | if (!isl_sched_edge_is_conditional_validity(edge: &graph->edge[i])) |
| 5517 | continue; |
| 5518 | |
| 5519 | violated = is_violated(graph, edge_index: i); |
| 5520 | if (violated < 0) |
| 5521 | goto error; |
| 5522 | if (!violated) |
| 5523 | continue; |
| 5524 | |
| 5525 | any = 1; |
| 5526 | |
| 5527 | umap = isl_union_map_copy(umap: graph->edge[i].tagged_validity); |
| 5528 | uset = isl_union_map_domain(umap); |
| 5529 | source = isl_union_set_union(uset1: source, uset2: uset); |
| 5530 | source = isl_union_set_coalesce(uset: source); |
| 5531 | |
| 5532 | umap = isl_union_map_copy(umap: graph->edge[i].tagged_validity); |
| 5533 | uset = isl_union_map_range(umap); |
| 5534 | sink = isl_union_set_union(uset1: sink, uset2: uset); |
| 5535 | sink = isl_union_set_coalesce(uset: sink); |
| 5536 | } |
| 5537 | |
| 5538 | if (any) |
| 5539 | any = has_adjacent_true_conditions(graph, conditional_source: source, conditional_sink: sink); |
| 5540 | |
| 5541 | isl_union_set_free(uset: source); |
| 5542 | isl_union_set_free(uset: sink); |
| 5543 | return any; |
| 5544 | error: |
| 5545 | isl_union_set_free(uset: source); |
| 5546 | isl_union_set_free(uset: sink); |
| 5547 | return -1; |
| 5548 | } |
| 5549 | |
| 5550 | /* Examine the current band (the rows between graph->band_start and |
| 5551 | * graph->n_total_row), deciding whether to drop it or add it to "node" |
| 5552 | * and then continue with the computation of the next band, if any. |
| 5553 | * If "initialized" is set, then it may be assumed that |
| 5554 | * isl_sched_graph_compute_maxvar |
| 5555 | * has been called on the current band. Otherwise, call |
| 5556 | * isl_sched_graph_compute_maxvar if and before carry_dependences gets called. |
| 5557 | * |
| 5558 | * The caller keeps looking for a new row as long as |
| 5559 | * graph->n_row < graph->maxvar. If the latest attempt to find |
| 5560 | * such a row failed (i.e., we still have graph->n_row < graph->maxvar), |
| 5561 | * then we either |
| 5562 | * - split between SCCs and start over (assuming we found an interesting |
| 5563 | * pair of SCCs between which to split) |
| 5564 | * - continue with the next band (assuming the current band has at least |
| 5565 | * one row) |
| 5566 | * - if there is more than one SCC left, then split along all SCCs |
| 5567 | * - if outer coincidence needs to be enforced, then try to carry as many |
| 5568 | * validity or coincidence dependences as possible and |
| 5569 | * continue with the next band |
| 5570 | * - try to carry as many validity dependences as possible and |
| 5571 | * continue with the next band |
| 5572 | * In each case, we first insert a band node in the schedule tree |
| 5573 | * if any rows have been computed. |
| 5574 | * |
| 5575 | * If the caller managed to complete the schedule and the current band |
| 5576 | * is empty, then finish off by topologically |
| 5577 | * sorting the statements based on the remaining dependences. |
| 5578 | * If, on the other hand, the current band has at least one row, |
| 5579 | * then continue with the next band. Note that this next band |
| 5580 | * will necessarily be empty, but the graph may still be split up |
| 5581 | * into weakly connected components before arriving back here. |
| 5582 | */ |
| 5583 | __isl_give isl_schedule_node *isl_schedule_node_compute_finish_band( |
| 5584 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
| 5585 | int initialized) |
| 5586 | { |
| 5587 | int empty; |
| 5588 | |
| 5589 | if (!node) |
| 5590 | return NULL; |
| 5591 | |
| 5592 | empty = graph->n_total_row == graph->band_start; |
| 5593 | if (graph->n_row < graph->maxvar) { |
| 5594 | isl_ctx *ctx; |
| 5595 | |
| 5596 | ctx = isl_schedule_node_get_ctx(node); |
| 5597 | if (!ctx->opt->schedule_maximize_band_depth && !empty) |
| 5598 | return compute_next_band(node, graph, permutable: 1); |
| 5599 | if (graph->src_scc >= 0) |
| 5600 | return compute_split_schedule(node, graph); |
| 5601 | if (!empty) |
| 5602 | return compute_next_band(node, graph, permutable: 1); |
| 5603 | if (graph->scc > 1) |
| 5604 | return compute_component_schedule(node, graph, wcc: 1); |
| 5605 | if (!initialized && isl_sched_graph_compute_maxvar(graph) < 0) |
| 5606 | return isl_schedule_node_free(node); |
| 5607 | if (isl_options_get_schedule_outer_coincidence(ctx)) |
| 5608 | return carry_coincidence(node, graph); |
| 5609 | return carry_dependences(node, graph); |
| 5610 | } |
| 5611 | |
| 5612 | if (!empty) |
| 5613 | return compute_next_band(node, graph, permutable: 1); |
| 5614 | return sort_statements(node, graph, initialized); |
| 5615 | } |
| 5616 | |
| 5617 | /* Construct a band of schedule rows for a connected dependence graph. |
| 5618 | * The caller is responsible for determining the strongly connected |
| 5619 | * components and calling isl_sched_graph_compute_maxvar first. |
| 5620 | * |
| 5621 | * We try to find a sequence of as many schedule rows as possible that result |
| 5622 | * in non-negative dependence distances (independent of the previous rows |
| 5623 | * in the sequence, i.e., such that the sequence is tilable), with as |
| 5624 | * many of the initial rows as possible satisfying the coincidence constraints. |
| 5625 | * The computation stops if we can't find any more rows or if we have found |
| 5626 | * all the rows we wanted to find. |
| 5627 | * |
| 5628 | * If ctx->opt->schedule_outer_coincidence is set, then we force the |
| 5629 | * outermost dimension to satisfy the coincidence constraints. If this |
| 5630 | * turns out to be impossible, we fall back on the general scheme above |
| 5631 | * and try to carry as many dependences as possible. |
| 5632 | * |
| 5633 | * If "graph" contains both condition and conditional validity dependences, |
| 5634 | * then we need to check that that the conditional schedule constraint |
| 5635 | * is satisfied, i.e., there are no violated conditional validity dependences |
| 5636 | * that are adjacent to any non-local condition dependences. |
| 5637 | * If there are, then we mark all those adjacent condition dependences |
| 5638 | * as local and recompute the current band. Those dependences that |
| 5639 | * are marked local will then be forced to be local. |
| 5640 | * The initial computation is performed with no dependences marked as local. |
| 5641 | * If we are lucky, then there will be no violated conditional validity |
| 5642 | * dependences adjacent to any non-local condition dependences. |
| 5643 | * Otherwise, we mark some additional condition dependences as local and |
| 5644 | * recompute. We continue this process until there are no violations left or |
| 5645 | * until we are no longer able to compute a schedule. |
| 5646 | * Since there are only a finite number of dependences, |
| 5647 | * there will only be a finite number of iterations. |
| 5648 | */ |
| 5649 | isl_stat isl_schedule_node_compute_wcc_band(isl_ctx *ctx, |
| 5650 | struct isl_sched_graph *graph) |
| 5651 | { |
| 5652 | int has_coincidence; |
| 5653 | int use_coincidence; |
| 5654 | int force_coincidence = 0; |
| 5655 | int check_conditional; |
| 5656 | |
| 5657 | if (sort_sccs(graph) < 0) |
| 5658 | return isl_stat_error; |
| 5659 | |
| 5660 | clear_local_edges(graph); |
| 5661 | check_conditional = need_condition_check(graph); |
| 5662 | has_coincidence = has_any_coincidence(graph); |
| 5663 | |
| 5664 | if (ctx->opt->schedule_outer_coincidence) |
| 5665 | force_coincidence = 1; |
| 5666 | |
| 5667 | use_coincidence = has_coincidence; |
| 5668 | while (graph->n_row < graph->maxvar) { |
| 5669 | isl_vec *sol; |
| 5670 | int violated; |
| 5671 | int coincident; |
| 5672 | |
| 5673 | graph->src_scc = -1; |
| 5674 | graph->dst_scc = -1; |
| 5675 | |
| 5676 | if (setup_lp(ctx, graph, use_coincidence) < 0) |
| 5677 | return isl_stat_error; |
| 5678 | sol = solve_lp(ctx, graph); |
| 5679 | if (!sol) |
| 5680 | return isl_stat_error; |
| 5681 | if (sol->size == 0) { |
| 5682 | int empty = graph->n_total_row == graph->band_start; |
| 5683 | |
| 5684 | isl_vec_free(vec: sol); |
| 5685 | if (use_coincidence && (!force_coincidence || !empty)) { |
| 5686 | use_coincidence = 0; |
| 5687 | continue; |
| 5688 | } |
| 5689 | return isl_stat_ok; |
| 5690 | } |
| 5691 | coincident = !has_coincidence || use_coincidence; |
| 5692 | if (update_schedule(graph, sol, coincident) < 0) |
| 5693 | return isl_stat_error; |
| 5694 | |
| 5695 | if (!check_conditional) |
| 5696 | continue; |
| 5697 | violated = has_violated_conditional_constraint(ctx, graph); |
| 5698 | if (violated < 0) |
| 5699 | return isl_stat_error; |
| 5700 | if (!violated) |
| 5701 | continue; |
| 5702 | if (reset_band(graph) < 0) |
| 5703 | return isl_stat_error; |
| 5704 | use_coincidence = has_coincidence; |
| 5705 | } |
| 5706 | |
| 5707 | return isl_stat_ok; |
| 5708 | } |
| 5709 | |
| 5710 | /* Compute a schedule for a connected dependence graph by considering |
| 5711 | * the graph as a whole and return the updated schedule node. |
| 5712 | * |
| 5713 | * The actual schedule rows of the current band are computed by |
| 5714 | * isl_schedule_node_compute_wcc_band. isl_schedule_node_compute_finish_band |
| 5715 | * takes care of integrating the band into "node" and continuing |
| 5716 | * the computation. |
| 5717 | */ |
| 5718 | static __isl_give isl_schedule_node *compute_schedule_wcc_whole( |
| 5719 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
| 5720 | { |
| 5721 | isl_ctx *ctx; |
| 5722 | |
| 5723 | if (!node) |
| 5724 | return NULL; |
| 5725 | |
| 5726 | ctx = isl_schedule_node_get_ctx(node); |
| 5727 | if (isl_schedule_node_compute_wcc_band(ctx, graph) < 0) |
| 5728 | return isl_schedule_node_free(node); |
| 5729 | |
| 5730 | return isl_schedule_node_compute_finish_band(node, graph, initialized: 1); |
| 5731 | } |
| 5732 | |
| 5733 | /* Compute a schedule for a connected dependence graph and return |
| 5734 | * the updated schedule node. |
| 5735 | * |
| 5736 | * If Feautrier's algorithm is selected, we first recursively try to satisfy |
| 5737 | * as many validity dependences as possible. When all validity dependences |
| 5738 | * are satisfied we extend the schedule to a full-dimensional schedule. |
| 5739 | * |
| 5740 | * Call compute_schedule_wcc_whole or isl_schedule_node_compute_wcc_clustering |
| 5741 | * depending on whether the user has selected the option to try and |
| 5742 | * compute a schedule for the entire (weakly connected) component first. |
| 5743 | * If there is only a single strongly connected component (SCC), then |
| 5744 | * there is no point in trying to combine SCCs |
| 5745 | * in isl_schedule_node_compute_wcc_clustering, so compute_schedule_wcc_whole |
| 5746 | * is called instead. |
| 5747 | */ |
| 5748 | static __isl_give isl_schedule_node *compute_schedule_wcc( |
| 5749 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph) |
| 5750 | { |
| 5751 | isl_ctx *ctx; |
| 5752 | |
| 5753 | if (!node) |
| 5754 | return NULL; |
| 5755 | |
| 5756 | ctx = isl_schedule_node_get_ctx(node); |
| 5757 | if (detect_sccs(ctx, graph) < 0) |
| 5758 | return isl_schedule_node_free(node); |
| 5759 | |
| 5760 | if (isl_sched_graph_compute_maxvar(graph) < 0) |
| 5761 | return isl_schedule_node_free(node); |
| 5762 | |
| 5763 | if (need_feautrier_step(ctx, graph)) |
| 5764 | return compute_schedule_wcc_feautrier(node, graph); |
| 5765 | |
| 5766 | if (graph->scc <= 1 || isl_options_get_schedule_whole_component(ctx)) |
| 5767 | return compute_schedule_wcc_whole(node, graph); |
| 5768 | else |
| 5769 | return isl_schedule_node_compute_wcc_clustering(node, graph); |
| 5770 | } |
| 5771 | |
| 5772 | /* Compute a schedule for each group of nodes identified by node->scc |
| 5773 | * separately and then combine them in a sequence node (or as set node |
| 5774 | * if graph->weak is set) inserted at position "node" of the schedule tree. |
| 5775 | * Return the updated schedule node. |
| 5776 | * |
| 5777 | * If "wcc" is set then each of the groups belongs to a single |
| 5778 | * weakly connected component in the dependence graph so that |
| 5779 | * there is no need for compute_sub_schedule to look for weakly |
| 5780 | * connected components. |
| 5781 | * |
| 5782 | * If a set node would be introduced and if the number of components |
| 5783 | * is equal to the number of nodes, then check if the schedule |
| 5784 | * is already complete. If so, a redundant set node would be introduced |
| 5785 | * (without any further descendants) stating that the statements |
| 5786 | * can be executed in arbitrary order, which is also expressed |
| 5787 | * by the absence of any node. Refrain from inserting any nodes |
| 5788 | * in this case and simply return. |
| 5789 | */ |
| 5790 | static __isl_give isl_schedule_node *compute_component_schedule( |
| 5791 | __isl_take isl_schedule_node *node, struct isl_sched_graph *graph, |
| 5792 | int wcc) |
| 5793 | { |
| 5794 | int component; |
| 5795 | isl_ctx *ctx; |
| 5796 | isl_union_set_list *filters; |
| 5797 | |
| 5798 | if (!node) |
| 5799 | return NULL; |
| 5800 | |
| 5801 | if (graph->weak && graph->scc == graph->n) { |
| 5802 | if (isl_sched_graph_compute_maxvar(graph) < 0) |
| 5803 | return isl_schedule_node_free(node); |
| 5804 | if (graph->n_row >= graph->maxvar) |
| 5805 | return node; |
| 5806 | } |
| 5807 | |
| 5808 | ctx = isl_schedule_node_get_ctx(node); |
| 5809 | filters = isl_sched_graph_extract_sccs(ctx, graph); |
| 5810 | if (graph->weak) |
| 5811 | node = isl_schedule_node_insert_set(node, filters); |
| 5812 | else |
| 5813 | node = isl_schedule_node_insert_sequence(node, filters); |
| 5814 | |
| 5815 | for (component = 0; component < graph->scc; ++component) { |
| 5816 | node = isl_schedule_node_grandchild(node, pos1: component, pos2: 0); |
| 5817 | node = compute_sub_schedule(node, ctx, graph, |
| 5818 | node_pred: &isl_sched_node_scc_exactly, |
| 5819 | edge_pred: &isl_sched_edge_scc_exactly, |
| 5820 | data: component, wcc); |
| 5821 | node = isl_schedule_node_grandparent(node); |
| 5822 | } |
| 5823 | |
| 5824 | return node; |
| 5825 | } |
| 5826 | |
| 5827 | /* Compute a schedule for the given dependence graph and insert it at "node". |
| 5828 | * Return the updated schedule node. |
| 5829 | * |
| 5830 | * We first check if the graph is connected (through validity and conditional |
| 5831 | * validity dependences) and, if not, compute a schedule |
| 5832 | * for each component separately. |
| 5833 | * If the schedule_serialize_sccs option is set, then we check for strongly |
| 5834 | * connected components instead and compute a separate schedule for |
| 5835 | * each such strongly connected component. |
| 5836 | */ |
| 5837 | static __isl_give isl_schedule_node *compute_schedule(isl_schedule_node *node, |
| 5838 | struct isl_sched_graph *graph) |
| 5839 | { |
| 5840 | isl_ctx *ctx; |
| 5841 | |
| 5842 | if (!node) |
| 5843 | return NULL; |
| 5844 | |
| 5845 | ctx = isl_schedule_node_get_ctx(node); |
| 5846 | if (isl_options_get_schedule_serialize_sccs(ctx)) { |
| 5847 | if (detect_sccs(ctx, graph) < 0) |
| 5848 | return isl_schedule_node_free(node); |
| 5849 | } else { |
| 5850 | if (detect_wccs(ctx, graph) < 0) |
| 5851 | return isl_schedule_node_free(node); |
| 5852 | } |
| 5853 | |
| 5854 | if (graph->scc > 1) |
| 5855 | return compute_component_schedule(node, graph, wcc: 1); |
| 5856 | |
| 5857 | return compute_schedule_wcc(node, graph); |
| 5858 | } |
| 5859 | |
| 5860 | /* Compute a schedule on sc->domain that respects the given schedule |
| 5861 | * constraints. |
| 5862 | * |
| 5863 | * In particular, the schedule respects all the validity dependences. |
| 5864 | * If the default isl scheduling algorithm is used, it tries to minimize |
| 5865 | * the dependence distances over the proximity dependences. |
| 5866 | * If Feautrier's scheduling algorithm is used, the proximity dependence |
| 5867 | * distances are only minimized during the extension to a full-dimensional |
| 5868 | * schedule. |
| 5869 | * |
| 5870 | * If there are any condition and conditional validity dependences, |
| 5871 | * then the conditional validity dependences may be violated inside |
| 5872 | * a tilable band, provided they have no adjacent non-local |
| 5873 | * condition dependences. |
| 5874 | */ |
| 5875 | __isl_give isl_schedule *isl_schedule_constraints_compute_schedule( |
| 5876 | __isl_take isl_schedule_constraints *sc) |
| 5877 | { |
| 5878 | isl_ctx *ctx = isl_schedule_constraints_get_ctx(sc); |
| 5879 | struct isl_sched_graph graph = { 0 }; |
| 5880 | isl_schedule *sched; |
| 5881 | isl_schedule_node *node; |
| 5882 | isl_union_set *domain; |
| 5883 | isl_size n; |
| 5884 | |
| 5885 | sc = isl_schedule_constraints_align_params(sc); |
| 5886 | |
| 5887 | domain = isl_schedule_constraints_get_domain(sc); |
| 5888 | n = isl_union_set_n_set(uset: domain); |
| 5889 | if (n == 0) { |
| 5890 | isl_schedule_constraints_free(sc); |
| 5891 | return isl_schedule_from_domain(domain); |
| 5892 | } |
| 5893 | |
| 5894 | if (n < 0 || isl_sched_graph_init(graph: &graph, sc) < 0) |
| 5895 | domain = isl_union_set_free(uset: domain); |
| 5896 | |
| 5897 | node = isl_schedule_node_from_domain(domain); |
| 5898 | node = isl_schedule_node_child(node, pos: 0); |
| 5899 | if (graph.n > 0) |
| 5900 | node = compute_schedule(node, graph: &graph); |
| 5901 | sched = isl_schedule_node_get_schedule(node); |
| 5902 | isl_schedule_node_free(node); |
| 5903 | |
| 5904 | isl_sched_graph_free(ctx, graph: &graph); |
| 5905 | isl_schedule_constraints_free(sc); |
| 5906 | |
| 5907 | return sched; |
| 5908 | } |
| 5909 | |
| 5910 | /* Compute a schedule for the given union of domains that respects |
| 5911 | * all the validity dependences and minimizes |
| 5912 | * the dependence distances over the proximity dependences. |
| 5913 | * |
| 5914 | * This function is kept for backward compatibility. |
| 5915 | */ |
| 5916 | __isl_give isl_schedule *isl_union_set_compute_schedule( |
| 5917 | __isl_take isl_union_set *domain, |
| 5918 | __isl_take isl_union_map *validity, |
| 5919 | __isl_take isl_union_map *proximity) |
| 5920 | { |
| 5921 | isl_schedule_constraints *sc; |
| 5922 | |
| 5923 | sc = isl_schedule_constraints_on_domain(domain); |
| 5924 | sc = isl_schedule_constraints_set_validity(sc, validity); |
| 5925 | sc = isl_schedule_constraints_set_proximity(sc, proximity); |
| 5926 | |
| 5927 | return isl_schedule_constraints_compute_schedule(sc); |
| 5928 | } |
| 5929 | |