1 | /* Swing Modulo Scheduling implementation. |
2 | Copyright (C) 2004-2023 Free Software Foundation, Inc. |
3 | Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com> |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | |
22 | #include "config.h" |
23 | #include "system.h" |
24 | #include "coretypes.h" |
25 | #include "backend.h" |
26 | #include "target.h" |
27 | #include "rtl.h" |
28 | #include "tree.h" |
29 | #include "cfghooks.h" |
30 | #include "df.h" |
31 | #include "memmodel.h" |
32 | #include "optabs.h" |
33 | #include "regs.h" |
34 | #include "emit-rtl.h" |
35 | #include "gcov-io.h" |
36 | #include "profile.h" |
37 | #include "insn-attr.h" |
38 | #include "cfgrtl.h" |
39 | #include "sched-int.h" |
40 | #include "cfgloop.h" |
41 | #include "expr.h" |
42 | #include "ddg.h" |
43 | #include "tree-pass.h" |
44 | #include "dbgcnt.h" |
45 | #include "loop-unroll.h" |
46 | #include "hard-reg-set.h" |
47 | |
48 | #ifdef INSN_SCHEDULING |
49 | |
50 | /* This file contains the implementation of the Swing Modulo Scheduler, |
51 | described in the following references: |
52 | [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt. |
53 | Lifetime--sensitive modulo scheduling in a production environment. |
54 | IEEE Trans. on Comps., 50(3), March 2001 |
55 | [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero. |
56 | Swing Modulo Scheduling: A Lifetime Sensitive Approach. |
57 | PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA). |
58 | |
59 | The basic structure is: |
60 | 1. Build a data-dependence graph (DDG) for each loop. |
61 | 2. Use the DDG to order the insns of a loop (not in topological order |
62 | necessarily, but rather) trying to place each insn after all its |
63 | predecessors _or_ after all its successors. |
64 | 3. Compute MII: a lower bound on the number of cycles to schedule the loop. |
65 | 4. Use the ordering to perform list-scheduling of the loop: |
66 | 1. Set II = MII. We will try to schedule the loop within II cycles. |
67 | 2. Try to schedule the insns one by one according to the ordering. |
68 | For each insn compute an interval of cycles by considering already- |
69 | scheduled preds and succs (and associated latencies); try to place |
70 | the insn in the cycles of this window checking for potential |
71 | resource conflicts (using the DFA interface). |
72 | Note: this is different from the cycle-scheduling of schedule_insns; |
73 | here the insns are not scheduled monotonically top-down (nor bottom- |
74 | up). |
75 | 3. If failed in scheduling all insns - bump II++ and try again, unless |
76 | II reaches an upper bound MaxII, in which case report failure. |
77 | 5. If we succeeded in scheduling the loop within II cycles, we now |
78 | generate prolog and epilog, decrease the counter of the loop, and |
79 | perform modulo variable expansion for live ranges that span more than |
80 | II cycles (i.e. use register copies to prevent a def from overwriting |
81 | itself before reaching the use). |
82 | |
83 | SMS works with countable loops (1) whose control part can be easily |
84 | decoupled from the rest of the loop and (2) whose loop count can |
85 | be easily adjusted. This is because we peel a constant number of |
86 | iterations into a prologue and epilogue for which we want to avoid |
87 | emitting the control part, and a kernel which is to iterate that |
88 | constant number of iterations less than the original loop. So the |
89 | control part should be a set of insns clearly identified and having |
90 | its own iv, not otherwise used in the loop (at-least for now), which |
91 | initializes a register before the loop to the number of iterations. |
92 | Currently SMS relies on the do-loop pattern to recognize such loops, |
93 | where (1) the control part comprises of all insns defining and/or |
94 | using a certain 'count' register and (2) the loop count can be |
95 | adjusted by modifying this register prior to the loop. |
96 | TODO: Rely on cfgloop analysis instead. */ |
97 | |
98 | /* This page defines partial-schedule structures and functions for |
99 | modulo scheduling. */ |
100 | |
101 | typedef struct partial_schedule *partial_schedule_ptr; |
102 | typedef struct ps_insn *ps_insn_ptr; |
103 | |
104 | /* The minimum (absolute) cycle that a node of ps was scheduled in. */ |
105 | #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle) |
106 | |
107 | /* The maximum (absolute) cycle that a node of ps was scheduled in. */ |
108 | #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle) |
109 | |
110 | /* Perform signed modulo, always returning a non-negative value. */ |
111 | #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y)) |
112 | |
113 | /* The number of different iterations the nodes in ps span, assuming |
114 | the stage boundaries are placed efficiently. */ |
115 | #define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \ |
116 | + 1 + ii - 1) / ii) |
117 | /* The stage count of ps. */ |
118 | #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count) |
119 | |
120 | /* A single instruction in the partial schedule. */ |
121 | struct ps_insn |
122 | { |
123 | /* Identifies the instruction to be scheduled. Values smaller than |
124 | the ddg's num_nodes refer directly to ddg nodes. A value of |
125 | X - num_nodes refers to register move X. */ |
126 | int id; |
127 | |
128 | /* The (absolute) cycle in which the PS instruction is scheduled. |
129 | Same as SCHED_TIME (node). */ |
130 | int cycle; |
131 | |
132 | /* The next/prev PS_INSN in the same row. */ |
133 | ps_insn_ptr next_in_row, |
134 | prev_in_row; |
135 | |
136 | }; |
137 | |
138 | /* Information about a register move that has been added to a partial |
139 | schedule. */ |
140 | struct ps_reg_move_info |
141 | { |
142 | /* The source of the move is defined by the ps_insn with id DEF. |
143 | The destination is used by the ps_insns with the ids in USES. */ |
144 | int def; |
145 | sbitmap uses; |
146 | |
147 | /* The original form of USES' instructions used OLD_REG, but they |
148 | should now use NEW_REG. */ |
149 | rtx old_reg; |
150 | rtx new_reg; |
151 | |
152 | /* The number of consecutive stages that the move occupies. */ |
153 | int num_consecutive_stages; |
154 | |
155 | /* An instruction that sets NEW_REG to the correct value. The first |
156 | move associated with DEF will have an rhs of OLD_REG; later moves |
157 | use the result of the previous move. */ |
158 | rtx_insn *insn; |
159 | }; |
160 | |
161 | /* Holds the partial schedule as an array of II rows. Each entry of the |
162 | array points to a linked list of PS_INSNs, which represents the |
163 | instructions that are scheduled for that row. */ |
164 | struct partial_schedule |
165 | { |
166 | int ii; /* Number of rows in the partial schedule. */ |
167 | int history; /* Threshold for conflict checking using DFA. */ |
168 | |
169 | /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */ |
170 | ps_insn_ptr *rows; |
171 | |
172 | /* All the moves added for this partial schedule. Index X has |
173 | a ps_insn id of X + g->num_nodes. */ |
174 | vec<ps_reg_move_info> reg_moves; |
175 | |
176 | /* rows_length[i] holds the number of instructions in the row. |
177 | It is used only (as an optimization) to back off quickly from |
178 | trying to schedule a node in a full row; that is, to avoid running |
179 | through futile DFA state transitions. */ |
180 | int *rows_length; |
181 | |
182 | /* The earliest absolute cycle of an insn in the partial schedule. */ |
183 | int min_cycle; |
184 | |
185 | /* The latest absolute cycle of an insn in the partial schedule. */ |
186 | int max_cycle; |
187 | |
188 | ddg_ptr g; /* The DDG of the insns in the partial schedule. */ |
189 | |
190 | int stage_count; /* The stage count of the partial schedule. */ |
191 | }; |
192 | |
193 | |
194 | static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history); |
195 | static void free_partial_schedule (partial_schedule_ptr); |
196 | static void reset_partial_schedule (partial_schedule_ptr, int new_ii); |
197 | void print_partial_schedule (partial_schedule_ptr, FILE *); |
198 | static void verify_partial_schedule (partial_schedule_ptr, sbitmap); |
199 | static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr, |
200 | int, int, sbitmap, sbitmap); |
201 | static void rotate_partial_schedule (partial_schedule_ptr, int); |
202 | void set_row_column_for_ps (partial_schedule_ptr); |
203 | static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap); |
204 | static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr); |
205 | |
206 | |
207 | /* This page defines constants and structures for the modulo scheduling |
208 | driver. */ |
209 | |
210 | static int sms_order_nodes (ddg_ptr, int, int *, int *); |
211 | static void set_node_sched_params (ddg_ptr); |
212 | static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *); |
213 | static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *); |
214 | static int calculate_stage_count (partial_schedule_ptr, int); |
215 | static void calculate_must_precede_follow (ddg_node_ptr, int, int, |
216 | int, int, sbitmap, sbitmap, sbitmap); |
217 | static int get_sched_window (partial_schedule_ptr, ddg_node_ptr, |
218 | sbitmap, int, int *, int *, int *); |
219 | static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int, |
220 | sbitmap, int *, sbitmap, sbitmap); |
221 | static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr); |
222 | |
223 | #define NODE_ASAP(node) ((node)->aux.count) |
224 | |
225 | #define SCHED_PARAMS(x) (&node_sched_param_vec[x]) |
226 | #define SCHED_TIME(x) (SCHED_PARAMS (x)->time) |
227 | #define SCHED_ROW(x) (SCHED_PARAMS (x)->row) |
228 | #define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage) |
229 | #define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column) |
230 | |
231 | /* The scheduling parameters held for each node. */ |
232 | typedef struct node_sched_params |
233 | { |
234 | int time; /* The absolute scheduling cycle. */ |
235 | |
236 | int row; /* Holds time % ii. */ |
237 | int stage; /* Holds time / ii. */ |
238 | |
239 | /* The column of a node inside the ps. If nodes u, v are on the same row, |
240 | u will precede v if column (u) < column (v). */ |
241 | int column; |
242 | } *node_sched_params_ptr; |
243 | |
244 | /* The following three functions are copied from the current scheduler |
245 | code in order to use sched_analyze() for computing the dependencies. |
246 | They are used when initializing the sched_info structure. */ |
247 | static const char * |
248 | sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED) |
249 | { |
250 | static char tmp[80]; |
251 | |
252 | sprintf (s: tmp, format: "i%4d" , INSN_UID (insn)); |
253 | return tmp; |
254 | } |
255 | |
256 | static void |
257 | compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED, |
258 | regset used ATTRIBUTE_UNUSED) |
259 | { |
260 | } |
261 | |
262 | static struct common_sched_info_def sms_common_sched_info; |
263 | |
264 | static struct sched_deps_info_def sms_sched_deps_info = |
265 | { |
266 | .compute_jump_reg_dependencies: compute_jump_reg_dependencies, |
267 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
268 | NULL, |
269 | .use_cselib: 0, .use_deps_list: 0, .generate_spec_deps: 0 |
270 | }; |
271 | |
272 | static struct haifa_sched_info sms_sched_info = |
273 | { |
274 | NULL, |
275 | NULL, |
276 | NULL, |
277 | NULL, |
278 | NULL, |
279 | .print_insn: sms_print_insn, |
280 | NULL, |
281 | NULL, /* insn_finishes_block_p */ |
282 | NULL, NULL, |
283 | NULL, NULL, |
284 | .queue_must_finish_empty: 0, .sched_max_insns_priority: 0, |
285 | |
286 | NULL, NULL, NULL, NULL, |
287 | NULL, NULL, |
288 | .flags: 0 |
289 | }; |
290 | |
291 | /* Partial schedule instruction ID in PS is a register move. Return |
292 | information about it. */ |
293 | static struct ps_reg_move_info * |
294 | ps_reg_move (partial_schedule_ptr ps, int id) |
295 | { |
296 | gcc_checking_assert (id >= ps->g->num_nodes); |
297 | return &ps->reg_moves[id - ps->g->num_nodes]; |
298 | } |
299 | |
300 | /* Return the rtl instruction that is being scheduled by partial schedule |
301 | instruction ID, which belongs to schedule PS. */ |
302 | static rtx_insn * |
303 | ps_rtl_insn (partial_schedule_ptr ps, int id) |
304 | { |
305 | if (id < ps->g->num_nodes) |
306 | return ps->g->nodes[id].insn; |
307 | else |
308 | return ps_reg_move (ps, id)->insn; |
309 | } |
310 | |
311 | /* Partial schedule instruction ID, which belongs to PS, occurred in |
312 | the original (unscheduled) loop. Return the first instruction |
313 | in the loop that was associated with ps_rtl_insn (PS, ID). |
314 | If the instruction had some notes before it, this is the first |
315 | of those notes. */ |
316 | static rtx_insn * |
317 | ps_first_note (partial_schedule_ptr ps, int id) |
318 | { |
319 | gcc_assert (id < ps->g->num_nodes); |
320 | return ps->g->nodes[id].first_note; |
321 | } |
322 | |
323 | /* Return the number of consecutive stages that are occupied by |
324 | partial schedule instruction ID in PS. */ |
325 | static int |
326 | ps_num_consecutive_stages (partial_schedule_ptr ps, int id) |
327 | { |
328 | if (id < ps->g->num_nodes) |
329 | return 1; |
330 | else |
331 | return ps_reg_move (ps, id)->num_consecutive_stages; |
332 | } |
333 | |
334 | /* Given HEAD and TAIL which are the first and last insns in a loop; |
335 | return the register which controls the loop. Return zero if it has |
336 | more than one occurrence in the loop besides the control part or the |
337 | do-loop pattern is not of the form we expect. */ |
338 | static rtx |
339 | doloop_register_get (rtx_insn *head, rtx_insn *tail) |
340 | { |
341 | rtx reg, condition; |
342 | rtx_insn *insn, *first_insn_not_to_check; |
343 | |
344 | if (!JUMP_P (tail)) |
345 | return NULL_RTX; |
346 | |
347 | if (!targetm.code_for_doloop_end) |
348 | return NULL_RTX; |
349 | |
350 | /* TODO: Free SMS's dependence on doloop_condition_get. */ |
351 | condition = doloop_condition_get (tail); |
352 | if (! condition) |
353 | return NULL_RTX; |
354 | |
355 | if (REG_P (XEXP (condition, 0))) |
356 | reg = XEXP (condition, 0); |
357 | else if (GET_CODE (XEXP (condition, 0)) == PLUS |
358 | && REG_P (XEXP (XEXP (condition, 0), 0))) |
359 | reg = XEXP (XEXP (condition, 0), 0); |
360 | else |
361 | gcc_unreachable (); |
362 | |
363 | /* Check that the COUNT_REG has no other occurrences in the loop |
364 | until the decrement. We assume the control part consists of |
365 | either a single (parallel) branch-on-count or a (non-parallel) |
366 | branch immediately preceded by a single (decrement) insn. */ |
367 | first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail |
368 | : prev_nondebug_insn (tail)); |
369 | |
370 | for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn)) |
371 | if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn)) |
372 | { |
373 | if (dump_file) |
374 | { |
375 | fprintf (stream: dump_file, format: "SMS count_reg found " ); |
376 | print_rtl_single (dump_file, reg); |
377 | fprintf (stream: dump_file, format: " outside control in insn:\n" ); |
378 | print_rtl_single (dump_file, insn); |
379 | } |
380 | |
381 | return NULL_RTX; |
382 | } |
383 | |
384 | return reg; |
385 | } |
386 | |
387 | /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so |
388 | that the number of iterations is a compile-time constant. If so, |
389 | return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to |
390 | this constant. Otherwise return 0. */ |
391 | static rtx_insn * |
392 | const_iteration_count (rtx count_reg, basic_block , |
393 | int64_t *count, bool* adjust_inplace) |
394 | { |
395 | rtx_insn *insn; |
396 | rtx_insn *head, *tail; |
397 | |
398 | *adjust_inplace = false; |
399 | bool read_after = false; |
400 | |
401 | if (! pre_header) |
402 | return NULL; |
403 | |
404 | get_ebb_head_tail (pre_header, pre_header, &head, &tail); |
405 | |
406 | for (insn = tail; insn != PREV_INSN (insn: head); insn = PREV_INSN (insn)) |
407 | if (single_set (insn) && rtx_equal_p (count_reg, |
408 | SET_DEST (single_set (insn)))) |
409 | { |
410 | rtx pat = single_set (insn); |
411 | |
412 | if (CONST_INT_P (SET_SRC (pat))) |
413 | { |
414 | *count = INTVAL (SET_SRC (pat)); |
415 | *adjust_inplace = !read_after; |
416 | return insn; |
417 | } |
418 | |
419 | return NULL; |
420 | } |
421 | else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn)) |
422 | { |
423 | read_after = true; |
424 | if (reg_set_p (count_reg, insn)) |
425 | break; |
426 | } |
427 | |
428 | return NULL; |
429 | } |
430 | |
431 | /* A very simple resource-based lower bound on the initiation interval. |
432 | ??? Improve the accuracy of this bound by considering the |
433 | utilization of various units. */ |
434 | static int |
435 | res_MII (ddg_ptr g) |
436 | { |
437 | if (targetm.sched.sms_res_mii) |
438 | return targetm.sched.sms_res_mii (g); |
439 | |
440 | return g->num_nodes / issue_rate; |
441 | } |
442 | |
443 | |
444 | /* A vector that contains the sched data for each ps_insn. */ |
445 | static vec<node_sched_params> node_sched_param_vec; |
446 | |
447 | /* Allocate sched_params for each node and initialize it. */ |
448 | static void |
449 | set_node_sched_params (ddg_ptr g) |
450 | { |
451 | node_sched_param_vec.truncate (size: 0); |
452 | node_sched_param_vec.safe_grow_cleared (len: g->num_nodes, exact: true); |
453 | } |
454 | |
455 | /* Make sure that node_sched_param_vec has an entry for every move in PS. */ |
456 | static void |
457 | extend_node_sched_params (partial_schedule_ptr ps) |
458 | { |
459 | node_sched_param_vec.safe_grow_cleared (len: ps->g->num_nodes |
460 | + ps->reg_moves.length (), exact: true); |
461 | } |
462 | |
463 | /* Update the sched_params (time, row and stage) for node U using the II, |
464 | the CYCLE of U and MIN_CYCLE. |
465 | We're not simply taking the following |
466 | SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii); |
467 | because the stages may not be aligned on cycle 0. */ |
468 | static void |
469 | update_node_sched_params (int u, int ii, int cycle, int min_cycle) |
470 | { |
471 | int sc_until_cycle_zero; |
472 | int stage; |
473 | |
474 | SCHED_TIME (u) = cycle; |
475 | SCHED_ROW (u) = SMODULO (cycle, ii); |
476 | |
477 | /* The calculation of stage count is done adding the number |
478 | of stages before cycle zero and after cycle zero. */ |
479 | sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii); |
480 | |
481 | if (SCHED_TIME (u) < 0) |
482 | { |
483 | stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii); |
484 | SCHED_STAGE (u) = sc_until_cycle_zero - stage; |
485 | } |
486 | else |
487 | { |
488 | stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii); |
489 | SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1; |
490 | } |
491 | } |
492 | |
493 | static void |
494 | print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps) |
495 | { |
496 | int i; |
497 | |
498 | if (! file) |
499 | return; |
500 | for (i = 0; i < num_nodes; i++) |
501 | { |
502 | node_sched_params_ptr nsp = SCHED_PARAMS (i); |
503 | |
504 | fprintf (stream: file, format: "Node = %d; INSN = %d\n" , i, |
505 | INSN_UID (insn: ps_rtl_insn (ps, id: i))); |
506 | fprintf (stream: file, format: " asap = %d:\n" , NODE_ASAP (&ps->g->nodes[i])); |
507 | fprintf (stream: file, format: " time = %d:\n" , nsp->time); |
508 | fprintf (stream: file, format: " stage = %d:\n" , nsp->stage); |
509 | } |
510 | } |
511 | |
512 | /* Set SCHED_COLUMN for each instruction in row ROW of PS. */ |
513 | static void |
514 | set_columns_for_row (partial_schedule_ptr ps, int row) |
515 | { |
516 | ps_insn_ptr cur_insn; |
517 | int column; |
518 | |
519 | column = 0; |
520 | for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row) |
521 | SCHED_COLUMN (cur_insn->id) = column++; |
522 | } |
523 | |
524 | /* Set SCHED_COLUMN for each instruction in PS. */ |
525 | static void |
526 | set_columns_for_ps (partial_schedule_ptr ps) |
527 | { |
528 | int row; |
529 | |
530 | for (row = 0; row < ps->ii; row++) |
531 | set_columns_for_row (ps, row); |
532 | } |
533 | |
534 | /* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS. |
535 | Its single predecessor has already been scheduled, as has its |
536 | ddg node successors. (The move may have also another move as its |
537 | successor, in which case that successor will be scheduled later.) |
538 | |
539 | The move is part of a chain that satisfies register dependencies |
540 | between a producing ddg node and various consuming ddg nodes. |
541 | If some of these dependencies have a distance of 1 (meaning that |
542 | the use is upward-exposed) then DISTANCE1_USES is nonnull and |
543 | contains the set of uses with distance-1 dependencies. |
544 | DISTANCE1_USES is null otherwise. |
545 | |
546 | MUST_FOLLOW is a scratch bitmap that is big enough to hold |
547 | all current ps_insn ids. |
548 | |
549 | Return true on success. */ |
550 | static bool |
551 | schedule_reg_move (partial_schedule_ptr ps, int i_reg_move, |
552 | sbitmap distance1_uses, sbitmap must_follow) |
553 | { |
554 | unsigned int u; |
555 | int this_time, this_distance, this_start, this_end, this_latency; |
556 | int start, end, c, ii; |
557 | sbitmap_iterator sbi; |
558 | ps_reg_move_info *move; |
559 | rtx_insn *this_insn; |
560 | ps_insn_ptr psi; |
561 | |
562 | move = ps_reg_move (ps, id: i_reg_move); |
563 | ii = ps->ii; |
564 | if (dump_file) |
565 | { |
566 | fprintf (stream: dump_file, format: "Scheduling register move INSN %d; ii = %d" |
567 | ", min cycle = %d\n\n" , INSN_UID (insn: move->insn), ii, |
568 | PS_MIN_CYCLE (ps)); |
569 | print_rtl_single (dump_file, move->insn); |
570 | fprintf (stream: dump_file, format: "\n%11s %11s %5s\n" , "start" , "end" , "time" ); |
571 | fprintf (stream: dump_file, format: "=========== =========== =====\n" ); |
572 | } |
573 | |
574 | start = INT_MIN; |
575 | end = INT_MAX; |
576 | |
577 | /* For dependencies of distance 1 between a producer ddg node A |
578 | and consumer ddg node B, we have a chain of dependencies: |
579 | |
580 | A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B |
581 | |
582 | where Mi is the ith move. For dependencies of distance 0 between |
583 | a producer ddg node A and consumer ddg node C, we have a chain of |
584 | dependencies: |
585 | |
586 | A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C |
587 | |
588 | where Mi' occupies the same position as Mi but occurs a stage later. |
589 | We can only schedule each move once, so if we have both types of |
590 | chain, we model the second as: |
591 | |
592 | A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C |
593 | |
594 | First handle the dependencies between the previously-scheduled |
595 | predecessor and the move. */ |
596 | this_insn = ps_rtl_insn (ps, id: move->def); |
597 | this_latency = insn_latency (this_insn, move->insn); |
598 | this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0; |
599 | this_time = SCHED_TIME (move->def) - this_distance * ii; |
600 | this_start = this_time + this_latency; |
601 | this_end = this_time + ii; |
602 | if (dump_file) |
603 | fprintf (stream: dump_file, format: "%11d %11d %5d %d --(T,%d,%d)--> %d\n" , |
604 | this_start, this_end, SCHED_TIME (move->def), |
605 | INSN_UID (insn: this_insn), this_latency, this_distance, |
606 | INSN_UID (insn: move->insn)); |
607 | |
608 | if (start < this_start) |
609 | start = this_start; |
610 | if (end > this_end) |
611 | end = this_end; |
612 | |
613 | /* Handle the dependencies between the move and previously-scheduled |
614 | successors. */ |
615 | EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi) |
616 | { |
617 | this_insn = ps_rtl_insn (ps, id: u); |
618 | this_latency = insn_latency (move->insn, this_insn); |
619 | if (distance1_uses && !bitmap_bit_p (map: distance1_uses, bitno: u)) |
620 | this_distance = -1; |
621 | else |
622 | this_distance = 0; |
623 | this_time = SCHED_TIME (u) + this_distance * ii; |
624 | this_start = this_time - ii; |
625 | this_end = this_time - this_latency; |
626 | if (dump_file) |
627 | fprintf (stream: dump_file, format: "%11d %11d %5d %d --(T,%d,%d)--> %d\n" , |
628 | this_start, this_end, SCHED_TIME (u), INSN_UID (insn: move->insn), |
629 | this_latency, this_distance, INSN_UID (insn: this_insn)); |
630 | |
631 | if (start < this_start) |
632 | start = this_start; |
633 | if (end > this_end) |
634 | end = this_end; |
635 | } |
636 | |
637 | if (dump_file) |
638 | { |
639 | fprintf (stream: dump_file, format: "----------- ----------- -----\n" ); |
640 | fprintf (stream: dump_file, format: "%11d %11d %5s %s\n" , start, end, "" , "(max, min)" ); |
641 | } |
642 | |
643 | bitmap_clear (must_follow); |
644 | bitmap_set_bit (map: must_follow, bitno: move->def); |
645 | |
646 | start = MAX (start, end - (ii - 1)); |
647 | for (c = end; c >= start; c--) |
648 | { |
649 | psi = ps_add_node_check_conflicts (ps, i_reg_move, c, |
650 | move->uses, must_follow); |
651 | if (psi) |
652 | { |
653 | update_node_sched_params (u: i_reg_move, ii, cycle: c, PS_MIN_CYCLE (ps)); |
654 | if (dump_file) |
655 | fprintf (stream: dump_file, format: "\nScheduled register move INSN %d at" |
656 | " time %d, row %d\n\n" , INSN_UID (insn: move->insn), c, |
657 | SCHED_ROW (i_reg_move)); |
658 | return true; |
659 | } |
660 | } |
661 | |
662 | if (dump_file) |
663 | fprintf (stream: dump_file, format: "\nNo available slot\n\n" ); |
664 | |
665 | return false; |
666 | } |
667 | |
668 | /* |
669 | Breaking intra-loop register anti-dependences: |
670 | Each intra-loop register anti-dependence implies a cross-iteration true |
671 | dependence of distance 1. Therefore, we can remove such false dependencies |
672 | and figure out if the partial schedule broke them by checking if (for a |
673 | true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and |
674 | if so generate a register move. The number of such moves is equal to: |
675 | SCHED_TIME (use) - SCHED_TIME (def) { 0 broken |
676 | nreg_moves = ----------------------------------- + 1 - { dependence. |
677 | ii { 1 if not. |
678 | */ |
679 | static bool |
680 | schedule_reg_moves (partial_schedule_ptr ps) |
681 | { |
682 | ddg_ptr g = ps->g; |
683 | int ii = ps->ii; |
684 | int i; |
685 | |
686 | for (i = 0; i < g->num_nodes; i++) |
687 | { |
688 | ddg_node_ptr u = &g->nodes[i]; |
689 | ddg_edge_ptr e; |
690 | int nreg_moves = 0, i_reg_move; |
691 | rtx prev_reg, old_reg; |
692 | int first_move; |
693 | int distances[2]; |
694 | sbitmap distance1_uses; |
695 | rtx set = single_set (insn: u->insn); |
696 | |
697 | /* Skip instructions that do not set a register. */ |
698 | if (set && !REG_P (SET_DEST (set))) |
699 | continue; |
700 | |
701 | /* Compute the number of reg_moves needed for u, by looking at life |
702 | ranges started at u (excluding self-loops). */ |
703 | distances[0] = distances[1] = false; |
704 | for (e = u->out; e; e = e->next_out) |
705 | if (e->type == TRUE_DEP && e->dest != e->src) |
706 | { |
707 | int nreg_moves4e = (SCHED_TIME (e->dest->cuid) |
708 | - SCHED_TIME (e->src->cuid)) / ii; |
709 | |
710 | if (e->distance == 1) |
711 | nreg_moves4e = (SCHED_TIME (e->dest->cuid) |
712 | - SCHED_TIME (e->src->cuid) + ii) / ii; |
713 | |
714 | /* If dest precedes src in the schedule of the kernel, then dest |
715 | will read before src writes and we can save one reg_copy. */ |
716 | if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid) |
717 | && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid)) |
718 | nreg_moves4e--; |
719 | |
720 | if (nreg_moves4e >= 1) |
721 | { |
722 | /* !single_set instructions are not supported yet and |
723 | thus we do not except to encounter them in the loop |
724 | except from the doloop part. For the latter case |
725 | we assume no regmoves are generated as the doloop |
726 | instructions are tied to the branch with an edge. */ |
727 | gcc_assert (set); |
728 | /* If the instruction contains auto-inc register then |
729 | validate that the regmov is being generated for the |
730 | target regsiter rather then the inc'ed register. */ |
731 | gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn)); |
732 | } |
733 | |
734 | if (nreg_moves4e) |
735 | { |
736 | gcc_assert (e->distance < 2); |
737 | distances[e->distance] = true; |
738 | } |
739 | nreg_moves = MAX (nreg_moves, nreg_moves4e); |
740 | } |
741 | |
742 | if (nreg_moves == 0) |
743 | continue; |
744 | |
745 | /* Create NREG_MOVES register moves. */ |
746 | first_move = ps->reg_moves.length (); |
747 | ps->reg_moves.safe_grow_cleared (len: first_move + nreg_moves, exact: true); |
748 | extend_node_sched_params (ps); |
749 | |
750 | /* Record the moves associated with this node. */ |
751 | first_move += ps->g->num_nodes; |
752 | |
753 | /* Generate each move. */ |
754 | old_reg = prev_reg = SET_DEST (set); |
755 | if (HARD_REGISTER_P (old_reg)) |
756 | return false; |
757 | |
758 | for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) |
759 | { |
760 | ps_reg_move_info *move = ps_reg_move (ps, id: first_move + i_reg_move); |
761 | |
762 | move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i; |
763 | move->uses = sbitmap_alloc (first_move + nreg_moves); |
764 | move->old_reg = old_reg; |
765 | move->new_reg = gen_reg_rtx (GET_MODE (prev_reg)); |
766 | move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1; |
767 | move->insn = gen_move_insn (move->new_reg, copy_rtx (prev_reg)); |
768 | bitmap_clear (move->uses); |
769 | |
770 | prev_reg = move->new_reg; |
771 | } |
772 | |
773 | distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL; |
774 | |
775 | if (distance1_uses) |
776 | bitmap_clear (distance1_uses); |
777 | |
778 | /* Every use of the register defined by node may require a different |
779 | copy of this register, depending on the time the use is scheduled. |
780 | Record which uses require which move results. */ |
781 | for (e = u->out; e; e = e->next_out) |
782 | if (e->type == TRUE_DEP && e->dest != e->src) |
783 | { |
784 | int dest_copy = (SCHED_TIME (e->dest->cuid) |
785 | - SCHED_TIME (e->src->cuid)) / ii; |
786 | |
787 | if (e->distance == 1) |
788 | dest_copy = (SCHED_TIME (e->dest->cuid) |
789 | - SCHED_TIME (e->src->cuid) + ii) / ii; |
790 | |
791 | if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid) |
792 | && SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid)) |
793 | dest_copy--; |
794 | |
795 | if (dest_copy) |
796 | { |
797 | ps_reg_move_info *move; |
798 | |
799 | move = ps_reg_move (ps, id: first_move + dest_copy - 1); |
800 | bitmap_set_bit (map: move->uses, bitno: e->dest->cuid); |
801 | if (e->distance == 1) |
802 | bitmap_set_bit (map: distance1_uses, bitno: e->dest->cuid); |
803 | } |
804 | } |
805 | |
806 | auto_sbitmap must_follow (first_move + nreg_moves); |
807 | for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) |
808 | if (!schedule_reg_move (ps, i_reg_move: first_move + i_reg_move, |
809 | distance1_uses, must_follow)) |
810 | break; |
811 | if (distance1_uses) |
812 | sbitmap_free (map: distance1_uses); |
813 | if (i_reg_move < nreg_moves) |
814 | return false; |
815 | } |
816 | return true; |
817 | } |
818 | |
819 | /* Emit the moves associated with PS. Apply the substitutions |
820 | associated with them. */ |
821 | static void |
822 | apply_reg_moves (partial_schedule_ptr ps) |
823 | { |
824 | ps_reg_move_info *move; |
825 | int i; |
826 | |
827 | FOR_EACH_VEC_ELT (ps->reg_moves, i, move) |
828 | { |
829 | unsigned int i_use; |
830 | sbitmap_iterator sbi; |
831 | |
832 | EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi) |
833 | { |
834 | replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg); |
835 | df_insn_rescan (ps->g->nodes[i_use].insn); |
836 | } |
837 | } |
838 | } |
839 | |
840 | /* Bump the SCHED_TIMEs of all nodes by AMOUNT. Set the values of |
841 | SCHED_ROW and SCHED_STAGE. Instruction scheduled on cycle AMOUNT |
842 | will move to cycle zero. */ |
843 | static void |
844 | reset_sched_times (partial_schedule_ptr ps, int amount) |
845 | { |
846 | int row; |
847 | int ii = ps->ii; |
848 | ps_insn_ptr crr_insn; |
849 | |
850 | for (row = 0; row < ii; row++) |
851 | for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) |
852 | { |
853 | int u = crr_insn->id; |
854 | int normalized_time = SCHED_TIME (u) - amount; |
855 | int new_min_cycle = PS_MIN_CYCLE (ps) - amount; |
856 | |
857 | if (dump_file) |
858 | { |
859 | /* Print the scheduling times after the rotation. */ |
860 | rtx_insn *insn = ps_rtl_insn (ps, id: u); |
861 | |
862 | fprintf (stream: dump_file, format: "crr_insn->node=%d (insn id %d), " |
863 | "crr_insn->cycle=%d, min_cycle=%d" , u, |
864 | INSN_UID (insn), normalized_time, new_min_cycle); |
865 | if (JUMP_P (insn)) |
866 | fprintf (stream: dump_file, format: " (branch)" ); |
867 | fprintf (stream: dump_file, format: "\n" ); |
868 | } |
869 | |
870 | gcc_assert (SCHED_TIME (u) >= ps->min_cycle); |
871 | gcc_assert (SCHED_TIME (u) <= ps->max_cycle); |
872 | |
873 | crr_insn->cycle = normalized_time; |
874 | update_node_sched_params (u, ii, cycle: normalized_time, min_cycle: new_min_cycle); |
875 | } |
876 | } |
877 | |
878 | /* Permute the insns according to their order in PS, from row 0 to |
879 | row ii-1, and position them right before LAST. This schedules |
880 | the insns of the loop kernel. */ |
881 | static void |
882 | permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last) |
883 | { |
884 | int ii = ps->ii; |
885 | int row; |
886 | ps_insn_ptr ps_ij; |
887 | |
888 | for (row = 0; row < ii ; row++) |
889 | for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) |
890 | { |
891 | rtx_insn *insn = ps_rtl_insn (ps, id: ps_ij->id); |
892 | |
893 | if (PREV_INSN (insn: last) != insn) |
894 | { |
895 | if (ps_ij->id < ps->g->num_nodes) |
896 | reorder_insns_nobb (ps_first_note (ps, id: ps_ij->id), insn, |
897 | PREV_INSN (insn: last)); |
898 | else |
899 | add_insn_before (insn, last, NULL); |
900 | } |
901 | } |
902 | } |
903 | |
904 | /* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE |
905 | respectively only if cycle C falls on the border of the scheduling |
906 | window boundaries marked by START and END cycles. STEP is the |
907 | direction of the window. */ |
908 | static inline void |
909 | set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow, |
910 | sbitmap *tmp_precede, sbitmap must_precede, int c, |
911 | int start, int end, int step) |
912 | { |
913 | *tmp_precede = NULL; |
914 | *tmp_follow = NULL; |
915 | |
916 | if (c == start) |
917 | { |
918 | if (step == 1) |
919 | *tmp_precede = must_precede; |
920 | else /* step == -1. */ |
921 | *tmp_follow = must_follow; |
922 | } |
923 | if (c == end - step) |
924 | { |
925 | if (step == 1) |
926 | *tmp_follow = must_follow; |
927 | else /* step == -1. */ |
928 | *tmp_precede = must_precede; |
929 | } |
930 | |
931 | } |
932 | |
933 | /* Return True if the branch can be moved to row ii-1 while |
934 | normalizing the partial schedule PS to start from cycle zero and thus |
935 | optimize the SC. Otherwise return False. */ |
936 | static bool |
937 | optimize_sc (partial_schedule_ptr ps, ddg_ptr g) |
938 | { |
939 | int amount = PS_MIN_CYCLE (ps); |
940 | int start, end, step; |
941 | int ii = ps->ii; |
942 | bool ok = false; |
943 | int stage_count, stage_count_curr; |
944 | |
945 | /* Compare the SC after normalization and SC after bringing the branch |
946 | to row ii-1. If they are equal just bail out. */ |
947 | stage_count = calculate_stage_count (ps, amount); |
948 | stage_count_curr = |
949 | calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1)); |
950 | |
951 | if (stage_count == stage_count_curr) |
952 | { |
953 | if (dump_file) |
954 | fprintf (stream: dump_file, format: "SMS SC already optimized.\n" ); |
955 | |
956 | return false; |
957 | } |
958 | |
959 | if (dump_file) |
960 | { |
961 | fprintf (stream: dump_file, format: "SMS Trying to optimize branch location\n" ); |
962 | fprintf (stream: dump_file, format: "SMS partial schedule before trial:\n" ); |
963 | print_partial_schedule (ps, dump_file); |
964 | } |
965 | |
966 | /* First, normalize the partial scheduling. */ |
967 | reset_sched_times (ps, amount); |
968 | rotate_partial_schedule (ps, amount); |
969 | if (dump_file) |
970 | { |
971 | fprintf (stream: dump_file, |
972 | format: "SMS partial schedule after normalization (ii, %d, SC %d):\n" , |
973 | ii, stage_count); |
974 | print_partial_schedule (ps, dump_file); |
975 | } |
976 | |
977 | if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1) |
978 | return true; |
979 | |
980 | auto_sbitmap sched_nodes (g->num_nodes); |
981 | bitmap_ones (sched_nodes); |
982 | |
983 | /* Calculate the new placement of the branch. It should be in row |
984 | ii-1 and fall into it's scheduling window. */ |
985 | if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start, |
986 | &step, &end) == 0) |
987 | { |
988 | bool success; |
989 | ps_insn_ptr next_ps_i; |
990 | int branch_cycle = SCHED_TIME (g->closing_branch->cuid); |
991 | int row = SMODULO (branch_cycle, ps->ii); |
992 | int num_splits = 0; |
993 | sbitmap tmp_precede, tmp_follow; |
994 | int min_cycle, c; |
995 | |
996 | if (dump_file) |
997 | fprintf (stream: dump_file, format: "\nTrying to schedule node %d " |
998 | "INSN = %d in (%d .. %d) step %d\n" , |
999 | g->closing_branch->cuid, |
1000 | (INSN_UID (insn: g->closing_branch->insn)), start, end, step); |
1001 | |
1002 | gcc_assert ((step > 0 && start < end) || (step < 0 && start > end)); |
1003 | if (step == 1) |
1004 | { |
1005 | c = start + ii - SMODULO (start, ii) - 1; |
1006 | gcc_assert (c >= start); |
1007 | if (c >= end) |
1008 | { |
1009 | if (dump_file) |
1010 | fprintf (stream: dump_file, |
1011 | format: "SMS failed to schedule branch at cycle: %d\n" , c); |
1012 | return false; |
1013 | } |
1014 | } |
1015 | else |
1016 | { |
1017 | c = start - SMODULO (start, ii) - 1; |
1018 | gcc_assert (c <= start); |
1019 | |
1020 | if (c <= end) |
1021 | { |
1022 | if (dump_file) |
1023 | fprintf (stream: dump_file, |
1024 | format: "SMS failed to schedule branch at cycle: %d\n" , c); |
1025 | return false; |
1026 | } |
1027 | } |
1028 | |
1029 | auto_sbitmap must_precede (g->num_nodes); |
1030 | auto_sbitmap must_follow (g->num_nodes); |
1031 | |
1032 | /* Try to schedule the branch is it's new cycle. */ |
1033 | calculate_must_precede_follow (g->closing_branch, start, end, |
1034 | step, ii, sched_nodes, |
1035 | must_precede, must_follow); |
1036 | |
1037 | set_must_precede_follow (tmp_follow: &tmp_follow, must_follow, tmp_precede: &tmp_precede, |
1038 | must_precede, c, start, end, step); |
1039 | |
1040 | /* Find the element in the partial schedule related to the closing |
1041 | branch so we can remove it from it's current cycle. */ |
1042 | for (next_ps_i = ps->rows[row]; |
1043 | next_ps_i; next_ps_i = next_ps_i->next_in_row) |
1044 | if (next_ps_i->id == g->closing_branch->cuid) |
1045 | break; |
1046 | |
1047 | min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii); |
1048 | remove_node_from_ps (ps, next_ps_i); |
1049 | success = |
1050 | try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c, |
1051 | sched_nodes, &num_splits, |
1052 | tmp_precede, tmp_follow); |
1053 | gcc_assert (num_splits == 0); |
1054 | if (!success) |
1055 | { |
1056 | if (dump_file) |
1057 | fprintf (stream: dump_file, |
1058 | format: "SMS failed to schedule branch at cycle: %d, " |
1059 | "bringing it back to cycle %d\n" , c, branch_cycle); |
1060 | |
1061 | /* The branch was failed to be placed in row ii - 1. |
1062 | Put it back in it's original place in the partial |
1063 | schedualing. */ |
1064 | set_must_precede_follow (tmp_follow: &tmp_follow, must_follow, tmp_precede: &tmp_precede, |
1065 | must_precede, c: branch_cycle, start, end, |
1066 | step); |
1067 | success = |
1068 | try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, |
1069 | branch_cycle, sched_nodes, |
1070 | &num_splits, tmp_precede, |
1071 | tmp_follow); |
1072 | gcc_assert (success && (num_splits == 0)); |
1073 | ok = false; |
1074 | } |
1075 | else |
1076 | { |
1077 | /* The branch is placed in row ii - 1. */ |
1078 | if (dump_file) |
1079 | fprintf (stream: dump_file, |
1080 | format: "SMS success in moving branch to cycle %d\n" , c); |
1081 | |
1082 | update_node_sched_params (u: g->closing_branch->cuid, ii, cycle: c, |
1083 | PS_MIN_CYCLE (ps)); |
1084 | ok = true; |
1085 | } |
1086 | |
1087 | /* This might have been added to a new first stage. */ |
1088 | if (PS_MIN_CYCLE (ps) < min_cycle) |
1089 | reset_sched_times (ps, amount: 0); |
1090 | } |
1091 | |
1092 | return ok; |
1093 | } |
1094 | |
1095 | static void |
1096 | duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, |
1097 | int to_stage, rtx count_reg, class loop *loop) |
1098 | { |
1099 | int row; |
1100 | ps_insn_ptr ps_ij; |
1101 | copy_bb_data id; |
1102 | |
1103 | for (row = 0; row < ps->ii; row++) |
1104 | for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) |
1105 | { |
1106 | int u = ps_ij->id; |
1107 | int first_u, last_u; |
1108 | rtx_insn *u_insn; |
1109 | |
1110 | /* Do not duplicate any insn which refers to count_reg as it |
1111 | belongs to the control part. |
1112 | The closing branch is scheduled as well and thus should |
1113 | be ignored. |
1114 | TODO: This should be done by analyzing the control part of |
1115 | the loop. */ |
1116 | u_insn = ps_rtl_insn (ps, id: u); |
1117 | if (reg_mentioned_p (count_reg, u_insn) |
1118 | || JUMP_P (u_insn)) |
1119 | continue; |
1120 | |
1121 | first_u = SCHED_STAGE (u); |
1122 | last_u = first_u + ps_num_consecutive_stages (ps, id: u) - 1; |
1123 | if (from_stage <= last_u && to_stage >= first_u) |
1124 | { |
1125 | if (u < ps->g->num_nodes) |
1126 | duplicate_insn_chain (ps_first_note (ps, id: u), u_insn, |
1127 | loop, &id); |
1128 | else |
1129 | emit_insn (copy_rtx (PATTERN (insn: u_insn))); |
1130 | } |
1131 | } |
1132 | } |
1133 | |
1134 | |
1135 | /* Generate the instructions (including reg_moves) for prolog & epilog. */ |
1136 | static void |
1137 | generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop, |
1138 | rtx count_reg, bool adjust_init) |
1139 | { |
1140 | int i; |
1141 | int last_stage = PS_STAGE_COUNT (ps) - 1; |
1142 | edge e; |
1143 | |
1144 | /* Generate the prolog, inserting its insns on the loop-entry edge. */ |
1145 | start_sequence (); |
1146 | |
1147 | if (adjust_init) |
1148 | { |
1149 | /* Generate instructions at the beginning of the prolog to |
1150 | adjust the loop count by STAGE_COUNT. If loop count is constant |
1151 | and it not used anywhere in prologue, this constant is adjusted by |
1152 | STAGE_COUNT outside of generate_prolog_epilog function. */ |
1153 | rtx sub_reg = NULL_RTX; |
1154 | |
1155 | sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg, |
1156 | gen_int_mode (last_stage, |
1157 | GET_MODE (count_reg)), |
1158 | count_reg, 1, OPTAB_DIRECT); |
1159 | gcc_assert (REG_P (sub_reg)); |
1160 | if (REGNO (sub_reg) != REGNO (count_reg)) |
1161 | emit_move_insn (count_reg, sub_reg); |
1162 | } |
1163 | |
1164 | for (i = 0; i < last_stage; i++) |
1165 | duplicate_insns_of_cycles (ps, from_stage: 0, to_stage: i, count_reg, loop); |
1166 | |
1167 | /* Put the prolog on the entry edge. */ |
1168 | e = loop_preheader_edge (loop); |
1169 | split_edge_and_insert (e, get_insns ()); |
1170 | if (!flag_resched_modulo_sched) |
1171 | e->dest->flags |= BB_DISABLE_SCHEDULE; |
1172 | |
1173 | end_sequence (); |
1174 | |
1175 | /* Generate the epilog, inserting its insns on the loop-exit edge. */ |
1176 | start_sequence (); |
1177 | |
1178 | for (i = 0; i < last_stage; i++) |
1179 | duplicate_insns_of_cycles (ps, from_stage: i + 1, to_stage: last_stage, count_reg, loop); |
1180 | |
1181 | /* Put the epilogue on the exit edge. */ |
1182 | gcc_assert (single_exit (loop)); |
1183 | e = single_exit (loop); |
1184 | split_edge_and_insert (e, get_insns ()); |
1185 | if (!flag_resched_modulo_sched) |
1186 | e->dest->flags |= BB_DISABLE_SCHEDULE; |
1187 | |
1188 | end_sequence (); |
1189 | } |
1190 | |
1191 | /* Mark LOOP as software pipelined so the later |
1192 | scheduling passes don't touch it. */ |
1193 | static void |
1194 | mark_loop_unsched (class loop *loop) |
1195 | { |
1196 | unsigned i; |
1197 | basic_block *bbs = get_loop_body (loop); |
1198 | |
1199 | for (i = 0; i < loop->num_nodes; i++) |
1200 | bbs[i]->flags |= BB_DISABLE_SCHEDULE; |
1201 | |
1202 | free (ptr: bbs); |
1203 | } |
1204 | |
1205 | /* Return true if all the BBs of the loop are empty except the |
1206 | loop header. */ |
1207 | static bool |
1208 | loop_single_full_bb_p (class loop *loop) |
1209 | { |
1210 | unsigned i; |
1211 | basic_block *bbs = get_loop_body (loop); |
1212 | |
1213 | for (i = 0; i < loop->num_nodes ; i++) |
1214 | { |
1215 | rtx_insn *head, *tail; |
1216 | bool empty_bb = true; |
1217 | |
1218 | if (bbs[i] == loop->header) |
1219 | continue; |
1220 | |
1221 | /* Make sure that basic blocks other than the header |
1222 | have only notes labels or jumps. */ |
1223 | get_ebb_head_tail (bbs[i], bbs[i], &head, &tail); |
1224 | for (; head != NEXT_INSN (insn: tail); head = NEXT_INSN (insn: head)) |
1225 | { |
1226 | if (NOTE_P (head) || LABEL_P (head) |
1227 | || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head)))) |
1228 | continue; |
1229 | empty_bb = false; |
1230 | break; |
1231 | } |
1232 | |
1233 | if (! empty_bb) |
1234 | { |
1235 | free (ptr: bbs); |
1236 | return false; |
1237 | } |
1238 | } |
1239 | free (ptr: bbs); |
1240 | return true; |
1241 | } |
1242 | |
1243 | /* Dump file:line from INSN's location info to dump_file. */ |
1244 | |
1245 | static void |
1246 | dump_insn_location (rtx_insn *insn) |
1247 | { |
1248 | if (dump_file && INSN_HAS_LOCATION (insn)) |
1249 | { |
1250 | expanded_location xloc = insn_location (insn); |
1251 | fprintf (stream: dump_file, format: " %s:%i" , xloc.file, xloc.line); |
1252 | } |
1253 | } |
1254 | |
1255 | /* A simple loop from SMS point of view; it is a loop that is composed of |
1256 | either a single basic block or two BBs - a header and a latch. */ |
1257 | #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \ |
1258 | && (EDGE_COUNT (loop->latch->preds) == 1) \ |
1259 | && (EDGE_COUNT (loop->latch->succs) == 1)) |
1260 | |
1261 | /* Return true if the loop is in its canonical form and false if not. |
1262 | i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */ |
1263 | static bool |
1264 | loop_canon_p (class loop *loop) |
1265 | { |
1266 | |
1267 | if (loop->inner || !loop_outer (loop)) |
1268 | { |
1269 | if (dump_file) |
1270 | fprintf (stream: dump_file, format: "SMS loop inner or !loop_outer\n" ); |
1271 | return false; |
1272 | } |
1273 | |
1274 | if (!single_exit (loop)) |
1275 | { |
1276 | if (dump_file) |
1277 | { |
1278 | rtx_insn *insn = BB_END (loop->header); |
1279 | |
1280 | fprintf (stream: dump_file, format: "SMS loop many exits" ); |
1281 | dump_insn_location (insn); |
1282 | fprintf (stream: dump_file, format: "\n" ); |
1283 | } |
1284 | return false; |
1285 | } |
1286 | |
1287 | if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop)) |
1288 | { |
1289 | if (dump_file) |
1290 | { |
1291 | rtx_insn *insn = BB_END (loop->header); |
1292 | |
1293 | fprintf (stream: dump_file, format: "SMS loop many BBs." ); |
1294 | dump_insn_location (insn); |
1295 | fprintf (stream: dump_file, format: "\n" ); |
1296 | } |
1297 | return false; |
1298 | } |
1299 | |
1300 | return true; |
1301 | } |
1302 | |
1303 | /* If there are more than one entry for the loop, |
1304 | make it one by splitting the first entry edge and |
1305 | redirecting the others to the new BB. */ |
1306 | static void |
1307 | canon_loop (class loop *loop) |
1308 | { |
1309 | edge e; |
1310 | edge_iterator i; |
1311 | |
1312 | /* Avoid annoying special cases of edges going to exit |
1313 | block. */ |
1314 | FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
1315 | if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1)) |
1316 | split_edge (e); |
1317 | |
1318 | if (loop->latch == loop->header |
1319 | || EDGE_COUNT (loop->latch->succs) > 1) |
1320 | { |
1321 | FOR_EACH_EDGE (e, i, loop->header->preds) |
1322 | if (e->src == loop->latch) |
1323 | break; |
1324 | split_edge (e); |
1325 | } |
1326 | } |
1327 | |
1328 | /* Setup infos. */ |
1329 | static void |
1330 | setup_sched_infos (void) |
1331 | { |
1332 | memcpy (dest: &sms_common_sched_info, src: &haifa_common_sched_info, |
1333 | n: sizeof (sms_common_sched_info)); |
1334 | sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS; |
1335 | common_sched_info = &sms_common_sched_info; |
1336 | |
1337 | sched_deps_info = &sms_sched_deps_info; |
1338 | current_sched_info = &sms_sched_info; |
1339 | } |
1340 | |
1341 | /* Probability in % that the sms-ed loop rolls enough so that optimized |
1342 | version may be entered. Just a guess. */ |
1343 | #define PROB_SMS_ENOUGH_ITERATIONS 80 |
1344 | |
1345 | /* Main entry point, perform SMS scheduling on the loops of the function |
1346 | that consist of single basic blocks. */ |
1347 | static void |
1348 | sms_schedule (void) |
1349 | { |
1350 | rtx_insn *insn; |
1351 | ddg_ptr *g_arr, g; |
1352 | int * node_order; |
1353 | int maxii, max_asap; |
1354 | partial_schedule_ptr ps; |
1355 | basic_block bb = NULL; |
1356 | basic_block condition_bb = NULL; |
1357 | edge latch_edge; |
1358 | HOST_WIDE_INT trip_count, max_trip_count; |
1359 | HARD_REG_SET prohibited_regs; |
1360 | |
1361 | loop_optimizer_init (LOOPS_HAVE_PREHEADERS |
1362 | | LOOPS_HAVE_RECORDED_EXITS); |
1363 | if (number_of_loops (cfun) <= 1) |
1364 | { |
1365 | loop_optimizer_finalize (); |
1366 | return; /* There are no loops to schedule. */ |
1367 | } |
1368 | |
1369 | /* Initialize issue_rate. */ |
1370 | if (targetm.sched.issue_rate) |
1371 | { |
1372 | int temp = reload_completed; |
1373 | |
1374 | reload_completed = 1; |
1375 | issue_rate = targetm.sched.issue_rate (); |
1376 | reload_completed = temp; |
1377 | } |
1378 | else |
1379 | issue_rate = 1; |
1380 | |
1381 | /* Initialize the scheduler. */ |
1382 | setup_sched_infos (); |
1383 | haifa_sched_init (); |
1384 | |
1385 | /* Allocate memory to hold the DDG array one entry for each loop. |
1386 | We use loop->num as index into this array. */ |
1387 | g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun)); |
1388 | |
1389 | REG_SET_TO_HARD_REG_SET (prohibited_regs, &df->regular_block_artificial_uses); |
1390 | |
1391 | if (dump_file) |
1392 | { |
1393 | fprintf (stream: dump_file, format: "\n\nSMS analysis phase\n" ); |
1394 | fprintf (stream: dump_file, format: "===================\n\n" ); |
1395 | } |
1396 | |
1397 | /* Build DDGs for all the relevant loops and hold them in G_ARR |
1398 | indexed by the loop index. */ |
1399 | for (auto loop : loops_list (cfun, 0)) |
1400 | { |
1401 | rtx_insn *head, *tail; |
1402 | rtx count_reg; |
1403 | |
1404 | /* For debugging. */ |
1405 | if (dbg_cnt (index: sms_sched_loop) == false) |
1406 | { |
1407 | if (dump_file) |
1408 | fprintf (stream: dump_file, format: "SMS reached max limit... \n" ); |
1409 | |
1410 | break; |
1411 | } |
1412 | |
1413 | if (dump_file) |
1414 | { |
1415 | rtx_insn *insn = BB_END (loop->header); |
1416 | |
1417 | fprintf (stream: dump_file, format: "SMS loop num: %d" , loop->num); |
1418 | dump_insn_location (insn); |
1419 | fprintf (stream: dump_file, format: "\n" ); |
1420 | } |
1421 | |
1422 | if (! loop_canon_p (loop)) |
1423 | continue; |
1424 | |
1425 | if (! loop_single_full_bb_p (loop)) |
1426 | { |
1427 | if (dump_file) |
1428 | fprintf (stream: dump_file, format: "SMS not loop_single_full_bb_p\n" ); |
1429 | continue; |
1430 | } |
1431 | |
1432 | bb = loop->header; |
1433 | |
1434 | get_ebb_head_tail (bb, bb, &head, &tail); |
1435 | latch_edge = loop_latch_edge (loop); |
1436 | gcc_assert (single_exit (loop)); |
1437 | trip_count = get_estimated_loop_iterations_int (loop); |
1438 | max_trip_count = get_max_loop_iterations_int (loop); |
1439 | |
1440 | /* Perform SMS only on loops that their average count is above threshold. */ |
1441 | |
1442 | if (latch_edge->count () > profile_count::zero () |
1443 | && (latch_edge->count () |
1444 | < (single_exit (loop)->count () |
1445 | * param_sms_loop_average_count_threshold))) |
1446 | { |
1447 | if (dump_file) |
1448 | { |
1449 | dump_insn_location (insn: tail); |
1450 | fprintf (stream: dump_file, format: "\nSMS single-bb-loop\n" ); |
1451 | if (profile_info && flag_branch_probabilities) |
1452 | { |
1453 | fprintf (stream: dump_file, format: "SMS loop-count " ); |
1454 | fprintf (stream: dump_file, format: "%" PRId64, |
1455 | (int64_t) bb->count.to_gcov_type ()); |
1456 | fprintf (stream: dump_file, format: "\n" ); |
1457 | fprintf (stream: dump_file, format: "SMS trip-count " ); |
1458 | fprintf (stream: dump_file, format: "%" PRId64 "max %" PRId64, |
1459 | (int64_t) trip_count, (int64_t) max_trip_count); |
1460 | fprintf (stream: dump_file, format: "\n" ); |
1461 | } |
1462 | } |
1463 | continue; |
1464 | } |
1465 | |
1466 | /* Make sure this is a doloop. */ |
1467 | if (!(count_reg = doloop_register_get (head, tail))) |
1468 | { |
1469 | if (dump_file) |
1470 | fprintf (stream: dump_file, format: "SMS doloop_register_get failed\n" ); |
1471 | continue; |
1472 | } |
1473 | |
1474 | /* Don't handle BBs with calls or barriers |
1475 | or !single_set with the exception of do-loop control part insns. |
1476 | ??? Should handle insns defining subregs. */ |
1477 | for (insn = head; insn != NEXT_INSN (insn: tail); insn = NEXT_INSN (insn)) |
1478 | { |
1479 | if (INSN_P (insn)) |
1480 | { |
1481 | HARD_REG_SET regs; |
1482 | CLEAR_HARD_REG_SET (set&: regs); |
1483 | note_stores (insn, record_hard_reg_sets, ®s); |
1484 | if (hard_reg_set_intersect_p (x: regs, y: prohibited_regs)) |
1485 | break; |
1486 | } |
1487 | |
1488 | if (CALL_P (insn) |
1489 | || BARRIER_P (insn) |
1490 | || (INSN_P (insn) && single_set (insn) |
1491 | && GET_CODE (SET_DEST (single_set (insn))) == SUBREG) |
1492 | /* Not a single set. */ |
1493 | || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn) |
1494 | && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE |
1495 | /* But non-single-set allowed in one special case. */ |
1496 | && (insn != prev_nondebug_insn (tail) |
1497 | || !reg_mentioned_p (count_reg, insn)))) |
1498 | break; |
1499 | } |
1500 | |
1501 | if (insn != NEXT_INSN (insn: tail)) |
1502 | { |
1503 | if (dump_file) |
1504 | { |
1505 | if (CALL_P (insn)) |
1506 | fprintf (stream: dump_file, format: "SMS loop-with-call\n" ); |
1507 | else if (BARRIER_P (insn)) |
1508 | fprintf (stream: dump_file, format: "SMS loop-with-barrier\n" ); |
1509 | else if (INSN_P (insn) && single_set (insn) |
1510 | && GET_CODE (SET_DEST (single_set (insn))) == SUBREG) |
1511 | fprintf (stream: dump_file, format: "SMS loop with subreg in lhs\n" ); |
1512 | else |
1513 | fprintf (stream: dump_file, |
1514 | format: "SMS loop-with-not-single-set-or-prohibited-reg\n" ); |
1515 | |
1516 | print_rtl_single (dump_file, insn); |
1517 | } |
1518 | |
1519 | continue; |
1520 | } |
1521 | |
1522 | /* Always schedule the closing branch with the rest of the |
1523 | instructions. The branch is rotated to be in row ii-1 at the |
1524 | end of the scheduling procedure to make sure it's the last |
1525 | instruction in the iteration. */ |
1526 | if (! (g = create_ddg (bb, closing_branch_deps: 1))) |
1527 | { |
1528 | if (dump_file) |
1529 | fprintf (stream: dump_file, format: "SMS create_ddg failed\n" ); |
1530 | continue; |
1531 | } |
1532 | |
1533 | g_arr[loop->num] = g; |
1534 | if (dump_file) |
1535 | fprintf (stream: dump_file, format: "...OK\n" ); |
1536 | |
1537 | } |
1538 | if (dump_file) |
1539 | { |
1540 | fprintf (stream: dump_file, format: "\nSMS transformation phase\n" ); |
1541 | fprintf (stream: dump_file, format: "=========================\n\n" ); |
1542 | } |
1543 | |
1544 | /* We don't want to perform SMS on new loops - created by versioning. */ |
1545 | for (auto loop : loops_list (cfun, 0)) |
1546 | { |
1547 | rtx_insn *head, *tail; |
1548 | rtx count_reg; |
1549 | rtx_insn *count_init; |
1550 | int mii, rec_mii, stage_count, min_cycle; |
1551 | int64_t loop_count = 0; |
1552 | bool opt_sc_p, adjust_inplace = false; |
1553 | basic_block ; |
1554 | |
1555 | if (! (g = g_arr[loop->num])) |
1556 | continue; |
1557 | |
1558 | if (dump_file) |
1559 | { |
1560 | rtx_insn *insn = BB_END (loop->header); |
1561 | |
1562 | fprintf (stream: dump_file, format: "SMS loop num: %d" , loop->num); |
1563 | dump_insn_location (insn); |
1564 | fprintf (stream: dump_file, format: "\n" ); |
1565 | |
1566 | print_ddg (dump_file, g); |
1567 | } |
1568 | |
1569 | get_ebb_head_tail (loop->header, loop->header, &head, &tail); |
1570 | |
1571 | latch_edge = loop_latch_edge (loop); |
1572 | gcc_assert (single_exit (loop)); |
1573 | trip_count = get_estimated_loop_iterations_int (loop); |
1574 | max_trip_count = get_max_loop_iterations_int (loop); |
1575 | |
1576 | if (dump_file) |
1577 | { |
1578 | dump_insn_location (insn: tail); |
1579 | fprintf (stream: dump_file, format: "\nSMS single-bb-loop\n" ); |
1580 | if (profile_info && flag_branch_probabilities) |
1581 | { |
1582 | fprintf (stream: dump_file, format: "SMS loop-count " ); |
1583 | fprintf (stream: dump_file, format: "%" PRId64, |
1584 | (int64_t) bb->count.to_gcov_type ()); |
1585 | fprintf (stream: dump_file, format: "\n" ); |
1586 | } |
1587 | fprintf (stream: dump_file, format: "SMS doloop\n" ); |
1588 | fprintf (stream: dump_file, format: "SMS built-ddg %d\n" , g->num_nodes); |
1589 | fprintf (stream: dump_file, format: "SMS num-loads %d\n" , g->num_loads); |
1590 | fprintf (stream: dump_file, format: "SMS num-stores %d\n" , g->num_stores); |
1591 | } |
1592 | |
1593 | |
1594 | count_reg = doloop_register_get (head, tail); |
1595 | gcc_assert (count_reg); |
1596 | |
1597 | pre_header = loop_preheader_edge (loop)->src; |
1598 | count_init = const_iteration_count (count_reg, pre_header, count: &loop_count, |
1599 | adjust_inplace: &adjust_inplace); |
1600 | |
1601 | if (dump_file && count_init) |
1602 | { |
1603 | fprintf (stream: dump_file, format: "SMS const-doloop " ); |
1604 | fprintf (stream: dump_file, format: "%" PRId64, |
1605 | loop_count); |
1606 | fprintf (stream: dump_file, format: "\n" ); |
1607 | } |
1608 | |
1609 | node_order = XNEWVEC (int, g->num_nodes); |
1610 | |
1611 | mii = 1; /* Need to pass some estimate of mii. */ |
1612 | rec_mii = sms_order_nodes (g, mii, node_order, &max_asap); |
1613 | mii = MAX (res_MII (g), rec_mii); |
1614 | mii = MAX (mii, 1); |
1615 | maxii = MAX (max_asap, param_sms_max_ii_factor * mii); |
1616 | |
1617 | if (dump_file) |
1618 | fprintf (stream: dump_file, format: "SMS iis %d %d %d (rec_mii, mii, maxii)\n" , |
1619 | rec_mii, mii, maxii); |
1620 | |
1621 | for (;;) |
1622 | { |
1623 | set_node_sched_params (g); |
1624 | |
1625 | stage_count = 0; |
1626 | opt_sc_p = false; |
1627 | ps = sms_schedule_by_order (g, mii, maxii, node_order); |
1628 | |
1629 | if (ps) |
1630 | { |
1631 | /* Try to achieve optimized SC by normalizing the partial |
1632 | schedule (having the cycles start from cycle zero). |
1633 | The branch location must be placed in row ii-1 in the |
1634 | final scheduling. If failed, shift all instructions to |
1635 | position the branch in row ii-1. */ |
1636 | opt_sc_p = optimize_sc (ps, g); |
1637 | if (opt_sc_p) |
1638 | stage_count = calculate_stage_count (ps, 0); |
1639 | else |
1640 | { |
1641 | /* Bring the branch to cycle ii-1. */ |
1642 | int amount = (SCHED_TIME (g->closing_branch->cuid) |
1643 | - (ps->ii - 1)); |
1644 | |
1645 | if (dump_file) |
1646 | fprintf (stream: dump_file, format: "SMS schedule branch at cycle ii-1\n" ); |
1647 | |
1648 | stage_count = calculate_stage_count (ps, amount); |
1649 | } |
1650 | |
1651 | gcc_assert (stage_count >= 1); |
1652 | } |
1653 | |
1654 | /* The default value of param_sms_min_sc is 2 as stage count of |
1655 | 1 means that there is no interleaving between iterations thus |
1656 | we let the scheduling passes do the job in this case. */ |
1657 | if (stage_count < param_sms_min_sc |
1658 | || (count_init && (loop_count <= stage_count)) |
1659 | || (max_trip_count >= 0 && max_trip_count <= stage_count) |
1660 | || (trip_count >= 0 && trip_count <= stage_count)) |
1661 | { |
1662 | if (dump_file) |
1663 | { |
1664 | fprintf (stream: dump_file, format: "SMS failed... \n" ); |
1665 | fprintf (stream: dump_file, format: "SMS sched-failed (stage-count=%d," |
1666 | " loop-count=" , stage_count); |
1667 | fprintf (stream: dump_file, format: "%" PRId64, loop_count); |
1668 | fprintf (stream: dump_file, format: ", trip-count=" ); |
1669 | fprintf (stream: dump_file, format: "%" PRId64 "max %" PRId64, |
1670 | (int64_t) trip_count, (int64_t) max_trip_count); |
1671 | fprintf (stream: dump_file, format: ")\n" ); |
1672 | } |
1673 | break; |
1674 | } |
1675 | |
1676 | if (!opt_sc_p) |
1677 | { |
1678 | /* Rotate the partial schedule to have the branch in row ii-1. */ |
1679 | int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1); |
1680 | |
1681 | reset_sched_times (ps, amount); |
1682 | rotate_partial_schedule (ps, amount); |
1683 | } |
1684 | |
1685 | set_columns_for_ps (ps); |
1686 | |
1687 | min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii); |
1688 | if (!schedule_reg_moves (ps)) |
1689 | { |
1690 | mii = ps->ii + 1; |
1691 | free_partial_schedule (ps); |
1692 | continue; |
1693 | } |
1694 | |
1695 | /* Moves that handle incoming values might have been added |
1696 | to a new first stage. Bump the stage count if so. |
1697 | |
1698 | ??? Perhaps we could consider rotating the schedule here |
1699 | instead? */ |
1700 | if (PS_MIN_CYCLE (ps) < min_cycle) |
1701 | { |
1702 | reset_sched_times (ps, amount: 0); |
1703 | stage_count++; |
1704 | } |
1705 | |
1706 | /* The stage count should now be correct without rotation. */ |
1707 | gcc_checking_assert (stage_count == calculate_stage_count (ps, 0)); |
1708 | PS_STAGE_COUNT (ps) = stage_count; |
1709 | |
1710 | canon_loop (loop); |
1711 | |
1712 | if (dump_file) |
1713 | { |
1714 | dump_insn_location (insn: tail); |
1715 | fprintf (stream: dump_file, format: " SMS succeeded %d %d (with ii, sc)\n" , |
1716 | ps->ii, stage_count); |
1717 | print_partial_schedule (ps, dump_file); |
1718 | } |
1719 | |
1720 | if (count_init) |
1721 | { |
1722 | if (adjust_inplace) |
1723 | { |
1724 | /* When possible, set new iteration count of loop kernel in |
1725 | place. Otherwise, generate_prolog_epilog creates an insn |
1726 | to adjust. */ |
1727 | SET_SRC (single_set (count_init)) = GEN_INT (loop_count |
1728 | - stage_count + 1); |
1729 | } |
1730 | } |
1731 | else |
1732 | { |
1733 | /* case the BCT count is not known , Do loop-versioning */ |
1734 | rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg, |
1735 | gen_int_mode (stage_count, |
1736 | GET_MODE (count_reg))); |
1737 | profile_probability prob = profile_probability::guessed_always () |
1738 | .apply_scale (PROB_SMS_ENOUGH_ITERATIONS, den: 100); |
1739 | |
1740 | loop_version (loop, comp_rtx, &condition_bb, |
1741 | prob, prob.invert (), |
1742 | prob, prob.invert (), true); |
1743 | } |
1744 | |
1745 | /* Now apply the scheduled kernel to the RTL of the loop. */ |
1746 | permute_partial_schedule (ps, last: g->closing_branch->first_note); |
1747 | |
1748 | /* Mark this loop as software pipelined so the later |
1749 | scheduling passes don't touch it. */ |
1750 | if (! flag_resched_modulo_sched) |
1751 | mark_loop_unsched (loop); |
1752 | |
1753 | /* The life-info is not valid any more. */ |
1754 | df_set_bb_dirty (g->bb); |
1755 | |
1756 | apply_reg_moves (ps); |
1757 | if (dump_file) |
1758 | print_node_sched_params (file: dump_file, num_nodes: g->num_nodes, ps); |
1759 | /* Generate prolog and epilog. */ |
1760 | generate_prolog_epilog (ps, loop, count_reg, adjust_init: !adjust_inplace); |
1761 | break; |
1762 | } |
1763 | |
1764 | free_partial_schedule (ps); |
1765 | node_sched_param_vec.release (); |
1766 | free (ptr: node_order); |
1767 | free_ddg (g); |
1768 | } |
1769 | |
1770 | free (ptr: g_arr); |
1771 | |
1772 | /* Release scheduler data, needed until now because of DFA. */ |
1773 | haifa_sched_finish (); |
1774 | loop_optimizer_finalize (); |
1775 | } |
1776 | |
1777 | /* The SMS scheduling algorithm itself |
1778 | ----------------------------------- |
1779 | Input: 'O' an ordered list of insns of a loop. |
1780 | Output: A scheduling of the loop - kernel, prolog, and epilogue. |
1781 | |
1782 | 'Q' is the empty Set |
1783 | 'PS' is the partial schedule; it holds the currently scheduled nodes with |
1784 | their cycle/slot. |
1785 | 'PSP' previously scheduled predecessors. |
1786 | 'PSS' previously scheduled successors. |
1787 | 't(u)' the cycle where u is scheduled. |
1788 | 'l(u)' is the latency of u. |
1789 | 'd(v,u)' is the dependence distance from v to u. |
1790 | 'ASAP(u)' the earliest time at which u could be scheduled as computed in |
1791 | the node ordering phase. |
1792 | 'check_hardware_resources_conflicts(u, PS, c)' |
1793 | run a trace around cycle/slot through DFA model |
1794 | to check resource conflicts involving instruction u |
1795 | at cycle c given the partial schedule PS. |
1796 | 'add_to_partial_schedule_at_time(u, PS, c)' |
1797 | Add the node/instruction u to the partial schedule |
1798 | PS at time c. |
1799 | 'calculate_register_pressure(PS)' |
1800 | Given a schedule of instructions, calculate the register |
1801 | pressure it implies. One implementation could be the |
1802 | maximum number of overlapping live ranges. |
1803 | 'maxRP' The maximum allowed register pressure, it is usually derived from the number |
1804 | registers available in the hardware. |
1805 | |
1806 | 1. II = MII. |
1807 | 2. PS = empty list |
1808 | 3. for each node u in O in pre-computed order |
1809 | 4. if (PSP(u) != Q && PSS(u) == Q) then |
1810 | 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u). |
1811 | 6. start = Early_start; end = Early_start + II - 1; step = 1 |
1812 | 11. else if (PSP(u) == Q && PSS(u) != Q) then |
1813 | 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u). |
1814 | 13. start = Late_start; end = Late_start - II + 1; step = -1 |
1815 | 14. else if (PSP(u) != Q && PSS(u) != Q) then |
1816 | 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u). |
1817 | 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u). |
1818 | 17. start = Early_start; |
1819 | 18. end = min(Early_start + II - 1 , Late_start); |
1820 | 19. step = 1 |
1821 | 20. else "if (PSP(u) == Q && PSS(u) == Q)" |
1822 | 21. start = ASAP(u); end = start + II - 1; step = 1 |
1823 | 22. endif |
1824 | |
1825 | 23. success = false |
1826 | 24. for (c = start ; c != end ; c += step) |
1827 | 25. if check_hardware_resources_conflicts(u, PS, c) then |
1828 | 26. add_to_partial_schedule_at_time(u, PS, c) |
1829 | 27. success = true |
1830 | 28. break |
1831 | 29. endif |
1832 | 30. endfor |
1833 | 31. if (success == false) then |
1834 | 32. II = II + 1 |
1835 | 33. if (II > maxII) then |
1836 | 34. finish - failed to schedule |
1837 | 35. endif |
1838 | 36. goto 2. |
1839 | 37. endif |
1840 | 38. endfor |
1841 | 39. if (calculate_register_pressure(PS) > maxRP) then |
1842 | 40. goto 32. |
1843 | 41. endif |
1844 | 42. compute epilogue & prologue |
1845 | 43. finish - succeeded to schedule |
1846 | |
1847 | ??? The algorithm restricts the scheduling window to II cycles. |
1848 | In rare cases, it may be better to allow windows of II+1 cycles. |
1849 | The window would then start and end on the same row, but with |
1850 | different "must precede" and "must follow" requirements. */ |
1851 | |
1852 | /* A threshold for the number of repeated unsuccessful attempts to insert |
1853 | an empty row, before we flush the partial schedule and start over. */ |
1854 | #define MAX_SPLIT_NUM 10 |
1855 | /* Given the partial schedule PS, this function calculates and returns the |
1856 | cycles in which we can schedule the node with the given index I. |
1857 | NOTE: Here we do the backtracking in SMS, in some special cases. We have |
1858 | noticed that there are several cases in which we fail to SMS the loop |
1859 | because the sched window of a node is empty due to tight data-deps. In |
1860 | such cases we want to unschedule some of the predecessors/successors |
1861 | until we get non-empty scheduling window. It returns -1 if the |
1862 | scheduling window is empty and zero otherwise. */ |
1863 | |
1864 | static int |
1865 | get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node, |
1866 | sbitmap sched_nodes, int ii, int *start_p, int *step_p, |
1867 | int *end_p) |
1868 | { |
1869 | int start, step, end; |
1870 | int early_start, late_start; |
1871 | ddg_edge_ptr e; |
1872 | auto_sbitmap psp (ps->g->num_nodes); |
1873 | auto_sbitmap pss (ps->g->num_nodes); |
1874 | sbitmap u_node_preds = NODE_PREDECESSORS (u_node); |
1875 | sbitmap u_node_succs = NODE_SUCCESSORS (u_node); |
1876 | int psp_not_empty; |
1877 | int pss_not_empty; |
1878 | int count_preds; |
1879 | int count_succs; |
1880 | |
1881 | /* 1. compute sched window for u (start, end, step). */ |
1882 | bitmap_clear (psp); |
1883 | bitmap_clear (pss); |
1884 | psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes); |
1885 | pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes); |
1886 | |
1887 | /* We first compute a forward range (start <= end), then decide whether |
1888 | to reverse it. */ |
1889 | early_start = INT_MIN; |
1890 | late_start = INT_MAX; |
1891 | start = INT_MIN; |
1892 | end = INT_MAX; |
1893 | step = 1; |
1894 | |
1895 | count_preds = 0; |
1896 | count_succs = 0; |
1897 | |
1898 | if (dump_file && (psp_not_empty || pss_not_empty)) |
1899 | { |
1900 | fprintf (stream: dump_file, format: "\nAnalyzing dependencies for node %d (INSN %d)" |
1901 | "; ii = %d\n\n" , u_node->cuid, INSN_UID (insn: u_node->insn), ii); |
1902 | fprintf (stream: dump_file, format: "%11s %11s %11s %11s %5s\n" , |
1903 | "start" , "early start" , "late start" , "end" , "time" ); |
1904 | fprintf (stream: dump_file, format: "=========== =========== =========== ===========" |
1905 | " =====\n" ); |
1906 | } |
1907 | /* Calculate early_start and limit end. Both bounds are inclusive. */ |
1908 | if (psp_not_empty) |
1909 | for (e = u_node->in; e != 0; e = e->next_in) |
1910 | { |
1911 | int v = e->src->cuid; |
1912 | |
1913 | if (bitmap_bit_p (map: sched_nodes, bitno: v)) |
1914 | { |
1915 | int p_st = SCHED_TIME (v); |
1916 | int earliest = p_st + e->latency - (e->distance * ii); |
1917 | int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX); |
1918 | |
1919 | if (dump_file) |
1920 | { |
1921 | fprintf (stream: dump_file, format: "%11s %11d %11s %11d %5d" , |
1922 | "" , earliest, "" , latest, p_st); |
1923 | print_ddg_edge (dump_file, e); |
1924 | fprintf (stream: dump_file, format: "\n" ); |
1925 | } |
1926 | |
1927 | early_start = MAX (early_start, earliest); |
1928 | end = MIN (end, latest); |
1929 | |
1930 | if (e->type == TRUE_DEP && e->data_type == REG_DEP) |
1931 | count_preds++; |
1932 | } |
1933 | } |
1934 | |
1935 | /* Calculate late_start and limit start. Both bounds are inclusive. */ |
1936 | if (pss_not_empty) |
1937 | for (e = u_node->out; e != 0; e = e->next_out) |
1938 | { |
1939 | int v = e->dest->cuid; |
1940 | |
1941 | if (bitmap_bit_p (map: sched_nodes, bitno: v)) |
1942 | { |
1943 | int s_st = SCHED_TIME (v); |
1944 | int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN); |
1945 | int latest = s_st - e->latency + (e->distance * ii); |
1946 | |
1947 | if (dump_file) |
1948 | { |
1949 | fprintf (stream: dump_file, format: "%11d %11s %11d %11s %5d" , |
1950 | earliest, "" , latest, "" , s_st); |
1951 | print_ddg_edge (dump_file, e); |
1952 | fprintf (stream: dump_file, format: "\n" ); |
1953 | } |
1954 | |
1955 | start = MAX (start, earliest); |
1956 | late_start = MIN (late_start, latest); |
1957 | |
1958 | if (e->type == TRUE_DEP && e->data_type == REG_DEP) |
1959 | count_succs++; |
1960 | } |
1961 | } |
1962 | |
1963 | if (dump_file && (psp_not_empty || pss_not_empty)) |
1964 | { |
1965 | fprintf (stream: dump_file, format: "----------- ----------- ----------- -----------" |
1966 | " -----\n" ); |
1967 | fprintf (stream: dump_file, format: "%11d %11d %11d %11d %5s %s\n" , |
1968 | start, early_start, late_start, end, "" , |
1969 | "(max, max, min, min)" ); |
1970 | } |
1971 | |
1972 | /* Get a target scheduling window no bigger than ii. */ |
1973 | if (early_start == INT_MIN && late_start == INT_MAX) |
1974 | early_start = NODE_ASAP (u_node); |
1975 | else if (early_start == INT_MIN) |
1976 | early_start = late_start - (ii - 1); |
1977 | late_start = MIN (late_start, early_start + (ii - 1)); |
1978 | |
1979 | /* Apply memory dependence limits. */ |
1980 | start = MAX (start, early_start); |
1981 | end = MIN (end, late_start); |
1982 | |
1983 | if (dump_file && (psp_not_empty || pss_not_empty)) |
1984 | fprintf (stream: dump_file, format: "%11s %11d %11d %11s %5s final window\n" , |
1985 | "" , start, end, "" , "" ); |
1986 | |
1987 | /* If there are at least as many successors as predecessors, schedule the |
1988 | node close to its successors. */ |
1989 | if (pss_not_empty && count_succs >= count_preds) |
1990 | { |
1991 | std::swap (a&: start, b&: end); |
1992 | step = -1; |
1993 | } |
1994 | |
1995 | /* Now that we've finalized the window, make END an exclusive rather |
1996 | than an inclusive bound. */ |
1997 | end += step; |
1998 | |
1999 | *start_p = start; |
2000 | *step_p = step; |
2001 | *end_p = end; |
2002 | |
2003 | if ((start >= end && step == 1) || (start <= end && step == -1)) |
2004 | { |
2005 | if (dump_file) |
2006 | fprintf (stream: dump_file, format: "\nEmpty window: start=%d, end=%d, step=%d\n" , |
2007 | start, end, step); |
2008 | return -1; |
2009 | } |
2010 | |
2011 | return 0; |
2012 | } |
2013 | |
2014 | /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the |
2015 | node currently been scheduled. At the end of the calculation |
2016 | MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of |
2017 | U_NODE which are (1) already scheduled in the first/last row of |
2018 | U_NODE's scheduling window, (2) whose dependence inequality with U |
2019 | becomes an equality when U is scheduled in this same row, and (3) |
2020 | whose dependence latency is zero. |
2021 | |
2022 | The first and last rows are calculated using the following parameters: |
2023 | START/END rows - The cycles that begins/ends the traversal on the window; |
2024 | searching for an empty cycle to schedule U_NODE. |
2025 | STEP - The direction in which we traverse the window. |
2026 | II - The initiation interval. */ |
2027 | |
2028 | static void |
2029 | calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end, |
2030 | int step, int ii, sbitmap sched_nodes, |
2031 | sbitmap must_precede, sbitmap must_follow) |
2032 | { |
2033 | ddg_edge_ptr e; |
2034 | int first_cycle_in_window, last_cycle_in_window; |
2035 | |
2036 | gcc_assert (must_precede && must_follow); |
2037 | |
2038 | /* Consider the following scheduling window: |
2039 | {first_cycle_in_window, first_cycle_in_window+1, ..., |
2040 | last_cycle_in_window}. If step is 1 then the following will be |
2041 | the order we traverse the window: {start=first_cycle_in_window, |
2042 | first_cycle_in_window+1, ..., end=last_cycle_in_window+1}, |
2043 | or {start=last_cycle_in_window, last_cycle_in_window-1, ..., |
2044 | end=first_cycle_in_window-1} if step is -1. */ |
2045 | first_cycle_in_window = (step == 1) ? start : end - step; |
2046 | last_cycle_in_window = (step == 1) ? end - step : start; |
2047 | |
2048 | bitmap_clear (must_precede); |
2049 | bitmap_clear (must_follow); |
2050 | |
2051 | if (dump_file) |
2052 | fprintf (stream: dump_file, format: "\nmust_precede: " ); |
2053 | |
2054 | /* Instead of checking if: |
2055 | (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window) |
2056 | && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) == |
2057 | first_cycle_in_window) |
2058 | && e->latency == 0 |
2059 | we use the fact that latency is non-negative: |
2060 | SCHED_TIME (e->src) - (e->distance * ii) <= |
2061 | SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <= |
2062 | first_cycle_in_window |
2063 | and check only if |
2064 | SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */ |
2065 | for (e = u_node->in; e != 0; e = e->next_in) |
2066 | if (bitmap_bit_p (map: sched_nodes, bitno: e->src->cuid) |
2067 | && ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) == |
2068 | first_cycle_in_window)) |
2069 | { |
2070 | if (dump_file) |
2071 | fprintf (stream: dump_file, format: "%d " , e->src->cuid); |
2072 | |
2073 | bitmap_set_bit (map: must_precede, bitno: e->src->cuid); |
2074 | } |
2075 | |
2076 | if (dump_file) |
2077 | fprintf (stream: dump_file, format: "\nmust_follow: " ); |
2078 | |
2079 | /* Instead of checking if: |
2080 | (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window) |
2081 | && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) == |
2082 | last_cycle_in_window) |
2083 | && e->latency == 0 |
2084 | we use the fact that latency is non-negative: |
2085 | SCHED_TIME (e->dest) + (e->distance * ii) >= |
2086 | SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >= |
2087 | last_cycle_in_window |
2088 | and check only if |
2089 | SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */ |
2090 | for (e = u_node->out; e != 0; e = e->next_out) |
2091 | if (bitmap_bit_p (map: sched_nodes, bitno: e->dest->cuid) |
2092 | && ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) == |
2093 | last_cycle_in_window)) |
2094 | { |
2095 | if (dump_file) |
2096 | fprintf (stream: dump_file, format: "%d " , e->dest->cuid); |
2097 | |
2098 | bitmap_set_bit (map: must_follow, bitno: e->dest->cuid); |
2099 | } |
2100 | |
2101 | if (dump_file) |
2102 | fprintf (stream: dump_file, format: "\n" ); |
2103 | } |
2104 | |
2105 | /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following |
2106 | parameters to decide if that's possible: |
2107 | PS - The partial schedule. |
2108 | U - The serial number of U_NODE. |
2109 | NUM_SPLITS - The number of row splits made so far. |
2110 | MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at |
2111 | the first row of the scheduling window) |
2112 | MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the |
2113 | last row of the scheduling window) */ |
2114 | |
2115 | static bool |
2116 | try_scheduling_node_in_cycle (partial_schedule_ptr ps, |
2117 | int u, int cycle, sbitmap sched_nodes, |
2118 | int *num_splits, sbitmap must_precede, |
2119 | sbitmap must_follow) |
2120 | { |
2121 | ps_insn_ptr psi; |
2122 | bool success = false; |
2123 | |
2124 | verify_partial_schedule (ps, sched_nodes); |
2125 | psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow); |
2126 | if (psi) |
2127 | { |
2128 | SCHED_TIME (u) = cycle; |
2129 | bitmap_set_bit (map: sched_nodes, bitno: u); |
2130 | success = true; |
2131 | *num_splits = 0; |
2132 | if (dump_file) |
2133 | fprintf (stream: dump_file, format: "Scheduled w/o split in %d\n" , cycle); |
2134 | |
2135 | } |
2136 | |
2137 | return success; |
2138 | } |
2139 | |
2140 | /* This function implements the scheduling algorithm for SMS according to the |
2141 | above algorithm. */ |
2142 | static partial_schedule_ptr |
2143 | sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order) |
2144 | { |
2145 | int ii = mii; |
2146 | int i, c, success, num_splits = 0; |
2147 | int flush_and_start_over = true; |
2148 | int num_nodes = g->num_nodes; |
2149 | int start, end, step; /* Place together into one struct? */ |
2150 | auto_sbitmap sched_nodes (num_nodes); |
2151 | auto_sbitmap must_precede (num_nodes); |
2152 | auto_sbitmap must_follow (num_nodes); |
2153 | auto_sbitmap tobe_scheduled (num_nodes); |
2154 | |
2155 | /* Value of param_sms_dfa_history is a limit on the number of cycles that |
2156 | resource conflicts can span. ??? Should be provided by DFA, and be |
2157 | dependent on the type of insn scheduled. Set to 0 by default to save |
2158 | compile time. */ |
2159 | partial_schedule_ptr ps = create_partial_schedule (ii, g, |
2160 | param_sms_dfa_history); |
2161 | |
2162 | bitmap_ones (tobe_scheduled); |
2163 | bitmap_clear (sched_nodes); |
2164 | |
2165 | while (flush_and_start_over && (ii < maxii)) |
2166 | { |
2167 | |
2168 | if (dump_file) |
2169 | fprintf (stream: dump_file, format: "Starting with ii=%d\n" , ii); |
2170 | flush_and_start_over = false; |
2171 | bitmap_clear (sched_nodes); |
2172 | |
2173 | for (i = 0; i < num_nodes; i++) |
2174 | { |
2175 | int u = nodes_order[i]; |
2176 | ddg_node_ptr u_node = &ps->g->nodes[u]; |
2177 | rtx_insn *insn = u_node->insn; |
2178 | |
2179 | gcc_checking_assert (NONDEBUG_INSN_P (insn)); |
2180 | |
2181 | if (bitmap_bit_p (map: sched_nodes, bitno: u)) |
2182 | continue; |
2183 | |
2184 | /* Try to get non-empty scheduling window. */ |
2185 | success = 0; |
2186 | if (get_sched_window (ps, u_node, sched_nodes, ii, start_p: &start, |
2187 | step_p: &step, end_p: &end) == 0) |
2188 | { |
2189 | if (dump_file) |
2190 | fprintf (stream: dump_file, format: "\nTrying to schedule node %d " |
2191 | "INSN = %d in (%d .. %d) step %d\n" , u, (INSN_UID |
2192 | (insn: g->nodes[u].insn)), start, end, step); |
2193 | |
2194 | gcc_assert ((step > 0 && start < end) |
2195 | || (step < 0 && start > end)); |
2196 | |
2197 | calculate_must_precede_follow (u_node, start, end, step, ii, |
2198 | sched_nodes, must_precede, |
2199 | must_follow); |
2200 | |
2201 | for (c = start; c != end; c += step) |
2202 | { |
2203 | sbitmap tmp_precede, tmp_follow; |
2204 | |
2205 | set_must_precede_follow (tmp_follow: &tmp_follow, must_follow, |
2206 | tmp_precede: &tmp_precede, must_precede, |
2207 | c, start, end, step); |
2208 | success = |
2209 | try_scheduling_node_in_cycle (ps, u, cycle: c, |
2210 | sched_nodes, |
2211 | num_splits: &num_splits, must_precede: tmp_precede, |
2212 | must_follow: tmp_follow); |
2213 | if (success) |
2214 | break; |
2215 | } |
2216 | |
2217 | verify_partial_schedule (ps, sched_nodes); |
2218 | } |
2219 | if (!success) |
2220 | { |
2221 | int split_row; |
2222 | |
2223 | if (ii++ == maxii) |
2224 | break; |
2225 | |
2226 | if (num_splits >= MAX_SPLIT_NUM) |
2227 | { |
2228 | num_splits = 0; |
2229 | flush_and_start_over = true; |
2230 | verify_partial_schedule (ps, sched_nodes); |
2231 | reset_partial_schedule (ps, new_ii: ii); |
2232 | verify_partial_schedule (ps, sched_nodes); |
2233 | break; |
2234 | } |
2235 | |
2236 | num_splits++; |
2237 | /* The scheduling window is exclusive of 'end' |
2238 | whereas compute_split_window() expects an inclusive, |
2239 | ordered range. */ |
2240 | if (step == 1) |
2241 | split_row = compute_split_row (sched_nodes, start, end - 1, |
2242 | ps->ii, u_node); |
2243 | else |
2244 | split_row = compute_split_row (sched_nodes, end + 1, start, |
2245 | ps->ii, u_node); |
2246 | |
2247 | ps_insert_empty_row (ps, split_row, sched_nodes); |
2248 | i--; /* Go back and retry node i. */ |
2249 | |
2250 | if (dump_file) |
2251 | fprintf (stream: dump_file, format: "num_splits=%d\n" , num_splits); |
2252 | } |
2253 | |
2254 | /* ??? If (success), check register pressure estimates. */ |
2255 | } /* Continue with next node. */ |
2256 | } /* While flush_and_start_over. */ |
2257 | if (ii >= maxii) |
2258 | { |
2259 | free_partial_schedule (ps); |
2260 | ps = NULL; |
2261 | } |
2262 | else |
2263 | gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes)); |
2264 | |
2265 | return ps; |
2266 | } |
2267 | |
2268 | /* This function inserts a new empty row into PS at the position |
2269 | according to SPLITROW, keeping all already scheduled instructions |
2270 | intact and updating their SCHED_TIME and cycle accordingly. */ |
2271 | static void |
2272 | ps_insert_empty_row (partial_schedule_ptr ps, int split_row, |
2273 | sbitmap sched_nodes) |
2274 | { |
2275 | ps_insn_ptr crr_insn; |
2276 | ps_insn_ptr *rows_new; |
2277 | int ii = ps->ii; |
2278 | int new_ii = ii + 1; |
2279 | int row; |
2280 | int *rows_length_new; |
2281 | |
2282 | verify_partial_schedule (ps, sched_nodes); |
2283 | |
2284 | /* We normalize sched_time and rotate ps to have only non-negative sched |
2285 | times, for simplicity of updating cycles after inserting new row. */ |
2286 | split_row -= ps->min_cycle; |
2287 | split_row = SMODULO (split_row, ii); |
2288 | if (dump_file) |
2289 | fprintf (stream: dump_file, format: "split_row=%d\n" , split_row); |
2290 | |
2291 | reset_sched_times (ps, PS_MIN_CYCLE (ps)); |
2292 | rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); |
2293 | |
2294 | rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr)); |
2295 | rows_length_new = (int *) xcalloc (new_ii, sizeof (int)); |
2296 | for (row = 0; row < split_row; row++) |
2297 | { |
2298 | rows_new[row] = ps->rows[row]; |
2299 | rows_length_new[row] = ps->rows_length[row]; |
2300 | ps->rows[row] = NULL; |
2301 | for (crr_insn = rows_new[row]; |
2302 | crr_insn; crr_insn = crr_insn->next_in_row) |
2303 | { |
2304 | int u = crr_insn->id; |
2305 | int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii); |
2306 | |
2307 | SCHED_TIME (u) = new_time; |
2308 | crr_insn->cycle = new_time; |
2309 | SCHED_ROW (u) = new_time % new_ii; |
2310 | SCHED_STAGE (u) = new_time / new_ii; |
2311 | } |
2312 | |
2313 | } |
2314 | |
2315 | rows_new[split_row] = NULL; |
2316 | |
2317 | for (row = split_row; row < ii; row++) |
2318 | { |
2319 | rows_new[row + 1] = ps->rows[row]; |
2320 | rows_length_new[row + 1] = ps->rows_length[row]; |
2321 | ps->rows[row] = NULL; |
2322 | for (crr_insn = rows_new[row + 1]; |
2323 | crr_insn; crr_insn = crr_insn->next_in_row) |
2324 | { |
2325 | int u = crr_insn->id; |
2326 | int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1; |
2327 | |
2328 | SCHED_TIME (u) = new_time; |
2329 | crr_insn->cycle = new_time; |
2330 | SCHED_ROW (u) = new_time % new_ii; |
2331 | SCHED_STAGE (u) = new_time / new_ii; |
2332 | } |
2333 | } |
2334 | |
2335 | /* Updating ps. */ |
2336 | ps->min_cycle = ps->min_cycle + ps->min_cycle / ii |
2337 | + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0); |
2338 | ps->max_cycle = ps->max_cycle + ps->max_cycle / ii |
2339 | + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0); |
2340 | free (ptr: ps->rows); |
2341 | ps->rows = rows_new; |
2342 | free (ptr: ps->rows_length); |
2343 | ps->rows_length = rows_length_new; |
2344 | ps->ii = new_ii; |
2345 | gcc_assert (ps->min_cycle >= 0); |
2346 | |
2347 | verify_partial_schedule (ps, sched_nodes); |
2348 | |
2349 | if (dump_file) |
2350 | fprintf (stream: dump_file, format: "min_cycle=%d, max_cycle=%d\n" , ps->min_cycle, |
2351 | ps->max_cycle); |
2352 | } |
2353 | |
2354 | /* Given U_NODE which is the node that failed to be scheduled; LOW and |
2355 | UP which are the boundaries of it's scheduling window; compute using |
2356 | SCHED_NODES and II a row in the partial schedule that can be split |
2357 | which will separate a critical predecessor from a critical successor |
2358 | thereby expanding the window, and return it. */ |
2359 | static int |
2360 | compute_split_row (sbitmap sched_nodes, int low, int up, int ii, |
2361 | ddg_node_ptr u_node) |
2362 | { |
2363 | ddg_edge_ptr e; |
2364 | int lower = INT_MIN, upper = INT_MAX; |
2365 | int crit_pred = -1; |
2366 | int crit_succ = -1; |
2367 | int crit_cycle; |
2368 | |
2369 | for (e = u_node->in; e != 0; e = e->next_in) |
2370 | { |
2371 | int v = e->src->cuid; |
2372 | |
2373 | if (bitmap_bit_p (map: sched_nodes, bitno: v) |
2374 | && (low == SCHED_TIME (v) + e->latency - (e->distance * ii))) |
2375 | if (SCHED_TIME (v) > lower) |
2376 | { |
2377 | crit_pred = v; |
2378 | lower = SCHED_TIME (v); |
2379 | } |
2380 | } |
2381 | |
2382 | if (crit_pred >= 0) |
2383 | { |
2384 | crit_cycle = SCHED_TIME (crit_pred) + 1; |
2385 | return SMODULO (crit_cycle, ii); |
2386 | } |
2387 | |
2388 | for (e = u_node->out; e != 0; e = e->next_out) |
2389 | { |
2390 | int v = e->dest->cuid; |
2391 | |
2392 | if (bitmap_bit_p (map: sched_nodes, bitno: v) |
2393 | && (up == SCHED_TIME (v) - e->latency + (e->distance * ii))) |
2394 | if (SCHED_TIME (v) < upper) |
2395 | { |
2396 | crit_succ = v; |
2397 | upper = SCHED_TIME (v); |
2398 | } |
2399 | } |
2400 | |
2401 | if (crit_succ >= 0) |
2402 | { |
2403 | crit_cycle = SCHED_TIME (crit_succ); |
2404 | return SMODULO (crit_cycle, ii); |
2405 | } |
2406 | |
2407 | if (dump_file) |
2408 | fprintf (stream: dump_file, format: "Both crit_pred and crit_succ are NULL\n" ); |
2409 | |
2410 | return SMODULO ((low + up + 1) / 2, ii); |
2411 | } |
2412 | |
2413 | static void |
2414 | verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes) |
2415 | { |
2416 | int row; |
2417 | ps_insn_ptr crr_insn; |
2418 | |
2419 | for (row = 0; row < ps->ii; row++) |
2420 | { |
2421 | int length = 0; |
2422 | |
2423 | for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) |
2424 | { |
2425 | int u = crr_insn->id; |
2426 | |
2427 | length++; |
2428 | gcc_assert (bitmap_bit_p (sched_nodes, u)); |
2429 | /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by |
2430 | popcount (sched_nodes) == number of insns in ps. */ |
2431 | gcc_assert (SCHED_TIME (u) >= ps->min_cycle); |
2432 | gcc_assert (SCHED_TIME (u) <= ps->max_cycle); |
2433 | } |
2434 | |
2435 | gcc_assert (ps->rows_length[row] == length); |
2436 | } |
2437 | } |
2438 | |
2439 | |
2440 | /* This page implements the algorithm for ordering the nodes of a DDG |
2441 | for modulo scheduling, activated through the |
2442 | "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */ |
2443 | |
2444 | #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info) |
2445 | #define ASAP(x) (ORDER_PARAMS ((x))->asap) |
2446 | #define ALAP(x) (ORDER_PARAMS ((x))->alap) |
2447 | #define HEIGHT(x) (ORDER_PARAMS ((x))->height) |
2448 | #define MOB(x) (ALAP ((x)) - ASAP ((x))) |
2449 | #define DEPTH(x) (ASAP ((x))) |
2450 | |
2451 | typedef struct node_order_params * nopa; |
2452 | |
2453 | static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result); |
2454 | static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int); |
2455 | static nopa calculate_order_params (ddg_ptr, int, int *); |
2456 | static int find_max_asap (ddg_ptr, sbitmap); |
2457 | static int find_max_hv_min_mob (ddg_ptr, sbitmap); |
2458 | static int find_max_dv_min_mob (ddg_ptr, sbitmap); |
2459 | |
2460 | enum sms_direction {BOTTOMUP, TOPDOWN}; |
2461 | |
2462 | struct node_order_params |
2463 | { |
2464 | int asap; |
2465 | int alap; |
2466 | int height; |
2467 | }; |
2468 | |
2469 | /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */ |
2470 | static void |
2471 | check_nodes_order (int *node_order, int num_nodes) |
2472 | { |
2473 | int i; |
2474 | auto_sbitmap tmp (num_nodes); |
2475 | |
2476 | bitmap_clear (tmp); |
2477 | |
2478 | if (dump_file) |
2479 | fprintf (stream: dump_file, format: "SMS final nodes order: \n" ); |
2480 | |
2481 | for (i = 0; i < num_nodes; i++) |
2482 | { |
2483 | int u = node_order[i]; |
2484 | |
2485 | if (dump_file) |
2486 | fprintf (stream: dump_file, format: "%d " , u); |
2487 | gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u)); |
2488 | |
2489 | bitmap_set_bit (map: tmp, bitno: u); |
2490 | } |
2491 | |
2492 | if (dump_file) |
2493 | fprintf (stream: dump_file, format: "\n" ); |
2494 | } |
2495 | |
2496 | /* Order the nodes of G for scheduling and pass the result in |
2497 | NODE_ORDER. Also set aux.count of each node to ASAP. |
2498 | Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */ |
2499 | static int |
2500 | sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap) |
2501 | { |
2502 | int i; |
2503 | int rec_mii = 0; |
2504 | ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g); |
2505 | |
2506 | nopa nops = calculate_order_params (g, mii, pmax_asap); |
2507 | |
2508 | if (dump_file) |
2509 | print_sccs (dump_file, sccs, g); |
2510 | |
2511 | order_nodes_of_sccs (sccs, result: node_order); |
2512 | |
2513 | if (sccs->num_sccs > 0) |
2514 | /* First SCC has the largest recurrence_length. */ |
2515 | rec_mii = sccs->sccs[0]->recurrence_length; |
2516 | |
2517 | /* Save ASAP before destroying node_order_params. */ |
2518 | for (i = 0; i < g->num_nodes; i++) |
2519 | { |
2520 | ddg_node_ptr v = &g->nodes[i]; |
2521 | v->aux.count = ASAP (v); |
2522 | } |
2523 | |
2524 | free (ptr: nops); |
2525 | free_ddg_all_sccs (sccs); |
2526 | check_nodes_order (node_order, num_nodes: g->num_nodes); |
2527 | |
2528 | return rec_mii; |
2529 | } |
2530 | |
2531 | static void |
2532 | order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order) |
2533 | { |
2534 | int i, pos = 0; |
2535 | ddg_ptr g = all_sccs->ddg; |
2536 | int num_nodes = g->num_nodes; |
2537 | auto_sbitmap prev_sccs (num_nodes); |
2538 | auto_sbitmap on_path (num_nodes); |
2539 | auto_sbitmap tmp (num_nodes); |
2540 | auto_sbitmap ones (num_nodes); |
2541 | |
2542 | bitmap_clear (prev_sccs); |
2543 | bitmap_ones (ones); |
2544 | |
2545 | /* Perform the node ordering starting from the SCC with the highest recMII. |
2546 | For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */ |
2547 | for (i = 0; i < all_sccs->num_sccs; i++) |
2548 | { |
2549 | ddg_scc_ptr scc = all_sccs->sccs[i]; |
2550 | |
2551 | /* Add nodes on paths from previous SCCs to the current SCC. */ |
2552 | find_nodes_on_paths (result: on_path, g, from: prev_sccs, to: scc->nodes); |
2553 | bitmap_ior (tmp, scc->nodes, on_path); |
2554 | |
2555 | /* Add nodes on paths from the current SCC to previous SCCs. */ |
2556 | find_nodes_on_paths (result: on_path, g, from: scc->nodes, to: prev_sccs); |
2557 | bitmap_ior (tmp, tmp, on_path); |
2558 | |
2559 | /* Remove nodes of previous SCCs from current extended SCC. */ |
2560 | bitmap_and_compl (tmp, tmp, prev_sccs); |
2561 | |
2562 | pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos); |
2563 | /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */ |
2564 | } |
2565 | |
2566 | /* Handle the remaining nodes that do not belong to any scc. Each call |
2567 | to order_nodes_in_scc handles a single connected component. */ |
2568 | while (pos < g->num_nodes) |
2569 | { |
2570 | bitmap_and_compl (tmp, ones, prev_sccs); |
2571 | pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos); |
2572 | } |
2573 | } |
2574 | |
2575 | /* MII is needed if we consider backarcs (that do not close recursive cycles). */ |
2576 | static struct node_order_params * |
2577 | calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap) |
2578 | { |
2579 | int u; |
2580 | int max_asap; |
2581 | int num_nodes = g->num_nodes; |
2582 | ddg_edge_ptr e; |
2583 | /* Allocate a place to hold ordering params for each node in the DDG. */ |
2584 | nopa node_order_params_arr; |
2585 | |
2586 | /* Initialize of ASAP/ALAP/HEIGHT to zero. */ |
2587 | node_order_params_arr = (nopa) xcalloc (num_nodes, |
2588 | sizeof (struct node_order_params)); |
2589 | |
2590 | /* Set the aux pointer of each node to point to its order_params structure. */ |
2591 | for (u = 0; u < num_nodes; u++) |
2592 | g->nodes[u].aux.info = &node_order_params_arr[u]; |
2593 | |
2594 | /* Disregarding a backarc from each recursive cycle to obtain a DAG, |
2595 | calculate ASAP, ALAP, mobility, distance, and height for each node |
2596 | in the dependence (direct acyclic) graph. */ |
2597 | |
2598 | /* We assume that the nodes in the array are in topological order. */ |
2599 | |
2600 | max_asap = 0; |
2601 | for (u = 0; u < num_nodes; u++) |
2602 | { |
2603 | ddg_node_ptr u_node = &g->nodes[u]; |
2604 | |
2605 | ASAP (u_node) = 0; |
2606 | for (e = u_node->in; e; e = e->next_in) |
2607 | if (e->distance == 0) |
2608 | ASAP (u_node) = MAX (ASAP (u_node), |
2609 | ASAP (e->src) + e->latency); |
2610 | max_asap = MAX (max_asap, ASAP (u_node)); |
2611 | } |
2612 | |
2613 | for (u = num_nodes - 1; u > -1; u--) |
2614 | { |
2615 | ddg_node_ptr u_node = &g->nodes[u]; |
2616 | |
2617 | ALAP (u_node) = max_asap; |
2618 | HEIGHT (u_node) = 0; |
2619 | for (e = u_node->out; e; e = e->next_out) |
2620 | if (e->distance == 0) |
2621 | { |
2622 | ALAP (u_node) = MIN (ALAP (u_node), |
2623 | ALAP (e->dest) - e->latency); |
2624 | HEIGHT (u_node) = MAX (HEIGHT (u_node), |
2625 | HEIGHT (e->dest) + e->latency); |
2626 | } |
2627 | } |
2628 | if (dump_file) |
2629 | { |
2630 | fprintf (stream: dump_file, format: "\nOrder params\n" ); |
2631 | for (u = 0; u < num_nodes; u++) |
2632 | { |
2633 | ddg_node_ptr u_node = &g->nodes[u]; |
2634 | |
2635 | fprintf (stream: dump_file, format: "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n" , u, |
2636 | ASAP (u_node), ALAP (u_node), HEIGHT (u_node)); |
2637 | } |
2638 | } |
2639 | |
2640 | *pmax_asap = max_asap; |
2641 | return node_order_params_arr; |
2642 | } |
2643 | |
2644 | static int |
2645 | find_max_asap (ddg_ptr g, sbitmap nodes) |
2646 | { |
2647 | unsigned int u = 0; |
2648 | int max_asap = -1; |
2649 | int result = -1; |
2650 | sbitmap_iterator sbi; |
2651 | |
2652 | EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi) |
2653 | { |
2654 | ddg_node_ptr u_node = &g->nodes[u]; |
2655 | |
2656 | if (max_asap < ASAP (u_node)) |
2657 | { |
2658 | max_asap = ASAP (u_node); |
2659 | result = u; |
2660 | } |
2661 | } |
2662 | return result; |
2663 | } |
2664 | |
2665 | static int |
2666 | find_max_hv_min_mob (ddg_ptr g, sbitmap nodes) |
2667 | { |
2668 | unsigned int u = 0; |
2669 | int max_hv = -1; |
2670 | int min_mob = INT_MAX; |
2671 | int result = -1; |
2672 | sbitmap_iterator sbi; |
2673 | |
2674 | EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi) |
2675 | { |
2676 | ddg_node_ptr u_node = &g->nodes[u]; |
2677 | |
2678 | if (max_hv < HEIGHT (u_node)) |
2679 | { |
2680 | max_hv = HEIGHT (u_node); |
2681 | min_mob = MOB (u_node); |
2682 | result = u; |
2683 | } |
2684 | else if ((max_hv == HEIGHT (u_node)) |
2685 | && (min_mob > MOB (u_node))) |
2686 | { |
2687 | min_mob = MOB (u_node); |
2688 | result = u; |
2689 | } |
2690 | } |
2691 | return result; |
2692 | } |
2693 | |
2694 | static int |
2695 | find_max_dv_min_mob (ddg_ptr g, sbitmap nodes) |
2696 | { |
2697 | unsigned int u = 0; |
2698 | int max_dv = -1; |
2699 | int min_mob = INT_MAX; |
2700 | int result = -1; |
2701 | sbitmap_iterator sbi; |
2702 | |
2703 | EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi) |
2704 | { |
2705 | ddg_node_ptr u_node = &g->nodes[u]; |
2706 | |
2707 | if (max_dv < DEPTH (u_node)) |
2708 | { |
2709 | max_dv = DEPTH (u_node); |
2710 | min_mob = MOB (u_node); |
2711 | result = u; |
2712 | } |
2713 | else if ((max_dv == DEPTH (u_node)) |
2714 | && (min_mob > MOB (u_node))) |
2715 | { |
2716 | min_mob = MOB (u_node); |
2717 | result = u; |
2718 | } |
2719 | } |
2720 | return result; |
2721 | } |
2722 | |
2723 | /* Places the nodes of SCC into the NODE_ORDER array starting |
2724 | at position POS, according to the SMS ordering algorithm. |
2725 | NODES_ORDERED (in&out parameter) holds the bitset of all nodes in |
2726 | the NODE_ORDER array, starting from position zero. */ |
2727 | static int |
2728 | order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc, |
2729 | int * node_order, int pos) |
2730 | { |
2731 | enum sms_direction dir; |
2732 | int num_nodes = g->num_nodes; |
2733 | auto_sbitmap workset (num_nodes); |
2734 | auto_sbitmap tmp (num_nodes); |
2735 | sbitmap zero_bitmap = sbitmap_alloc (num_nodes); |
2736 | auto_sbitmap predecessors (num_nodes); |
2737 | auto_sbitmap successors (num_nodes); |
2738 | |
2739 | bitmap_clear (predecessors); |
2740 | find_predecessors (result: predecessors, g, nodes_ordered); |
2741 | |
2742 | bitmap_clear (successors); |
2743 | find_successors (result: successors, g, nodes_ordered); |
2744 | |
2745 | bitmap_clear (tmp); |
2746 | if (bitmap_and (tmp, predecessors, scc)) |
2747 | { |
2748 | bitmap_copy (workset, tmp); |
2749 | dir = BOTTOMUP; |
2750 | } |
2751 | else if (bitmap_and (tmp, successors, scc)) |
2752 | { |
2753 | bitmap_copy (workset, tmp); |
2754 | dir = TOPDOWN; |
2755 | } |
2756 | else |
2757 | { |
2758 | int u; |
2759 | |
2760 | bitmap_clear (workset); |
2761 | if ((u = find_max_asap (g, nodes: scc)) >= 0) |
2762 | bitmap_set_bit (map: workset, bitno: u); |
2763 | dir = BOTTOMUP; |
2764 | } |
2765 | |
2766 | bitmap_clear (zero_bitmap); |
2767 | while (!bitmap_equal_p (workset, zero_bitmap)) |
2768 | { |
2769 | int v; |
2770 | ddg_node_ptr v_node; |
2771 | sbitmap v_node_preds; |
2772 | sbitmap v_node_succs; |
2773 | |
2774 | if (dir == TOPDOWN) |
2775 | { |
2776 | while (!bitmap_equal_p (workset, zero_bitmap)) |
2777 | { |
2778 | v = find_max_hv_min_mob (g, nodes: workset); |
2779 | v_node = &g->nodes[v]; |
2780 | node_order[pos++] = v; |
2781 | v_node_succs = NODE_SUCCESSORS (v_node); |
2782 | bitmap_and (tmp, v_node_succs, scc); |
2783 | |
2784 | /* Don't consider the already ordered successors again. */ |
2785 | bitmap_and_compl (tmp, tmp, nodes_ordered); |
2786 | bitmap_ior (workset, workset, tmp); |
2787 | bitmap_clear_bit (map: workset, bitno: v); |
2788 | bitmap_set_bit (map: nodes_ordered, bitno: v); |
2789 | } |
2790 | dir = BOTTOMUP; |
2791 | bitmap_clear (predecessors); |
2792 | find_predecessors (result: predecessors, g, nodes_ordered); |
2793 | bitmap_and (workset, predecessors, scc); |
2794 | } |
2795 | else |
2796 | { |
2797 | while (!bitmap_equal_p (workset, zero_bitmap)) |
2798 | { |
2799 | v = find_max_dv_min_mob (g, nodes: workset); |
2800 | v_node = &g->nodes[v]; |
2801 | node_order[pos++] = v; |
2802 | v_node_preds = NODE_PREDECESSORS (v_node); |
2803 | bitmap_and (tmp, v_node_preds, scc); |
2804 | |
2805 | /* Don't consider the already ordered predecessors again. */ |
2806 | bitmap_and_compl (tmp, tmp, nodes_ordered); |
2807 | bitmap_ior (workset, workset, tmp); |
2808 | bitmap_clear_bit (map: workset, bitno: v); |
2809 | bitmap_set_bit (map: nodes_ordered, bitno: v); |
2810 | } |
2811 | dir = TOPDOWN; |
2812 | bitmap_clear (successors); |
2813 | find_successors (result: successors, g, nodes_ordered); |
2814 | bitmap_and (workset, successors, scc); |
2815 | } |
2816 | } |
2817 | sbitmap_free (map: zero_bitmap); |
2818 | return pos; |
2819 | } |
2820 | |
2821 | |
2822 | /* This page contains functions for manipulating partial-schedules during |
2823 | modulo scheduling. */ |
2824 | |
2825 | /* Create a partial schedule and allocate a memory to hold II rows. */ |
2826 | |
2827 | static partial_schedule_ptr |
2828 | create_partial_schedule (int ii, ddg_ptr g, int history) |
2829 | { |
2830 | partial_schedule_ptr ps = XNEW (struct partial_schedule); |
2831 | ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr)); |
2832 | ps->rows_length = (int *) xcalloc (ii, sizeof (int)); |
2833 | ps->reg_moves.create (nelems: 0); |
2834 | ps->ii = ii; |
2835 | ps->history = history; |
2836 | ps->min_cycle = INT_MAX; |
2837 | ps->max_cycle = INT_MIN; |
2838 | ps->g = g; |
2839 | |
2840 | return ps; |
2841 | } |
2842 | |
2843 | /* Free the PS_INSNs in rows array of the given partial schedule. |
2844 | ??? Consider caching the PS_INSN's. */ |
2845 | static void |
2846 | free_ps_insns (partial_schedule_ptr ps) |
2847 | { |
2848 | int i; |
2849 | |
2850 | for (i = 0; i < ps->ii; i++) |
2851 | { |
2852 | while (ps->rows[i]) |
2853 | { |
2854 | ps_insn_ptr ps_insn = ps->rows[i]->next_in_row; |
2855 | |
2856 | free (ptr: ps->rows[i]); |
2857 | ps->rows[i] = ps_insn; |
2858 | } |
2859 | ps->rows[i] = NULL; |
2860 | } |
2861 | } |
2862 | |
2863 | /* Free all the memory allocated to the partial schedule. */ |
2864 | |
2865 | static void |
2866 | free_partial_schedule (partial_schedule_ptr ps) |
2867 | { |
2868 | ps_reg_move_info *move; |
2869 | unsigned int i; |
2870 | |
2871 | if (!ps) |
2872 | return; |
2873 | |
2874 | FOR_EACH_VEC_ELT (ps->reg_moves, i, move) |
2875 | sbitmap_free (map: move->uses); |
2876 | ps->reg_moves.release (); |
2877 | |
2878 | free_ps_insns (ps); |
2879 | free (ptr: ps->rows); |
2880 | free (ptr: ps->rows_length); |
2881 | free (ptr: ps); |
2882 | } |
2883 | |
2884 | /* Clear the rows array with its PS_INSNs, and create a new one with |
2885 | NEW_II rows. */ |
2886 | |
2887 | static void |
2888 | reset_partial_schedule (partial_schedule_ptr ps, int new_ii) |
2889 | { |
2890 | if (!ps) |
2891 | return; |
2892 | free_ps_insns (ps); |
2893 | if (new_ii == ps->ii) |
2894 | return; |
2895 | ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii |
2896 | * sizeof (ps_insn_ptr)); |
2897 | memset (s: ps->rows, c: 0, n: new_ii * sizeof (ps_insn_ptr)); |
2898 | ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int)); |
2899 | memset (s: ps->rows_length, c: 0, n: new_ii * sizeof (int)); |
2900 | ps->ii = new_ii; |
2901 | ps->min_cycle = INT_MAX; |
2902 | ps->max_cycle = INT_MIN; |
2903 | } |
2904 | |
2905 | /* Prints the partial schedule as an ii rows array, for each rows |
2906 | print the ids of the insns in it. */ |
2907 | void |
2908 | print_partial_schedule (partial_schedule_ptr ps, FILE *dump) |
2909 | { |
2910 | int i; |
2911 | |
2912 | for (i = 0; i < ps->ii; i++) |
2913 | { |
2914 | ps_insn_ptr ps_i = ps->rows[i]; |
2915 | |
2916 | fprintf (stream: dump, format: "\n[ROW %d ]: " , i); |
2917 | while (ps_i) |
2918 | { |
2919 | rtx_insn *insn = ps_rtl_insn (ps, id: ps_i->id); |
2920 | |
2921 | if (JUMP_P (insn)) |
2922 | fprintf (stream: dump, format: "%d (branch), " , INSN_UID (insn)); |
2923 | else |
2924 | fprintf (stream: dump, format: "%d, " , INSN_UID (insn)); |
2925 | |
2926 | ps_i = ps_i->next_in_row; |
2927 | } |
2928 | } |
2929 | } |
2930 | |
2931 | /* Creates an object of PS_INSN and initializes it to the given parameters. */ |
2932 | static ps_insn_ptr |
2933 | create_ps_insn (int id, int cycle) |
2934 | { |
2935 | ps_insn_ptr ps_i = XNEW (struct ps_insn); |
2936 | |
2937 | ps_i->id = id; |
2938 | ps_i->next_in_row = NULL; |
2939 | ps_i->prev_in_row = NULL; |
2940 | ps_i->cycle = cycle; |
2941 | |
2942 | return ps_i; |
2943 | } |
2944 | |
2945 | |
2946 | /* Removes the given PS_INSN from the partial schedule. */ |
2947 | static void |
2948 | remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i) |
2949 | { |
2950 | int row; |
2951 | |
2952 | gcc_assert (ps && ps_i); |
2953 | |
2954 | row = SMODULO (ps_i->cycle, ps->ii); |
2955 | if (! ps_i->prev_in_row) |
2956 | { |
2957 | gcc_assert (ps_i == ps->rows[row]); |
2958 | ps->rows[row] = ps_i->next_in_row; |
2959 | if (ps->rows[row]) |
2960 | ps->rows[row]->prev_in_row = NULL; |
2961 | } |
2962 | else |
2963 | { |
2964 | ps_i->prev_in_row->next_in_row = ps_i->next_in_row; |
2965 | if (ps_i->next_in_row) |
2966 | ps_i->next_in_row->prev_in_row = ps_i->prev_in_row; |
2967 | } |
2968 | |
2969 | ps->rows_length[row] -= 1; |
2970 | free (ptr: ps_i); |
2971 | return; |
2972 | } |
2973 | |
2974 | /* Unlike what literature describes for modulo scheduling (which focuses |
2975 | on VLIW machines) the order of the instructions inside a cycle is |
2976 | important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know |
2977 | where the current instruction should go relative to the already |
2978 | scheduled instructions in the given cycle. Go over these |
2979 | instructions and find the first possible column to put it in. */ |
2980 | static bool |
2981 | ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i, |
2982 | sbitmap must_precede, sbitmap must_follow) |
2983 | { |
2984 | ps_insn_ptr next_ps_i; |
2985 | ps_insn_ptr first_must_follow = NULL; |
2986 | ps_insn_ptr last_must_precede = NULL; |
2987 | ps_insn_ptr last_in_row = NULL; |
2988 | int row; |
2989 | |
2990 | if (! ps_i) |
2991 | return false; |
2992 | |
2993 | row = SMODULO (ps_i->cycle, ps->ii); |
2994 | |
2995 | /* Find the first must follow and the last must precede |
2996 | and insert the node immediately after the must precede |
2997 | but make sure that it there is no must follow after it. */ |
2998 | for (next_ps_i = ps->rows[row]; |
2999 | next_ps_i; |
3000 | next_ps_i = next_ps_i->next_in_row) |
3001 | { |
3002 | if (must_follow |
3003 | && bitmap_bit_p (map: must_follow, bitno: next_ps_i->id) |
3004 | && ! first_must_follow) |
3005 | first_must_follow = next_ps_i; |
3006 | if (must_precede && bitmap_bit_p (map: must_precede, bitno: next_ps_i->id)) |
3007 | { |
3008 | /* If we have already met a node that must follow, then |
3009 | there is no possible column. */ |
3010 | if (first_must_follow) |
3011 | return false; |
3012 | else |
3013 | last_must_precede = next_ps_i; |
3014 | } |
3015 | /* The closing branch must be the last in the row. */ |
3016 | if (JUMP_P (ps_rtl_insn (ps, next_ps_i->id))) |
3017 | return false; |
3018 | |
3019 | last_in_row = next_ps_i; |
3020 | } |
3021 | |
3022 | /* The closing branch is scheduled as well. Make sure there is no |
3023 | dependent instruction after it as the branch should be the last |
3024 | instruction in the row. */ |
3025 | if (JUMP_P (ps_rtl_insn (ps, ps_i->id))) |
3026 | { |
3027 | if (first_must_follow) |
3028 | return false; |
3029 | if (last_in_row) |
3030 | { |
3031 | /* Make the branch the last in the row. New instructions |
3032 | will be inserted at the beginning of the row or after the |
3033 | last must_precede instruction thus the branch is guaranteed |
3034 | to remain the last instruction in the row. */ |
3035 | last_in_row->next_in_row = ps_i; |
3036 | ps_i->prev_in_row = last_in_row; |
3037 | ps_i->next_in_row = NULL; |
3038 | } |
3039 | else |
3040 | ps->rows[row] = ps_i; |
3041 | return true; |
3042 | } |
3043 | |
3044 | /* Now insert the node after INSERT_AFTER_PSI. */ |
3045 | |
3046 | if (! last_must_precede) |
3047 | { |
3048 | ps_i->next_in_row = ps->rows[row]; |
3049 | ps_i->prev_in_row = NULL; |
3050 | if (ps_i->next_in_row) |
3051 | ps_i->next_in_row->prev_in_row = ps_i; |
3052 | ps->rows[row] = ps_i; |
3053 | } |
3054 | else |
3055 | { |
3056 | ps_i->next_in_row = last_must_precede->next_in_row; |
3057 | last_must_precede->next_in_row = ps_i; |
3058 | ps_i->prev_in_row = last_must_precede; |
3059 | if (ps_i->next_in_row) |
3060 | ps_i->next_in_row->prev_in_row = ps_i; |
3061 | } |
3062 | |
3063 | return true; |
3064 | } |
3065 | |
3066 | /* Advances the PS_INSN one column in its current row; returns false |
3067 | in failure and true in success. Bit N is set in MUST_FOLLOW if |
3068 | the node with cuid N must be come after the node pointed to by |
3069 | PS_I when scheduled in the same cycle. */ |
3070 | static bool |
3071 | ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i, |
3072 | sbitmap must_follow) |
3073 | { |
3074 | ps_insn_ptr prev, next; |
3075 | int row; |
3076 | |
3077 | if (!ps || !ps_i) |
3078 | return false; |
3079 | |
3080 | row = SMODULO (ps_i->cycle, ps->ii); |
3081 | |
3082 | if (! ps_i->next_in_row) |
3083 | return false; |
3084 | |
3085 | /* Check if next_in_row is dependent on ps_i, both having same sched |
3086 | times (typically ANTI_DEP). If so, ps_i cannot skip over it. */ |
3087 | if (must_follow && bitmap_bit_p (map: must_follow, bitno: ps_i->next_in_row->id)) |
3088 | return false; |
3089 | |
3090 | /* Advance PS_I over its next_in_row in the doubly linked list. */ |
3091 | prev = ps_i->prev_in_row; |
3092 | next = ps_i->next_in_row; |
3093 | |
3094 | if (ps_i == ps->rows[row]) |
3095 | ps->rows[row] = next; |
3096 | |
3097 | ps_i->next_in_row = next->next_in_row; |
3098 | |
3099 | if (next->next_in_row) |
3100 | next->next_in_row->prev_in_row = ps_i; |
3101 | |
3102 | next->next_in_row = ps_i; |
3103 | ps_i->prev_in_row = next; |
3104 | |
3105 | next->prev_in_row = prev; |
3106 | if (prev) |
3107 | prev->next_in_row = next; |
3108 | |
3109 | return true; |
3110 | } |
3111 | |
3112 | /* Inserts a DDG_NODE to the given partial schedule at the given cycle. |
3113 | Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is |
3114 | set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come |
3115 | before/after (respectively) the node pointed to by PS_I when scheduled |
3116 | in the same cycle. */ |
3117 | static ps_insn_ptr |
3118 | add_node_to_ps (partial_schedule_ptr ps, int id, int cycle, |
3119 | sbitmap must_precede, sbitmap must_follow) |
3120 | { |
3121 | ps_insn_ptr ps_i; |
3122 | int row = SMODULO (cycle, ps->ii); |
3123 | |
3124 | if (ps->rows_length[row] >= issue_rate) |
3125 | return NULL; |
3126 | |
3127 | ps_i = create_ps_insn (id, cycle); |
3128 | |
3129 | /* Finds and inserts PS_I according to MUST_FOLLOW and |
3130 | MUST_PRECEDE. */ |
3131 | if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow)) |
3132 | { |
3133 | free (ptr: ps_i); |
3134 | return NULL; |
3135 | } |
3136 | |
3137 | ps->rows_length[row] += 1; |
3138 | return ps_i; |
3139 | } |
3140 | |
3141 | /* Advance time one cycle. Assumes DFA is being used. */ |
3142 | static void |
3143 | advance_one_cycle (void) |
3144 | { |
3145 | if (targetm.sched.dfa_pre_cycle_insn) |
3146 | state_transition (curr_state, |
3147 | targetm.sched.dfa_pre_cycle_insn ()); |
3148 | |
3149 | state_transition (curr_state, NULL); |
3150 | |
3151 | if (targetm.sched.dfa_post_cycle_insn) |
3152 | state_transition (curr_state, |
3153 | targetm.sched.dfa_post_cycle_insn ()); |
3154 | } |
3155 | |
3156 | |
3157 | |
3158 | /* Checks if PS has resource conflicts according to DFA, starting from |
3159 | FROM cycle to TO cycle; returns true if there are conflicts and false |
3160 | if there are no conflicts. Assumes DFA is being used. */ |
3161 | static bool |
3162 | ps_has_conflicts (partial_schedule_ptr ps, int from, int to) |
3163 | { |
3164 | int cycle; |
3165 | |
3166 | state_reset (curr_state); |
3167 | |
3168 | for (cycle = from; cycle <= to; cycle++) |
3169 | { |
3170 | ps_insn_ptr crr_insn; |
3171 | /* Holds the remaining issue slots in the current row. */ |
3172 | int can_issue_more = issue_rate; |
3173 | |
3174 | /* Walk through the DFA for the current row. */ |
3175 | for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)]; |
3176 | crr_insn; |
3177 | crr_insn = crr_insn->next_in_row) |
3178 | { |
3179 | rtx_insn *insn = ps_rtl_insn (ps, id: crr_insn->id); |
3180 | |
3181 | /* Check if there is room for the current insn. */ |
3182 | if (!can_issue_more || state_dead_lock_p (curr_state)) |
3183 | return true; |
3184 | |
3185 | /* Update the DFA state and return with failure if the DFA found |
3186 | resource conflicts. */ |
3187 | if (state_transition (curr_state, insn) >= 0) |
3188 | return true; |
3189 | |
3190 | if (targetm.sched.variable_issue) |
3191 | can_issue_more = |
3192 | targetm.sched.variable_issue (sched_dump, sched_verbose, |
3193 | insn, can_issue_more); |
3194 | /* A naked CLOBBER or USE generates no instruction, so don't |
3195 | let them consume issue slots. */ |
3196 | else if (GET_CODE (PATTERN (insn)) != USE |
3197 | && GET_CODE (PATTERN (insn)) != CLOBBER) |
3198 | can_issue_more--; |
3199 | } |
3200 | |
3201 | /* Advance the DFA to the next cycle. */ |
3202 | advance_one_cycle (); |
3203 | } |
3204 | return false; |
3205 | } |
3206 | |
3207 | /* Checks if the given node causes resource conflicts when added to PS at |
3208 | cycle C. If not the node is added to PS and returned; otherwise zero |
3209 | is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with |
3210 | cuid N must be come before/after (respectively) the node pointed to by |
3211 | PS_I when scheduled in the same cycle. */ |
3212 | ps_insn_ptr |
3213 | ps_add_node_check_conflicts (partial_schedule_ptr ps, int n, |
3214 | int c, sbitmap must_precede, |
3215 | sbitmap must_follow) |
3216 | { |
3217 | int i, first, amount; |
3218 | bool has_conflicts = false; |
3219 | ps_insn_ptr ps_i; |
3220 | |
3221 | /* First add the node to the PS, if this succeeds check for |
3222 | conflicts, trying different issue slots in the same row. */ |
3223 | if (! (ps_i = add_node_to_ps (ps, id: n, cycle: c, must_precede, must_follow))) |
3224 | return NULL; /* Failed to insert the node at the given cycle. */ |
3225 | |
3226 | while (1) |
3227 | { |
3228 | has_conflicts = ps_has_conflicts (ps, from: c, to: c); |
3229 | if (ps->history > 0 && !has_conflicts) |
3230 | { |
3231 | /* Check all 2h+1 intervals, starting from c-2h..c up to c..2h, |
3232 | but not more than ii intervals. */ |
3233 | first = c - ps->history; |
3234 | amount = 2 * ps->history + 1; |
3235 | if (amount > ps->ii) |
3236 | amount = ps->ii; |
3237 | for (i = first; i < first + amount; i++) |
3238 | { |
3239 | has_conflicts = ps_has_conflicts (ps, |
3240 | from: i - ps->history, |
3241 | to: i + ps->history); |
3242 | if (has_conflicts) |
3243 | break; |
3244 | } |
3245 | } |
3246 | if (!has_conflicts) |
3247 | break; |
3248 | /* Try different issue slots to find one that the given node can be |
3249 | scheduled in without conflicts. */ |
3250 | if (! ps_insn_advance_column (ps, ps_i, must_follow)) |
3251 | break; |
3252 | } |
3253 | |
3254 | if (has_conflicts) |
3255 | { |
3256 | remove_node_from_ps (ps, ps_i); |
3257 | return NULL; |
3258 | } |
3259 | |
3260 | ps->min_cycle = MIN (ps->min_cycle, c); |
3261 | ps->max_cycle = MAX (ps->max_cycle, c); |
3262 | return ps_i; |
3263 | } |
3264 | |
3265 | /* Calculate the stage count of the partial schedule PS. The calculation |
3266 | takes into account the rotation amount passed in ROTATION_AMOUNT. */ |
3267 | int |
3268 | calculate_stage_count (partial_schedule_ptr ps, int rotation_amount) |
3269 | { |
3270 | int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount; |
3271 | int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount; |
3272 | int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii); |
3273 | |
3274 | /* The calculation of stage count is done adding the number of stages |
3275 | before cycle zero and after cycle zero. */ |
3276 | stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii); |
3277 | |
3278 | return stage_count; |
3279 | } |
3280 | |
3281 | /* Rotate the rows of PS such that insns scheduled at time |
3282 | START_CYCLE will appear in row 0. Updates max/min_cycles. */ |
3283 | void |
3284 | rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle) |
3285 | { |
3286 | int i, row, backward_rotates; |
3287 | int last_row = ps->ii - 1; |
3288 | |
3289 | if (start_cycle == 0) |
3290 | return; |
3291 | |
3292 | backward_rotates = SMODULO (start_cycle, ps->ii); |
3293 | |
3294 | /* Revisit later and optimize this into a single loop. */ |
3295 | for (i = 0; i < backward_rotates; i++) |
3296 | { |
3297 | ps_insn_ptr first_row = ps->rows[0]; |
3298 | int first_row_length = ps->rows_length[0]; |
3299 | |
3300 | for (row = 0; row < last_row; row++) |
3301 | { |
3302 | ps->rows[row] = ps->rows[row + 1]; |
3303 | ps->rows_length[row] = ps->rows_length[row + 1]; |
3304 | } |
3305 | |
3306 | ps->rows[last_row] = first_row; |
3307 | ps->rows_length[last_row] = first_row_length; |
3308 | } |
3309 | |
3310 | ps->max_cycle -= start_cycle; |
3311 | ps->min_cycle -= start_cycle; |
3312 | } |
3313 | |
3314 | #endif /* INSN_SCHEDULING */ |
3315 | |
3316 | /* Run instruction scheduler. */ |
3317 | /* Perform SMS module scheduling. */ |
3318 | |
3319 | namespace { |
3320 | |
3321 | const pass_data pass_data_sms = |
3322 | { |
3323 | .type: RTL_PASS, /* type */ |
3324 | .name: "sms" , /* name */ |
3325 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
3326 | .tv_id: TV_SMS, /* tv_id */ |
3327 | .properties_required: 0, /* properties_required */ |
3328 | .properties_provided: 0, /* properties_provided */ |
3329 | .properties_destroyed: 0, /* properties_destroyed */ |
3330 | .todo_flags_start: 0, /* todo_flags_start */ |
3331 | TODO_df_finish, /* todo_flags_finish */ |
3332 | }; |
3333 | |
3334 | class pass_sms : public rtl_opt_pass |
3335 | { |
3336 | public: |
3337 | pass_sms (gcc::context *ctxt) |
3338 | : rtl_opt_pass (pass_data_sms, ctxt) |
3339 | {} |
3340 | |
3341 | /* opt_pass methods: */ |
3342 | bool gate (function *) final override |
3343 | { |
3344 | return (optimize > 0 && flag_modulo_sched); |
3345 | } |
3346 | |
3347 | unsigned int execute (function *) final override; |
3348 | |
3349 | }; // class pass_sms |
3350 | |
3351 | unsigned int |
3352 | pass_sms::execute (function *fun ATTRIBUTE_UNUSED) |
3353 | { |
3354 | #ifdef INSN_SCHEDULING |
3355 | basic_block bb; |
3356 | |
3357 | /* Collect loop information to be used in SMS. */ |
3358 | cfg_layout_initialize (0); |
3359 | sms_schedule (); |
3360 | |
3361 | /* Update the life information, because we add pseudos. */ |
3362 | max_regno = max_reg_num (); |
3363 | |
3364 | /* Finalize layout changes. */ |
3365 | FOR_EACH_BB_FN (bb, fun) |
3366 | if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun)) |
3367 | bb->aux = bb->next_bb; |
3368 | free_dominance_info (CDI_DOMINATORS); |
3369 | cfg_layout_finalize (); |
3370 | #endif /* INSN_SCHEDULING */ |
3371 | return 0; |
3372 | } |
3373 | |
3374 | } // anon namespace |
3375 | |
3376 | rtl_opt_pass * |
3377 | make_pass_sms (gcc::context *ctxt) |
3378 | { |
3379 | return new pass_sms (ctxt); |
3380 | } |
3381 | |