1 | /* Instruction scheduling pass. |
2 | Copyright (C) 1992-2023 Free Software Foundation, Inc. |
3 | Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by, |
4 | and currently maintained by, Jim Wilson (wilson@cygnus.com) |
5 | |
6 | This file is part of GCC. |
7 | |
8 | GCC is free software; you can redistribute it and/or modify it under |
9 | the terms of the GNU General Public License as published by the Free |
10 | Software Foundation; either version 3, or (at your option) any later |
11 | version. |
12 | |
13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
16 | for more details. |
17 | |
18 | You should have received a copy of the GNU General Public License |
19 | along with GCC; see the file COPYING3. If not see |
20 | <http://www.gnu.org/licenses/>. */ |
21 | |
22 | /* Instruction scheduling pass. This file, along with sched-deps.cc, |
23 | contains the generic parts. The actual entry point for |
24 | the normal instruction scheduling pass is found in sched-rgn.cc. |
25 | |
26 | We compute insn priorities based on data dependencies. Flow |
27 | analysis only creates a fraction of the data-dependencies we must |
28 | observe: namely, only those dependencies which the combiner can be |
29 | expected to use. For this pass, we must therefore create the |
30 | remaining dependencies we need to observe: register dependencies, |
31 | memory dependencies, dependencies to keep function calls in order, |
32 | and the dependence between a conditional branch and the setting of |
33 | condition codes are all dealt with here. |
34 | |
35 | The scheduler first traverses the data flow graph, starting with |
36 | the last instruction, and proceeding to the first, assigning values |
37 | to insn_priority as it goes. This sorts the instructions |
38 | topologically by data dependence. |
39 | |
40 | Once priorities have been established, we order the insns using |
41 | list scheduling. This works as follows: starting with a list of |
42 | all the ready insns, and sorted according to priority number, we |
43 | schedule the insn from the end of the list by placing its |
44 | predecessors in the list according to their priority order. We |
45 | consider this insn scheduled by setting the pointer to the "end" of |
46 | the list to point to the previous insn. When an insn has no |
47 | predecessors, we either queue it until sufficient time has elapsed |
48 | or add it to the ready list. As the instructions are scheduled or |
49 | when stalls are introduced, the queue advances and dumps insns into |
50 | the ready list. When all insns down to the lowest priority have |
51 | been scheduled, the critical path of the basic block has been made |
52 | as short as possible. The remaining insns are then scheduled in |
53 | remaining slots. |
54 | |
55 | The following list shows the order in which we want to break ties |
56 | among insns in the ready list: |
57 | |
58 | 1. choose insn with the longest path to end of bb, ties |
59 | broken by |
60 | 2. choose insn with least contribution to register pressure, |
61 | ties broken by |
62 | 3. prefer in-block upon interblock motion, ties broken by |
63 | 4. prefer useful upon speculative motion, ties broken by |
64 | 5. choose insn with largest control flow probability, ties |
65 | broken by |
66 | 6. choose insn with the least dependences upon the previously |
67 | scheduled insn, or finally |
68 | 7 choose the insn which has the most insns dependent on it. |
69 | 8. choose insn with lowest UID. |
70 | |
71 | Memory references complicate matters. Only if we can be certain |
72 | that memory references are not part of the data dependency graph |
73 | (via true, anti, or output dependence), can we move operations past |
74 | memory references. To first approximation, reads can be done |
75 | independently, while writes introduce dependencies. Better |
76 | approximations will yield fewer dependencies. |
77 | |
78 | Before reload, an extended analysis of interblock data dependences |
79 | is required for interblock scheduling. This is performed in |
80 | compute_block_dependences (). |
81 | |
82 | Dependencies set up by memory references are treated in exactly the |
83 | same way as other dependencies, by using insn backward dependences |
84 | INSN_BACK_DEPS. INSN_BACK_DEPS are translated into forward dependences |
85 | INSN_FORW_DEPS for the purpose of forward list scheduling. |
86 | |
87 | Having optimized the critical path, we may have also unduly |
88 | extended the lifetimes of some registers. If an operation requires |
89 | that constants be loaded into registers, it is certainly desirable |
90 | to load those constants as early as necessary, but no earlier. |
91 | I.e., it will not do to load up a bunch of registers at the |
92 | beginning of a basic block only to use them at the end, if they |
93 | could be loaded later, since this may result in excessive register |
94 | utilization. |
95 | |
96 | Note that since branches are never in basic blocks, but only end |
97 | basic blocks, this pass will not move branches. But that is ok, |
98 | since we can use GNU's delayed branch scheduling pass to take care |
99 | of this case. |
100 | |
101 | Also note that no further optimizations based on algebraic |
102 | identities are performed, so this pass would be a good one to |
103 | perform instruction splitting, such as breaking up a multiply |
104 | instruction into shifts and adds where that is profitable. |
105 | |
106 | Given the memory aliasing analysis that this pass should perform, |
107 | it should be possible to remove redundant stores to memory, and to |
108 | load values from registers instead of hitting memory. |
109 | |
110 | Before reload, speculative insns are moved only if a 'proof' exists |
111 | that no exception will be caused by this, and if no live registers |
112 | exist that inhibit the motion (live registers constraints are not |
113 | represented by data dependence edges). |
114 | |
115 | This pass must update information that subsequent passes expect to |
116 | be correct. Namely: reg_n_refs, reg_n_sets, reg_n_deaths, |
117 | reg_n_calls_crossed, and reg_live_length. Also, BB_HEAD, BB_END. |
118 | |
119 | The information in the line number notes is carefully retained by |
120 | this pass. Notes that refer to the starting and ending of |
121 | exception regions are also carefully retained by this pass. All |
122 | other NOTE insns are grouped in their same relative order at the |
123 | beginning of basic blocks and regions that have been scheduled. */ |
124 | |
125 | #include "config.h" |
126 | #include "system.h" |
127 | #include "coretypes.h" |
128 | #include "backend.h" |
129 | #include "target.h" |
130 | #include "rtl.h" |
131 | #include "cfghooks.h" |
132 | #include "df.h" |
133 | #include "memmodel.h" |
134 | #include "tm_p.h" |
135 | #include "insn-config.h" |
136 | #include "regs.h" |
137 | #include "ira.h" |
138 | #include "recog.h" |
139 | #include "insn-attr.h" |
140 | #include "cfgrtl.h" |
141 | #include "cfgbuild.h" |
142 | #include "sched-int.h" |
143 | #include "common/common-target.h" |
144 | #include "dbgcnt.h" |
145 | #include "cfgloop.h" |
146 | #include "dumpfile.h" |
147 | #include "print-rtl.h" |
148 | #include "function-abi.h" |
149 | |
150 | #ifdef INSN_SCHEDULING |
151 | |
152 | /* True if we do register pressure relief through live-range |
153 | shrinkage. */ |
154 | static bool live_range_shrinkage_p; |
155 | |
156 | /* Switch on live range shrinkage. */ |
157 | void |
158 | initialize_live_range_shrinkage (void) |
159 | { |
160 | live_range_shrinkage_p = true; |
161 | } |
162 | |
163 | /* Switch off live range shrinkage. */ |
164 | void |
165 | finish_live_range_shrinkage (void) |
166 | { |
167 | live_range_shrinkage_p = false; |
168 | } |
169 | |
170 | /* issue_rate is the number of insns that can be scheduled in the same |
171 | machine cycle. It can be defined in the config/mach/mach.h file, |
172 | otherwise we set it to 1. */ |
173 | |
174 | int issue_rate; |
175 | |
176 | /* This can be set to true by a backend if the scheduler should not |
177 | enable a DCE pass. */ |
178 | bool sched_no_dce; |
179 | |
180 | /* The current initiation interval used when modulo scheduling. */ |
181 | static int modulo_ii; |
182 | |
183 | /* The maximum number of stages we are prepared to handle. */ |
184 | static int modulo_max_stages; |
185 | |
186 | /* The number of insns that exist in each iteration of the loop. We use this |
187 | to detect when we've scheduled all insns from the first iteration. */ |
188 | static int modulo_n_insns; |
189 | |
190 | /* The current count of insns in the first iteration of the loop that have |
191 | already been scheduled. */ |
192 | static int modulo_insns_scheduled; |
193 | |
194 | /* The maximum uid of insns from the first iteration of the loop. */ |
195 | static int modulo_iter0_max_uid; |
196 | |
197 | /* The number of times we should attempt to backtrack when modulo scheduling. |
198 | Decreased each time we have to backtrack. */ |
199 | static int modulo_backtracks_left; |
200 | |
201 | /* The stage in which the last insn from the original loop was |
202 | scheduled. */ |
203 | static int modulo_last_stage; |
204 | |
205 | /* sched-verbose controls the amount of debugging output the |
206 | scheduler prints. It is controlled by -fsched-verbose=N: |
207 | N=0: no debugging output. |
208 | N=1: default value. |
209 | N=2: bb's probabilities, detailed ready list info, unit/insn info. |
210 | N=3: rtl at abort point, control-flow, regions info. |
211 | N=5: dependences info. */ |
212 | int sched_verbose = 0; |
213 | |
214 | /* Debugging file. All printouts are sent to dump. */ |
215 | FILE *sched_dump = 0; |
216 | |
217 | /* This is a placeholder for the scheduler parameters common |
218 | to all schedulers. */ |
219 | struct common_sched_info_def *common_sched_info; |
220 | |
221 | #define INSN_TICK(INSN) (HID (INSN)->tick) |
222 | #define INSN_EXACT_TICK(INSN) (HID (INSN)->exact_tick) |
223 | #define INSN_TICK_ESTIMATE(INSN) (HID (INSN)->tick_estimate) |
224 | #define INTER_TICK(INSN) (HID (INSN)->inter_tick) |
225 | #define FEEDS_BACKTRACK_INSN(INSN) (HID (INSN)->feeds_backtrack_insn) |
226 | #define SHADOW_P(INSN) (HID (INSN)->shadow_p) |
227 | #define MUST_RECOMPUTE_SPEC_P(INSN) (HID (INSN)->must_recompute_spec) |
228 | /* Cached cost of the instruction. Use insn_sched_cost to get cost of the |
229 | insn. -1 here means that the field is not initialized. */ |
230 | #define INSN_COST(INSN) (HID (INSN)->cost) |
231 | |
232 | /* If INSN_TICK of an instruction is equal to INVALID_TICK, |
233 | then it should be recalculated from scratch. */ |
234 | #define INVALID_TICK (-(max_insn_queue_index + 1)) |
235 | /* The minimal value of the INSN_TICK of an instruction. */ |
236 | #define MIN_TICK (-max_insn_queue_index) |
237 | |
238 | /* Original order of insns in the ready list. |
239 | Used to keep order of normal insns while separating DEBUG_INSNs. */ |
240 | #define INSN_RFS_DEBUG_ORIG_ORDER(INSN) (HID (INSN)->rfs_debug_orig_order) |
241 | |
242 | /* The deciding reason for INSN's place in the ready list. */ |
243 | #define INSN_LAST_RFS_WIN(INSN) (HID (INSN)->last_rfs_win) |
244 | |
245 | /* List of important notes we must keep around. This is a pointer to the |
246 | last element in the list. */ |
247 | rtx_insn *note_list; |
248 | |
249 | static struct spec_info_def spec_info_var; |
250 | /* Description of the speculative part of the scheduling. |
251 | If NULL - no speculation. */ |
252 | spec_info_t spec_info = NULL; |
253 | |
254 | /* True, if recovery block was added during scheduling of current block. |
255 | Used to determine, if we need to fix INSN_TICKs. */ |
256 | static bool haifa_recovery_bb_recently_added_p; |
257 | |
258 | /* True, if recovery block was added during this scheduling pass. |
259 | Used to determine if we should have empty memory pools of dependencies |
260 | after finishing current region. */ |
261 | bool haifa_recovery_bb_ever_added_p; |
262 | |
263 | /* Counters of different types of speculative instructions. */ |
264 | static int nr_begin_data, nr_be_in_data, nr_begin_control, nr_be_in_control; |
265 | |
266 | /* Array used in {unlink, restore}_bb_notes. */ |
267 | static rtx_insn ** = 0; |
268 | |
269 | /* Basic block after which recovery blocks will be created. */ |
270 | static basic_block before_recovery; |
271 | |
272 | /* Basic block just before the EXIT_BLOCK and after recovery, if we have |
273 | created it. */ |
274 | basic_block after_recovery; |
275 | |
276 | /* FALSE if we add bb to another region, so we don't need to initialize it. */ |
277 | bool adding_bb_to_current_region_p = true; |
278 | |
279 | /* Queues, etc. */ |
280 | |
281 | /* An instruction is ready to be scheduled when all insns preceding it |
282 | have already been scheduled. It is important to ensure that all |
283 | insns which use its result will not be executed until its result |
284 | has been computed. An insn is maintained in one of four structures: |
285 | |
286 | (P) the "Pending" set of insns which cannot be scheduled until |
287 | their dependencies have been satisfied. |
288 | (Q) the "Queued" set of insns that can be scheduled when sufficient |
289 | time has passed. |
290 | (R) the "Ready" list of unscheduled, uncommitted insns. |
291 | (S) the "Scheduled" list of insns. |
292 | |
293 | Initially, all insns are either "Pending" or "Ready" depending on |
294 | whether their dependencies are satisfied. |
295 | |
296 | Insns move from the "Ready" list to the "Scheduled" list as they |
297 | are committed to the schedule. As this occurs, the insns in the |
298 | "Pending" list have their dependencies satisfied and move to either |
299 | the "Ready" list or the "Queued" set depending on whether |
300 | sufficient time has passed to make them ready. As time passes, |
301 | insns move from the "Queued" set to the "Ready" list. |
302 | |
303 | The "Pending" list (P) are the insns in the INSN_FORW_DEPS of the |
304 | unscheduled insns, i.e., those that are ready, queued, and pending. |
305 | The "Queued" set (Q) is implemented by the variable `insn_queue'. |
306 | The "Ready" list (R) is implemented by the variables `ready' and |
307 | `n_ready'. |
308 | The "Scheduled" list (S) is the new insn chain built by this pass. |
309 | |
310 | The transition (R->S) is implemented in the scheduling loop in |
311 | `schedule_block' when the best insn to schedule is chosen. |
312 | The transitions (P->R and P->Q) are implemented in `schedule_insn' as |
313 | insns move from the ready list to the scheduled list. |
314 | The transition (Q->R) is implemented in 'queue_to_insn' as time |
315 | passes or stalls are introduced. */ |
316 | |
317 | /* Implement a circular buffer to delay instructions until sufficient |
318 | time has passed. For the new pipeline description interface, |
319 | MAX_INSN_QUEUE_INDEX is a power of two minus one which is not less |
320 | than maximal time of instruction execution computed by genattr.cc on |
321 | the base maximal time of functional unit reservations and getting a |
322 | result. This is the longest time an insn may be queued. */ |
323 | |
324 | static rtx_insn_list **insn_queue; |
325 | static int q_ptr = 0; |
326 | static int q_size = 0; |
327 | #define NEXT_Q(X) (((X)+1) & max_insn_queue_index) |
328 | #define NEXT_Q_AFTER(X, C) (((X)+C) & max_insn_queue_index) |
329 | |
330 | #define QUEUE_SCHEDULED (-3) |
331 | #define QUEUE_NOWHERE (-2) |
332 | #define QUEUE_READY (-1) |
333 | /* QUEUE_SCHEDULED - INSN is scheduled. |
334 | QUEUE_NOWHERE - INSN isn't scheduled yet and is neither in |
335 | queue or ready list. |
336 | QUEUE_READY - INSN is in ready list. |
337 | N >= 0 - INSN queued for X [where NEXT_Q_AFTER (q_ptr, X) == N] cycles. */ |
338 | |
339 | #define QUEUE_INDEX(INSN) (HID (INSN)->queue_index) |
340 | |
341 | /* The following variable value refers for all current and future |
342 | reservations of the processor units. */ |
343 | state_t curr_state; |
344 | |
345 | /* The following variable value is size of memory representing all |
346 | current and future reservations of the processor units. */ |
347 | size_t dfa_state_size; |
348 | |
349 | /* The following array is used to find the best insn from ready when |
350 | the automaton pipeline interface is used. */ |
351 | signed char *ready_try = NULL; |
352 | |
353 | /* The ready list. */ |
354 | struct ready_list ready = {NULL, .veclen: 0, .first: 0, .n_ready: 0, .n_debug: 0}; |
355 | |
356 | /* The pointer to the ready list (to be removed). */ |
357 | static struct ready_list *readyp = &ready; |
358 | |
359 | /* Scheduling clock. */ |
360 | static int clock_var; |
361 | |
362 | /* Clock at which the previous instruction was issued. */ |
363 | static int last_clock_var; |
364 | |
365 | /* Set to true if, when queuing a shadow insn, we discover that it would be |
366 | scheduled too late. */ |
367 | static bool must_backtrack; |
368 | |
369 | /* The following variable value is number of essential insns issued on |
370 | the current cycle. An insn is essential one if it changes the |
371 | processors state. */ |
372 | int cycle_issued_insns; |
373 | |
374 | /* This records the actual schedule. It is built up during the main phase |
375 | of schedule_block, and afterwards used to reorder the insns in the RTL. */ |
376 | static vec<rtx_insn *> scheduled_insns; |
377 | |
378 | static int may_trap_exp (const_rtx, int); |
379 | |
380 | /* Nonzero iff the address is comprised from at most 1 register. */ |
381 | #define CONST_BASED_ADDRESS_P(x) \ |
382 | (REG_P (x) \ |
383 | || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \ |
384 | || (GET_CODE (x) == LO_SUM)) \ |
385 | && (CONSTANT_P (XEXP (x, 0)) \ |
386 | || CONSTANT_P (XEXP (x, 1))))) |
387 | |
388 | /* Returns a class that insn with GET_DEST(insn)=x may belong to, |
389 | as found by analyzing insn's expression. */ |
390 | |
391 | |
392 | static int haifa_luid_for_non_insn (rtx x); |
393 | |
394 | /* Haifa version of sched_info hooks common to all headers. */ |
395 | const struct common_sched_info_def haifa_common_sched_info = |
396 | { |
397 | NULL, /* fix_recovery_cfg */ |
398 | NULL, /* add_block */ |
399 | NULL, /* estimate_number_of_insns */ |
400 | .luid_for_non_insn: haifa_luid_for_non_insn, /* luid_for_non_insn */ |
401 | .sched_pass_id: SCHED_PASS_UNKNOWN /* sched_pass_id */ |
402 | }; |
403 | |
404 | /* Mapping from instruction UID to its Logical UID. */ |
405 | vec<int> sched_luids; |
406 | |
407 | /* Next LUID to assign to an instruction. */ |
408 | int sched_max_luid = 1; |
409 | |
410 | /* Haifa Instruction Data. */ |
411 | vec<haifa_insn_data_def> h_i_d; |
412 | |
413 | void (* sched_init_only_bb) (basic_block, basic_block); |
414 | |
415 | /* Split block function. Different schedulers might use different functions |
416 | to handle their internal data consistent. */ |
417 | basic_block (* sched_split_block) (basic_block, rtx); |
418 | |
419 | /* Create empty basic block after the specified block. */ |
420 | basic_block (* sched_create_empty_bb) (basic_block); |
421 | |
422 | /* Return the number of cycles until INSN is expected to be ready. |
423 | Return zero if it already is. */ |
424 | static int |
425 | insn_delay (rtx_insn *insn) |
426 | { |
427 | return MAX (INSN_TICK (insn) - clock_var, 0); |
428 | } |
429 | |
430 | static int |
431 | may_trap_exp (const_rtx x, int is_store) |
432 | { |
433 | enum rtx_code code; |
434 | |
435 | if (x == 0) |
436 | return TRAP_FREE; |
437 | code = GET_CODE (x); |
438 | if (is_store) |
439 | { |
440 | if (code == MEM && may_trap_p (x)) |
441 | return TRAP_RISKY; |
442 | else |
443 | return TRAP_FREE; |
444 | } |
445 | if (code == MEM) |
446 | { |
447 | /* The insn uses memory: a volatile load. */ |
448 | if (MEM_VOLATILE_P (x)) |
449 | return IRISKY; |
450 | /* An exception-free load. */ |
451 | if (!may_trap_p (x)) |
452 | return IFREE; |
453 | /* A load with 1 base register, to be further checked. */ |
454 | if (CONST_BASED_ADDRESS_P (XEXP (x, 0))) |
455 | return PFREE_CANDIDATE; |
456 | /* No info on the load, to be further checked. */ |
457 | return PRISKY_CANDIDATE; |
458 | } |
459 | else |
460 | { |
461 | const char *fmt; |
462 | int i, insn_class = TRAP_FREE; |
463 | |
464 | /* Neither store nor load, check if it may cause a trap. */ |
465 | if (may_trap_p (x)) |
466 | return TRAP_RISKY; |
467 | /* Recursive step: walk the insn... */ |
468 | fmt = GET_RTX_FORMAT (code); |
469 | for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) |
470 | { |
471 | if (fmt[i] == 'e') |
472 | { |
473 | int tmp_class = may_trap_exp (XEXP (x, i), is_store); |
474 | insn_class = WORST_CLASS (insn_class, tmp_class); |
475 | } |
476 | else if (fmt[i] == 'E') |
477 | { |
478 | int j; |
479 | for (j = 0; j < XVECLEN (x, i); j++) |
480 | { |
481 | int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store); |
482 | insn_class = WORST_CLASS (insn_class, tmp_class); |
483 | if (insn_class == TRAP_RISKY || insn_class == IRISKY) |
484 | break; |
485 | } |
486 | } |
487 | if (insn_class == TRAP_RISKY || insn_class == IRISKY) |
488 | break; |
489 | } |
490 | return insn_class; |
491 | } |
492 | } |
493 | |
494 | /* Classifies rtx X of an insn for the purpose of verifying that X can be |
495 | executed speculatively (and consequently the insn can be moved |
496 | speculatively), by examining X, returning: |
497 | TRAP_RISKY: store, or risky non-load insn (e.g. division by variable). |
498 | TRAP_FREE: non-load insn. |
499 | IFREE: load from a globally safe location. |
500 | IRISKY: volatile load. |
501 | PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for |
502 | being either PFREE or PRISKY. */ |
503 | |
504 | static int |
505 | haifa_classify_rtx (const_rtx x) |
506 | { |
507 | int tmp_class = TRAP_FREE; |
508 | int insn_class = TRAP_FREE; |
509 | enum rtx_code code; |
510 | |
511 | if (GET_CODE (x) == PARALLEL) |
512 | { |
513 | int i, len = XVECLEN (x, 0); |
514 | |
515 | for (i = len - 1; i >= 0; i--) |
516 | { |
517 | tmp_class = haifa_classify_rtx (XVECEXP (x, 0, i)); |
518 | insn_class = WORST_CLASS (insn_class, tmp_class); |
519 | if (insn_class == TRAP_RISKY || insn_class == IRISKY) |
520 | break; |
521 | } |
522 | } |
523 | else |
524 | { |
525 | code = GET_CODE (x); |
526 | switch (code) |
527 | { |
528 | case CLOBBER: |
529 | /* Test if it is a 'store'. */ |
530 | tmp_class = may_trap_exp (XEXP (x, 0), is_store: 1); |
531 | break; |
532 | case SET: |
533 | /* Test if it is a store. */ |
534 | tmp_class = may_trap_exp (SET_DEST (x), is_store: 1); |
535 | if (tmp_class == TRAP_RISKY) |
536 | break; |
537 | /* Test if it is a load. */ |
538 | tmp_class = |
539 | WORST_CLASS (tmp_class, |
540 | may_trap_exp (SET_SRC (x), 0)); |
541 | break; |
542 | case COND_EXEC: |
543 | tmp_class = haifa_classify_rtx (COND_EXEC_CODE (x)); |
544 | if (tmp_class == TRAP_RISKY) |
545 | break; |
546 | tmp_class = WORST_CLASS (tmp_class, |
547 | may_trap_exp (COND_EXEC_TEST (x), 0)); |
548 | break; |
549 | case TRAP_IF: |
550 | tmp_class = TRAP_RISKY; |
551 | break; |
552 | default:; |
553 | } |
554 | insn_class = tmp_class; |
555 | } |
556 | |
557 | return insn_class; |
558 | } |
559 | |
560 | int |
561 | haifa_classify_insn (const_rtx insn) |
562 | { |
563 | return haifa_classify_rtx (x: PATTERN (insn)); |
564 | } |
565 | |
566 | /* After the scheduler initialization function has been called, this function |
567 | can be called to enable modulo scheduling. II is the initiation interval |
568 | we should use, it affects the delays for delay_pairs that were recorded as |
569 | separated by a given number of stages. |
570 | |
571 | MAX_STAGES provides us with a limit |
572 | after which we give up scheduling; the caller must have unrolled at least |
573 | as many copies of the loop body and recorded delay_pairs for them. |
574 | |
575 | INSNS is the number of real (non-debug) insns in one iteration of |
576 | the loop. MAX_UID can be used to test whether an insn belongs to |
577 | the first iteration of the loop; all of them have a uid lower than |
578 | MAX_UID. */ |
579 | void |
580 | set_modulo_params (int ii, int max_stages, int insns, int max_uid) |
581 | { |
582 | modulo_ii = ii; |
583 | modulo_max_stages = max_stages; |
584 | modulo_n_insns = insns; |
585 | modulo_iter0_max_uid = max_uid; |
586 | modulo_backtracks_left = param_max_modulo_backtrack_attempts; |
587 | } |
588 | |
589 | /* A structure to record a pair of insns where the first one is a real |
590 | insn that has delay slots, and the second is its delayed shadow. |
591 | I1 is scheduled normally and will emit an assembly instruction, |
592 | while I2 describes the side effect that takes place at the |
593 | transition between cycles CYCLES and (CYCLES + 1) after I1. */ |
594 | struct delay_pair |
595 | { |
596 | struct delay_pair *next_same_i1; |
597 | rtx_insn *i1, *i2; |
598 | int cycles; |
599 | /* When doing modulo scheduling, we a delay_pair can also be used to |
600 | show that I1 and I2 are the same insn in a different stage. If that |
601 | is the case, STAGES will be nonzero. */ |
602 | int stages; |
603 | }; |
604 | |
605 | /* Helpers for delay hashing. */ |
606 | |
607 | struct delay_i1_hasher : nofree_ptr_hash <delay_pair> |
608 | { |
609 | typedef void *compare_type; |
610 | static inline hashval_t hash (const delay_pair *); |
611 | static inline bool equal (const delay_pair *, const void *); |
612 | }; |
613 | |
614 | /* Returns a hash value for X, based on hashing just I1. */ |
615 | |
616 | inline hashval_t |
617 | delay_i1_hasher::hash (const delay_pair *x) |
618 | { |
619 | return htab_hash_pointer (x->i1); |
620 | } |
621 | |
622 | /* Return true if I1 of pair X is the same as that of pair Y. */ |
623 | |
624 | inline bool |
625 | delay_i1_hasher::equal (const delay_pair *x, const void *y) |
626 | { |
627 | return x->i1 == y; |
628 | } |
629 | |
630 | struct delay_i2_hasher : free_ptr_hash <delay_pair> |
631 | { |
632 | typedef void *compare_type; |
633 | static inline hashval_t hash (const delay_pair *); |
634 | static inline bool equal (const delay_pair *, const void *); |
635 | }; |
636 | |
637 | /* Returns a hash value for X, based on hashing just I2. */ |
638 | |
639 | inline hashval_t |
640 | delay_i2_hasher::hash (const delay_pair *x) |
641 | { |
642 | return htab_hash_pointer (x->i2); |
643 | } |
644 | |
645 | /* Return true if I2 of pair X is the same as that of pair Y. */ |
646 | |
647 | inline bool |
648 | delay_i2_hasher::equal (const delay_pair *x, const void *y) |
649 | { |
650 | return x->i2 == y; |
651 | } |
652 | |
653 | /* Two hash tables to record delay_pairs, one indexed by I1 and the other |
654 | indexed by I2. */ |
655 | static hash_table<delay_i1_hasher> *delay_htab; |
656 | static hash_table<delay_i2_hasher> *delay_htab_i2; |
657 | |
658 | /* Called through htab_traverse. Walk the hashtable using I2 as |
659 | index, and delete all elements involving an UID higher than |
660 | that pointed to by *DATA. */ |
661 | int |
662 | haifa_htab_i2_traverse (delay_pair **slot, int *data) |
663 | { |
664 | int maxuid = *data; |
665 | struct delay_pair *p = *slot; |
666 | if (INSN_UID (insn: p->i2) >= maxuid || INSN_UID (insn: p->i1) >= maxuid) |
667 | { |
668 | delay_htab_i2->clear_slot (slot); |
669 | } |
670 | return 1; |
671 | } |
672 | |
673 | /* Called through htab_traverse. Walk the hashtable using I2 as |
674 | index, and delete all elements involving an UID higher than |
675 | that pointed to by *DATA. */ |
676 | int |
677 | haifa_htab_i1_traverse (delay_pair **pslot, int *data) |
678 | { |
679 | int maxuid = *data; |
680 | struct delay_pair *p, *first, **pprev; |
681 | |
682 | if (INSN_UID (insn: (*pslot)->i1) >= maxuid) |
683 | { |
684 | delay_htab->clear_slot (slot: pslot); |
685 | return 1; |
686 | } |
687 | pprev = &first; |
688 | for (p = *pslot; p; p = p->next_same_i1) |
689 | { |
690 | if (INSN_UID (insn: p->i2) < maxuid) |
691 | { |
692 | *pprev = p; |
693 | pprev = &p->next_same_i1; |
694 | } |
695 | } |
696 | *pprev = NULL; |
697 | if (first == NULL) |
698 | delay_htab->clear_slot (slot: pslot); |
699 | else |
700 | *pslot = first; |
701 | return 1; |
702 | } |
703 | |
704 | /* Discard all delay pairs which involve an insn with an UID higher |
705 | than MAX_UID. */ |
706 | void |
707 | discard_delay_pairs_above (int max_uid) |
708 | { |
709 | delay_htab->traverse <int *, haifa_htab_i1_traverse> (argument: &max_uid); |
710 | delay_htab_i2->traverse <int *, haifa_htab_i2_traverse> (argument: &max_uid); |
711 | } |
712 | |
713 | /* This function can be called by a port just before it starts the final |
714 | scheduling pass. It records the fact that an instruction with delay |
715 | slots has been split into two insns, I1 and I2. The first one will be |
716 | scheduled normally and initiates the operation. The second one is a |
717 | shadow which must follow a specific number of cycles after I1; its only |
718 | purpose is to show the side effect that occurs at that cycle in the RTL. |
719 | If a JUMP_INSN or a CALL_INSN has been split, I1 should be a normal INSN, |
720 | while I2 retains the original insn type. |
721 | |
722 | There are two ways in which the number of cycles can be specified, |
723 | involving the CYCLES and STAGES arguments to this function. If STAGES |
724 | is zero, we just use the value of CYCLES. Otherwise, STAGES is a factor |
725 | which is multiplied by MODULO_II to give the number of cycles. This is |
726 | only useful if the caller also calls set_modulo_params to enable modulo |
727 | scheduling. */ |
728 | |
729 | void |
730 | record_delay_slot_pair (rtx_insn *i1, rtx_insn *i2, int cycles, int stages) |
731 | { |
732 | struct delay_pair *p = XNEW (struct delay_pair); |
733 | struct delay_pair **slot; |
734 | |
735 | p->i1 = i1; |
736 | p->i2 = i2; |
737 | p->cycles = cycles; |
738 | p->stages = stages; |
739 | |
740 | if (!delay_htab) |
741 | { |
742 | delay_htab = new hash_table<delay_i1_hasher> (10); |
743 | delay_htab_i2 = new hash_table<delay_i2_hasher> (10); |
744 | } |
745 | slot = delay_htab->find_slot_with_hash (comparable: i1, hash: htab_hash_pointer (i1), insert: INSERT); |
746 | p->next_same_i1 = *slot; |
747 | *slot = p; |
748 | slot = delay_htab_i2->find_slot (value: p, insert: INSERT); |
749 | *slot = p; |
750 | } |
751 | |
752 | /* Examine the delay pair hashtable to see if INSN is a shadow for another, |
753 | and return the other insn if so. Return NULL otherwise. */ |
754 | rtx_insn * |
755 | real_insn_for_shadow (rtx_insn *insn) |
756 | { |
757 | struct delay_pair *pair; |
758 | |
759 | if (!delay_htab) |
760 | return NULL; |
761 | |
762 | pair = delay_htab_i2->find_with_hash (comparable: insn, hash: htab_hash_pointer (insn)); |
763 | if (!pair || pair->stages > 0) |
764 | return NULL; |
765 | return pair->i1; |
766 | } |
767 | |
768 | /* For a pair P of insns, return the fixed distance in cycles from the first |
769 | insn after which the second must be scheduled. */ |
770 | static int |
771 | pair_delay (struct delay_pair *p) |
772 | { |
773 | if (p->stages == 0) |
774 | return p->cycles; |
775 | else |
776 | return p->stages * modulo_ii; |
777 | } |
778 | |
779 | /* Given an insn INSN, add a dependence on its delayed shadow if it |
780 | has one. Also try to find situations where shadows depend on each other |
781 | and add dependencies to the real insns to limit the amount of backtracking |
782 | needed. */ |
783 | void |
784 | add_delay_dependencies (rtx_insn *insn) |
785 | { |
786 | struct delay_pair *pair; |
787 | sd_iterator_def sd_it; |
788 | dep_t dep; |
789 | |
790 | if (!delay_htab) |
791 | return; |
792 | |
793 | pair = delay_htab_i2->find_with_hash (comparable: insn, hash: htab_hash_pointer (insn)); |
794 | if (!pair) |
795 | return; |
796 | add_dependence (insn, pair->i1, REG_DEP_ANTI); |
797 | if (pair->stages) |
798 | return; |
799 | |
800 | FOR_EACH_DEP (pair->i2, SD_LIST_BACK, sd_it, dep) |
801 | { |
802 | rtx_insn *pro = DEP_PRO (dep); |
803 | struct delay_pair *other_pair |
804 | = delay_htab_i2->find_with_hash (comparable: pro, hash: htab_hash_pointer (pro)); |
805 | if (!other_pair || other_pair->stages) |
806 | continue; |
807 | if (pair_delay (p: other_pair) >= pair_delay (p: pair)) |
808 | { |
809 | if (sched_verbose >= 4) |
810 | { |
811 | fprintf (stream: sched_dump, format: ";;\tadding dependence %d <- %d\n" , |
812 | INSN_UID (insn: other_pair->i1), |
813 | INSN_UID (insn: pair->i1)); |
814 | fprintf (stream: sched_dump, format: ";;\tpair1 %d <- %d, cost %d\n" , |
815 | INSN_UID (insn: pair->i1), |
816 | INSN_UID (insn: pair->i2), |
817 | pair_delay (p: pair)); |
818 | fprintf (stream: sched_dump, format: ";;\tpair2 %d <- %d, cost %d\n" , |
819 | INSN_UID (insn: other_pair->i1), |
820 | INSN_UID (insn: other_pair->i2), |
821 | pair_delay (p: other_pair)); |
822 | } |
823 | add_dependence (pair->i1, other_pair->i1, REG_DEP_ANTI); |
824 | } |
825 | } |
826 | } |
827 | |
828 | /* Forward declarations. */ |
829 | |
830 | static int priority (rtx_insn *, bool force_recompute = false); |
831 | static int autopref_rank_for_schedule (const rtx_insn *, const rtx_insn *); |
832 | static int rank_for_schedule (const void *, const void *); |
833 | static void swap_sort (rtx_insn **, int); |
834 | static void queue_insn (rtx_insn *, int, const char *); |
835 | static int schedule_insn (rtx_insn *); |
836 | static void adjust_priority (rtx_insn *); |
837 | static void advance_one_cycle (void); |
838 | static void extend_h_i_d (void); |
839 | |
840 | |
841 | /* Notes handling mechanism: |
842 | ========================= |
843 | Generally, NOTES are saved before scheduling and restored after scheduling. |
844 | The scheduler distinguishes between two types of notes: |
845 | |
846 | (1) LOOP_BEGIN, LOOP_END, SETJMP, EHREGION_BEG, EHREGION_END notes: |
847 | Before scheduling a region, a pointer to the note is added to the insn |
848 | that follows or precedes it. (This happens as part of the data dependence |
849 | computation). After scheduling an insn, the pointer contained in it is |
850 | used for regenerating the corresponding note (in reemit_notes). |
851 | |
852 | (2) All other notes (e.g. INSN_DELETED): Before scheduling a block, |
853 | these notes are put in a list (in rm_other_notes() and |
854 | unlink_other_notes ()). After scheduling the block, these notes are |
855 | inserted at the beginning of the block (in schedule_block()). */ |
856 | |
857 | static void ready_add (struct ready_list *, rtx_insn *, bool); |
858 | static rtx_insn *ready_remove_first (struct ready_list *); |
859 | static rtx_insn *ready_remove_first_dispatch (struct ready_list *ready); |
860 | |
861 | static void queue_to_ready (struct ready_list *); |
862 | static int early_queue_to_ready (state_t, struct ready_list *); |
863 | |
864 | /* The following functions are used to implement multi-pass scheduling |
865 | on the first cycle. */ |
866 | static rtx_insn *ready_remove (struct ready_list *, int); |
867 | static void ready_remove_insn (rtx_insn *); |
868 | |
869 | static void fix_inter_tick (rtx_insn *, rtx_insn *); |
870 | static int fix_tick_ready (rtx_insn *); |
871 | static void change_queue_index (rtx_insn *, int); |
872 | |
873 | /* The following functions are used to implement scheduling of data/control |
874 | speculative instructions. */ |
875 | |
876 | static void extend_h_i_d (void); |
877 | static void init_h_i_d (rtx_insn *); |
878 | static int haifa_speculate_insn (rtx_insn *, ds_t, rtx *); |
879 | static void generate_recovery_code (rtx_insn *); |
880 | static void process_insn_forw_deps_be_in_spec (rtx_insn *, rtx_insn *, ds_t); |
881 | static void begin_speculative_block (rtx_insn *); |
882 | static void add_to_speculative_block (rtx_insn *); |
883 | static void init_before_recovery (basic_block *); |
884 | static void create_check_block_twin (rtx_insn *, bool); |
885 | static void fix_recovery_deps (basic_block); |
886 | static bool haifa_change_pattern (rtx_insn *, rtx); |
887 | static void dump_new_block_header (int, basic_block, rtx_insn *, rtx_insn *); |
888 | static void restore_bb_notes (basic_block); |
889 | static void fix_jump_move (rtx_insn *); |
890 | static void move_block_after_check (rtx_insn *); |
891 | static void move_succs (vec<edge, va_gc> **, basic_block); |
892 | static void sched_remove_insn (rtx_insn *); |
893 | static void clear_priorities (rtx_insn *, rtx_vec_t *); |
894 | static void calc_priorities (const rtx_vec_t &); |
895 | static void add_jump_dependencies (rtx_insn *, rtx_insn *); |
896 | |
897 | #endif /* INSN_SCHEDULING */ |
898 | |
899 | /* Point to state used for the current scheduling pass. */ |
900 | struct haifa_sched_info *current_sched_info; |
901 | |
902 | #ifndef INSN_SCHEDULING |
903 | void |
904 | schedule_insns (void) |
905 | { |
906 | } |
907 | #else |
908 | |
909 | /* Do register pressure sensitive insn scheduling if the flag is set |
910 | up. */ |
911 | enum sched_pressure_algorithm sched_pressure; |
912 | |
913 | /* Map regno -> its pressure class. The map defined only when |
914 | SCHED_PRESSURE != SCHED_PRESSURE_NONE. */ |
915 | enum reg_class *sched_regno_pressure_class; |
916 | |
917 | /* The current register pressure. Only elements corresponding pressure |
918 | classes are defined. */ |
919 | static int curr_reg_pressure[N_REG_CLASSES]; |
920 | |
921 | /* Saved value of the previous array. */ |
922 | static int saved_reg_pressure[N_REG_CLASSES]; |
923 | |
924 | /* Register living at given scheduling point. */ |
925 | static bitmap curr_reg_live; |
926 | |
927 | /* Saved value of the previous array. */ |
928 | static bitmap saved_reg_live; |
929 | |
930 | /* Registers mentioned in the current region. */ |
931 | static bitmap region_ref_regs; |
932 | |
933 | /* Temporary bitmap used for SCHED_PRESSURE_MODEL. */ |
934 | static bitmap tmp_bitmap; |
935 | |
936 | /* Effective number of available registers of a given class (see comment |
937 | in sched_pressure_start_bb). */ |
938 | static int sched_class_regs_num[N_REG_CLASSES]; |
939 | /* The number of registers that the function would need to save before it |
940 | uses them, and the number of fixed_regs. Helpers for calculating of |
941 | sched_class_regs_num. */ |
942 | static int call_saved_regs_num[N_REG_CLASSES]; |
943 | static int fixed_regs_num[N_REG_CLASSES]; |
944 | |
945 | /* Initiate register pressure relative info for scheduling the current |
946 | region. Currently it is only clearing register mentioned in the |
947 | current region. */ |
948 | void |
949 | sched_init_region_reg_pressure_info (void) |
950 | { |
951 | bitmap_clear (region_ref_regs); |
952 | } |
953 | |
954 | /* PRESSURE[CL] describes the pressure on register class CL. Update it |
955 | for the birth (if BIRTH_P) or death (if !BIRTH_P) of register REGNO. |
956 | LIVE tracks the set of live registers; if it is null, assume that |
957 | every birth or death is genuine. */ |
958 | static inline void |
959 | mark_regno_birth_or_death (bitmap live, int *pressure, int regno, bool birth_p) |
960 | { |
961 | enum reg_class pressure_class; |
962 | |
963 | pressure_class = sched_regno_pressure_class[regno]; |
964 | if (regno >= FIRST_PSEUDO_REGISTER) |
965 | { |
966 | if (pressure_class != NO_REGS) |
967 | { |
968 | if (birth_p) |
969 | { |
970 | if (!live || bitmap_set_bit (live, regno)) |
971 | pressure[pressure_class] |
972 | += (ira_reg_class_max_nregs |
973 | [pressure_class][PSEUDO_REGNO_MODE (regno)]); |
974 | } |
975 | else |
976 | { |
977 | if (!live || bitmap_clear_bit (live, regno)) |
978 | pressure[pressure_class] |
979 | -= (ira_reg_class_max_nregs |
980 | [pressure_class][PSEUDO_REGNO_MODE (regno)]); |
981 | } |
982 | } |
983 | } |
984 | else if (pressure_class != NO_REGS |
985 | && ! TEST_HARD_REG_BIT (ira_no_alloc_regs, bit: regno)) |
986 | { |
987 | if (birth_p) |
988 | { |
989 | if (!live || bitmap_set_bit (live, regno)) |
990 | pressure[pressure_class]++; |
991 | } |
992 | else |
993 | { |
994 | if (!live || bitmap_clear_bit (live, regno)) |
995 | pressure[pressure_class]--; |
996 | } |
997 | } |
998 | } |
999 | |
1000 | /* Initiate current register pressure related info from living |
1001 | registers given by LIVE. */ |
1002 | static void |
1003 | initiate_reg_pressure_info (bitmap live) |
1004 | { |
1005 | int i; |
1006 | unsigned int j; |
1007 | bitmap_iterator bi; |
1008 | |
1009 | for (i = 0; i < ira_pressure_classes_num; i++) |
1010 | curr_reg_pressure[ira_pressure_classes[i]] = 0; |
1011 | bitmap_clear (curr_reg_live); |
1012 | EXECUTE_IF_SET_IN_BITMAP (live, 0, j, bi) |
1013 | if (sched_pressure == SCHED_PRESSURE_MODEL |
1014 | || current_nr_blocks == 1 |
1015 | || bitmap_bit_p (region_ref_regs, j)) |
1016 | mark_regno_birth_or_death (live: curr_reg_live, pressure: curr_reg_pressure, regno: j, birth_p: true); |
1017 | } |
1018 | |
1019 | /* Mark registers in X as mentioned in the current region. */ |
1020 | static void |
1021 | setup_ref_regs (rtx x) |
1022 | { |
1023 | int i, j; |
1024 | const RTX_CODE code = GET_CODE (x); |
1025 | const char *fmt; |
1026 | |
1027 | if (REG_P (x)) |
1028 | { |
1029 | bitmap_set_range (region_ref_regs, REGNO (x), REG_NREGS (x)); |
1030 | return; |
1031 | } |
1032 | fmt = GET_RTX_FORMAT (code); |
1033 | for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) |
1034 | if (fmt[i] == 'e') |
1035 | setup_ref_regs (XEXP (x, i)); |
1036 | else if (fmt[i] == 'E') |
1037 | { |
1038 | for (j = 0; j < XVECLEN (x, i); j++) |
1039 | setup_ref_regs (XVECEXP (x, i, j)); |
1040 | } |
1041 | } |
1042 | |
1043 | /* Initiate current register pressure related info at the start of |
1044 | basic block BB. */ |
1045 | static void |
1046 | initiate_bb_reg_pressure_info (basic_block bb) |
1047 | { |
1048 | unsigned int i ATTRIBUTE_UNUSED; |
1049 | rtx_insn *insn; |
1050 | |
1051 | if (current_nr_blocks > 1) |
1052 | FOR_BB_INSNS (bb, insn) |
1053 | if (NONDEBUG_INSN_P (insn)) |
1054 | setup_ref_regs (PATTERN (insn)); |
1055 | initiate_reg_pressure_info (live: df_get_live_in (bb)); |
1056 | if (bb_has_eh_pred (bb)) |
1057 | for (i = 0; ; ++i) |
1058 | { |
1059 | unsigned int regno = EH_RETURN_DATA_REGNO (i); |
1060 | |
1061 | if (regno == INVALID_REGNUM) |
1062 | break; |
1063 | if (! bitmap_bit_p (df_get_live_in (bb), regno)) |
1064 | mark_regno_birth_or_death (live: curr_reg_live, pressure: curr_reg_pressure, |
1065 | regno, birth_p: true); |
1066 | } |
1067 | } |
1068 | |
1069 | /* Save current register pressure related info. */ |
1070 | static void |
1071 | save_reg_pressure (void) |
1072 | { |
1073 | int i; |
1074 | |
1075 | for (i = 0; i < ira_pressure_classes_num; i++) |
1076 | saved_reg_pressure[ira_pressure_classes[i]] |
1077 | = curr_reg_pressure[ira_pressure_classes[i]]; |
1078 | bitmap_copy (saved_reg_live, curr_reg_live); |
1079 | } |
1080 | |
1081 | /* Restore saved register pressure related info. */ |
1082 | static void |
1083 | restore_reg_pressure (void) |
1084 | { |
1085 | int i; |
1086 | |
1087 | for (i = 0; i < ira_pressure_classes_num; i++) |
1088 | curr_reg_pressure[ira_pressure_classes[i]] |
1089 | = saved_reg_pressure[ira_pressure_classes[i]]; |
1090 | bitmap_copy (curr_reg_live, saved_reg_live); |
1091 | } |
1092 | |
1093 | /* Return TRUE if the register is dying after its USE. */ |
1094 | static bool |
1095 | dying_use_p (struct reg_use_data *use) |
1096 | { |
1097 | struct reg_use_data *next; |
1098 | |
1099 | for (next = use->next_regno_use; next != use; next = next->next_regno_use) |
1100 | if (NONDEBUG_INSN_P (next->insn) |
1101 | && QUEUE_INDEX (next->insn) != QUEUE_SCHEDULED) |
1102 | return false; |
1103 | return true; |
1104 | } |
1105 | |
1106 | /* Print info about the current register pressure and its excess for |
1107 | each pressure class. */ |
1108 | static void |
1109 | print_curr_reg_pressure (void) |
1110 | { |
1111 | int i; |
1112 | enum reg_class cl; |
1113 | |
1114 | fprintf (stream: sched_dump, format: ";;\t" ); |
1115 | for (i = 0; i < ira_pressure_classes_num; i++) |
1116 | { |
1117 | cl = ira_pressure_classes[i]; |
1118 | gcc_assert (curr_reg_pressure[cl] >= 0); |
1119 | fprintf (stream: sched_dump, format: " %s:%d(%d)" , reg_class_names[cl], |
1120 | curr_reg_pressure[cl], |
1121 | curr_reg_pressure[cl] - sched_class_regs_num[cl]); |
1122 | } |
1123 | fprintf (stream: sched_dump, format: "\n" ); |
1124 | } |
1125 | |
1126 | /* Determine if INSN has a condition that is clobbered if a register |
1127 | in SET_REGS is modified. */ |
1128 | static bool |
1129 | cond_clobbered_p (rtx_insn *insn, HARD_REG_SET set_regs) |
1130 | { |
1131 | rtx pat = PATTERN (insn); |
1132 | gcc_assert (GET_CODE (pat) == COND_EXEC); |
1133 | if (TEST_HARD_REG_BIT (set: set_regs, REGNO (XEXP (COND_EXEC_TEST (pat), 0)))) |
1134 | { |
1135 | sd_iterator_def sd_it; |
1136 | dep_t dep; |
1137 | haifa_change_pattern (insn, ORIG_PAT (insn)); |
1138 | FOR_EACH_DEP (insn, SD_LIST_BACK, sd_it, dep) |
1139 | DEP_STATUS (dep) &= ~DEP_CANCELLED; |
1140 | TODO_SPEC (insn) = HARD_DEP; |
1141 | if (sched_verbose >= 2) |
1142 | fprintf (stream: sched_dump, |
1143 | format: ";;\t\tdequeue insn %s because of clobbered condition\n" , |
1144 | (*current_sched_info->print_insn) (insn, 0)); |
1145 | return true; |
1146 | } |
1147 | |
1148 | return false; |
1149 | } |
1150 | |
1151 | /* This function should be called after modifying the pattern of INSN, |
1152 | to update scheduler data structures as needed. */ |
1153 | static void |
1154 | update_insn_after_change (rtx_insn *insn) |
1155 | { |
1156 | sd_iterator_def sd_it; |
1157 | dep_t dep; |
1158 | |
1159 | dfa_clear_single_insn_cache (insn); |
1160 | |
1161 | sd_it = sd_iterator_start (insn, |
1162 | SD_LIST_FORW | SD_LIST_BACK | SD_LIST_RES_BACK); |
1163 | while (sd_iterator_cond (it_ptr: &sd_it, dep_ptr: &dep)) |
1164 | { |
1165 | DEP_COST (dep) = UNKNOWN_DEP_COST; |
1166 | sd_iterator_next (it_ptr: &sd_it); |
1167 | } |
1168 | |
1169 | /* Invalidate INSN_COST, so it'll be recalculated. */ |
1170 | INSN_COST (insn) = -1; |
1171 | /* Invalidate INSN_TICK, so it'll be recalculated. */ |
1172 | INSN_TICK (insn) = INVALID_TICK; |
1173 | |
1174 | /* Invalidate autoprefetch data entry. */ |
1175 | INSN_AUTOPREF_MULTIPASS_DATA (insn)[0].status |
1176 | = AUTOPREF_MULTIPASS_DATA_UNINITIALIZED; |
1177 | INSN_AUTOPREF_MULTIPASS_DATA (insn)[1].status |
1178 | = AUTOPREF_MULTIPASS_DATA_UNINITIALIZED; |
1179 | } |
1180 | |
1181 | |
1182 | /* Two VECs, one to hold dependencies for which pattern replacements |
1183 | need to be applied or restored at the start of the next cycle, and |
1184 | another to hold an integer that is either one, to apply the |
1185 | corresponding replacement, or zero to restore it. */ |
1186 | static vec<dep_t> next_cycle_replace_deps; |
1187 | static vec<int> next_cycle_apply; |
1188 | |
1189 | static void apply_replacement (dep_t, bool); |
1190 | static void restore_pattern (dep_t, bool); |
1191 | |
1192 | /* Look at the remaining dependencies for insn NEXT, and compute and return |
1193 | the TODO_SPEC value we should use for it. This is called after one of |
1194 | NEXT's dependencies has been resolved. |
1195 | We also perform pattern replacements for predication, and for broken |
1196 | replacement dependencies. The latter is only done if FOR_BACKTRACK is |
1197 | false. */ |
1198 | |
1199 | static ds_t |
1200 | recompute_todo_spec (rtx_insn *next, bool for_backtrack) |
1201 | { |
1202 | ds_t new_ds; |
1203 | sd_iterator_def sd_it; |
1204 | dep_t dep, modify_dep = NULL; |
1205 | int n_spec = 0; |
1206 | int n_control = 0; |
1207 | int n_replace = 0; |
1208 | bool first_p = true; |
1209 | |
1210 | if (sd_lists_empty_p (next, SD_LIST_BACK)) |
1211 | /* NEXT has all its dependencies resolved. */ |
1212 | return 0; |
1213 | |
1214 | if (!sd_lists_empty_p (next, SD_LIST_HARD_BACK)) |
1215 | return HARD_DEP; |
1216 | |
1217 | /* If NEXT is intended to sit adjacent to this instruction, we don't |
1218 | want to try to break any dependencies. Treat it as a HARD_DEP. */ |
1219 | if (SCHED_GROUP_P (next)) |
1220 | return HARD_DEP; |
1221 | |
1222 | /* Now we've got NEXT with speculative deps only. |
1223 | 1. Look at the deps to see what we have to do. |
1224 | 2. Check if we can do 'todo'. */ |
1225 | new_ds = 0; |
1226 | |
1227 | FOR_EACH_DEP (next, SD_LIST_BACK, sd_it, dep) |
1228 | { |
1229 | rtx_insn *pro = DEP_PRO (dep); |
1230 | ds_t ds = DEP_STATUS (dep) & SPECULATIVE; |
1231 | |
1232 | if (DEBUG_INSN_P (pro) && !DEBUG_INSN_P (next)) |
1233 | continue; |
1234 | |
1235 | if (ds) |
1236 | { |
1237 | n_spec++; |
1238 | if (first_p) |
1239 | { |
1240 | first_p = false; |
1241 | |
1242 | new_ds = ds; |
1243 | } |
1244 | else |
1245 | new_ds = ds_merge (new_ds, ds); |
1246 | } |
1247 | else if (DEP_TYPE (dep) == REG_DEP_CONTROL) |
1248 | { |
1249 | if (QUEUE_INDEX (pro) != QUEUE_SCHEDULED) |
1250 | { |
1251 | n_control++; |
1252 | modify_dep = dep; |
1253 | } |
1254 | DEP_STATUS (dep) &= ~DEP_CANCELLED; |
1255 | } |
1256 | else if (DEP_REPLACE (dep) != NULL) |
1257 | { |
1258 | if (QUEUE_INDEX (pro) != QUEUE_SCHEDULED) |
1259 | { |
1260 | n_replace++; |
1261 | modify_dep = dep; |
1262 | } |
1263 | DEP_STATUS (dep) &= ~DEP_CANCELLED; |
1264 | } |
1265 | } |
1266 | |
1267 | if (n_replace > 0 && n_control == 0 && n_spec == 0) |
1268 | { |
1269 | if (!dbg_cnt (index: sched_breakdep)) |
1270 | return HARD_DEP; |
1271 | FOR_EACH_DEP (next, SD_LIST_BACK, sd_it, dep) |
1272 | { |
1273 | struct dep_replacement *desc = DEP_REPLACE (dep); |
1274 | if (desc != NULL) |
1275 | { |
1276 | if (desc->insn == next && !for_backtrack) |
1277 | { |
1278 | gcc_assert (n_replace == 1); |
1279 | apply_replacement (dep, true); |
1280 | } |
1281 | DEP_STATUS (dep) |= DEP_CANCELLED; |
1282 | } |
1283 | } |
1284 | return 0; |
1285 | } |
1286 | |
1287 | else if (n_control == 1 && n_replace == 0 && n_spec == 0) |
1288 | { |
1289 | rtx_insn *pro, *other; |
1290 | rtx new_pat; |
1291 | rtx cond = NULL_RTX; |
1292 | bool success; |
1293 | rtx_insn *prev = NULL; |
1294 | int i; |
1295 | unsigned regno; |
1296 | |
1297 | if ((current_sched_info->flags & DO_PREDICATION) == 0 |
1298 | || (ORIG_PAT (next) != NULL_RTX |
1299 | && PREDICATED_PAT (next) == NULL_RTX)) |
1300 | return HARD_DEP; |
1301 | |
1302 | pro = DEP_PRO (modify_dep); |
1303 | other = real_insn_for_shadow (insn: pro); |
1304 | if (other != NULL_RTX) |
1305 | pro = other; |
1306 | |
1307 | cond = sched_get_reverse_condition_uncached (pro); |
1308 | regno = REGNO (XEXP (cond, 0)); |
1309 | |
1310 | /* Find the last scheduled insn that modifies the condition register. |
1311 | We can stop looking once we find the insn we depend on through the |
1312 | REG_DEP_CONTROL; if the condition register isn't modified after it, |
1313 | we know that it still has the right value. */ |
1314 | if (QUEUE_INDEX (pro) == QUEUE_SCHEDULED) |
1315 | FOR_EACH_VEC_ELT_REVERSE (scheduled_insns, i, prev) |
1316 | { |
1317 | HARD_REG_SET t; |
1318 | |
1319 | find_all_hard_reg_sets (prev, &t, true); |
1320 | if (TEST_HARD_REG_BIT (set: t, bit: regno)) |
1321 | return HARD_DEP; |
1322 | if (prev == pro) |
1323 | break; |
1324 | } |
1325 | if (ORIG_PAT (next) == NULL_RTX) |
1326 | { |
1327 | ORIG_PAT (next) = PATTERN (insn: next); |
1328 | |
1329 | new_pat = gen_rtx_COND_EXEC (VOIDmode, cond, PATTERN (next)); |
1330 | success = haifa_change_pattern (next, new_pat); |
1331 | if (!success) |
1332 | return HARD_DEP; |
1333 | PREDICATED_PAT (next) = new_pat; |
1334 | } |
1335 | else if (PATTERN (insn: next) != PREDICATED_PAT (next)) |
1336 | { |
1337 | bool success = haifa_change_pattern (next, |
1338 | PREDICATED_PAT (next)); |
1339 | gcc_assert (success); |
1340 | } |
1341 | DEP_STATUS (modify_dep) |= DEP_CANCELLED; |
1342 | return DEP_CONTROL; |
1343 | } |
1344 | |
1345 | if (PREDICATED_PAT (next) != NULL_RTX) |
1346 | { |
1347 | int tick = INSN_TICK (next); |
1348 | bool success = haifa_change_pattern (next, |
1349 | ORIG_PAT (next)); |
1350 | INSN_TICK (next) = tick; |
1351 | gcc_assert (success); |
1352 | } |
1353 | |
1354 | /* We can't handle the case where there are both speculative and control |
1355 | dependencies, so we return HARD_DEP in such a case. Also fail if |
1356 | we have speculative dependencies with not enough points, or more than |
1357 | one control dependency. */ |
1358 | if ((n_spec > 0 && (n_control > 0 || n_replace > 0)) |
1359 | || (n_spec > 0 |
1360 | /* Too few points? */ |
1361 | && ds_weak (new_ds) < spec_info->data_weakness_cutoff) |
1362 | || n_control > 0 |
1363 | || n_replace > 0) |
1364 | return HARD_DEP; |
1365 | |
1366 | return new_ds; |
1367 | } |
1368 | |
1369 | /* Pointer to the last instruction scheduled. */ |
1370 | static rtx_insn *last_scheduled_insn; |
1371 | |
1372 | /* Pointer to the last nondebug instruction scheduled within the |
1373 | block, or the prev_head of the scheduling block. Used by |
1374 | rank_for_schedule, so that insns independent of the last scheduled |
1375 | insn will be preferred over dependent instructions. */ |
1376 | static rtx_insn *last_nondebug_scheduled_insn; |
1377 | |
1378 | /* Pointer that iterates through the list of unscheduled insns if we |
1379 | have a dbg_cnt enabled. It always points at an insn prior to the |
1380 | first unscheduled one. */ |
1381 | static rtx_insn *nonscheduled_insns_begin; |
1382 | |
1383 | /* Compute cost of executing INSN. |
1384 | This is the number of cycles between instruction issue and |
1385 | instruction results. */ |
1386 | int |
1387 | insn_sched_cost (rtx_insn *insn) |
1388 | { |
1389 | int cost; |
1390 | |
1391 | if (sched_fusion) |
1392 | return 0; |
1393 | |
1394 | if (sel_sched_p ()) |
1395 | { |
1396 | if (recog_memoized (insn) < 0) |
1397 | return 0; |
1398 | |
1399 | cost = insn_default_latency (insn); |
1400 | if (cost < 0) |
1401 | cost = 0; |
1402 | |
1403 | return cost; |
1404 | } |
1405 | |
1406 | cost = INSN_COST (insn); |
1407 | |
1408 | if (cost < 0) |
1409 | { |
1410 | /* A USE insn, or something else we don't need to |
1411 | understand. We can't pass these directly to |
1412 | result_ready_cost or insn_default_latency because it will |
1413 | trigger a fatal error for unrecognizable insns. */ |
1414 | if (recog_memoized (insn) < 0) |
1415 | { |
1416 | INSN_COST (insn) = 0; |
1417 | return 0; |
1418 | } |
1419 | else |
1420 | { |
1421 | cost = insn_default_latency (insn); |
1422 | if (cost < 0) |
1423 | cost = 0; |
1424 | |
1425 | INSN_COST (insn) = cost; |
1426 | } |
1427 | } |
1428 | |
1429 | return cost; |
1430 | } |
1431 | |
1432 | /* Compute cost of dependence LINK. |
1433 | This is the number of cycles between instruction issue and |
1434 | instruction results. |
1435 | ??? We also use this function to call recog_memoized on all insns. */ |
1436 | int |
1437 | dep_cost_1 (dep_t link, dw_t dw) |
1438 | { |
1439 | rtx_insn *insn = DEP_PRO (link); |
1440 | rtx_insn *used = DEP_CON (link); |
1441 | int cost; |
1442 | |
1443 | if (DEP_COST (link) != UNKNOWN_DEP_COST) |
1444 | return DEP_COST (link); |
1445 | |
1446 | if (delay_htab) |
1447 | { |
1448 | struct delay_pair *delay_entry; |
1449 | delay_entry |
1450 | = delay_htab_i2->find_with_hash (comparable: used, hash: htab_hash_pointer (used)); |
1451 | if (delay_entry) |
1452 | { |
1453 | if (delay_entry->i1 == insn) |
1454 | { |
1455 | DEP_COST (link) = pair_delay (p: delay_entry); |
1456 | return DEP_COST (link); |
1457 | } |
1458 | } |
1459 | } |
1460 | |
1461 | /* A USE insn should never require the value used to be computed. |
1462 | This allows the computation of a function's result and parameter |
1463 | values to overlap the return and call. We don't care about the |
1464 | dependence cost when only decreasing register pressure. */ |
1465 | if (recog_memoized (insn: used) < 0) |
1466 | { |
1467 | cost = 0; |
1468 | recog_memoized (insn); |
1469 | } |
1470 | else |
1471 | { |
1472 | enum reg_note dep_type = DEP_TYPE (link); |
1473 | |
1474 | cost = insn_sched_cost (insn); |
1475 | |
1476 | if (INSN_CODE (insn) >= 0) |
1477 | { |
1478 | if (dep_type == REG_DEP_ANTI) |
1479 | cost = 0; |
1480 | else if (dep_type == REG_DEP_OUTPUT) |
1481 | { |
1482 | cost = (insn_default_latency (insn) |
1483 | - insn_default_latency (used)); |
1484 | if (cost <= 0) |
1485 | cost = 1; |
1486 | } |
1487 | else if (bypass_p (insn)) |
1488 | cost = insn_latency (insn, used); |
1489 | } |
1490 | |
1491 | |
1492 | if (targetm.sched.adjust_cost) |
1493 | cost = targetm.sched.adjust_cost (used, (int) dep_type, insn, cost, |
1494 | dw); |
1495 | |
1496 | if (cost < 0) |
1497 | cost = 0; |
1498 | } |
1499 | |
1500 | DEP_COST (link) = cost; |
1501 | return cost; |
1502 | } |
1503 | |
1504 | /* Compute cost of dependence LINK. |
1505 | This is the number of cycles between instruction issue and |
1506 | instruction results. */ |
1507 | int |
1508 | dep_cost (dep_t link) |
1509 | { |
1510 | return dep_cost_1 (link, dw: 0); |
1511 | } |
1512 | |
1513 | /* Use this sel-sched.cc friendly function in reorder2 instead of increasing |
1514 | INSN_PRIORITY explicitly. */ |
1515 | void |
1516 | increase_insn_priority (rtx_insn *insn, int amount) |
1517 | { |
1518 | if (!sel_sched_p ()) |
1519 | { |
1520 | /* We're dealing with haifa-sched.cc INSN_PRIORITY. */ |
1521 | if (INSN_PRIORITY_KNOWN (insn)) |
1522 | INSN_PRIORITY (insn) += amount; |
1523 | } |
1524 | else |
1525 | { |
1526 | /* In sel-sched.cc INSN_PRIORITY is not kept up to date. |
1527 | Use EXPR_PRIORITY instead. */ |
1528 | sel_add_to_insn_priority (insn, amount); |
1529 | } |
1530 | } |
1531 | |
1532 | /* Return 'true' if DEP should be included in priority calculations. */ |
1533 | static bool |
1534 | contributes_to_priority_p (dep_t dep) |
1535 | { |
1536 | if (DEBUG_INSN_P (DEP_CON (dep)) |
1537 | || DEBUG_INSN_P (DEP_PRO (dep))) |
1538 | return false; |
1539 | |
1540 | /* Critical path is meaningful in block boundaries only. */ |
1541 | if (!current_sched_info->contributes_to_priority (DEP_CON (dep), |
1542 | DEP_PRO (dep))) |
1543 | return false; |
1544 | |
1545 | if (DEP_REPLACE (dep) != NULL) |
1546 | return false; |
1547 | |
1548 | /* If flag COUNT_SPEC_IN_CRITICAL_PATH is set, |
1549 | then speculative instructions will less likely be |
1550 | scheduled. That is because the priority of |
1551 | their producers will increase, and, thus, the |
1552 | producers will more likely be scheduled, thus, |
1553 | resolving the dependence. */ |
1554 | if (sched_deps_info->generate_spec_deps |
1555 | && !(spec_info->flags & COUNT_SPEC_IN_CRITICAL_PATH) |
1556 | && (DEP_STATUS (dep) & SPECULATIVE)) |
1557 | return false; |
1558 | |
1559 | return true; |
1560 | } |
1561 | |
1562 | /* Compute the number of nondebug deps in list LIST for INSN. */ |
1563 | |
1564 | static int |
1565 | dep_list_size (rtx_insn *insn, sd_list_types_def list) |
1566 | { |
1567 | sd_iterator_def sd_it; |
1568 | dep_t dep; |
1569 | int dbgcount = 0, nodbgcount = 0; |
1570 | |
1571 | if (!MAY_HAVE_DEBUG_INSNS) |
1572 | return sd_lists_size (insn, list); |
1573 | |
1574 | FOR_EACH_DEP (insn, list, sd_it, dep) |
1575 | { |
1576 | if (DEBUG_INSN_P (DEP_CON (dep))) |
1577 | dbgcount++; |
1578 | else if (!DEBUG_INSN_P (DEP_PRO (dep))) |
1579 | nodbgcount++; |
1580 | } |
1581 | |
1582 | gcc_assert (dbgcount + nodbgcount == sd_lists_size (insn, list)); |
1583 | |
1584 | return nodbgcount; |
1585 | } |
1586 | |
1587 | bool sched_fusion; |
1588 | |
1589 | /* Compute the priority number for INSN. */ |
1590 | static int |
1591 | priority (rtx_insn *insn, bool force_recompute) |
1592 | { |
1593 | if (! INSN_P (insn)) |
1594 | return 0; |
1595 | |
1596 | /* We should not be interested in priority of an already scheduled insn. */ |
1597 | gcc_assert (QUEUE_INDEX (insn) != QUEUE_SCHEDULED); |
1598 | |
1599 | if (force_recompute || !INSN_PRIORITY_KNOWN (insn)) |
1600 | { |
1601 | int this_priority = -1; |
1602 | |
1603 | if (sched_fusion) |
1604 | { |
1605 | int this_fusion_priority; |
1606 | |
1607 | targetm.sched.fusion_priority (insn, FUSION_MAX_PRIORITY, |
1608 | &this_fusion_priority, &this_priority); |
1609 | INSN_FUSION_PRIORITY (insn) = this_fusion_priority; |
1610 | } |
1611 | else if (dep_list_size (insn, SD_LIST_FORW) == 0) |
1612 | /* ??? We should set INSN_PRIORITY to insn_sched_cost when and insn |
1613 | has some forward deps but all of them are ignored by |
1614 | contributes_to_priority hook. At the moment we set priority of |
1615 | such insn to 0. */ |
1616 | this_priority = insn_sched_cost (insn); |
1617 | else |
1618 | { |
1619 | rtx_insn *prev_first, *twin; |
1620 | basic_block rec; |
1621 | |
1622 | /* For recovery check instructions we calculate priority slightly |
1623 | different than that of normal instructions. Instead of walking |
1624 | through INSN_FORW_DEPS (check) list, we walk through |
1625 | INSN_FORW_DEPS list of each instruction in the corresponding |
1626 | recovery block. */ |
1627 | |
1628 | /* Selective scheduling does not define RECOVERY_BLOCK macro. */ |
1629 | rec = sel_sched_p () ? NULL : RECOVERY_BLOCK (insn); |
1630 | if (!rec || rec == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
1631 | { |
1632 | prev_first = PREV_INSN (insn); |
1633 | twin = insn; |
1634 | } |
1635 | else |
1636 | { |
1637 | prev_first = NEXT_INSN (BB_HEAD (rec)); |
1638 | twin = PREV_INSN (BB_END (rec)); |
1639 | } |
1640 | |
1641 | do |
1642 | { |
1643 | sd_iterator_def sd_it; |
1644 | dep_t dep; |
1645 | |
1646 | FOR_EACH_DEP (twin, SD_LIST_FORW, sd_it, dep) |
1647 | { |
1648 | rtx_insn *next; |
1649 | int next_priority; |
1650 | |
1651 | next = DEP_CON (dep); |
1652 | |
1653 | if (BLOCK_FOR_INSN (insn: next) != rec) |
1654 | { |
1655 | int cost; |
1656 | |
1657 | if (!contributes_to_priority_p (dep)) |
1658 | continue; |
1659 | |
1660 | if (twin == insn) |
1661 | cost = dep_cost (link: dep); |
1662 | else |
1663 | { |
1664 | struct _dep _dep1, *dep1 = &_dep1; |
1665 | |
1666 | init_dep (dep1, insn, next, REG_DEP_ANTI); |
1667 | |
1668 | cost = dep_cost (link: dep1); |
1669 | } |
1670 | |
1671 | next_priority = cost + priority (insn: next); |
1672 | |
1673 | if (next_priority > this_priority) |
1674 | this_priority = next_priority; |
1675 | } |
1676 | } |
1677 | |
1678 | twin = PREV_INSN (insn: twin); |
1679 | } |
1680 | while (twin != prev_first); |
1681 | } |
1682 | |
1683 | if (this_priority < 0) |
1684 | { |
1685 | gcc_assert (this_priority == -1); |
1686 | |
1687 | this_priority = insn_sched_cost (insn); |
1688 | } |
1689 | |
1690 | INSN_PRIORITY (insn) = this_priority; |
1691 | INSN_PRIORITY_STATUS (insn) = 1; |
1692 | } |
1693 | |
1694 | return INSN_PRIORITY (insn); |
1695 | } |
1696 | |
1697 | /* Macros and functions for keeping the priority queue sorted, and |
1698 | dealing with queuing and dequeuing of instructions. */ |
1699 | |
1700 | /* For each pressure class CL, set DEATH[CL] to the number of registers |
1701 | in that class that die in INSN. */ |
1702 | |
1703 | static void |
1704 | calculate_reg_deaths (rtx_insn *insn, int *death) |
1705 | { |
1706 | int i; |
1707 | struct reg_use_data *use; |
1708 | |
1709 | for (i = 0; i < ira_pressure_classes_num; i++) |
1710 | death[ira_pressure_classes[i]] = 0; |
1711 | for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use) |
1712 | if (dying_use_p (use)) |
1713 | mark_regno_birth_or_death (live: 0, pressure: death, regno: use->regno, birth_p: true); |
1714 | } |
1715 | |
1716 | /* Setup info about the current register pressure impact of scheduling |
1717 | INSN at the current scheduling point. */ |
1718 | static void |
1719 | setup_insn_reg_pressure_info (rtx_insn *insn) |
1720 | { |
1721 | int i, change, before, after, hard_regno; |
1722 | int excess_cost_change; |
1723 | machine_mode mode; |
1724 | enum reg_class cl; |
1725 | struct reg_pressure_data *pressure_info; |
1726 | int *max_reg_pressure; |
1727 | static int death[N_REG_CLASSES]; |
1728 | |
1729 | gcc_checking_assert (!DEBUG_INSN_P (insn)); |
1730 | |
1731 | excess_cost_change = 0; |
1732 | calculate_reg_deaths (insn, death); |
1733 | pressure_info = INSN_REG_PRESSURE (insn); |
1734 | max_reg_pressure = INSN_MAX_REG_PRESSURE (insn); |
1735 | gcc_assert (pressure_info != NULL && max_reg_pressure != NULL); |
1736 | for (i = 0; i < ira_pressure_classes_num; i++) |
1737 | { |
1738 | cl = ira_pressure_classes[i]; |
1739 | gcc_assert (curr_reg_pressure[cl] >= 0); |
1740 | change = (int) pressure_info[i].set_increase - death[cl]; |
1741 | before = MAX (0, max_reg_pressure[i] - sched_class_regs_num[cl]); |
1742 | after = MAX (0, max_reg_pressure[i] + change |
1743 | - sched_class_regs_num[cl]); |
1744 | hard_regno = ira_class_hard_regs[cl][0]; |
1745 | gcc_assert (hard_regno >= 0); |
1746 | mode = reg_raw_mode[hard_regno]; |
1747 | excess_cost_change += ((after - before) |
1748 | * (ira_memory_move_cost[mode][cl][0] |
1749 | + ira_memory_move_cost[mode][cl][1])); |
1750 | } |
1751 | INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insn) = excess_cost_change; |
1752 | } |
1753 | |
1754 | /* This is the first page of code related to SCHED_PRESSURE_MODEL. |
1755 | It tries to make the scheduler take register pressure into account |
1756 | without introducing too many unnecessary stalls. It hooks into the |
1757 | main scheduling algorithm at several points: |
1758 | |
1759 | - Before scheduling starts, model_start_schedule constructs a |
1760 | "model schedule" for the current block. This model schedule is |
1761 | chosen solely to keep register pressure down. It does not take the |
1762 | target's pipeline or the original instruction order into account, |
1763 | except as a tie-breaker. It also doesn't work to a particular |
1764 | pressure limit. |
1765 | |
1766 | This model schedule gives us an idea of what pressure can be |
1767 | achieved for the block and gives us an example of a schedule that |
1768 | keeps to that pressure. It also makes the final schedule less |
1769 | dependent on the original instruction order. This is important |
1770 | because the original order can either be "wide" (many values live |
1771 | at once, such as in user-scheduled code) or "narrow" (few values |
1772 | live at once, such as after loop unrolling, where several |
1773 | iterations are executed sequentially). |
1774 | |
1775 | We do not apply this model schedule to the rtx stream. We simply |
1776 | record it in model_schedule. We also compute the maximum pressure, |
1777 | MP, that was seen during this schedule. |
1778 | |
1779 | - Instructions are added to the ready queue even if they require |
1780 | a stall. The length of the stall is instead computed as: |
1781 | |
1782 | MAX (INSN_TICK (INSN) - clock_var, 0) |
1783 | |
1784 | (= insn_delay). This allows rank_for_schedule to choose between |
1785 | introducing a deliberate stall or increasing pressure. |
1786 | |
1787 | - Before sorting the ready queue, model_set_excess_costs assigns |
1788 | a pressure-based cost to each ready instruction in the queue. |
1789 | This is the instruction's INSN_REG_PRESSURE_EXCESS_COST_CHANGE |
1790 | (ECC for short) and is effectively measured in cycles. |
1791 | |
1792 | - rank_for_schedule ranks instructions based on: |
1793 | |
1794 | ECC (insn) + insn_delay (insn) |
1795 | |
1796 | then as: |
1797 | |
1798 | insn_delay (insn) |
1799 | |
1800 | So, for example, an instruction X1 with an ECC of 1 that can issue |
1801 | now will win over an instruction X0 with an ECC of zero that would |
1802 | introduce a stall of one cycle. However, an instruction X2 with an |
1803 | ECC of 2 that can issue now will lose to both X0 and X1. |
1804 | |
1805 | - When an instruction is scheduled, model_recompute updates the model |
1806 | schedule with the new pressures (some of which might now exceed the |
1807 | original maximum pressure MP). model_update_limit_points then searches |
1808 | for the new point of maximum pressure, if not already known. */ |
1809 | |
1810 | /* Used to separate high-verbosity debug information for SCHED_PRESSURE_MODEL |
1811 | from surrounding debug information. */ |
1812 | #define MODEL_BAR \ |
1813 | ";;\t\t+------------------------------------------------------\n" |
1814 | |
1815 | /* Information about the pressure on a particular register class at a |
1816 | particular point of the model schedule. */ |
1817 | struct model_pressure_data { |
1818 | /* The pressure at this point of the model schedule, or -1 if the |
1819 | point is associated with an instruction that has already been |
1820 | scheduled. */ |
1821 | int ref_pressure; |
1822 | |
1823 | /* The maximum pressure during or after this point of the model schedule. */ |
1824 | int max_pressure; |
1825 | }; |
1826 | |
1827 | /* Per-instruction information that is used while building the model |
1828 | schedule. Here, "schedule" refers to the model schedule rather |
1829 | than the main schedule. */ |
1830 | struct model_insn_info { |
1831 | /* The instruction itself. */ |
1832 | rtx_insn *insn; |
1833 | |
1834 | /* If this instruction is in model_worklist, these fields link to the |
1835 | previous (higher-priority) and next (lower-priority) instructions |
1836 | in the list. */ |
1837 | struct model_insn_info *prev; |
1838 | struct model_insn_info *next; |
1839 | |
1840 | /* While constructing the schedule, QUEUE_INDEX describes whether an |
1841 | instruction has already been added to the schedule (QUEUE_SCHEDULED), |
1842 | is in model_worklist (QUEUE_READY), or neither (QUEUE_NOWHERE). |
1843 | old_queue records the value that QUEUE_INDEX had before scheduling |
1844 | started, so that we can restore it once the schedule is complete. */ |
1845 | int old_queue; |
1846 | |
1847 | /* The relative importance of an unscheduled instruction. Higher |
1848 | values indicate greater importance. */ |
1849 | unsigned int model_priority; |
1850 | |
1851 | /* The length of the longest path of satisfied true dependencies |
1852 | that leads to this instruction. */ |
1853 | unsigned int depth; |
1854 | |
1855 | /* The length of the longest path of dependencies of any kind |
1856 | that leads from this instruction. */ |
1857 | unsigned int alap; |
1858 | |
1859 | /* The number of predecessor nodes that must still be scheduled. */ |
1860 | int unscheduled_preds; |
1861 | }; |
1862 | |
1863 | /* Information about the pressure limit for a particular register class. |
1864 | This structure is used when applying a model schedule to the main |
1865 | schedule. */ |
1866 | struct model_pressure_limit { |
1867 | /* The maximum register pressure seen in the original model schedule. */ |
1868 | int orig_pressure; |
1869 | |
1870 | /* The maximum register pressure seen in the current model schedule |
1871 | (which excludes instructions that have already been scheduled). */ |
1872 | int pressure; |
1873 | |
1874 | /* The point of the current model schedule at which PRESSURE is first |
1875 | reached. It is set to -1 if the value needs to be recomputed. */ |
1876 | int point; |
1877 | }; |
1878 | |
1879 | /* Describes a particular way of measuring register pressure. */ |
1880 | struct model_pressure_group { |
1881 | /* Index PCI describes the maximum pressure on ira_pressure_classes[PCI]. */ |
1882 | struct model_pressure_limit limits[N_REG_CLASSES]; |
1883 | |
1884 | /* Index (POINT * ira_num_pressure_classes + PCI) describes the pressure |
1885 | on register class ira_pressure_classes[PCI] at point POINT of the |
1886 | current model schedule. A POINT of model_num_insns describes the |
1887 | pressure at the end of the schedule. */ |
1888 | struct model_pressure_data *model; |
1889 | }; |
1890 | |
1891 | /* Index POINT gives the instruction at point POINT of the model schedule. |
1892 | This array doesn't change during main scheduling. */ |
1893 | static vec<rtx_insn *> model_schedule; |
1894 | |
1895 | /* The list of instructions in the model worklist, sorted in order of |
1896 | decreasing priority. */ |
1897 | static struct model_insn_info *model_worklist; |
1898 | |
1899 | /* Index I describes the instruction with INSN_LUID I. */ |
1900 | static struct model_insn_info *model_insns; |
1901 | |
1902 | /* The number of instructions in the model schedule. */ |
1903 | static int model_num_insns; |
1904 | |
1905 | /* The index of the first instruction in model_schedule that hasn't yet been |
1906 | added to the main schedule, or model_num_insns if all of them have. */ |
1907 | static int model_curr_point; |
1908 | |
1909 | /* Describes the pressure before each instruction in the model schedule. */ |
1910 | static struct model_pressure_group model_before_pressure; |
1911 | |
1912 | /* The first unused model_priority value (as used in model_insn_info). */ |
1913 | static unsigned int model_next_priority; |
1914 | |
1915 | |
1916 | /* The model_pressure_data for ira_pressure_classes[PCI] in GROUP |
1917 | at point POINT of the model schedule. */ |
1918 | #define MODEL_PRESSURE_DATA(GROUP, POINT, PCI) \ |
1919 | (&(GROUP)->model[(POINT) * ira_pressure_classes_num + (PCI)]) |
1920 | |
1921 | /* The maximum pressure on ira_pressure_classes[PCI] in GROUP at or |
1922 | after point POINT of the model schedule. */ |
1923 | #define MODEL_MAX_PRESSURE(GROUP, POINT, PCI) \ |
1924 | (MODEL_PRESSURE_DATA (GROUP, POINT, PCI)->max_pressure) |
1925 | |
1926 | /* The pressure on ira_pressure_classes[PCI] in GROUP at point POINT |
1927 | of the model schedule. */ |
1928 | #define MODEL_REF_PRESSURE(GROUP, POINT, PCI) \ |
1929 | (MODEL_PRESSURE_DATA (GROUP, POINT, PCI)->ref_pressure) |
1930 | |
1931 | /* Information about INSN that is used when creating the model schedule. */ |
1932 | #define MODEL_INSN_INFO(INSN) \ |
1933 | (&model_insns[INSN_LUID (INSN)]) |
1934 | |
1935 | /* The instruction at point POINT of the model schedule. */ |
1936 | #define MODEL_INSN(POINT) \ |
1937 | (model_schedule[POINT]) |
1938 | |
1939 | |
1940 | /* Return INSN's index in the model schedule, or model_num_insns if it |
1941 | doesn't belong to that schedule. */ |
1942 | |
1943 | static int |
1944 | model_index (rtx_insn *insn) |
1945 | { |
1946 | if (INSN_MODEL_INDEX (insn) == 0) |
1947 | return model_num_insns; |
1948 | return INSN_MODEL_INDEX (insn) - 1; |
1949 | } |
1950 | |
1951 | /* Make sure that GROUP->limits is up-to-date for the current point |
1952 | of the model schedule. */ |
1953 | |
1954 | static void |
1955 | model_update_limit_points_in_group (struct model_pressure_group *group) |
1956 | { |
1957 | int pci, max_pressure, point; |
1958 | |
1959 | for (pci = 0; pci < ira_pressure_classes_num; pci++) |
1960 | { |
1961 | /* We may have passed the final point at which the pressure in |
1962 | group->limits[pci].pressure was reached. Update the limit if so. */ |
1963 | max_pressure = MODEL_MAX_PRESSURE (group, model_curr_point, pci); |
1964 | group->limits[pci].pressure = max_pressure; |
1965 | |
1966 | /* Find the point at which MAX_PRESSURE is first reached. We need |
1967 | to search in three cases: |
1968 | |
1969 | - We've already moved past the previous pressure point. |
1970 | In this case we search forward from model_curr_point. |
1971 | |
1972 | - We scheduled the previous point of maximum pressure ahead of |
1973 | its position in the model schedule, but doing so didn't bring |
1974 | the pressure point earlier. In this case we search forward |
1975 | from that previous pressure point. |
1976 | |
1977 | - Scheduling an instruction early caused the maximum pressure |
1978 | to decrease. In this case we will have set the pressure |
1979 | point to -1, and we search forward from model_curr_point. */ |
1980 | point = MAX (group->limits[pci].point, model_curr_point); |
1981 | while (point < model_num_insns |
1982 | && MODEL_REF_PRESSURE (group, point, pci) < max_pressure) |
1983 | point++; |
1984 | group->limits[pci].point = point; |
1985 | |
1986 | gcc_assert (MODEL_REF_PRESSURE (group, point, pci) == max_pressure); |
1987 | gcc_assert (MODEL_MAX_PRESSURE (group, point, pci) == max_pressure); |
1988 | } |
1989 | } |
1990 | |
1991 | /* Make sure that all register-pressure limits are up-to-date for the |
1992 | current position in the model schedule. */ |
1993 | |
1994 | static void |
1995 | model_update_limit_points (void) |
1996 | { |
1997 | model_update_limit_points_in_group (group: &model_before_pressure); |
1998 | } |
1999 | |
2000 | /* Return the model_index of the last unscheduled use in chain USE |
2001 | outside of USE's instruction. Return -1 if there are no other uses, |
2002 | or model_num_insns if the register is live at the end of the block. */ |
2003 | |
2004 | static int |
2005 | model_last_use_except (struct reg_use_data *use) |
2006 | { |
2007 | struct reg_use_data *next; |
2008 | int last, index; |
2009 | |
2010 | last = -1; |
2011 | for (next = use->next_regno_use; next != use; next = next->next_regno_use) |
2012 | if (NONDEBUG_INSN_P (next->insn) |
2013 | && QUEUE_INDEX (next->insn) != QUEUE_SCHEDULED) |
2014 | { |
2015 | index = model_index (insn: next->insn); |
2016 | if (index == model_num_insns) |
2017 | return model_num_insns; |
2018 | if (last < index) |
2019 | last = index; |
2020 | } |
2021 | return last; |
2022 | } |
2023 | |
2024 | /* An instruction with model_index POINT has just been scheduled, and it |
2025 | adds DELTA to the pressure on ira_pressure_classes[PCI] after POINT - 1. |
2026 | Update MODEL_REF_PRESSURE (GROUP, POINT, PCI) and |
2027 | MODEL_MAX_PRESSURE (GROUP, POINT, PCI) accordingly. */ |
2028 | |
2029 | static void |
2030 | model_start_update_pressure (struct model_pressure_group *group, |
2031 | int point, int pci, int delta) |
2032 | { |
2033 | int next_max_pressure; |
2034 | |
2035 | if (point == model_num_insns) |
2036 | { |
2037 | /* The instruction wasn't part of the model schedule; it was moved |
2038 | from a different block. Update the pressure for the end of |
2039 | the model schedule. */ |
2040 | MODEL_REF_PRESSURE (group, point, pci) += delta; |
2041 | MODEL_MAX_PRESSURE (group, point, pci) += delta; |
2042 | } |
2043 | else |
2044 | { |
2045 | /* Record that this instruction has been scheduled. Nothing now |
2046 | changes between POINT and POINT + 1, so get the maximum pressure |
2047 | from the latter. If the maximum pressure decreases, the new |
2048 | pressure point may be before POINT. */ |
2049 | MODEL_REF_PRESSURE (group, point, pci) = -1; |
2050 | next_max_pressure = MODEL_MAX_PRESSURE (group, point + 1, pci); |
2051 | if (MODEL_MAX_PRESSURE (group, point, pci) > next_max_pressure) |
2052 | { |
2053 | MODEL_MAX_PRESSURE (group, point, pci) = next_max_pressure; |
2054 | if (group->limits[pci].point == point) |
2055 | group->limits[pci].point = -1; |
2056 | } |
2057 | } |
2058 | } |
2059 | |
2060 | /* Record that scheduling a later instruction has changed the pressure |
2061 | at point POINT of the model schedule by DELTA (which might be 0). |
2062 | Update GROUP accordingly. Return nonzero if these changes might |
2063 | trigger changes to previous points as well. */ |
2064 | |
2065 | static int |
2066 | model_update_pressure (struct model_pressure_group *group, |
2067 | int point, int pci, int delta) |
2068 | { |
2069 | int ref_pressure, max_pressure, next_max_pressure; |
2070 | |
2071 | /* If POINT hasn't yet been scheduled, update its pressure. */ |
2072 | ref_pressure = MODEL_REF_PRESSURE (group, point, pci); |
2073 | if (ref_pressure >= 0 && delta != 0) |
2074 | { |
2075 | ref_pressure += delta; |
2076 | MODEL_REF_PRESSURE (group, point, pci) = ref_pressure; |
2077 | |
2078 | /* Check whether the maximum pressure in the overall schedule |
2079 | has increased. (This means that the MODEL_MAX_PRESSURE of |
2080 | every point <= POINT will need to increase too; see below.) */ |
2081 | if (group->limits[pci].pressure < ref_pressure) |
2082 | group->limits[pci].pressure = ref_pressure; |
2083 | |
2084 | /* If we are at maximum pressure, and the maximum pressure |
2085 | point was previously unknown or later than POINT, |
2086 | bring it forward. */ |
2087 | if (group->limits[pci].pressure == ref_pressure |
2088 | && !IN_RANGE (group->limits[pci].point, 0, point)) |
2089 | group->limits[pci].point = point; |
2090 | |
2091 | /* If POINT used to be the point of maximum pressure, but isn't |
2092 | any longer, we need to recalculate it using a forward walk. */ |
2093 | if (group->limits[pci].pressure > ref_pressure |
2094 | && group->limits[pci].point == point) |
2095 | group->limits[pci].point = -1; |
2096 | } |
2097 | |
2098 | /* Update the maximum pressure at POINT. Changes here might also |
2099 | affect the maximum pressure at POINT - 1. */ |
2100 | next_max_pressure = MODEL_MAX_PRESSURE (group, point + 1, pci); |
2101 | max_pressure = MAX (ref_pressure, next_max_pressure); |
2102 | if (MODEL_MAX_PRESSURE (group, point, pci) != max_pressure) |
2103 | { |
2104 | MODEL_MAX_PRESSURE (group, point, pci) = max_pressure; |
2105 | return 1; |
2106 | } |
2107 | return 0; |
2108 | } |
2109 | |
2110 | /* INSN has just been scheduled. Update the model schedule accordingly. */ |
2111 | |
2112 | static void |
2113 | model_recompute (rtx_insn *insn) |
2114 | { |
2115 | struct { |
2116 | int last_use; |
2117 | int regno; |
2118 | } uses[FIRST_PSEUDO_REGISTER + MAX_RECOG_OPERANDS]; |
2119 | struct reg_use_data *use; |
2120 | struct reg_pressure_data *reg_pressure; |
2121 | int delta[N_REG_CLASSES]; |
2122 | int pci, point, mix, new_last, cl, ref_pressure, queue; |
2123 | unsigned int i, num_uses, num_pending_births; |
2124 | bool print_p; |
2125 | |
2126 | /* The destinations of INSN were previously live from POINT onwards, but are |
2127 | now live from model_curr_point onwards. Set up DELTA accordingly. */ |
2128 | point = model_index (insn); |
2129 | reg_pressure = INSN_REG_PRESSURE (insn); |
2130 | for (pci = 0; pci < ira_pressure_classes_num; pci++) |
2131 | { |
2132 | cl = ira_pressure_classes[pci]; |
2133 | delta[cl] = reg_pressure[pci].set_increase; |
2134 | } |
2135 | |
2136 | /* Record which registers previously died at POINT, but which now die |
2137 | before POINT. Adjust DELTA so that it represents the effect of |
2138 | this change after POINT - 1. Set NUM_PENDING_BIRTHS to the number of |
2139 | registers that will be born in the range [model_curr_point, POINT). */ |
2140 | num_uses = 0; |
2141 | num_pending_births = 0; |
2142 | bitmap_clear (tmp_bitmap); |
2143 | for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use) |
2144 | { |
2145 | new_last = model_last_use_except (use); |
2146 | if (new_last < point && bitmap_set_bit (tmp_bitmap, use->regno)) |
2147 | { |
2148 | gcc_assert (num_uses < ARRAY_SIZE (uses)); |
2149 | uses[num_uses].last_use = new_last; |
2150 | uses[num_uses].regno = use->regno; |
2151 | /* This register is no longer live after POINT - 1. */ |
2152 | mark_regno_birth_or_death (NULL, pressure: delta, regno: use->regno, birth_p: false); |
2153 | num_uses++; |
2154 | if (new_last >= 0) |
2155 | num_pending_births++; |
2156 | } |
2157 | } |
2158 | |
2159 | /* Update the MODEL_REF_PRESSURE and MODEL_MAX_PRESSURE for POINT. |
2160 | Also set each group pressure limit for POINT. */ |
2161 | for (pci = 0; pci < ira_pressure_classes_num; pci++) |
2162 | { |
2163 | cl = ira_pressure_classes[pci]; |
2164 | model_start_update_pressure (group: &model_before_pressure, |
2165 | point, pci, delta: delta[cl]); |
2166 | } |
2167 | |
2168 | /* Walk the model schedule backwards, starting immediately before POINT. */ |
2169 | print_p = false; |
2170 | if (point != model_curr_point) |
2171 | do |
2172 | { |
2173 | point--; |
2174 | insn = MODEL_INSN (point); |
2175 | queue = QUEUE_INDEX (insn); |
2176 | |
2177 | if (queue != QUEUE_SCHEDULED) |
2178 | { |
2179 | /* DELTA describes the effect of the move on the register pressure |
2180 | after POINT. Make it describe the effect on the pressure |
2181 | before POINT. */ |
2182 | i = 0; |
2183 | while (i < num_uses) |
2184 | { |
2185 | if (uses[i].last_use == point) |
2186 | { |
2187 | /* This register is now live again. */ |
2188 | mark_regno_birth_or_death (NULL, pressure: delta, |
2189 | regno: uses[i].regno, birth_p: true); |
2190 | |
2191 | /* Remove this use from the array. */ |
2192 | uses[i] = uses[num_uses - 1]; |
2193 | num_uses--; |
2194 | num_pending_births--; |
2195 | } |
2196 | else |
2197 | i++; |
2198 | } |
2199 | |
2200 | if (sched_verbose >= 5) |
2201 | { |
2202 | if (!print_p) |
2203 | { |
2204 | fprintf (stream: sched_dump, MODEL_BAR); |
2205 | fprintf (stream: sched_dump, format: ";;\t\t| New pressure for model" |
2206 | " schedule\n" ); |
2207 | fprintf (stream: sched_dump, MODEL_BAR); |
2208 | print_p = true; |
2209 | } |
2210 | |
2211 | fprintf (stream: sched_dump, format: ";;\t\t| %3d %4d %-30s " , |
2212 | point, INSN_UID (insn), |
2213 | str_pattern_slim (PATTERN (insn))); |
2214 | for (pci = 0; pci < ira_pressure_classes_num; pci++) |
2215 | { |
2216 | cl = ira_pressure_classes[pci]; |
2217 | ref_pressure = MODEL_REF_PRESSURE (&model_before_pressure, |
2218 | point, pci); |
2219 | fprintf (stream: sched_dump, format: " %s:[%d->%d]" , |
2220 | reg_class_names[ira_pressure_classes[pci]], |
2221 | ref_pressure, ref_pressure + delta[cl]); |
2222 | } |
2223 | fprintf (stream: sched_dump, format: "\n" ); |
2224 | } |
2225 | } |
2226 | |
2227 | /* Adjust the pressure at POINT. Set MIX to nonzero if POINT - 1 |
2228 | might have changed as well. */ |
2229 | mix = num_pending_births; |
2230 | for (pci = 0; pci < ira_pressure_classes_num; pci++) |
2231 | { |
2232 | cl = ira_pressure_classes[pci]; |
2233 | mix |= delta[cl]; |
2234 | mix |= model_update_pressure (group: &model_before_pressure, |
2235 | point, pci, delta: delta[cl]); |
2236 | } |
2237 | } |
2238 | while (mix && point > model_curr_point); |
2239 | |
2240 | if (print_p) |
2241 | fprintf (stream: sched_dump, MODEL_BAR); |
2242 | } |
2243 | |
2244 | /* After DEP, which was cancelled, has been resolved for insn NEXT, |
2245 | check whether the insn's pattern needs restoring. */ |
2246 | static bool |
2247 | must_restore_pattern_p (rtx_insn *next, dep_t dep) |
2248 | { |
2249 | if (QUEUE_INDEX (next) == QUEUE_SCHEDULED) |
2250 | return false; |
2251 | |
2252 | if (DEP_TYPE (dep) == REG_DEP_CONTROL) |
2253 | { |
2254 | gcc_assert (ORIG_PAT (next) != NULL_RTX); |
2255 | gcc_assert (next == DEP_CON (dep)); |
2256 | } |
2257 | else |
2258 | { |
2259 | struct dep_replacement *desc = DEP_REPLACE (dep); |
2260 | if (desc->insn != next) |
2261 | { |
2262 | gcc_assert (*desc->loc == desc->orig); |
2263 | return false; |
2264 | } |
2265 | } |
2266 | return true; |
2267 | } |
2268 | |
2269 | /* model_spill_cost (CL, P, P') returns the cost of increasing the |
2270 | pressure on CL from P to P'. We use this to calculate a "base ECC", |
2271 | baseECC (CL, X), for each pressure class CL and each instruction X. |
2272 | Supposing X changes the pressure on CL from P to P', and that the |
2273 | maximum pressure on CL in the current model schedule is MP', then: |
2274 | |
2275 | * if X occurs before or at the next point of maximum pressure in |
2276 | the model schedule and P' > MP', then: |
2277 | |
2278 | baseECC (CL, X) = model_spill_cost (CL, MP, P') |
2279 | |
2280 | The idea is that the pressure after scheduling a fixed set of |
2281 | instructions -- in this case, the set up to and including the |
2282 | next maximum pressure point -- is going to be the same regardless |
2283 | of the order; we simply want to keep the intermediate pressure |
2284 | under control. Thus X has a cost of zero unless scheduling it |
2285 | now would exceed MP'. |
2286 | |
2287 | If all increases in the set are by the same amount, no zero-cost |
2288 | instruction will ever cause the pressure to exceed MP'. However, |
2289 | if X is instead moved past an instruction X' with pressure in the |
2290 | range (MP' - (P' - P), MP'), the pressure at X' will increase |
2291 | beyond MP'. Since baseECC is very much a heuristic anyway, |
2292 | it doesn't seem worth the overhead of tracking cases like these. |
2293 | |
2294 | The cost of exceeding MP' is always based on the original maximum |
2295 | pressure MP. This is so that going 2 registers over the original |
2296 | limit has the same cost regardless of whether it comes from two |
2297 | separate +1 deltas or from a single +2 delta. |
2298 | |
2299 | * if X occurs after the next point of maximum pressure in the model |
2300 | schedule and P' > P, then: |
2301 | |
2302 | baseECC (CL, X) = model_spill_cost (CL, MP, MP' + (P' - P)) |
2303 | |
2304 | That is, if we move X forward across a point of maximum pressure, |
2305 | and if X increases the pressure by P' - P, then we conservatively |
2306 | assume that scheduling X next would increase the maximum pressure |
2307 | by P' - P. Again, the cost of doing this is based on the original |
2308 | maximum pressure MP, for the same reason as above. |
2309 | |
2310 | * if P' < P, P > MP, and X occurs at or after the next point of |
2311 | maximum pressure, then: |
2312 | |
2313 | baseECC (CL, X) = -model_spill_cost (CL, MAX (MP, P'), P) |
2314 | |
2315 | That is, if we have already exceeded the original maximum pressure MP, |
2316 | and if X might reduce the maximum pressure again -- or at least push |
2317 | it further back, and thus allow more scheduling freedom -- it is given |
2318 | a negative cost to reflect the improvement. |
2319 | |
2320 | * otherwise, |
2321 | |
2322 | baseECC (CL, X) = 0 |
2323 | |
2324 | In this case, X is not expected to affect the maximum pressure MP', |
2325 | so it has zero cost. |
2326 | |
2327 | We then create a combined value baseECC (X) that is the sum of |
2328 | baseECC (CL, X) for each pressure class CL. |
2329 | |
2330 | baseECC (X) could itself be used as the ECC value described above. |
2331 | However, this is often too conservative, in the sense that it |
2332 | tends to make high-priority instructions that increase pressure |
2333 | wait too long in cases where introducing a spill would be better. |
2334 | For this reason the final ECC is a priority-adjusted form of |
2335 | baseECC (X). Specifically, we calculate: |
2336 | |
2337 | P (X) = INSN_PRIORITY (X) - insn_delay (X) - baseECC (X) |
2338 | baseP = MAX { P (X) | baseECC (X) <= 0 } |
2339 | |
2340 | Then: |
2341 | |
2342 | ECC (X) = MAX (MIN (baseP - P (X), baseECC (X)), 0) |
2343 | |
2344 | Thus an instruction's effect on pressure is ignored if it has a high |
2345 | enough priority relative to the ones that don't increase pressure. |
2346 | Negative values of baseECC (X) do not increase the priority of X |
2347 | itself, but they do make it harder for other instructions to |
2348 | increase the pressure further. |
2349 | |
2350 | This pressure cost is deliberately timid. The intention has been |
2351 | to choose a heuristic that rarely interferes with the normal list |
2352 | scheduler in cases where that scheduler would produce good code. |
2353 | We simply want to curb some of its worst excesses. */ |
2354 | |
2355 | /* Return the cost of increasing the pressure in class CL from FROM to TO. |
2356 | |
2357 | Here we use the very simplistic cost model that every register above |
2358 | sched_class_regs_num[CL] has a spill cost of 1. We could use other |
2359 | measures instead, such as one based on MEMORY_MOVE_COST. However: |
2360 | |
2361 | (1) In order for an instruction to be scheduled, the higher cost |
2362 | would need to be justified in a single saving of that many stalls. |
2363 | This is overly pessimistic, because the benefit of spilling is |
2364 | often to avoid a sequence of several short stalls rather than |
2365 | a single long one. |
2366 | |
2367 | (2) The cost is still arbitrary. Because we are not allocating |
2368 | registers during scheduling, we have no way of knowing for |
2369 | sure how many memory accesses will be required by each spill, |
2370 | where the spills will be placed within the block, or even |
2371 | which block(s) will contain the spills. |
2372 | |
2373 | So a higher cost than 1 is often too conservative in practice, |
2374 | forcing blocks to contain unnecessary stalls instead of spill code. |
2375 | The simple cost below seems to be the best compromise. It reduces |
2376 | the interference with the normal list scheduler, which helps make |
2377 | it more suitable for a default-on option. */ |
2378 | |
2379 | static int |
2380 | model_spill_cost (int cl, int from, int to) |
2381 | { |
2382 | from = MAX (from, sched_class_regs_num[cl]); |
2383 | return MAX (to, from) - from; |
2384 | } |
2385 | |
2386 | /* Return baseECC (ira_pressure_classes[PCI], POINT), given that |
2387 | P = curr_reg_pressure[ira_pressure_classes[PCI]] and that |
2388 | P' = P + DELTA. */ |
2389 | |
2390 | static int |
2391 | model_excess_group_cost (struct model_pressure_group *group, |
2392 | int point, int pci, int delta) |
2393 | { |
2394 | int pressure, cl; |
2395 | |
2396 | cl = ira_pressure_classes[pci]; |
2397 | if (delta < 0 && point >= group->limits[pci].point) |
2398 | { |
2399 | pressure = MAX (group->limits[pci].orig_pressure, |
2400 | curr_reg_pressure[cl] + delta); |
2401 | return -model_spill_cost (cl, from: pressure, to: curr_reg_pressure[cl]); |
2402 | } |
2403 | |
2404 | if (delta > 0) |
2405 | { |
2406 | if (point > group->limits[pci].point) |
2407 | pressure = group->limits[pci].pressure + delta; |
2408 | else |
2409 | pressure = curr_reg_pressure[cl] + delta; |
2410 | |
2411 | if (pressure > group->limits[pci].pressure) |
2412 | return model_spill_cost (cl, from: group->limits[pci].orig_pressure, |
2413 | to: pressure); |
2414 | } |
2415 | |
2416 | return 0; |
2417 | } |
2418 | |
2419 | /* Return baseECC (MODEL_INSN (INSN)). Dump the costs to sched_dump |
2420 | if PRINT_P. */ |
2421 | |
2422 | static int |
2423 | model_excess_cost (rtx_insn *insn, bool print_p) |
2424 | { |
2425 | int point, pci, cl, cost, this_cost, delta; |
2426 | struct reg_pressure_data *insn_reg_pressure; |
2427 | int insn_death[N_REG_CLASSES]; |
2428 | |
2429 | calculate_reg_deaths (insn, death: insn_death); |
2430 | point = model_index (insn); |
2431 | insn_reg_pressure = INSN_REG_PRESSURE (insn); |
2432 | cost = 0; |
2433 | |
2434 | if (print_p) |
2435 | fprintf (stream: sched_dump, format: ";;\t\t| %3d %4d | %4d %+3d |" , point, |
2436 | INSN_UID (insn), INSN_PRIORITY (insn), insn_delay (insn)); |
2437 | |
2438 | /* Sum up the individual costs for each register class. */ |
2439 | for (pci = 0; pci < ira_pressure_classes_num; pci++) |
2440 | { |
2441 | cl = ira_pressure_classes[pci]; |
2442 | delta = insn_reg_pressure[pci].set_increase - insn_death[cl]; |
2443 | this_cost = model_excess_group_cost (group: &model_before_pressure, |
2444 | point, pci, delta); |
2445 | cost += this_cost; |
2446 | if (print_p) |
2447 | fprintf (stream: sched_dump, format: " %s:[%d base cost %d]" , |
2448 | reg_class_names[cl], delta, this_cost); |
2449 | } |
2450 | |
2451 | if (print_p) |
2452 | fprintf (stream: sched_dump, format: "\n" ); |
2453 | |
2454 | return cost; |
2455 | } |
2456 | |
2457 | /* Dump the next points of maximum pressure for GROUP. */ |
2458 | |
2459 | static void |
2460 | model_dump_pressure_points (struct model_pressure_group *group) |
2461 | { |
2462 | int pci, cl; |
2463 | |
2464 | fprintf (stream: sched_dump, format: ";;\t\t| pressure points" ); |
2465 | for (pci = 0; pci < ira_pressure_classes_num; pci++) |
2466 | { |
2467 | cl = ira_pressure_classes[pci]; |
2468 | fprintf (stream: sched_dump, format: " %s:[%d->%d at " , reg_class_names[cl], |
2469 | curr_reg_pressure[cl], group->limits[pci].pressure); |
2470 | if (group->limits[pci].point < model_num_insns) |
2471 | fprintf (stream: sched_dump, format: "%d:%d]" , group->limits[pci].point, |
2472 | INSN_UID (MODEL_INSN (group->limits[pci].point))); |
2473 | else |
2474 | fprintf (stream: sched_dump, format: "end]" ); |
2475 | } |
2476 | fprintf (stream: sched_dump, format: "\n" ); |
2477 | } |
2478 | |
2479 | /* Set INSN_REG_PRESSURE_EXCESS_COST_CHANGE for INSNS[0...COUNT-1]. */ |
2480 | |
2481 | static void |
2482 | model_set_excess_costs (rtx_insn **insns, int count) |
2483 | { |
2484 | int i, cost, priority_base, priority; |
2485 | bool print_p; |
2486 | |
2487 | /* Record the baseECC value for each instruction in the model schedule, |
2488 | except that negative costs are converted to zero ones now rather than |
2489 | later. Do not assign a cost to debug instructions, since they must |
2490 | not change code-generation decisions. Experiments suggest we also |
2491 | get better results by not assigning a cost to instructions from |
2492 | a different block. |
2493 | |
2494 | Set PRIORITY_BASE to baseP in the block comment above. This is the |
2495 | maximum priority of the "cheap" instructions, which should always |
2496 | include the next model instruction. */ |
2497 | priority_base = 0; |
2498 | print_p = false; |
2499 | for (i = 0; i < count; i++) |
2500 | if (INSN_MODEL_INDEX (insns[i])) |
2501 | { |
2502 | if (sched_verbose >= 6 && !print_p) |
2503 | { |
2504 | fprintf (stream: sched_dump, MODEL_BAR); |
2505 | fprintf (stream: sched_dump, format: ";;\t\t| Pressure costs for ready queue\n" ); |
2506 | model_dump_pressure_points (group: &model_before_pressure); |
2507 | fprintf (stream: sched_dump, MODEL_BAR); |
2508 | print_p = true; |
2509 | } |
2510 | cost = model_excess_cost (insn: insns[i], print_p); |
2511 | if (cost <= 0) |
2512 | { |
2513 | priority = INSN_PRIORITY (insns[i]) - insn_delay (insn: insns[i]) - cost; |
2514 | priority_base = MAX (priority_base, priority); |
2515 | cost = 0; |
2516 | } |
2517 | INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]) = cost; |
2518 | } |
2519 | if (print_p) |
2520 | fprintf (stream: sched_dump, MODEL_BAR); |
2521 | |
2522 | /* Use MAX (baseECC, 0) and baseP to calculcate ECC for each |
2523 | instruction. */ |
2524 | for (i = 0; i < count; i++) |
2525 | { |
2526 | cost = INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]); |
2527 | priority = INSN_PRIORITY (insns[i]) - insn_delay (insn: insns[i]); |
2528 | if (cost > 0 && priority > priority_base) |
2529 | { |
2530 | cost += priority_base - priority; |
2531 | INSN_REG_PRESSURE_EXCESS_COST_CHANGE (insns[i]) = MAX (cost, 0); |
2532 | } |
2533 | } |
2534 | } |
2535 | |
2536 | |
2537 | /* Enum of rank_for_schedule heuristic decisions. */ |
2538 | enum rfs_decision { |
2539 | RFS_LIVE_RANGE_SHRINK1, RFS_LIVE_RANGE_SHRINK2, |
2540 | RFS_SCHED_GROUP, RFS_PRESSURE_DELAY, RFS_PRESSURE_TICK, |
2541 | RFS_FEEDS_BACKTRACK_INSN, RFS_PRIORITY, RFS_AUTOPREF, RFS_SPECULATION, |
2542 | RFS_SCHED_RANK, RFS_LAST_INSN, RFS_PRESSURE_INDEX, |
2543 | RFS_DEP_COUNT, RFS_TIE, RFS_FUSION, RFS_COST, RFS_N }; |
2544 | |
2545 | /* Corresponding strings for print outs. */ |
2546 | static const char *rfs_str[RFS_N] = { |
2547 | "RFS_LIVE_RANGE_SHRINK1" , "RFS_LIVE_RANGE_SHRINK2" , |
2548 | "RFS_SCHED_GROUP" , "RFS_PRESSURE_DELAY" , "RFS_PRESSURE_TICK" , |
2549 | "RFS_FEEDS_BACKTRACK_INSN" , "RFS_PRIORITY" , "RFS_AUTOPREF" , "RFS_SPECULATION" , |
2550 | "RFS_SCHED_RANK" , "RFS_LAST_INSN" , "RFS_PRESSURE_INDEX" , |
2551 | "RFS_DEP_COUNT" , "RFS_TIE" , "RFS_FUSION" , "RFS_COST" }; |
2552 | |
2553 | /* Statistical breakdown of rank_for_schedule decisions. */ |
2554 | struct rank_for_schedule_stats_t { unsigned stats[RFS_N]; }; |
2555 | static rank_for_schedule_stats_t rank_for_schedule_stats; |
2556 | |
2557 | /* Return the result of comparing insns TMP and TMP2 and update |
2558 | Rank_For_Schedule statistics. */ |
2559 | static int |
2560 | rfs_result (enum rfs_decision decision, int result, rtx tmp, rtx tmp2) |
2561 | { |
2562 | ++rank_for_schedule_stats.stats[decision]; |
2563 | if (result < 0) |
2564 | INSN_LAST_RFS_WIN (tmp) = decision; |
2565 | else if (result > 0) |
2566 | INSN_LAST_RFS_WIN (tmp2) = decision; |
2567 | else |
2568 | gcc_unreachable (); |
2569 | return result; |
2570 | } |
2571 | |
2572 | /* Sorting predicate to move DEBUG_INSNs to the top of ready list, while |
2573 | keeping normal insns in original order. */ |
2574 | |
2575 | static int |
2576 | rank_for_schedule_debug (const void *x, const void *y) |
2577 | { |
2578 | rtx_insn *tmp = *(rtx_insn * const *) y; |
2579 | rtx_insn *tmp2 = *(rtx_insn * const *) x; |
2580 | |
2581 | /* Schedule debug insns as early as possible. */ |
2582 | if (DEBUG_INSN_P (tmp) && !DEBUG_INSN_P (tmp2)) |
2583 | return -1; |
2584 | else if (!DEBUG_INSN_P (tmp) && DEBUG_INSN_P (tmp2)) |
2585 | return 1; |
2586 | else if (DEBUG_INSN_P (tmp) && DEBUG_INSN_P (tmp2)) |
2587 | return INSN_LUID (tmp) - INSN_LUID (tmp2); |
2588 | else |
2589 | return INSN_RFS_DEBUG_ORIG_ORDER (tmp2) - INSN_RFS_DEBUG_ORIG_ORDER (tmp); |
2590 | } |
2591 | |
2592 | /* Returns a positive value if x is preferred; returns a negative value if |
2593 | y is preferred. Should never return 0, since that will make the sort |
2594 | unstable. */ |
2595 | |
2596 | static int |
2597 | rank_for_schedule (const void *x, const void *y) |
2598 | { |
2599 | rtx_insn *tmp = *(rtx_insn * const *) y; |
2600 | rtx_insn *tmp2 = *(rtx_insn * const *) x; |
2601 | int tmp_class, tmp2_class; |
2602 | int val, priority_val, info_val, diff; |
2603 | |
2604 | if (live_range_shrinkage_p) |
2605 | { |
2606 | /* Don't use SCHED_PRESSURE_MODEL -- it results in much worse |
2607 | code. */ |
2608 | gcc_assert (sched_pressure == SCHED_PRESSURE_WEIGHTED); |
2609 | if ((INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp) < 0 |
2610 | || INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp2) < 0) |
2611 | && (diff = (INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp) |
2612 | - INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp2))) != 0) |
2613 | return rfs_result (decision: RFS_LIVE_RANGE_SHRINK1, result: diff, tmp, tmp2); |
2614 | /* Sort by INSN_LUID (original insn order), so that we make the |
2615 | sort stable. This minimizes instruction movement, thus |
2616 | minimizing sched's effect on debugging and cross-jumping. */ |
2617 | return rfs_result (decision: RFS_LIVE_RANGE_SHRINK2, |
2618 | INSN_LUID (tmp) - INSN_LUID (tmp2), tmp, tmp2); |
2619 | } |
2620 | |
2621 | /* The insn in a schedule group should be issued the first. */ |
2622 | if (flag_sched_group_heuristic && |
2623 | SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2)) |
2624 | return rfs_result (decision: RFS_SCHED_GROUP, SCHED_GROUP_P (tmp2) ? 1 : -1, |
2625 | tmp, tmp2); |
2626 | |
2627 | /* Make sure that priority of TMP and TMP2 are initialized. */ |
2628 | gcc_assert (INSN_PRIORITY_KNOWN (tmp) && INSN_PRIORITY_KNOWN (tmp2)); |
2629 | |
2630 | if (sched_fusion) |
2631 | { |
2632 | /* The instruction that has the same fusion priority as the last |
2633 | instruction is the instruction we picked next. If that is not |
2634 | the case, we sort ready list firstly by fusion priority, then |
2635 | by priority, and at last by INSN_LUID. */ |
2636 | int a = INSN_FUSION_PRIORITY (tmp); |
2637 | int b = INSN_FUSION_PRIORITY (tmp2); |
2638 | int last = -1; |
2639 | |
2640 | if (last_nondebug_scheduled_insn |
2641 | && !NOTE_P (last_nondebug_scheduled_insn) |
2642 | && BLOCK_FOR_INSN (insn: tmp) |
2643 | == BLOCK_FOR_INSN (insn: last_nondebug_scheduled_insn)) |
2644 | last = INSN_FUSION_PRIORITY (last_nondebug_scheduled_insn); |
2645 | |
2646 | if (a != last && b != last) |
2647 | { |
2648 | if (a == b) |
2649 | { |
2650 | a = INSN_PRIORITY (tmp); |
2651 | b = INSN_PRIORITY (tmp2); |
2652 | } |
2653 | if (a != b) |
2654 | return rfs_result (decision: RFS_FUSION, result: b - a, tmp, tmp2); |
2655 | else |
2656 | return rfs_result (decision: RFS_FUSION, |
2657 | INSN_LUID (tmp) - INSN_LUID (tmp2), tmp, tmp2); |
2658 | } |
2659 | else if (a == b) |
2660 | { |
2661 | gcc_assert (last_nondebug_scheduled_insn |
2662 | && !NOTE_P (last_nondebug_scheduled_insn)); |
2663 | last = INSN_PRIORITY (last_nondebug_scheduled_insn); |
2664 | |
2665 | a = abs (INSN_PRIORITY (tmp) - last); |
2666 | b = abs (INSN_PRIORITY (tmp2) - last); |
2667 | if (a != b) |
2668 | return rfs_result (decision: RFS_FUSION, result: a - b, tmp, tmp2); |
2669 | else |
2670 | return rfs_result (decision: RFS_FUSION, |
2671 | INSN_LUID (tmp) - INSN_LUID (tmp2), tmp, tmp2); |
2672 | } |
2673 | else if (a == last) |
2674 | return rfs_result (decision: RFS_FUSION, result: -1, tmp, tmp2); |
2675 | else |
2676 | return rfs_result (decision: RFS_FUSION, result: 1, tmp, tmp2); |
2677 | } |
2678 | |
2679 | if (sched_pressure != SCHED_PRESSURE_NONE) |
2680 | { |
2681 | /* Prefer insn whose scheduling results in the smallest register |
2682 | pressure excess. */ |
2683 | if ((diff = (INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp) |
2684 | + insn_delay (insn: tmp) |
2685 | - INSN_REG_PRESSURE_EXCESS_COST_CHANGE (tmp2) |
2686 | - insn_delay (insn: tmp2)))) |
2687 | return rfs_result (decision: RFS_PRESSURE_DELAY, result: diff, tmp, tmp2); |
2688 | } |
2689 | |
2690 | if (sched_pressure != SCHED_PRESSURE_NONE |
2691 | && (INSN_TICK (tmp2) > clock_var || INSN_TICK (tmp) > clock_var) |
2692 | && INSN_TICK (tmp2) != INSN_TICK (tmp)) |
2693 | { |
2694 | diff = INSN_TICK (tmp) - INSN_TICK (tmp2); |
2695 | return rfs_result (decision: RFS_PRESSURE_TICK, result: diff, tmp, tmp2); |
2696 | } |
2697 | |
2698 | /* If we are doing backtracking in this schedule, prefer insns that |
2699 | have forward dependencies with negative cost against an insn that |
2700 | was already scheduled. */ |
2701 | if (current_sched_info->flags & DO_BACKTRACKING) |
2702 | { |
2703 | priority_val = FEEDS_BACKTRACK_INSN (tmp2) - FEEDS_BACKTRACK_INSN (tmp); |
2704 | if (priority_val) |
2705 | return rfs_result (decision: RFS_FEEDS_BACKTRACK_INSN, result: priority_val, tmp, tmp2); |
2706 | } |
2707 | |
2708 | /* Prefer insn with higher priority. */ |
2709 | priority_val = INSN_PRIORITY (tmp2) - INSN_PRIORITY (tmp); |
2710 | |
2711 | if (flag_sched_critical_path_heuristic && priority_val) |
2712 | return rfs_result (decision: RFS_PRIORITY, result: priority_val, tmp, tmp2); |
2713 | |
2714 | if (param_sched_autopref_queue_depth >= 0) |
2715 | { |
2716 | int autopref = autopref_rank_for_schedule (tmp, tmp2); |
2717 | if (autopref != 0) |
2718 | return rfs_result (decision: RFS_AUTOPREF, result: autopref, tmp, tmp2); |
2719 | } |
2720 | |
2721 | /* Prefer speculative insn with greater dependencies weakness. */ |
2722 | if (flag_sched_spec_insn_heuristic && spec_info) |
2723 | { |
2724 | ds_t ds1, ds2; |
2725 | dw_t dw1, dw2; |
2726 | int dw; |
2727 | |
2728 | ds1 = TODO_SPEC (tmp) & SPECULATIVE; |
2729 | if (ds1) |
2730 | dw1 = ds_weak (ds1); |
2731 | else |
2732 | dw1 = NO_DEP_WEAK; |
2733 | |
2734 | ds2 = TODO_SPEC (tmp2) & SPECULATIVE; |
2735 | if (ds2) |
2736 | dw2 = ds_weak (ds2); |
2737 | else |
2738 | dw2 = NO_DEP_WEAK; |
2739 | |
2740 | dw = dw2 - dw1; |
2741 | if (dw > (NO_DEP_WEAK / 8) || dw < -(NO_DEP_WEAK / 8)) |
2742 | return rfs_result (decision: RFS_SPECULATION, result: dw, tmp, tmp2); |
2743 | } |
2744 | |
2745 | info_val = (*current_sched_info->rank) (tmp, tmp2); |
2746 | if (flag_sched_rank_heuristic && info_val) |
2747 | return rfs_result (decision: RFS_SCHED_RANK, result: info_val, tmp, tmp2); |
2748 | |
2749 | /* Compare insns based on their relation to the last scheduled |
2750 | non-debug insn. */ |
2751 | if (flag_sched_last_insn_heuristic && last_nondebug_scheduled_insn) |
2752 | { |
2753 | dep_t dep1; |
2754 | dep_t dep2; |
2755 | rtx_insn *last = last_nondebug_scheduled_insn; |
2756 | |
2757 | /* Classify the instructions into three classes: |
2758 | 1) Data dependent on last schedule insn. |
2759 | 2) Anti/Output dependent on last scheduled insn. |
2760 | 3) Independent of last scheduled insn, or has latency of one. |
2761 | Choose the insn from the highest numbered class if different. */ |
2762 | dep1 = sd_find_dep_between (last, tmp, true); |
2763 | |
2764 | if (dep1 == NULL || dep_cost (link: dep1) == 1) |
2765 | tmp_class = 3; |
2766 | else if (/* Data dependence. */ |
2767 | DEP_TYPE (dep1) == REG_DEP_TRUE) |
2768 | tmp_class = 1; |
2769 | else |
2770 | tmp_class = 2; |
2771 | |
2772 | dep2 = sd_find_dep_between (last, tmp2, true); |
2773 | |
2774 | if (dep2 == NULL || dep_cost (link: dep2) == 1) |
2775 | tmp2_class = 3; |
2776 | else if (/* Data dependence. */ |
2777 | DEP_TYPE (dep2) == REG_DEP_TRUE) |
2778 | tmp2_class = 1; |
2779 | else |
2780 | tmp2_class = 2; |
2781 | |
2782 | if ((val = tmp2_class - tmp_class)) |
2783 | return rfs_result (decision: RFS_LAST_INSN, result: val, tmp, tmp2); |
2784 | } |
2785 | |
2786 | /* Prefer instructions that occur earlier in the model schedule. */ |
2787 | if (sched_pressure == SCHED_PRESSURE_MODEL) |
2788 | { |
2789 | diff = model_index (insn: tmp) - model_index (insn: tmp2); |
2790 | if (diff != 0) |
2791 | return rfs_result (decision: RFS_PRESSURE_INDEX, result: diff, tmp, tmp2); |
2792 | } |
2793 | |
2794 | /* Prefer the insn which has more later insns that depend on it. |
2795 | This gives the scheduler more freedom when scheduling later |
2796 | instructions at the expense of added register pressure. */ |
2797 | |
2798 | val = (dep_list_size (insn: tmp2, SD_LIST_FORW) |
2799 | - dep_list_size (insn: tmp, SD_LIST_FORW)); |
2800 | |
2801 | if (flag_sched_dep_count_heuristic && val != 0) |
2802 | return rfs_result (decision: RFS_DEP_COUNT, result: val, tmp, tmp2); |
2803 | |
2804 | /* Sort by INSN_COST rather than INSN_LUID. This means that instructions |
2805 | which take longer to execute are prioritised and it leads to more |
2806 | dual-issue opportunities on in-order cores which have this feature. */ |
2807 | |
2808 | if (INSN_COST (tmp) != INSN_COST (tmp2)) |
2809 | return rfs_result (decision: RFS_COST, INSN_COST (tmp2) - INSN_COST (tmp), |
2810 | tmp, tmp2); |
2811 | |
2812 | /* If insns are equally good, sort by INSN_LUID (original insn order), |
2813 | so that we make the sort stable. This minimizes instruction movement, |
2814 | thus minimizing sched's effect on debugging and cross-jumping. */ |
2815 | return rfs_result (decision: RFS_TIE, INSN_LUID (tmp) - INSN_LUID (tmp2), tmp, tmp2); |
2816 | } |
2817 | |
2818 | /* Resort the array A in which only element at index N may be out of order. */ |
2819 | |
2820 | HAIFA_INLINE static void |
2821 | swap_sort (rtx_insn **a, int n) |
2822 | { |
2823 | rtx_insn *insn = a[n - 1]; |
2824 | int i = n - 2; |
2825 | |
2826 | while (i >= 0 && rank_for_schedule (x: a + i, y: &insn) >= 0) |
2827 | { |
2828 | a[i + 1] = a[i]; |
2829 | i -= 1; |
2830 | } |
2831 | a[i + 1] = insn; |
2832 | } |
2833 | |
2834 | /* Add INSN to the insn queue so that it can be executed at least |
2835 | N_CYCLES after the currently executing insn. Preserve insns |
2836 | chain for debugging purposes. REASON will be printed in debugging |
2837 | output. */ |
2838 | |
2839 | HAIFA_INLINE static void |
2840 | queue_insn (rtx_insn *insn, int n_cycles, const char *reason) |
2841 | { |
2842 | int next_q = NEXT_Q_AFTER (q_ptr, n_cycles); |
2843 | rtx_insn_list *link = alloc_INSN_LIST (insn, insn_queue[next_q]); |
2844 | int new_tick; |
2845 | |
2846 | gcc_assert (n_cycles <= max_insn_queue_index); |
2847 | gcc_assert (!DEBUG_INSN_P (insn)); |
2848 | |
2849 | insn_queue[next_q] = link; |
2850 | q_size += 1; |
2851 | |
2852 | if (sched_verbose >= 2) |
2853 | { |
2854 | fprintf (stream: sched_dump, format: ";;\t\tReady-->Q: insn %s: " , |
2855 | (*current_sched_info->print_insn) (insn, 0)); |
2856 | |
2857 | fprintf (stream: sched_dump, format: "queued for %d cycles (%s).\n" , n_cycles, reason); |
2858 | } |
2859 | |
2860 | QUEUE_INDEX (insn) = next_q; |
2861 | |
2862 | if (current_sched_info->flags & DO_BACKTRACKING) |
2863 | { |
2864 | new_tick = clock_var + n_cycles; |
2865 | if (INSN_TICK (insn) == INVALID_TICK || INSN_TICK (insn) < new_tick) |
2866 | INSN_TICK (insn) = new_tick; |
2867 | |
2868 | if (INSN_EXACT_TICK (insn) != INVALID_TICK |
2869 | && INSN_EXACT_TICK (insn) < clock_var + n_cycles) |
2870 | { |
2871 | must_backtrack = true; |
2872 | if (sched_verbose >= 2) |
2873 | fprintf (stream: sched_dump, format: ";;\t\tcausing a backtrack.\n" ); |
2874 | } |
2875 | } |
2876 | } |
2877 | |
2878 | /* Remove INSN from queue. */ |
2879 | static void |
2880 | queue_remove (rtx_insn *insn) |
2881 | { |
2882 | gcc_assert (QUEUE_INDEX (insn) >= 0); |
2883 | remove_free_INSN_LIST_elem (insn, &insn_queue[QUEUE_INDEX (insn)]); |
2884 | q_size--; |
2885 | QUEUE_INDEX (insn) = QUEUE_NOWHERE; |
2886 | } |
2887 | |
2888 | /* Return a pointer to the bottom of the ready list, i.e. the insn |
2889 | with the lowest priority. */ |
2890 | |
2891 | rtx_insn ** |
2892 | ready_lastpos (struct ready_list *ready) |
2893 | { |
2894 | gcc_assert (ready->n_ready >= 1); |
2895 | return ready->vec + ready->first - ready->n_ready + 1; |
2896 | } |
2897 | |
2898 | /* Add an element INSN to the ready list so that it ends up with the |
2899 | lowest/highest priority depending on FIRST_P. */ |
2900 | |
2901 | HAIFA_INLINE static void |
2902 | ready_add (struct ready_list *ready, rtx_insn *insn, bool first_p) |
2903 | { |
2904 | if (!first_p) |
2905 | { |
2906 | if (ready->first == ready->n_ready) |
2907 | { |
2908 | memmove (dest: ready->vec + ready->veclen - ready->n_ready, |
2909 | src: ready_lastpos (ready), |
2910 | n: ready->n_ready * sizeof (rtx)); |
2911 | ready->first = ready->veclen - 1; |
2912 | } |
2913 | ready->vec[ready->first - ready->n_ready] = insn; |
2914 | } |
2915 | else |
2916 | { |
2917 | if (ready->first == ready->veclen - 1) |
2918 | { |
2919 | if (ready->n_ready) |
2920 | /* ready_lastpos() fails when called with (ready->n_ready == 0). */ |
2921 | memmove (dest: ready->vec + ready->veclen - ready->n_ready - 1, |
2922 | src: ready_lastpos (ready), |
2923 | n: ready->n_ready * sizeof (rtx)); |
2924 | ready->first = ready->veclen - 2; |
2925 | } |
2926 | ready->vec[++(ready->first)] = insn; |
2927 | } |
2928 | |
2929 | ready->n_ready++; |
2930 | if (DEBUG_INSN_P (insn)) |
2931 | ready->n_debug++; |
2932 | |
2933 | gcc_assert (QUEUE_INDEX (insn) != QUEUE_READY); |
2934 | QUEUE_INDEX (insn) = QUEUE_READY; |
2935 | |
2936 | if (INSN_EXACT_TICK (insn) != INVALID_TICK |
2937 | && INSN_EXACT_TICK (insn) < clock_var) |
2938 | { |
2939 | must_backtrack = true; |
2940 | } |
2941 | } |
2942 | |
2943 | /* Remove the element with the highest priority from the ready list and |
2944 | return it. */ |
2945 | |
2946 | HAIFA_INLINE static rtx_insn * |
2947 | ready_remove_first (struct ready_list *ready) |
2948 | { |
2949 | rtx_insn *t; |
2950 | |
2951 | gcc_assert (ready->n_ready); |
2952 | t = ready->vec[ready->first--]; |
2953 | ready->n_ready--; |
2954 | if (DEBUG_INSN_P (t)) |
2955 | ready->n_debug--; |
2956 | /* If the queue becomes empty, reset it. */ |
2957 | if (ready->n_ready == 0) |
2958 | ready->first = ready->veclen - 1; |
2959 | |
2960 | gcc_assert (QUEUE_INDEX (t) == QUEUE_READY); |
2961 | QUEUE_INDEX (t) = QUEUE_NOWHERE; |
2962 | |
2963 | return t; |
2964 | } |
2965 | |
2966 | /* The following code implements multi-pass scheduling for the first |
2967 | cycle. In other words, we will try to choose ready insn which |
2968 | permits to start maximum number of insns on the same cycle. */ |
2969 | |
2970 | /* Return a pointer to the element INDEX from the ready. INDEX for |
2971 | insn with the highest priority is 0, and the lowest priority has |
2972 | N_READY - 1. */ |
2973 | |
2974 | rtx_insn * |
2975 | ready_element (struct ready_list *ready, int index) |
2976 | { |
2977 | gcc_assert (ready->n_ready && index < ready->n_ready); |
2978 | |
2979 | return ready->vec[ready->first - index]; |
2980 | } |
2981 | |
2982 | /* Remove the element INDEX from the ready list and return it. INDEX |
2983 | for insn with the highest priority is 0, and the lowest priority |
2984 | has N_READY - 1. */ |
2985 | |
2986 | HAIFA_INLINE static rtx_insn * |
2987 | ready_remove (struct ready_list *ready, int index) |
2988 | { |
2989 | rtx_insn *t; |
2990 | int i; |
2991 | |
2992 | if (index == 0) |
2993 | return ready_remove_first (ready); |
2994 | gcc_assert (ready->n_ready && index < ready->n_ready); |
2995 | t = ready->vec[ready->first - index]; |
2996 | ready->n_ready--; |
2997 | if (DEBUG_INSN_P (t)) |
2998 | ready->n_debug--; |
2999 | for (i = index; i < ready->n_ready; i++) |
3000 | ready->vec[ready->first - i] = ready->vec[ready->first - i - 1]; |
3001 | QUEUE_INDEX (t) = QUEUE_NOWHERE; |
3002 | return t; |
3003 | } |
3004 | |
3005 | /* Remove INSN from the ready list. */ |
3006 | static void |
3007 | ready_remove_insn (rtx_insn *insn) |
3008 | { |
3009 | int i; |
3010 | |
3011 | for (i = 0; i < readyp->n_ready; i++) |
3012 | if (ready_element (ready: readyp, index: i) == insn) |
3013 | { |
3014 | ready_remove (ready: readyp, index: i); |
3015 | return; |
3016 | } |
3017 | gcc_unreachable (); |
3018 | } |
3019 | |
3020 | /* Calculate difference of two statistics set WAS and NOW. |
3021 | Result returned in WAS. */ |
3022 | static void |
3023 | rank_for_schedule_stats_diff (rank_for_schedule_stats_t *was, |
3024 | const rank_for_schedule_stats_t *now) |
3025 | { |
3026 | for (int i = 0; i < RFS_N; ++i) |
3027 | was->stats[i] = now->stats[i] - was->stats[i]; |
3028 | } |
3029 | |
3030 | /* Print rank_for_schedule statistics. */ |
3031 | static void |
3032 | print_rank_for_schedule_stats (const char *prefix, |
3033 | const rank_for_schedule_stats_t *stats, |
3034 | struct ready_list *ready) |
3035 | { |
3036 | for (int i = 0; i < RFS_N; ++i) |
3037 | if (stats->stats[i]) |
3038 | { |
3039 | fprintf (stream: sched_dump, format: "%s%20s: %u" , prefix, rfs_str[i], stats->stats[i]); |
3040 | |
3041 | if (ready != NULL) |
3042 | /* Print out insns that won due to RFS_<I>. */ |
3043 | { |
3044 | rtx_insn **p = ready_lastpos (ready); |
3045 | |
3046 | fprintf (stream: sched_dump, format: ":" ); |
3047 | /* Start with 1 since least-priority insn didn't have any wins. */ |
3048 | for (int j = 1; j < ready->n_ready; ++j) |
3049 | if (INSN_LAST_RFS_WIN (p[j]) == i) |
3050 | fprintf (stream: sched_dump, format: " %s" , |
3051 | (*current_sched_info->print_insn) (p[j], 0)); |
3052 | } |
3053 | fprintf (stream: sched_dump, format: "\n" ); |
3054 | } |
3055 | } |
3056 | |
3057 | /* Separate DEBUG_INSNS from normal insns. DEBUG_INSNs go to the end |
3058 | of array. */ |
3059 | static void |
3060 | ready_sort_debug (struct ready_list *ready) |
3061 | { |
3062 | int i; |
3063 | rtx_insn **first = ready_lastpos (ready); |
3064 | |
3065 | for (i = 0; i < ready->n_ready; ++i) |
3066 | if (!DEBUG_INSN_P (first[i])) |
3067 | INSN_RFS_DEBUG_ORIG_ORDER (first[i]) = i; |
3068 | |
3069 | qsort (first, ready->n_ready, sizeof (rtx), rank_for_schedule_debug); |
3070 | } |
3071 | |
3072 | /* Sort non-debug insns in the ready list READY by ascending priority. |
3073 | Assumes that all debug insns are separated from the real insns. */ |
3074 | static void |
3075 | ready_sort_real (struct ready_list *ready) |
3076 | { |
3077 | int i; |
3078 | rtx_insn **first = ready_lastpos (ready); |
3079 | int n_ready_real = ready->n_ready - ready->n_debug; |
3080 | |
3081 | if (sched_pressure == SCHED_PRESSURE_WEIGHTED) |
3082 | for (i = 0; i < n_ready_real; ++i) |
3083 | setup_insn_reg_pressure_info (first[i]); |
3084 | else if (sched_pressure == SCHED_PRESSURE_MODEL |
3085 | && model_curr_point < model_num_insns) |
3086 | model_set_excess_costs (insns: first, count: n_ready_real); |
3087 | |
3088 | rank_for_schedule_stats_t stats1; |
3089 | if (sched_verbose >= 4) |
3090 | stats1 = rank_for_schedule_stats; |
3091 | |
3092 | if (n_ready_real == 2) |
3093 | swap_sort (a: first, n: n_ready_real); |
3094 | else if (n_ready_real > 2) |
3095 | qsort (first, n_ready_real, sizeof (rtx), rank_for_schedule); |
3096 | |
3097 | if (sched_verbose >= 4) |
3098 | { |
3099 | rank_for_schedule_stats_diff (was: &stats1, now: &rank_for_schedule_stats); |
3100 | print_rank_for_schedule_stats (prefix: ";;\t\t" , stats: &stats1, ready); |
3101 | } |
3102 | } |
3103 | |
3104 | /* Sort the ready list READY by ascending priority. */ |
3105 | static void |
3106 | ready_sort (struct ready_list *ready) |
3107 | { |
3108 | if (ready->n_debug > 0) |
3109 | ready_sort_debug (ready); |
3110 | else |
3111 | ready_sort_real (ready); |
3112 | } |
3113 | |
3114 | /* PREV is an insn that is ready to execute. Adjust its priority if that |
3115 | will help shorten or lengthen register lifetimes as appropriate. Also |
3116 | provide a hook for the target to tweak itself. */ |
3117 | |
3118 | HAIFA_INLINE static void |
3119 | adjust_priority (rtx_insn *prev) |
3120 | { |
3121 | /* ??? There used to be code here to try and estimate how an insn |
3122 | affected register lifetimes, but it did it by looking at REG_DEAD |
3123 | notes, which we removed in schedule_region. Nor did it try to |
3124 | take into account register pressure or anything useful like that. |
3125 | |
3126 | Revisit when we have a machine model to work with and not before. */ |
3127 | |
3128 | if (targetm.sched.adjust_priority) |
3129 | INSN_PRIORITY (prev) = |
3130 | targetm.sched.adjust_priority (prev, INSN_PRIORITY (prev)); |
3131 | } |
3132 | |
3133 | /* Advance DFA state STATE on one cycle. */ |
3134 | void |
3135 | advance_state (state_t state) |
3136 | { |
3137 | if (targetm.sched.dfa_pre_advance_cycle) |
3138 | targetm.sched.dfa_pre_advance_cycle (); |
3139 | |
3140 | if (targetm.sched.dfa_pre_cycle_insn) |
3141 | state_transition (state, |
3142 | targetm.sched.dfa_pre_cycle_insn ()); |
3143 | |
3144 | state_transition (state, NULL); |
3145 | |
3146 | if (targetm.sched.dfa_post_cycle_insn) |
3147 | state_transition (state, |
3148 | targetm.sched.dfa_post_cycle_insn ()); |
3149 | |
3150 | if (targetm.sched.dfa_post_advance_cycle) |
3151 | targetm.sched.dfa_post_advance_cycle (); |
3152 | } |
3153 | |
3154 | /* Advance time on one cycle. */ |
3155 | HAIFA_INLINE static void |
3156 | advance_one_cycle (void) |
3157 | { |
3158 | int i; |
3159 | |
3160 | advance_state (state: curr_state); |
3161 | for (i = 4; i <= sched_verbose; ++i) |
3162 | fprintf (stream: sched_dump, format: ";;\tAdvance the current state: %d.\n" , clock_var); |
3163 | } |
3164 | |
3165 | /* Update register pressure after scheduling INSN. */ |
3166 | static void |
3167 | update_register_pressure (rtx_insn *insn) |
3168 | { |
3169 | struct reg_use_data *use; |
3170 | struct reg_set_data *set; |
3171 | |
3172 | gcc_checking_assert (!DEBUG_INSN_P (insn)); |
3173 | |
3174 | for (use = INSN_REG_USE_LIST (insn); use != NULL; use = use->next_insn_use) |
3175 | if (dying_use_p (use)) |
3176 | mark_regno_birth_or_death (live: curr_reg_live, pressure: curr_reg_pressure, |
3177 | regno: use->regno, birth_p: false); |
3178 | for (set = INSN_REG_SET_LIST (insn); set != NULL; set = set->next_insn_set) |
3179 | mark_regno_birth_or_death (live: curr_reg_live, pressure: curr_reg_pressure, |
3180 | regno: set->regno, birth_p: true); |
3181 | } |
3182 | |
3183 | /* Set up or update (if UPDATE_P) max register pressure (see its |
3184 | meaning in sched-int.h::_haifa_insn_data) for all current BB insns |
3185 | after insn AFTER. */ |
3186 | static void |
3187 | setup_insn_max_reg_pressure (rtx_insn *after, bool update_p) |
3188 | { |
3189 | int i, p; |
3190 | bool eq_p; |
3191 | rtx_insn *insn; |
3192 | static int max_reg_pressure[N_REG_CLASSES]; |
3193 | |
3194 | save_reg_pressure (); |
3195 | for (i = 0; i < ira_pressure_classes_num; i++) |
3196 | max_reg_pressure[ira_pressure_classes[i]] |
3197 | = curr_reg_pressure[ira_pressure_classes[i]]; |
3198 | for (insn = NEXT_INSN (insn: after); |
3199 | insn != NULL_RTX && ! BARRIER_P (insn) |
3200 | && BLOCK_FOR_INSN (insn) == BLOCK_FOR_INSN (insn: after); |
3201 | insn = NEXT_INSN (insn)) |
3202 | if (NONDEBUG_INSN_P (insn)) |
3203 | { |
3204 | eq_p = true; |
3205 | for (i = 0; i < ira_pressure_classes_num; i++) |
3206 | { |
3207 | p = max_reg_pressure[ira_pressure_classes[i]]; |
3208 | if (INSN_MAX_REG_PRESSURE (insn)[i] != p) |
3209 | { |
3210 | eq_p = false; |
3211 | INSN_MAX_REG_PRESSURE (insn)[i] |
3212 | = max_reg_pressure[ira_pressure_classes[i]]; |
3213 | } |
3214 | } |
3215 | if (update_p && eq_p) |
3216 | break; |
3217 | update_register_pressure (insn); |
3218 | for (i = 0; i < ira_pressure_classes_num; i++) |
3219 | if (max_reg_pressure[ira_pressure_classes[i]] |
3220 | < curr_reg_pressure[ira_pressure_classes[i]]) |
3221 | max_reg_pressure[ira_pressure_classes[i]] |
3222 | = curr_reg_pressure[ira_pressure_classes[i]]; |
3223 | } |
3224 | restore_reg_pressure (); |
3225 | } |
3226 | |
3227 | /* Update the current register pressure after scheduling INSN. Update |
3228 | also max register pressure for unscheduled insns of the current |
3229 | BB. */ |
3230 | static void |
3231 | update_reg_and_insn_max_reg_pressure (rtx_insn *insn) |
3232 | { |
3233 | int i; |
3234 | int before[N_REG_CLASSES]; |
3235 | |
3236 | for (i = 0; i < ira_pressure_classes_num; i++) |
3237 | before[i] = curr_reg_pressure[ira_pressure_classes[i]]; |
3238 | update_register_pressure (insn); |
3239 | for (i = 0; i < ira_pressure_classes_num; i++) |
3240 | if (curr_reg_pressure[ira_pressure_classes[i]] != before[i]) |
3241 | break; |
3242 | if (i < ira_pressure_classes_num) |
3243 | setup_insn_max_reg_pressure (after: insn, update_p: true); |
3244 | } |
3245 | |
3246 | /* Set up register pressure at the beginning of basic block BB whose |
3247 | insns starting after insn AFTER. Set up also max register pressure |
3248 | for all insns of the basic block. */ |
3249 | void |
3250 | sched_setup_bb_reg_pressure_info (basic_block bb, rtx_insn *after) |
3251 | { |
3252 | gcc_assert (sched_pressure == SCHED_PRESSURE_WEIGHTED); |
3253 | initiate_bb_reg_pressure_info (bb); |
3254 | setup_insn_max_reg_pressure (after, update_p: false); |
3255 | } |
3256 | |
3257 | /* If doing predication while scheduling, verify whether INSN, which |
3258 | has just been scheduled, clobbers the conditions of any |
3259 | instructions that must be predicated in order to break their |
3260 | dependencies. If so, remove them from the queues so that they will |
3261 | only be scheduled once their control dependency is resolved. */ |
3262 | |
3263 | static void |
3264 | check_clobbered_conditions (rtx_insn *insn) |
3265 | { |
3266 | HARD_REG_SET t; |
3267 | int i; |
3268 | |
3269 | if ((current_sched_info->flags & DO_PREDICATION) == 0) |
3270 | return; |
3271 | |
3272 | find_all_hard_reg_sets (insn, &t, true); |
3273 | |
3274 | restart: |
3275 | for (i = 0; i < ready.n_ready; i++) |
3276 | { |
3277 | rtx_insn *x = ready_element (ready: &ready, index: i); |
3278 | if (TODO_SPEC (x) == DEP_CONTROL && cond_clobbered_p (insn: x, set_regs: t)) |
3279 | { |
3280 | ready_remove_insn (insn: x); |
3281 | goto restart; |
3282 | } |
3283 | } |
3284 | for (i = 0; i <= max_insn_queue_index; i++) |
3285 | { |
3286 | rtx_insn_list *link; |
3287 | int q = NEXT_Q_AFTER (q_ptr, i); |
3288 | |
3289 | restart_queue: |
3290 | for (link = insn_queue[q]; link; link = link->next ()) |
3291 | { |
3292 | rtx_insn *x = link->insn (); |
3293 | if (TODO_SPEC (x) == DEP_CONTROL && cond_clobbered_p (insn: x, set_regs: t)) |
3294 | { |
3295 | queue_remove (insn: x); |
3296 | goto restart_queue; |
3297 | } |
3298 | } |
3299 | } |
3300 | } |
3301 | |
3302 | /* Return (in order): |
3303 | |
3304 | - positive if INSN adversely affects the pressure on one |
3305 | register class |
3306 | |
3307 | - negative if INSN reduces the pressure on one register class |
3308 | |
3309 | - 0 if INSN doesn't affect the pressure on any register class. */ |
3310 | |
3311 | static int |
3312 | model_classify_pressure (struct model_insn_info *insn) |
3313 | { |
3314 | struct reg_pressure_data *reg_pressure; |
3315 | int death[N_REG_CLASSES]; |
3316 | int pci, cl, sum; |
3317 | |
3318 | calculate_reg_deaths (insn: insn->insn, death); |
3319 | reg_pressure = INSN_REG_PRESSURE (insn->insn); |
3320 | sum = 0; |
3321 | for (pci = 0; pci < ira_pressure_classes_num; pci++) |
3322 | { |
3323 | cl = ira_pressure_classes[pci]; |
3324 | if (death[cl] < reg_pressure[pci].set_increase) |
3325 | return 1; |
3326 | sum += reg_pressure[pci].set_increase - death[cl]; |
3327 | } |
3328 | return sum; |
3329 | } |
3330 | |
3331 | /* Return true if INSN1 should come before INSN2 in the model schedule. */ |
3332 | |
3333 | static int |
3334 | model_order_p (struct model_insn_info *insn1, struct model_insn_info *insn2) |
3335 | { |
3336 | unsigned int height1, height2; |
3337 | unsigned int priority1, priority2; |
3338 | |
3339 | /* Prefer instructions with a higher model priority. */ |
3340 | if (insn1->model_priority != insn2->model_priority) |
3341 | return insn1->model_priority > insn2->model_priority; |
3342 | |
3343 | /* Combine the length of the longest path of satisfied true dependencies |
3344 | that leads to each instruction (depth) with the length of the longest |
3345 | path of any dependencies that leads from the instruction (alap). |
3346 | Prefer instructions with the greatest combined length. If the combined |
3347 | lengths are equal, prefer instructions with the greatest depth. |
3348 | |
3349 | The idea is that, if we have a set S of "equal" instructions that each |
3350 | have ALAP value X, and we pick one such instruction I, any true-dependent |
3351 | successors of I that have ALAP value X - 1 should be preferred over S. |
3352 | This encourages the schedule to be "narrow" rather than "wide". |
3353 | However, if I is a low-priority instruction that we decided to |
3354 | schedule because of its model_classify_pressure, and if there |
3355 | is a set of higher-priority instructions T, the aforementioned |
3356 | successors of I should not have the edge over T. */ |
3357 | height1 = insn1->depth + insn1->alap; |
3358 | height2 = insn2->depth + insn2->alap; |
3359 | if (height1 != height2) |
3360 | return height1 > height2; |
3361 | if (insn1->depth != insn2->depth) |
3362 | return insn1->depth > insn2->depth; |
3363 | |
3364 | /* We have no real preference between INSN1 an INSN2 as far as attempts |
3365 | to reduce pressure go. Prefer instructions with higher priorities. */ |
3366 | priority1 = INSN_PRIORITY (insn1->insn); |
3367 | priority2 = INSN_PRIORITY (insn2->insn); |
3368 | if (priority1 != priority2) |
3369 | return priority1 > priority2; |
3370 | |
3371 | /* Use the original rtl sequence as a tie-breaker. */ |
3372 | return insn1 < insn2; |
3373 | } |
3374 | |
3375 | /* Add INSN to the model worklist immediately after PREV. Add it to the |
3376 | beginning of the list if PREV is null. */ |
3377 | |
3378 | static void |
3379 | model_add_to_worklist_at (struct model_insn_info *insn, |
3380 | struct model_insn_info *prev) |
3381 | { |
3382 | gcc_assert (QUEUE_INDEX (insn->insn) == QUEUE_NOWHERE); |
3383 | QUEUE_INDEX (insn->insn) = QUEUE_READY; |
3384 | |
3385 | insn->prev = prev; |
3386 | if (prev) |
3387 | { |
3388 | insn->next = prev->next; |
3389 | prev->next = insn; |
3390 | } |
3391 | else |
3392 | { |
3393 | insn->next = model_worklist; |
3394 | model_worklist = insn; |
3395 | } |
3396 | if (insn->next) |
3397 | insn->next->prev = insn; |
3398 | } |
3399 | |
3400 | /* Remove INSN from the model worklist. */ |
3401 | |
3402 | static void |
3403 | model_remove_from_worklist (struct model_insn_info *insn) |
3404 | { |
3405 | gcc_assert (QUEUE_INDEX (insn->insn) == QUEUE_READY); |
3406 | QUEUE_INDEX (insn->insn) = QUEUE_NOWHERE; |
3407 | |
3408 | if (insn->prev) |
3409 | insn->prev->next = insn->next; |
3410 | else |
3411 | model_worklist = insn->next; |
3412 | if (insn->next) |
3413 | insn->next->prev = insn->prev; |
3414 | } |
3415 | |
3416 | /* Add INSN to the model worklist. Start looking for a suitable position |
3417 | between neighbors PREV and NEXT, testing at most param_max_sched_ready_insns |
3418 | insns either side. A null PREV indicates the beginning of the list and |
3419 | a null NEXT indicates the end. */ |
3420 | |
3421 | static void |
3422 | model_add_to_worklist (struct model_insn_info *insn, |
3423 | struct model_insn_info *prev, |
3424 | struct model_insn_info *next) |
3425 | { |
3426 | int count; |
3427 | |
3428 | count = param_max_sched_ready_insns; |
3429 | if (count > 0 && prev && model_order_p (insn1: insn, insn2: prev)) |
3430 | do |
3431 | { |
3432 | count--; |
3433 | prev = prev->prev; |
3434 | } |
3435 | while (count > 0 && prev && model_order_p (insn1: insn, insn2: prev)); |
3436 | else |
3437 | while (count > 0 && next && model_order_p (insn1: next, insn2: insn)) |
3438 | { |
3439 | count--; |
3440 | prev = next; |
3441 | next = next->next; |
3442 | } |
3443 | model_add_to_worklist_at (insn, prev); |
3444 | } |
3445 | |
3446 | /* INSN may now have a higher priority (in the model_order_p sense) |
3447 | than before. Move it up the worklist if necessary. */ |
3448 | |
3449 | static void |
3450 | model_promote_insn (struct model_insn_info *insn) |
3451 | { |
3452 | struct model_insn_info *prev; |
3453 | int count; |
3454 | |
3455 | prev = insn->prev; |
3456 | count = param_max_sched_ready_insns; |
3457 | while (count > 0 && prev && model_order_p (insn1: insn, insn2: prev)) |
3458 | { |
3459 | count--; |
3460 | prev = prev->prev; |
3461 | } |
3462 | if (prev != insn->prev) |
3463 | { |
3464 | model_remove_from_worklist (insn); |
3465 | model_add_to_worklist_at (insn, prev); |
3466 | } |
3467 | } |
3468 | |
3469 | /* Add INSN to the end of the model schedule. */ |
3470 | |
3471 | static void |
3472 | model_add_to_schedule (rtx_insn *insn) |
3473 | { |
3474 | unsigned int point; |
3475 | |
3476 | gcc_assert (QUEUE_INDEX (insn) == QUEUE_NOWHERE); |
3477 | QUEUE_INDEX (insn) = QUEUE_SCHEDULED; |
3478 | |
3479 | point = model_schedule.length (); |
3480 | model_schedule.quick_push (obj: insn); |
3481 | INSN_MODEL_INDEX (insn) = point + 1; |
3482 | } |
3483 | |
3484 | /* Analyze the instructions that are to be scheduled, setting up |
3485 | MODEL_INSN_INFO (...) and model_num_insns accordingly. Add ready |
3486 | instructions to model_worklist. */ |
3487 | |
3488 | static void |
3489 | model_analyze_insns (void) |
3490 | { |
3491 | rtx_insn *start, *end, *iter; |
3492 | sd_iterator_def sd_it; |
3493 | dep_t dep; |
3494 | struct model_insn_info *insn, *con; |
3495 | |
3496 | model_num_insns = 0; |
3497 | start = PREV_INSN (insn: current_sched_info->next_tail); |
3498 | end = current_sched_info->prev_head; |
3499 | for (iter = start; iter != end; iter = PREV_INSN (insn: iter)) |
3500 | if (NONDEBUG_INSN_P (iter)) |
3501 | { |
3502 | insn = MODEL_INSN_INFO (iter); |
3503 | insn->insn = iter; |
3504 | FOR_EACH_DEP (iter, SD_LIST_FORW, sd_it, dep) |
3505 | { |
3506 | con = MODEL_INSN_INFO (DEP_CON (dep)); |
3507 | if (con->insn && insn->alap < con->alap + 1) |
3508 | insn->alap = con->alap + 1; |
3509 | } |
3510 | |
3511 | insn->old_queue = QUEUE_INDEX (iter); |
3512 | QUEUE_INDEX (iter) = QUEUE_NOWHERE; |
3513 | |
3514 | insn->unscheduled_preds = dep_list_size (insn: iter, SD_LIST_HARD_BACK); |
3515 | if (insn->unscheduled_preds == 0) |
3516 | model_add_to_worklist (insn, NULL, next: model_worklist); |
3517 | |
3518 | model_num_insns++; |
3519 | } |
3520 | } |
3521 | |
3522 | /* The global state describes the register pressure at the start of the |
3523 | model schedule. Initialize GROUP accordingly. */ |
3524 | |
3525 | static void |
3526 | model_init_pressure_group (struct model_pressure_group *group) |
3527 | { |
3528 | int pci, cl; |
3529 | |
3530 | for (pci = 0; pci < ira_pressure_classes_num; pci++) |
3531 | { |
3532 | cl = ira_pressure_classes[pci]; |
3533 | group->limits[pci].pressure = curr_reg_pressure[cl]; |
3534 | group->limits[pci].point = 0; |
3535 | } |
3536 | /* Use index model_num_insns to record the state after the last |
3537 | instruction in the model schedule. */ |
3538 | group->model = XNEWVEC (struct model_pressure_data, |
3539 | (model_num_insns + 1) * ira_pressure_classes_num); |
3540 | } |
3541 | |
3542 | /* Record that MODEL_REF_PRESSURE (GROUP, POINT, PCI) is PRESSURE. |
3543 | Update the maximum pressure for the whole schedule. */ |
3544 | |
3545 | static void |
3546 | model_record_pressure (struct model_pressure_group *group, |
3547 | int point, int pci, int pressure) |
3548 | { |
3549 | MODEL_REF_PRESSURE (group, point, pci) = pressure; |
3550 | if (group->limits[pci].pressure < pressure) |
3551 | { |
3552 | group->limits[pci].pressure = pressure; |
3553 | group->limits[pci].point = point; |
3554 | } |
3555 | } |
3556 | |
3557 | /* INSN has just been added to the end of the model schedule. Record its |
3558 | register-pressure information. */ |
3559 | |
3560 | static void |
3561 | model_record_pressures (struct model_insn_info *insn) |
3562 | { |
3563 | struct reg_pressure_data *reg_pressure; |
3564 | int point, pci, cl, delta; |
3565 | int death[N_REG_CLASSES]; |
3566 | |
3567 | point = model_index (insn: insn->insn); |
3568 | if (sched_verbose >= 2) |
3569 | { |
3570 | if (point == 0) |
3571 | { |
3572 | fprintf (stream: sched_dump, format: "\n;;\tModel schedule:\n;;\n" ); |
3573 | fprintf (stream: sched_dump, format: ";;\t| idx insn | mpri hght dpth prio |\n" ); |
3574 | } |
3575 | fprintf (stream: sched_dump, format: ";;\t| %3d %4d | %4d %4d %4d %4d | %-30s " , |
3576 | point, INSN_UID (insn: insn->insn), insn->model_priority, |
3577 | insn->depth + insn->alap, insn->depth, |
3578 | INSN_PRIORITY (insn->insn), |
3579 | str_pattern_slim (PATTERN (insn: insn->insn))); |
3580 | } |
3581 | calculate_reg_deaths (insn: insn->insn, death); |
3582 | reg_pressure = INSN_REG_PRESSURE (insn->insn); |
3583 | for (pci = 0; pci < ira_pressure_classes_num; pci++) |
3584 | { |
3585 | cl = ira_pressure_classes[pci]; |
3586 | delta = reg_pressure[pci].set_increase - death[cl]; |
3587 | if (sched_verbose >= 2) |
3588 | fprintf (stream: sched_dump, format: " %s:[%d,%+d]" , reg_class_names[cl], |
3589 | curr_reg_pressure[cl], delta); |
3590 | model_record_pressure (group: &model_before_pressure, point, pci, |
3591 | pressure: curr_reg_pressure[cl]); |
3592 | } |
3593 | if (sched_verbose >= 2) |
3594 | fprintf (stream: sched_dump, format: "\n" ); |
3595 | } |
3596 | |
3597 | /* All instructions have been added to the model schedule. Record the |
3598 | final register pressure in GROUP and set up all MODEL_MAX_PRESSUREs. */ |
3599 | |
3600 | static void |
3601 | model_record_final_pressures (struct model_pressure_group *group) |
3602 | { |
3603 | int point, pci, max_pressure, ref_pressure, cl; |
3604 | |
3605 | for (pci = 0; pci < ira_pressure_classes_num; pci++) |
3606 | { |
3607 | /* Record the final pressure for this class. */ |
3608 | cl = ira_pressure_classes[pci]; |
3609 | point = model_num_insns; |
3610 | ref_pressure = curr_reg_pressure[cl]; |
3611 | model_record_pressure (group, point, pci, pressure: ref_pressure); |
3612 | |
3613 | /* Record the original maximum pressure. */ |
3614 | group->limits[pci].orig_pressure = group->limits[pci].pressure; |
3615 | |
3616 | /* Update the MODEL_MAX_PRESSURE for every point of the schedule. */ |
3617 | max_pressure = ref_pressure; |
3618 | MODEL_MAX_PRESSURE (group, point, pci) = max_pressure; |
3619 | while (point > 0) |
3620 | { |
3621 | point--; |
3622 | ref_pressure = MODEL_REF_PRESSURE (group, point, pci); |
3623 | max_pressure = MAX (max_pressure, ref_pressure); |
3624 | MODEL_MAX_PRESSURE (group, point, pci) = max_pressure; |
3625 | } |
3626 | } |
3627 | } |
3628 | |
3629 | /* Update all successors of INSN, given that INSN has just been scheduled. */ |
3630 | |
3631 | static void |
3632 | model_add_successors_to_worklist (struct model_insn_info *insn) |
3633 | { |
3634 | sd_iterator_def sd_it; |
3635 | struct model_insn_info *con; |
3636 | dep_t dep; |
3637 | |
3638 | FOR_EACH_DEP (insn->insn, SD_LIST_FORW, sd_it, dep) |
3639 | { |
3640 | con = MODEL_INSN_INFO (DEP_CON (dep)); |
3641 | /* Ignore debug instructions, and instructions from other blocks. */ |
3642 | if (con->insn) |
3643 | { |
3644 | con->unscheduled_preds--; |
3645 | |
3646 | /* Update the depth field of each true-dependent successor. |
3647 | Increasing the depth gives them a higher priority than |
3648 | before. */ |
3649 | if (DEP_TYPE (dep) == REG_DEP_TRUE && con->depth < insn->depth + 1) |
3650 | { |
3651 | con->depth = insn->depth + 1; |
3652 | if (QUEUE_INDEX (con->insn) == QUEUE_READY) |
3653 | model_promote_insn (insn: con); |
3654 | } |
3655 | |
3656 | /* If this is a true dependency, or if there are no remaining |
3657 | dependencies for CON (meaning that CON only had non-true |
3658 | dependencies), make sure that CON is on the worklist. |
3659 | We don't bother otherwise because it would tend to fill the |
3660 | worklist with a lot of low-priority instructions that are not |
3661 | yet ready to issue. */ |
3662 | if ((con->depth > 0 || con->unscheduled_preds == 0) |
3663 | && QUEUE_INDEX (con->insn) == QUEUE_NOWHERE) |
3664 | model_add_to_worklist (insn: con, prev: insn, next: insn->next); |
3665 | } |
3666 | } |
3667 | } |
3668 | |
3669 | /* Give INSN a higher priority than any current instruction, then give |
3670 | unscheduled predecessors of INSN a higher priority still. If any of |
3671 | those predecessors are not on the model worklist, do the same for its |
3672 | predecessors, and so on. */ |
3673 | |
3674 | static void |
3675 | model_promote_predecessors (struct model_insn_info *insn) |
3676 | { |
3677 | struct model_insn_info *pro, *first; |
3678 | sd_iterator_def sd_it; |
3679 | dep_t dep; |
3680 | |
3681 | if (sched_verbose >= 7) |
3682 | fprintf (stream: sched_dump, format: ";;\t+--- priority of %d = %d, priority of" , |
3683 | INSN_UID (insn: insn->insn), model_next_priority); |
3684 | insn->model_priority = model_next_priority++; |
3685 | model_remove_from_worklist (insn); |
3686 | model_add_to_worklist_at (insn, NULL); |
3687 | |
3688 | first = NULL; |
3689 | for (;;) |
3690 | { |
3691 | FOR_EACH_DEP (insn->insn, SD_LIST_HARD_BACK, sd_it, dep) |
3692 | { |
3693 | pro = MODEL_INSN_INFO (DEP_PRO (dep)); |
3694 | /* The first test is to ignore debug instructions, and instructions |
3695 | from other blocks. */ |
3696 | if (pro->insn |
3697 | && pro->model_priority != model_next_priority |
3698 | && QUEUE_INDEX (pro->insn) != QUEUE_SCHEDULED) |
3699 | { |
3700 | pro->model_priority = model_next_priority; |
3701 | if (sched_verbose >= 7) |
3702 | fprintf (stream: sched_dump, format: " %d" , INSN_UID (insn: pro->insn)); |
3703 | if (QUEUE_INDEX (pro->insn) == QUEUE_READY) |
3704 | { |
3705 | /* PRO is already in the worklist, but it now has |
3706 | a higher priority than before. Move it at the |
3707 | appropriate place. */ |
3708 | model_remove_from_worklist (insn: pro); |
3709 | model_add_to_worklist (insn: pro, NULL, next: model_worklist); |
3710 | } |
3711 | else |
3712 | { |
3713 | /* PRO isn't in the worklist. Recursively process |
3714 | its predecessors until we find one that is. */ |
3715 | pro->next = first; |
3716 | first = pro; |
3717 | } |
3718 | } |
3719 | } |
3720 | if (!first) |
3721 | break; |
3722 | insn = first; |
3723 | first = insn->next; |
3724 | } |
3725 | if (sched_verbose >= 7) |
3726 | fprintf (stream: sched_dump, format: " = %d\n" , model_next_priority); |
3727 | model_next_priority++; |
3728 | } |
3729 | |
3730 | /* Pick one instruction from model_worklist and process it. */ |
3731 | |
3732 | static void |
3733 | model_choose_insn (void) |
3734 | { |
3735 | struct model_insn_info *insn, *fallback; |
3736 | int count; |
3737 | |
3738 | if (sched_verbose >= 7) |
3739 | { |
3740 | fprintf (stream: sched_dump, format: ";;\t+--- worklist:\n" ); |
3741 | insn = model_worklist; |
3742 | count = param_max_sched_ready_insns; |
3743 | while (count > 0 && insn) |
3744 | { |
3745 | fprintf (stream: sched_dump, format: ";;\t+--- %d [%d, %d, %d, %d]\n" , |
3746 | INSN_UID (insn: insn->insn), insn->model_priority, |
3747 | insn->depth + insn->alap, insn->depth, |
3748 | INSN_PRIORITY (insn->insn)); |
3749 | count--; |
3750 | insn = insn->next; |
3751 | } |
3752 | } |
3753 | |
3754 | /* Look for a ready instruction whose model_classify_priority is zero |
3755 | or negative, picking the highest-priority one. Adding such an |
3756 | instruction to the schedule now should do no harm, and may actually |
3757 | do some good. |
3758 | |
3759 | Failing that, see whether there is an instruction with the highest |
3760 | extant model_priority that is not yet ready, but which would reduce |
3761 | pressure if it became ready. This is designed to catch cases like: |
3762 | |
3763 | (set (mem (reg R1)) (reg R2)) |
3764 | |
3765 | where the instruction is the last remaining use of R1 and where the |
3766 | value of R2 is not yet available (or vice versa). The death of R1 |
3767 | means that this instruction already reduces pressure. It is of |
3768 | course possible that the computation of R2 involves other registers |
3769 | that are hard to kill, but such cases are rare enough for this |
3770 | heuristic to be a win in general. |
3771 | |
3772 | Failing that, just pick the highest-priority instruction in the |
3773 | worklist. */ |
3774 | count = param_max_sched_ready_insns; |
3775 | insn = model_worklist; |
3776 | fallback = 0; |
3777 | for (;;) |
3778 | { |
3779 | if (count == 0 || !insn) |
3780 | { |
3781 | insn = fallback ? fallback : model_worklist; |
3782 | break; |
3783 | } |
3784 | if (insn->unscheduled_preds) |
3785 | { |
3786 | if (model_worklist->model_priority == insn->model_priority |
3787 | && !fallback |
3788 | && model_classify_pressure (insn) < 0) |
3789 | fallback = insn; |
3790 | } |
3791 | else |
3792 | { |
3793 | if (model_classify_pressure (insn) <= 0) |
3794 | break; |
3795 | } |
3796 | count--; |
3797 | insn = insn->next; |
3798 | } |
3799 | |
3800 | if (sched_verbose >= 7 && insn != model_worklist) |
3801 | { |
3802 | if (insn->unscheduled_preds) |
3803 | fprintf (stream: sched_dump, format: ";;\t+--- promoting insn %d, with dependencies\n" , |
3804 | INSN_UID (insn: insn->insn)); |
3805 | else |
3806 | fprintf (stream: sched_dump, format: ";;\t+--- promoting insn %d, which is ready\n" , |
3807 | INSN_UID (insn: insn->insn)); |
3808 | } |
3809 | if (insn->unscheduled_preds) |
3810 | /* INSN isn't yet ready to issue. Give all its predecessors the |
3811 | highest priority. */ |
3812 | model_promote_predecessors (insn); |
3813 | else |
3814 | { |
3815 | /* INSN is ready. Add it to the end of model_schedule and |
3816 | process its successors. */ |
3817 | model_add_successors_to_worklist (insn); |
3818 | model_remove_from_worklist (insn); |
3819 | model_add_to_schedule (insn: insn->insn); |
3820 | model_record_pressures (insn); |
3821 | update_register_pressure (insn: insn->insn); |
3822 | } |
3823 | } |
3824 | |
3825 | /* Restore all QUEUE_INDEXs to the values that they had before |
3826 | model_start_schedule was called. */ |
3827 | |
3828 | static void |
3829 | model_reset_queue_indices (void) |
3830 | { |
3831 | unsigned int i; |
3832 | rtx_insn *insn; |
3833 | |
3834 | FOR_EACH_VEC_ELT (model_schedule, i, insn) |
3835 | QUEUE_INDEX (insn) = MODEL_INSN_INFO (insn)->old_queue; |
3836 | } |
3837 | |
3838 | /* We have calculated the model schedule and spill costs. Print a summary |
3839 | to sched_dump. */ |
3840 | |
3841 | static void |
3842 | model_dump_pressure_summary (void) |
3843 | { |
3844 | int pci, cl; |
3845 | |
3846 | fprintf (stream: sched_dump, format: ";; Pressure summary:" ); |
3847 | for (pci = 0; pci < ira_pressure_classes_num; pci++) |
3848 | { |
3849 | cl = ira_pressure_classes[pci]; |
3850 | fprintf (stream: sched_dump, format: " %s:%d" , reg_class_names[cl], |
3851 | model_before_pressure.limits[pci].pressure); |
3852 | } |
3853 | fprintf (stream: sched_dump, format: "\n\n" ); |
3854 | } |
3855 | |
3856 | /* Initialize the SCHED_PRESSURE_MODEL information for the current |
3857 | scheduling region. */ |
3858 | |
3859 | static void |
3860 | model_start_schedule (basic_block bb) |
3861 | { |
3862 | model_next_priority = 1; |
3863 | model_schedule.create (nelems: sched_max_luid); |
3864 | model_insns = XCNEWVEC (struct model_insn_info, sched_max_luid); |
3865 | |
3866 | gcc_assert (bb == BLOCK_FOR_INSN (NEXT_INSN (current_sched_info->prev_head))); |
3867 | initiate_reg_pressure_info (live: df_get_live_in (bb)); |
3868 | |
3869 | model_analyze_insns (); |
3870 | model_init_pressure_group (group: &model_before_pressure); |
3871 | while (model_worklist) |
3872 | model_choose_insn (); |
3873 | gcc_assert (model_num_insns == (int) model_schedule.length ()); |
3874 | if (sched_verbose >= 2) |
3875 | fprintf (stream: sched_dump, format: "\n" ); |
3876 | |
3877 | model_record_final_pressures (group: &model_before_pressure); |
3878 | model_reset_queue_indices (); |
3879 | |
3880 | XDELETEVEC (model_insns); |
3881 | |
3882 | model_curr_point = 0; |
3883 | initiate_reg_pressure_info (live: df_get_live_in (bb)); |
3884 | if (sched_verbose >= 1) |
3885 | model_dump_pressure_summary (); |
3886 | } |
3887 | |
3888 | /* Free the information associated with GROUP. */ |
3889 | |
3890 | static void |
3891 | model_finalize_pressure_group (struct model_pressure_group *group) |
3892 | { |
3893 | XDELETEVEC (group->model); |
3894 | } |
3895 | |
3896 | /* Free the information created by model_start_schedule. */ |
3897 | |
3898 | static void |
3899 | model_end_schedule (void) |
3900 | { |
3901 | model_finalize_pressure_group (group: &model_before_pressure); |
3902 | model_schedule.release (); |
3903 | } |
3904 | |
3905 | /* Prepare reg pressure scheduling for basic block BB. */ |
3906 | static void |
3907 | sched_pressure_start_bb (basic_block bb) |
3908 | { |
3909 | /* Set the number of available registers for each class taking into account |
3910 | relative probability of current basic block versus function prologue and |
3911 | epilogue. |
3912 | * If the basic block executes much more often than the prologue/epilogue |
3913 | (e.g., inside a hot loop), then cost of spill in the prologue is close to |
3914 | nil, so the effective number of available registers is |
3915 | (ira_class_hard_regs_num[cl] - fixed_regs_num[cl] - 0). |
3916 | * If the basic block executes as often as the prologue/epilogue, |
3917 | then spill in the block is as costly as in the prologue, so the effective |
3918 | number of available registers is |
3919 | (ira_class_hard_regs_num[cl] - fixed_regs_num[cl] |
3920 | - call_saved_regs_num[cl]). |
3921 | Note that all-else-equal, we prefer to spill in the prologue, since that |
3922 | allows "extra" registers for other basic blocks of the function. |
3923 | * If the basic block is on the cold path of the function and executes |
3924 | rarely, then we should always prefer to spill in the block, rather than |
3925 | in the prologue/epilogue. The effective number of available register is |
3926 | (ira_class_hard_regs_num[cl] - fixed_regs_num[cl] |
3927 | - call_saved_regs_num[cl]). */ |
3928 | { |
3929 | int i; |
3930 | int entry_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun); |
3931 | int bb_freq = bb->count.to_frequency (cfun); |
3932 | |
3933 | if (bb_freq == 0) |
3934 | { |
3935 | if (entry_freq == 0) |
3936 | entry_freq = bb_freq = 1; |
3937 | } |
3938 | if (bb_freq < entry_freq) |
3939 | bb_freq = entry_freq; |
3940 | |
3941 | for (i = 0; i < ira_pressure_classes_num; ++i) |
3942 | { |
3943 | enum reg_class cl = ira_pressure_classes[i]; |
3944 | sched_class_regs_num[cl] = ira_class_hard_regs_num[cl] |
3945 | - fixed_regs_num[cl]; |
3946 | sched_class_regs_num[cl] |
3947 | -= (call_saved_regs_num[cl] * entry_freq) / bb_freq; |
3948 | } |
3949 | } |
3950 | |
3951 | if (sched_pressure == SCHED_PRESSURE_MODEL) |
3952 | model_start_schedule (bb); |
3953 | } |
3954 | |
3955 | /* A structure that holds local state for the loop in schedule_block. */ |
3956 | struct sched_block_state |
3957 | { |
3958 | /* True if no real insns have been scheduled in the current cycle. */ |
3959 | bool first_cycle_insn_p; |
3960 | /* True if a shadow insn has been scheduled in the current cycle, which |
3961 | means that no more normal insns can be issued. */ |
3962 | bool shadows_only_p; |
3963 | /* True if we're winding down a modulo schedule, which means that we only |
3964 | issue insns with INSN_EXACT_TICK set. */ |
3965 | bool modulo_epilogue; |
3966 | /* Initialized with the machine's issue rate every cycle, and updated |
3967 | by calls to the variable_issue hook. */ |
3968 | int can_issue_more; |
3969 | }; |
3970 | |
3971 | /* INSN is the "currently executing insn". Launch each insn which was |
3972 | waiting on INSN. READY is the ready list which contains the insns |
3973 | that are ready to fire. CLOCK is the current cycle. The function |
3974 | returns necessary cycle advance after issuing the insn (it is not |
3975 | zero for insns in a schedule group). */ |
3976 | |
3977 | static int |
3978 | schedule_insn (rtx_insn *insn) |
3979 | { |
3980 | sd_iterator_def sd_it; |
3981 | dep_t dep; |
3982 | int i; |
3983 | int advance = 0; |
3984 | |
3985 | if (sched_verbose >= 1) |
3986 | { |
3987 | struct reg_pressure_data *pressure_info; |
3988 | fprintf (stream: sched_dump, format: ";;\t%3i--> %s %-40s:" , |
3989 | clock_var, (*current_sched_info->print_insn) (insn, 1), |
3990 | str_pattern_slim (PATTERN (insn))); |
3991 | |
3992 | if (recog_memoized (insn) < 0) |
3993 | fprintf (stream: sched_dump, format: "nothing" ); |
3994 | else |
3995 | print_reservation (sched_dump, insn); |
3996 | pressure_info = INSN_REG_PRESSURE (insn); |
3997 | if (pressure_info != NULL) |
3998 | { |
3999 | fputc (c: ':', stream: sched_dump); |
4000 | for (i = 0; i < ira_pressure_classes_num; i++) |
4001 | fprintf (stream: sched_dump, format: "%s%s%+d(%d)" , |
4002 | scheduled_insns.length () > 1 |
4003 | && INSN_LUID (insn) |
4004 | < INSN_LUID (scheduled_insns[scheduled_insns.length () - 2]) ? "@" : "" , |
4005 | reg_class_names[ira_pressure_classes[i]], |
4006 | pressure_info[i].set_increase, pressure_info[i].change); |
4007 | } |
4008 | if (sched_pressure == SCHED_PRESSURE_MODEL |
4009 | && model_curr_point < model_num_insns |
4010 | && model_index (insn) == model_curr_point) |
4011 | fprintf (stream: sched_dump, format: ":model %d" , model_curr_point); |
4012 | fputc (c: '\n', stream: sched_dump); |
4013 | } |
4014 | |
4015 | if (sched_pressure == SCHED_PRESSURE_WEIGHTED && !DEBUG_INSN_P (insn)) |
4016 | update_reg_and_insn_max_reg_pressure (insn); |
4017 | |
4018 | /* Scheduling instruction should have all its dependencies resolved and |
4019 | should have been removed from the ready list. */ |
4020 | gcc_assert (sd_lists_empty_p (insn, SD_LIST_HARD_BACK)); |
4021 | |
4022 | /* Reset debug insns invalidated by moving this insn. */ |
4023 | if (MAY_HAVE_DEBUG_BIND_INSNS && !DEBUG_INSN_P (insn)) |
4024 | for (sd_it = sd_iterator_start (insn, SD_LIST_BACK); |
4025 | sd_iterator_cond (it_ptr: &sd_it, dep_ptr: &dep);) |
4026 | { |
4027 | rtx_insn *dbg = DEP_PRO (dep); |
4028 | struct reg_use_data *use, *next; |
4029 | |
4030 | if (DEP_STATUS (dep) & DEP_CANCELLED) |
4031 | { |
4032 | sd_iterator_next (it_ptr: &sd_it); |
4033 | continue; |
4034 | } |
4035 | |
4036 | gcc_assert (DEBUG_BIND_INSN_P (dbg)); |
4037 | |
4038 | if (sched_verbose >= 6) |
4039 | fprintf (stream: sched_dump, format: ";;\t\tresetting: debug insn %d\n" , |
4040 | INSN_UID (insn: dbg)); |
4041 | |
4042 | /* ??? Rather than resetting the debug insn, we might be able |
4043 | to emit a debug temp before the just-scheduled insn, but |
4044 | this would involve checking that the expression at the |
4045 | point of the debug insn is equivalent to the expression |
4046 | before the just-scheduled insn. They might not be: the |
4047 | expression in the debug insn may depend on other insns not |
4048 | yet scheduled that set MEMs, REGs or even other debug |
4049 | insns. It's not clear that attempting to preserve debug |
4050 | information in these cases is worth the effort, given how |
4051 | uncommon these resets are and the likelihood that the debug |
4052 | temps introduced won't survive the schedule change. */ |
4053 | INSN_VAR_LOCATION_LOC (dbg) = gen_rtx_UNKNOWN_VAR_LOC (); |
4054 | df_insn_rescan (dbg); |
4055 | |
4056 | /* Unknown location doesn't use any registers. */ |
4057 | for (use = INSN_REG_USE_LIST (dbg); use != NULL; use = next) |
4058 | { |
4059 | struct reg_use_data *prev = use; |
4060 | |
4061 | /* Remove use from the cyclic next_regno_use chain first. */ |
4062 | while (prev->next_regno_use != use) |
4063 | prev = prev->next_regno_use; |
4064 | prev->next_regno_use = use->next_regno_use; |
4065 | next = use->next_insn_use; |
4066 | free (ptr: use); |
4067 | } |
4068 | INSN_REG_USE_LIST (dbg) = NULL; |
4069 | |
4070 | /* We delete rather than resolve these deps, otherwise we |
4071 | crash in sched_free_deps(), because forward deps are |
4072 | expected to be released before backward deps. */ |
4073 | sd_delete_dep (sd_it); |
4074 | } |
4075 | |
4076 | gcc_assert (QUEUE_INDEX (insn) == QUEUE_NOWHERE); |
4077 | QUEUE_INDEX (insn) = QUEUE_SCHEDULED; |
4078 | |
4079 | if (sched_pressure == SCHED_PRESSURE_MODEL |
4080 | && model_curr_point < model_num_insns |
4081 | && NONDEBUG_INSN_P (insn)) |
4082 | { |
4083 | if (model_index (insn) == model_curr_point) |
4084 | do |
4085 | model_curr_point++; |
4086 | while (model_curr_point < model_num_insns |
4087 | && (QUEUE_INDEX (MODEL_INSN (model_curr_point)) |
4088 | == QUEUE_SCHEDULED)); |
4089 | else |
4090 | model_recompute (insn); |
4091 | model_update_limit_points (); |
4092 | update_register_pressure (insn); |
4093 | if (sched_verbose >= 2) |
4094 | print_curr_reg_pressure (); |
4095 | } |
4096 | |
4097 | gcc_assert (INSN_TICK (insn) >= MIN_TICK); |
4098 | if (INSN_TICK (insn) > clock_var) |
4099 | /* INSN has been prematurely moved from the queue to the ready list. |
4100 | This is possible only if following flags are set. */ |
4101 | gcc_assert (flag_sched_stalled_insns || sched_fusion); |
4102 | |
4103 | /* ??? Probably, if INSN is scheduled prematurely, we should leave |
4104 | INSN_TICK untouched. This is a machine-dependent issue, actually. */ |
4105 | INSN_TICK (insn) = clock_var; |
4106 | |
4107 | check_clobbered_conditions (insn); |
4108 | |
4109 | /* Update dependent instructions. First, see if by scheduling this insn |
4110 | now we broke a dependence in a way that requires us to change another |
4111 | insn. */ |
4112 | for (sd_it = sd_iterator_start (insn, SD_LIST_SPEC_BACK); |
4113 | sd_iterator_cond (it_ptr: &sd_it, dep_ptr: &dep); sd_iterator_next (it_ptr: &sd_it)) |
4114 | { |
4115 | struct dep_replacement *desc = DEP_REPLACE (dep); |
4116 | rtx_insn *pro = DEP_PRO (dep); |
4117 | if (QUEUE_INDEX (pro) != QUEUE_SCHEDULED |
4118 | && desc != NULL && desc->insn == pro) |
4119 | apply_replacement (dep, false); |
4120 | } |
4121 | |
4122 | /* Go through and resolve forward dependencies. */ |
4123 | for (sd_it = sd_iterator_start (insn, SD_LIST_FORW); |
4124 | sd_iterator_cond (it_ptr: &sd_it, dep_ptr: &dep);) |
4125 | { |
4126 | rtx_insn *next = DEP_CON (dep); |
4127 | bool cancelled = (DEP_STATUS (dep) & DEP_CANCELLED) != 0; |
4128 | |
4129 | /* Resolve the dependence between INSN and NEXT. |
4130 | sd_resolve_dep () moves current dep to another list thus |
4131 | advancing the iterator. */ |
4132 | sd_resolve_dep (sd_it); |
4133 | |
4134 | if (cancelled) |
4135 | { |
4136 | if (must_restore_pattern_p (next, dep)) |
4137 | restore_pattern (dep, false); |
4138 | continue; |
4139 | } |
4140 | |
4141 | /* Don't bother trying to mark next as ready if insn is a debug |
4142 | insn. If insn is the last hard dependency, it will have |
4143 | already been discounted. */ |
4144 | if (DEBUG_INSN_P (insn) && !DEBUG_INSN_P (next)) |
4145 | continue; |
4146 | |
4147 | if (!IS_SPECULATION_BRANCHY_CHECK_P (insn)) |
4148 | { |
4149 | int effective_cost; |
4150 | |
4151 | effective_cost = try_ready (next); |
4152 | |
4153 | if (effective_cost >= 0 |
4154 | && SCHED_GROUP_P (next) |
4155 | && advance < effective_cost) |
4156 | advance = effective_cost; |
4157 | } |
4158 | else |
4159 | /* Check always has only one forward dependence (to the first insn in |
4160 | the recovery block), therefore, this will be executed only once. */ |
4161 | { |
4162 | gcc_assert (sd_lists_empty_p (insn, SD_LIST_FORW)); |
4163 | fix_recovery_deps (RECOVERY_BLOCK (insn)); |
4164 | } |
4165 | } |
4166 | |
4167 | /* Annotate the instruction with issue information -- TImode |
4168 | indicates that the instruction is expected not to be able |
4169 | to issue on the same cycle as the previous insn. A machine |
4170 | may use this information to decide how the instruction should |
4171 | be aligned. */ |
4172 | if (issue_rate > 1 |
4173 | && GET_CODE (PATTERN (insn)) != USE |
4174 | && GET_CODE (PATTERN (insn)) != CLOBBER |
4175 | && !DEBUG_INSN_P (insn)) |
4176 | { |
4177 | if (reload_completed) |
4178 | PUT_MODE (x: insn, mode: clock_var > last_clock_var ? TImode : VOIDmode); |
4179 | last_clock_var = clock_var; |
4180 | } |
4181 | |
4182 | if (nonscheduled_insns_begin != NULL_RTX) |
4183 | /* Indicate to debug counters that INSN is scheduled. */ |
4184 | nonscheduled_insns_begin = insn; |
4185 | |
4186 | return advance; |
4187 | } |
4188 | |
4189 | /* Functions for handling of notes. */ |
4190 | |
4191 | /* Add note list that ends on FROM_END to the end of TO_ENDP. */ |
4192 | void |
4193 | concat_note_lists (rtx_insn *from_end, rtx_insn **to_endp) |
4194 | { |
4195 | rtx_insn *from_start; |
4196 | |
4197 | /* It's easy when have nothing to concat. */ |
4198 | if (from_end == NULL) |
4199 | return; |
4200 | |
4201 | /* It's also easy when destination is empty. */ |
4202 | if (*to_endp == NULL) |
4203 | { |
4204 | *to_endp = from_end; |
4205 | return; |
4206 | } |
4207 | |
4208 | from_start = from_end; |
4209 | while (PREV_INSN (insn: from_start) != NULL) |
4210 | from_start = PREV_INSN (insn: from_start); |
4211 | |
4212 | SET_PREV_INSN (from_start) = *to_endp; |
4213 | SET_NEXT_INSN (*to_endp) = from_start; |
4214 | *to_endp = from_end; |
4215 | } |
4216 | |
4217 | /* Delete notes between HEAD and TAIL and put them in the chain |
4218 | of notes ended by NOTE_LIST. */ |
4219 | void |
4220 | remove_notes (rtx_insn *head, rtx_insn *tail) |
4221 | { |
4222 | rtx_insn *next_tail, *insn, *next; |
4223 | |
4224 | note_list = 0; |
4225 | if (head == tail && !INSN_P (head)) |
4226 | return; |
4227 | |
4228 | next_tail = NEXT_INSN (insn: tail); |
4229 | for (insn = head; insn != next_tail; insn = next) |
4230 | { |
4231 | next = NEXT_INSN (insn); |
4232 | if (!NOTE_P (insn)) |
4233 | continue; |
4234 | |
4235 | switch (NOTE_KIND (insn)) |
4236 | { |
4237 | case NOTE_INSN_BASIC_BLOCK: |
4238 | continue; |
4239 | |
4240 | case NOTE_INSN_EPILOGUE_BEG: |
4241 | if (insn != tail) |
4242 | { |
4243 | remove_insn (insn); |
4244 | /* If an insn was split just before the EPILOGUE_BEG note and |
4245 | that split created new basic blocks, we could have a |
4246 | BASIC_BLOCK note here. Safely advance over it in that case |
4247 | and assert that we land on a real insn. */ |
4248 | if (NOTE_P (next) |
4249 | && NOTE_KIND (next) == NOTE_INSN_BASIC_BLOCK |
4250 | && next != next_tail) |
4251 | next = NEXT_INSN (insn: next); |
4252 | gcc_assert (INSN_P (next)); |
4253 | add_reg_note (next, REG_SAVE_NOTE, |
4254 | GEN_INT (NOTE_INSN_EPILOGUE_BEG)); |
4255 | break; |
4256 | } |
4257 | /* FALLTHRU */ |
4258 | |
4259 | default: |
4260 | remove_insn (insn); |
4261 | |
4262 | /* Add the note to list that ends at NOTE_LIST. */ |
4263 | SET_PREV_INSN (insn) = note_list; |
4264 | SET_NEXT_INSN (insn) = NULL_RTX; |
4265 | if (note_list) |
4266 | SET_NEXT_INSN (note_list) = insn; |
4267 | note_list = insn; |
4268 | break; |
4269 | } |
4270 | |
4271 | gcc_assert ((sel_sched_p () || insn != tail) && insn != head); |
4272 | } |
4273 | } |
4274 | |
4275 | /* A structure to record enough data to allow us to backtrack the scheduler to |
4276 | a previous state. */ |
4277 | struct haifa_saved_data |
4278 | { |
4279 | /* Next entry on the list. */ |
4280 | struct haifa_saved_data *next; |
4281 | |
4282 | /* Backtracking is associated with scheduling insns that have delay slots. |
4283 | DELAY_PAIR points to the structure that contains the insns involved, and |
4284 | the number of cycles between them. */ |
4285 | struct delay_pair *delay_pair; |
4286 | |
4287 | /* Data used by the frontend (e.g. sched-ebb or sched-rgn). */ |
4288 | void *fe_saved_data; |
4289 | /* Data used by the backend. */ |
4290 | void *be_saved_data; |
4291 | |
4292 | /* Copies of global state. */ |
4293 | int clock_var, last_clock_var; |
4294 | struct ready_list ready; |
4295 | state_t curr_state; |
4296 | |
4297 | rtx_insn *last_scheduled_insn; |
4298 | rtx_insn *last_nondebug_scheduled_insn; |
4299 | rtx_insn *nonscheduled_insns_begin; |
4300 | int cycle_issued_insns; |
4301 | |
4302 | /* Copies of state used in the inner loop of schedule_block. */ |
4303 | struct sched_block_state sched_block; |
4304 | |
4305 | /* We don't need to save q_ptr, as its value is arbitrary and we can set it |
4306 | to 0 when restoring. */ |
4307 | int q_size; |
4308 | rtx_insn_list **insn_queue; |
4309 | |
4310 | /* Describe pattern replacements that occurred since this backtrack point |
4311 | was queued. */ |
4312 | vec<dep_t> replacement_deps; |
4313 | vec<int> replace_apply; |
4314 | |
4315 | /* A copy of the next-cycle replacement vectors at the time of the backtrack |
4316 | point. */ |
4317 | vec<dep_t> next_cycle_deps; |
4318 | vec<int> next_cycle_apply; |
4319 | }; |
4320 | |
4321 | /* A record, in reverse order, of all scheduled insns which have delay slots |
4322 | and may require backtracking. */ |
4323 | static struct haifa_saved_data *backtrack_queue; |
4324 | |
4325 | /* For every dependency of INSN, set the FEEDS_BACKTRACK_INSN bit according |
4326 | to SET_P. */ |
4327 | static void |
4328 | mark_backtrack_feeds (rtx_insn *insn, int set_p) |
4329 | { |
4330 | sd_iterator_def sd_it; |
4331 | dep_t dep; |
4332 | FOR_EACH_DEP (insn, SD_LIST_HARD_BACK, sd_it, dep) |
4333 | { |
4334 | FEEDS_BACKTRACK_INSN (DEP_PRO (dep)) = set_p; |
4335 | } |
4336 | } |
4337 | |
4338 | /* Save the current scheduler state so that we can backtrack to it |
4339 | later if necessary. PAIR gives the insns that make it necessary to |
4340 | save this point. SCHED_BLOCK is the local state of schedule_block |
4341 | that need to be saved. */ |
4342 | static void |
4343 | save_backtrack_point (struct delay_pair *pair, |
4344 | struct sched_block_state sched_block) |
4345 | { |
4346 | int i; |
4347 | struct haifa_saved_data *save = XNEW (struct haifa_saved_data); |
4348 | |
4349 | save->curr_state = xmalloc (dfa_state_size); |
4350 | memcpy (dest: save->curr_state, src: curr_state, n: dfa_state_size); |
4351 | |
4352 | save->ready.first = ready.first; |
4353 | save->ready.n_ready = ready.n_ready; |
4354 | save->ready.n_debug = ready.n_debug; |
4355 | save->ready.veclen = ready.veclen; |
4356 | save->ready.vec = XNEWVEC (rtx_insn *, ready.veclen); |
4357 | memcpy (dest: save->ready.vec, src: ready.vec, n: ready.veclen * sizeof (rtx)); |
4358 | |
4359 | save->insn_queue = XNEWVEC (rtx_insn_list *, max_insn_queue_index + 1); |
4360 | save->q_size = q_size; |
4361 | for (i = 0; i <= max_insn_queue_index; i++) |
4362 | { |
4363 | int q = NEXT_Q_AFTER (q_ptr, i); |
4364 | save->insn_queue[i] = copy_INSN_LIST (insn_queue[q]); |
4365 | } |
4366 | |
4367 | save->clock_var = clock_var; |
4368 | save->last_clock_var = last_clock_var; |
4369 | save->cycle_issued_insns = cycle_issued_insns; |
4370 | save->last_scheduled_insn = last_scheduled_insn; |
4371 | save->last_nondebug_scheduled_insn = last_nondebug_scheduled_insn; |
4372 | save->nonscheduled_insns_begin = nonscheduled_insns_begin; |
4373 | |
4374 | save->sched_block = sched_block; |
4375 | |
4376 | save->replacement_deps.create (nelems: 0); |
4377 | save->replace_apply.create (nelems: 0); |
4378 | save->next_cycle_deps = next_cycle_replace_deps.copy (); |
4379 | save->next_cycle_apply = next_cycle_apply.copy (); |
4380 | |
4381 | if (current_sched_info->save_state) |
4382 | save->fe_saved_data = (*current_sched_info->save_state) (); |
4383 | |
4384 | if (targetm.sched.alloc_sched_context) |
4385 | { |
4386 | save->be_saved_data = targetm.sched.alloc_sched_context (); |
4387 | targetm.sched.init_sched_context (save->be_saved_data, false); |
4388 | } |
4389 | else |
4390 | save->be_saved_data = NULL; |
4391 | |
4392 | save->delay_pair = pair; |
4393 | |
4394 | save->next = backtrack_queue; |
4395 | backtrack_queue = save; |
4396 | |
4397 | while (pair) |
4398 | { |
4399 | mark_backtrack_feeds (insn: pair->i2, set_p: 1); |
4400 | INSN_TICK (pair->i2) = INVALID_TICK; |
4401 | INSN_EXACT_TICK (pair->i2) = clock_var + pair_delay (p: pair); |
4402 | SHADOW_P (pair->i2) = pair->stages == 0; |
4403 | pair = pair->next_same_i1; |
4404 | } |
4405 | } |
4406 | |
4407 | /* Walk the ready list and all queues. If any insns have unresolved backwards |
4408 | dependencies, these must be cancelled deps, broken by predication. Set or |
4409 | clear (depending on SET) the DEP_CANCELLED bit in DEP_STATUS. */ |
4410 | |
4411 | static void |
4412 | toggle_cancelled_flags (bool set) |
4413 | { |
4414 | int i; |
4415 | sd_iterator_def sd_it; |
4416 | dep_t dep; |
4417 | |
4418 | if (ready.n_ready > 0) |
4419 | { |
4420 | rtx_insn **first = ready_lastpos (ready: &ready); |
4421 | for (i = 0; i < ready.n_ready; i++) |
4422 | FOR_EACH_DEP (first[i], SD_LIST_BACK, sd_it, dep) |
4423 | if (!DEBUG_INSN_P (DEP_PRO (dep))) |
4424 | { |
4425 | if (set) |
4426 | DEP_STATUS (dep) |= DEP_CANCELLED; |
4427 | else |
4428 | DEP_STATUS (dep) &= ~DEP_CANCELLED; |
4429 | } |
4430 | } |
4431 | for (i = 0; i <= max_insn_queue_index; i++) |
4432 | { |
4433 | int q = NEXT_Q_AFTER (q_ptr, i); |
4434 | rtx_insn_list *link; |
4435 | for (link = insn_queue[q]; link; link = link->next ()) |
4436 | { |
4437 | rtx_insn *insn = link->insn (); |
4438 | FOR_EACH_DEP (insn, SD_LIST_BACK, sd_it, dep) |
4439 | if (!DEBUG_INSN_P (DEP_PRO (dep))) |
4440 | { |
4441 | if (set) |
4442 | DEP_STATUS (dep) |= DEP_CANCELLED; |
4443 | else |
4444 | DEP_STATUS (dep) &= ~DEP_CANCELLED; |
4445 | } |
4446 | } |
4447 | } |
4448 | } |
4449 | |
4450 | /* Undo the replacements that have occurred after backtrack point SAVE |
4451 | was placed. */ |
4452 | static void |
4453 | undo_replacements_for_backtrack (struct haifa_saved_data *save) |
4454 | { |
4455 | while (!save->replacement_deps.is_empty ()) |
4456 | { |
4457 | dep_t dep = save->replacement_deps.pop (); |
4458 | int apply_p = save->replace_apply.pop (); |
4459 | |
4460 | if (apply_p) |
4461 | restore_pattern (dep, true); |
4462 | else |
4463 | apply_replacement (dep, true); |
4464 | } |
4465 | save->replacement_deps.release (); |
4466 | save->replace_apply.release (); |
4467 | } |
4468 | |
4469 | /* Pop entries from the SCHEDULED_INSNS vector up to and including INSN. |
4470 | Restore their dependencies to an unresolved state, and mark them as |
4471 | queued nowhere. */ |
4472 | |
4473 | static void |
4474 | unschedule_insns_until (rtx_insn *insn) |
4475 | { |
4476 | auto_vec<rtx_insn *> recompute_vec; |
4477 | |
4478 | /* Make two passes over the insns to be unscheduled. First, we clear out |
4479 | dependencies and other trivial bookkeeping. */ |
4480 | for (;;) |
4481 | { |
4482 | rtx_insn *last; |
4483 | sd_iterator_def sd_it; |
4484 | dep_t dep; |
4485 | |
4486 | last = scheduled_insns.pop (); |
4487 | |
4488 | /* This will be changed by restore_backtrack_point if the insn is in |
4489 | any queue. */ |
4490 | QUEUE_INDEX (last) = QUEUE_NOWHERE; |
4491 | if (last != insn) |
4492 | INSN_TICK (last) = INVALID_TICK; |
4493 | |
4494 | if (modulo_ii > 0 && INSN_UID (insn: last) < modulo_iter0_max_uid) |
4495 | modulo_insns_scheduled--; |
4496 | |
4497 | for (sd_it = sd_iterator_start (insn: last, SD_LIST_RES_FORW); |
4498 | sd_iterator_cond (it_ptr: &sd_it, dep_ptr: &dep);) |
4499 | { |
4500 | rtx_insn *con = DEP_CON (dep); |
4501 | sd_unresolve_dep (sd_it); |
4502 | if (!MUST_RECOMPUTE_SPEC_P (con)) |
4503 | { |
4504 | MUST_RECOMPUTE_SPEC_P (con) = 1; |
4505 | recompute_vec.safe_push (obj: con); |
4506 | } |
4507 | } |
4508 | |
4509 | if (last == insn) |
4510 | break; |
4511 | } |
4512 | |
4513 | /* A second pass, to update ready and speculation status for insns |
4514 | depending on the unscheduled ones. The first pass must have |
4515 | popped the scheduled_insns vector up to the point where we |
4516 | restart scheduling, as recompute_todo_spec requires it to be |
4517 | up-to-date. */ |
4518 | while (!recompute_vec.is_empty ()) |
4519 | { |
4520 | rtx_insn *con; |
4521 | |
4522 | con = recompute_vec.pop (); |
4523 | MUST_RECOMPUTE_SPEC_P (con) = 0; |
4524 | if (!sd_lists_empty_p (con, SD_LIST_HARD_BACK)) |
4525 | { |
4526 | TODO_SPEC (con) = HARD_DEP; |
4527 | INSN_TICK (con) = INVALID_TICK; |
4528 | if (PREDICATED_PAT (con) != NULL_RTX) |
4529 | haifa_change_pattern (con, ORIG_PAT (con)); |
4530 | } |
4531 | else if (QUEUE_INDEX (con) != QUEUE_SCHEDULED) |
4532 | TODO_SPEC (con) = recompute_todo_spec (next: con, for_backtrack: true); |
4533 | } |
4534 | } |
4535 | |
4536 | /* Restore scheduler state from the topmost entry on the backtracking queue. |
4537 | PSCHED_BLOCK_P points to the local data of schedule_block that we must |
4538 | overwrite with the saved data. |
4539 | The caller must already have called unschedule_insns_until. */ |
4540 | |
4541 | static void |
4542 | restore_last_backtrack_point (struct sched_block_state *psched_block) |
4543 | { |
4544 | int i; |
4545 | struct haifa_saved_data *save = backtrack_queue; |
4546 | |
4547 | backtrack_queue = save->next; |
4548 | |
4549 | if (current_sched_info->restore_state) |
4550 | (*current_sched_info->restore_state) (save->fe_saved_data); |
4551 | |
4552 | if (targetm.sched.alloc_sched_context) |
4553 | { |
4554 | targetm.sched.set_sched_context (save->be_saved_data); |
4555 | targetm.sched.free_sched_context (save->be_saved_data); |
4556 | } |
4557 | |
4558 | /* Do this first since it clobbers INSN_TICK of the involved |
4559 | instructions. */ |
4560 | undo_replacements_for_backtrack (save); |
4561 | |
4562 | /* Clear the QUEUE_INDEX of everything in the ready list or one |
4563 | of the queues. */ |
4564 | if (ready.n_ready > 0) |
4565 | { |
4566 | rtx_insn **first = ready_lastpos (ready: &ready); |
4567 | for (i = 0; i < ready.n_ready; i++) |
4568 | { |
4569 | rtx_insn *insn = first[i]; |
4570 | QUEUE_INDEX (insn) = QUEUE_NOWHERE; |
4571 | INSN_TICK (insn) = INVALID_TICK; |
4572 | } |
4573 | } |
4574 | for (i = 0; i <= max_insn_queue_index; i++) |
4575 | { |
4576 | int q = NEXT_Q_AFTER (q_ptr, i); |
4577 | |
4578 | for (rtx_insn_list *link = insn_queue[q]; link; link = link->next ()) |
4579 | { |
4580 | rtx_insn *x = link->insn (); |
4581 | QUEUE_INDEX (x) = QUEUE_NOWHERE; |
4582 | INSN_TICK (x) = INVALID_TICK; |
4583 | } |
4584 | free_INSN_LIST_list (&insn_queue[q]); |
4585 | } |
4586 | |
4587 | free (ptr: ready.vec); |
4588 | ready = save->ready; |
4589 | |
4590 | if (ready.n_ready > 0) |
4591 | { |
4592 | rtx_insn **first = ready_lastpos (ready: &ready); |
4593 | for (i = 0; i < ready.n_ready; i++) |
4594 | { |
4595 | rtx_insn *insn = first[i]; |
4596 | QUEUE_INDEX (insn) = QUEUE_READY; |
4597 | TODO_SPEC (insn) = recompute_todo_spec (next: insn, for_backtrack: true); |
4598 | INSN_TICK (insn) = save->clock_var; |
4599 | } |
4600 | } |
4601 | |
4602 | q_ptr = 0; |
4603 | q_size = save->q_size; |
4604 | for (i = 0; i <= max_insn_queue_index; i++) |
4605 | { |
4606 | int q = NEXT_Q_AFTER (q_ptr, i); |
4607 | |
4608 | insn_queue[q] = save->insn_queue[q]; |
4609 | |
4610 | for (rtx_insn_list *link = insn_queue[q]; link; link = link->next ()) |
4611 | { |
4612 | rtx_insn *x = link->insn (); |
4613 | QUEUE_INDEX (x) = i; |
4614 | TODO_SPEC (x) = recompute_todo_spec (next: x, for_backtrack: true); |
4615 | INSN_TICK (x) = save->clock_var + i; |
4616 | } |
4617 | } |
4618 | free (ptr: save->insn_queue); |
4619 | |
4620 | toggle_cancelled_flags (set: true); |
4621 | |
4622 | clock_var = save->clock_var; |
4623 | last_clock_var = save->last_clock_var; |
4624 | cycle_issued_insns = save->cycle_issued_insns; |
4625 | last_scheduled_insn = save->last_scheduled_insn; |
4626 | last_nondebug_scheduled_insn = save->last_nondebug_scheduled_insn; |
4627 | nonscheduled_insns_begin = save->nonscheduled_insns_begin; |
4628 | |
4629 | *psched_block = save->sched_block; |
4630 | |
4631 | memcpy (dest: curr_state, src: save->curr_state, n: dfa_state_size); |
4632 | free (ptr: save->curr_state); |
4633 | |
4634 | mark_backtrack_feeds (insn: save->delay_pair->i2, set_p: 0); |
4635 | |
4636 | gcc_assert (next_cycle_replace_deps.is_empty ()); |
4637 | next_cycle_replace_deps = save->next_cycle_deps.copy (); |
4638 | next_cycle_apply = save->next_cycle_apply.copy (); |
4639 | |
4640 | free (ptr: save); |
4641 | |
4642 | for (save = backtrack_queue; save; save = save->next) |
4643 | { |
4644 | mark_backtrack_feeds (insn: save->delay_pair->i2, set_p: 1); |
4645 | } |
4646 | } |
4647 | |
4648 | /* Discard all data associated with the topmost entry in the backtrack |
4649 | queue. If RESET_TICK is false, we just want to free the data. If true, |
4650 | we are doing this because we discovered a reason to backtrack. In the |
4651 | latter case, also reset the INSN_TICK for the shadow insn. */ |
4652 | static void |
4653 | free_topmost_backtrack_point (bool reset_tick) |
4654 | { |
4655 | struct haifa_saved_data *save = backtrack_queue; |
4656 | int i; |
4657 | |
4658 | backtrack_queue = save->next; |
4659 | |
4660 | if (reset_tick) |
4661 | { |
4662 | struct delay_pair *pair = save->delay_pair; |
4663 | while (pair) |
4664 | { |
4665 | INSN_TICK (pair->i2) = INVALID_TICK; |
4666 | INSN_EXACT_TICK (pair->i2) = INVALID_TICK; |
4667 | pair = pair->next_same_i1; |
4668 | } |
4669 | undo_replacements_for_backtrack (save); |
4670 | } |
4671 | else |
4672 | { |
4673 | save->replacement_deps.release (); |
4674 | save->replace_apply.release (); |
4675 | } |
4676 | |
4677 | if (targetm.sched.free_sched_context) |
4678 | targetm.sched.free_sched_context (save->be_saved_data); |
4679 | if (current_sched_info->restore_state) |
4680 | free (ptr: save->fe_saved_data); |
4681 | for (i = 0; i <= max_insn_queue_index; i++) |
4682 | free_INSN_LIST_list (&save->insn_queue[i]); |
4683 | free (ptr: save->insn_queue); |
4684 | free (ptr: save->curr_state); |
4685 | free (ptr: save->ready.vec); |
4686 | free (ptr: save); |
4687 | } |
4688 | |
4689 | /* Free the entire backtrack queue. */ |
4690 | static void |
4691 | free_backtrack_queue (void) |
4692 | { |
4693 | while (backtrack_queue) |
4694 | free_topmost_backtrack_point (reset_tick: false); |
4695 | } |
4696 | |
4697 | /* Apply a replacement described by DESC. If IMMEDIATELY is false, we |
4698 | may have to postpone the replacement until the start of the next cycle, |
4699 | at which point we will be called again with IMMEDIATELY true. This is |
4700 | only done for machines which have instruction packets with explicit |
4701 | parallelism however. */ |
4702 | static void |
4703 | apply_replacement (dep_t dep, bool immediately) |
4704 | { |
4705 | struct dep_replacement *desc = DEP_REPLACE (dep); |
4706 | if (!immediately && targetm.sched.exposed_pipeline && reload_completed) |
4707 | { |
4708 | next_cycle_replace_deps.safe_push (obj: dep); |
4709 | next_cycle_apply.safe_push (obj: 1); |
4710 | } |
4711 | else |
4712 | { |
4713 | bool success; |
4714 | |
4715 | if (QUEUE_INDEX (desc->insn) == QUEUE_SCHEDULED) |
4716 | return; |
4717 | |
4718 | if (sched_verbose >= 5) |
4719 | fprintf (stream: sched_dump, format: "applying replacement for insn %d\n" , |
4720 | INSN_UID (insn: desc->insn)); |
4721 | |
4722 | success = validate_change (desc->insn, desc->loc, desc->newval, 0); |
4723 | gcc_assert (success); |
4724 | |
4725 | rtx_insn *insn = DEP_PRO (dep); |
4726 | |
4727 | /* Recompute priority since dependent priorities may have changed. */ |
4728 | priority (insn, force_recompute: true); |
4729 | update_insn_after_change (insn: desc->insn); |
4730 | |
4731 | if ((TODO_SPEC (desc->insn) & (HARD_DEP | DEP_POSTPONED)) == 0) |
4732 | fix_tick_ready (desc->insn); |
4733 | |
4734 | if (backtrack_queue != NULL) |
4735 | { |
4736 | backtrack_queue->replacement_deps.safe_push (obj: dep); |
4737 | backtrack_queue->replace_apply.safe_push (obj: 1); |
4738 | } |
4739 | } |
4740 | } |
4741 | |
4742 | /* We have determined that a pattern involved in DEP must be restored. |
4743 | If IMMEDIATELY is false, we may have to postpone the replacement |
4744 | until the start of the next cycle, at which point we will be called |
4745 | again with IMMEDIATELY true. */ |
4746 | static void |
4747 | restore_pattern (dep_t dep, bool immediately) |
4748 | { |
4749 | rtx_insn *next = DEP_CON (dep); |
4750 | int tick = INSN_TICK (next); |
4751 | |
4752 | /* If we already scheduled the insn, the modified version is |
4753 | correct. */ |
4754 | if (QUEUE_INDEX (next) == QUEUE_SCHEDULED) |
4755 | return; |
4756 | |
4757 | if (!immediately && targetm.sched.exposed_pipeline && reload_completed) |
4758 | { |
4759 | next_cycle_replace_deps.safe_push (obj: dep); |
4760 | next_cycle_apply.safe_push (obj: 0); |
4761 | return; |
4762 | } |
4763 | |
4764 | |
4765 | if (DEP_TYPE (dep) == REG_DEP_CONTROL) |
4766 | { |
4767 | if (sched_verbose >= 5) |
4768 | fprintf (stream: sched_dump, format: "restoring pattern for insn %d\n" , |
4769 | INSN_UID (insn: next)); |
4770 | haifa_change_pattern (next, ORIG_PAT (next)); |
4771 | } |
4772 | else |
4773 | { |
4774 | struct dep_replacement *desc = DEP_REPLACE (dep); |
4775 | bool success; |
4776 | |
4777 | if (sched_verbose >= 5) |
4778 | fprintf (stream: sched_dump, format: "restoring pattern for insn %d\n" , |
4779 | INSN_UID (insn: desc->insn)); |
4780 | tick = INSN_TICK (desc->insn); |
4781 | |
4782 | success = validate_change (desc->insn, desc->loc, desc->orig, 0); |
4783 | gcc_assert (success); |
4784 | |
4785 | rtx_insn *insn = DEP_PRO (dep); |
4786 | |
4787 | if (QUEUE_INDEX (insn) != QUEUE_SCHEDULED) |
4788 | { |
4789 | /* Recompute priority since dependent priorities may have changed. */ |
4790 | priority (insn, force_recompute: true); |
4791 | } |
4792 | |
4793 | update_insn_after_change (insn: desc->insn); |
4794 | |
4795 | if (backtrack_queue != NULL) |
4796 | { |
4797 | backtrack_queue->replacement_deps.safe_push (obj: dep); |
4798 | backtrack_queue->replace_apply.safe_push (obj: 0); |
4799 | } |
4800 | } |
4801 | INSN_TICK (next) = tick; |
4802 | if (TODO_SPEC (next) == DEP_POSTPONED) |
4803 | return; |
4804 | |
4805 | if (sd_lists_empty_p (next, SD_LIST_BACK)) |
4806 | TODO_SPEC (next) = 0; |
4807 | else if (!sd_lists_empty_p (next, SD_LIST_HARD_BACK)) |
4808 | TODO_SPEC (next) = HARD_DEP; |
4809 | } |
4810 | |
4811 | /* Perform pattern replacements that were queued up until the next |
4812 | cycle. */ |
4813 | static void |
4814 | perform_replacements_new_cycle (void) |
4815 | { |
4816 | int i; |
4817 | dep_t dep; |
4818 | FOR_EACH_VEC_ELT (next_cycle_replace_deps, i, dep) |
4819 | { |
4820 | int apply_p = next_cycle_apply[i]; |
4821 | if (apply_p) |
4822 | apply_replacement (dep, immediately: true); |
4823 | else |
4824 | restore_pattern (dep, immediately: true); |
4825 | } |
4826 | next_cycle_replace_deps.truncate (size: 0); |
4827 | next_cycle_apply.truncate (size: 0); |
4828 | } |
4829 | |
4830 | /* Compute INSN_TICK_ESTIMATE for INSN. PROCESSED is a bitmap of |
4831 | instructions we've previously encountered, a set bit prevents |
4832 | recursion. BUDGET is a limit on how far ahead we look, it is |
4833 | reduced on recursive calls. Return true if we produced a good |
4834 | estimate, or false if we exceeded the budget. */ |
4835 | static bool |
4836 | estimate_insn_tick (bitmap processed, rtx_insn *insn, int budget) |
4837 | { |
4838 | sd_iterator_def sd_it; |
4839 | dep_t dep; |
4840 | int earliest = INSN_TICK (insn); |
4841 | |
4842 | FOR_EACH_DEP (insn, SD_LIST_BACK, sd_it, dep) |
4843 | { |
4844 | rtx_insn *pro = DEP_PRO (dep); |
4845 | int t; |
4846 | |
4847 | if (DEP_STATUS (dep) & DEP_CANCELLED) |
4848 | continue; |
4849 | |
4850 | if (QUEUE_INDEX (pro) == QUEUE_SCHEDULED) |
4851 | gcc_assert (INSN_TICK (pro) + dep_cost (dep) <= INSN_TICK (insn)); |
4852 | else |
4853 | { |
4854 | int cost = dep_cost (link: dep); |
4855 | if (cost >= budget) |
4856 | return false; |
4857 | if (!bitmap_bit_p (processed, INSN_LUID (pro))) |
4858 | { |
4859 | if (!estimate_insn_tick (processed, insn: pro, budget: budget - cost)) |
4860 | return false; |
4861 | } |
4862 | gcc_assert (INSN_TICK_ESTIMATE (pro) != INVALID_TICK); |
4863 | t = INSN_TICK_ESTIMATE (pro) + cost; |
4864 | if (earliest == INVALID_TICK || t > earliest) |
4865 | earliest = t; |
4866 | } |
4867 | } |
4868 | bitmap_set_bit (processed, INSN_LUID (insn)); |
4869 | INSN_TICK_ESTIMATE (insn) = earliest; |
4870 | return true; |
4871 | } |
4872 | |
4873 | /* Examine the pair of insns in P, and estimate (optimistically, assuming |
4874 | infinite resources) the cycle in which the delayed shadow can be issued. |
4875 | Return the number of cycles that must pass before the real insn can be |
4876 | issued in order to meet this constraint. */ |
4877 | static int |
4878 | estimate_shadow_tick (struct delay_pair *p) |
4879 | { |
4880 | auto_bitmap processed; |
4881 | int t; |
4882 | bool cutoff; |
4883 | |
4884 | cutoff = !estimate_insn_tick (processed, insn: p->i2, |
4885 | budget: max_insn_queue_index + pair_delay (p)); |
4886 | if (cutoff) |
4887 | return max_insn_queue_index; |
4888 | t = INSN_TICK_ESTIMATE (p->i2) - (clock_var + pair_delay (p) + 1); |
4889 | if (t > 0) |
4890 | return t; |
4891 | return 0; |
4892 | } |
4893 | |
4894 | /* If INSN has no unresolved backwards dependencies, add it to the schedule and |
4895 | recursively resolve all its forward dependencies. */ |
4896 | static void |
4897 | resolve_dependencies (rtx_insn *insn) |
4898 | { |
4899 | sd_iterator_def sd_it; |
4900 | dep_t dep; |
4901 | |
4902 | /* Don't use sd_lists_empty_p; it ignores debug insns. */ |
4903 | if (DEPS_LIST_FIRST (INSN_HARD_BACK_DEPS (insn)) != NULL |
4904 | || DEPS_LIST_FIRST (INSN_SPEC_BACK_DEPS (insn)) != NULL) |
4905 | return; |
4906 | |
4907 | if (sched_verbose >= 4) |
4908 | fprintf (stream: sched_dump, format: ";;\tquickly resolving %d\n" , INSN_UID (insn)); |
4909 | |
4910 | if (QUEUE_INDEX (insn) >= 0) |
4911 | queue_remove (insn); |
4912 | |
4913 | scheduled_insns.safe_push (obj: insn); |
4914 | |
4915 | /* Update dependent instructions. */ |
4916 | for (sd_it = sd_iterator_start (insn, SD_LIST_FORW); |
4917 | sd_iterator_cond (it_ptr: &sd_it, dep_ptr: &dep);) |
4918 | { |
4919 | rtx_insn *next = DEP_CON (dep); |
4920 | |
4921 | if (sched_verbose >= 4) |
4922 | fprintf (stream: sched_dump, format: ";;\t\tdep %d against %d\n" , INSN_UID (insn), |
4923 | INSN_UID (insn: next)); |
4924 | |
4925 | /* Resolve the dependence between INSN and NEXT. |
4926 | sd_resolve_dep () moves current dep to another list thus |
4927 | advancing the iterator. */ |
4928 | sd_resolve_dep (sd_it); |
4929 | |
4930 | if (!IS_SPECULATION_BRANCHY_CHECK_P (insn)) |
4931 | { |
4932 | resolve_dependencies (insn: next); |
4933 | } |
4934 | else |
4935 | /* Check always has only one forward dependence (to the first insn in |
4936 | the recovery block), therefore, this will be executed only once. */ |
4937 | { |
4938 | gcc_assert (sd_lists_empty_p (insn, SD_LIST_FORW)); |
4939 | } |
4940 | } |
4941 | } |
4942 | |
4943 | |
4944 | /* Return the head and tail pointers of ebb starting at BEG and ending |
4945 | at END. */ |
4946 | void |
4947 | get_ebb_head_tail (basic_block beg, basic_block end, |
4948 | rtx_insn **headp, rtx_insn **tailp) |
4949 | { |
4950 | rtx_insn *beg_head = BB_HEAD (beg); |
4951 | rtx_insn * beg_tail = BB_END (beg); |
4952 | rtx_insn * end_head = BB_HEAD (end); |
4953 | rtx_insn * end_tail = BB_END (end); |
4954 | |
4955 | /* Don't include any notes or labels at the beginning of the BEG |
4956 | basic block, or notes at the end of the END basic blocks. */ |
4957 | |
4958 | if (LABEL_P (beg_head)) |
4959 | beg_head = NEXT_INSN (insn: beg_head); |
4960 | |
4961 | while (beg_head != beg_tail) |
4962 | if (NOTE_P (beg_head)) |
4963 | beg_head = NEXT_INSN (insn: beg_head); |
4964 | else if (DEBUG_INSN_P (beg_head)) |
4965 | { |
4966 | rtx_insn * note, *next; |
4967 | |
4968 | for (note = NEXT_INSN (insn: beg_head); |
4969 | note != beg_tail; |
4970 | note = next) |
4971 | { |
4972 | next = NEXT_INSN (insn: note); |
4973 | if (NOTE_P (note)) |
4974 | { |
4975 | if (sched_verbose >= 9) |
4976 | fprintf (stream: sched_dump, format: "reorder %i\n" , INSN_UID (insn: note)); |
4977 | |
4978 | reorder_insns_nobb (note, note, PREV_INSN (insn: beg_head)); |
4979 | |
4980 | if (BLOCK_FOR_INSN (insn: note) != beg) |
4981 | df_insn_change_bb (note, beg); |
4982 | } |
4983 | else if (!DEBUG_INSN_P (note)) |
4984 | break; |
4985 | } |
4986 | |
4987 | break; |
4988 | } |
4989 | else |
4990 | break; |
4991 | |
4992 | *headp = beg_head; |
4993 | |
4994 | if (beg == end) |
4995 | end_head = beg_head; |
4996 | else if (LABEL_P (end_head)) |
4997 | end_head = NEXT_INSN (insn: end_head); |
4998 | |
4999 | while (end_head != end_tail) |
5000 | if (NOTE_P (end_tail)) |
5001 | end_tail = PREV_INSN (insn: end_tail); |
5002 | else if (DEBUG_INSN_P (end_tail)) |
5003 | { |
5004 | rtx_insn * note, *prev; |
5005 | |
5006 | for (note = PREV_INSN (insn: end_tail); |
5007 | note != end_head; |
5008 | note = prev) |
5009 | { |
5010 | prev = PREV_INSN (insn: note); |
5011 | if (NOTE_P (note)) |
5012 | { |
5013 | if (sched_verbose >= 9) |
5014 | fprintf (stream: sched_dump, format: "reorder %i\n" , INSN_UID (insn: note)); |
5015 | |
5016 | reorder_insns_nobb (note, note, end_tail); |
5017 | |
5018 | if (end_tail == BB_END (end)) |
5019 | BB_END (end) = note; |
5020 | |
5021 | if (BLOCK_FOR_INSN (insn: note) != end) |
5022 | df_insn_change_bb (note, end); |
5023 | } |
5024 | else if (!DEBUG_INSN_P (note)) |
5025 | break; |
5026 | } |
5027 | |
5028 | break; |
5029 | } |
5030 | else |
5031 | break; |
5032 | |
5033 | *tailp = end_tail; |
5034 | } |
5035 | |
5036 | /* Return true if there are no real insns in the range [ HEAD, TAIL ]. */ |
5037 | |
5038 | bool |
5039 | no_real_insns_p (const rtx_insn *head, const rtx_insn *tail) |
5040 | { |
5041 | while (head != NEXT_INSN (insn: tail)) |
5042 | { |
5043 | if (!NOTE_P (head) && !LABEL_P (head)) |
5044 | return false; |
5045 | head = NEXT_INSN (insn: head); |
5046 | } |
5047 | return true; |
5048 | } |
5049 | |
5050 | /* Restore-other-notes: NOTE_LIST is the end of a chain of notes |
5051 | previously found among the insns. Insert them just before HEAD. */ |
5052 | rtx_insn * |
5053 | restore_other_notes (rtx_insn *head, basic_block head_bb) |
5054 | { |
5055 | if (note_list != 0) |
5056 | { |
5057 | rtx_insn *note_head = note_list; |
5058 | |
5059 | if (head) |
5060 | head_bb = BLOCK_FOR_INSN (insn: head); |
5061 | else |
5062 | head = NEXT_INSN (insn: bb_note (head_bb)); |
5063 | |
5064 | while (PREV_INSN (insn: note_head)) |
5065 | { |
5066 | set_block_for_insn (insn: note_head, bb: head_bb); |
5067 | note_head = PREV_INSN (insn: note_head); |
5068 | } |
5069 | /* In the above cycle we've missed this note. */ |
5070 | set_block_for_insn (insn: note_head, bb: head_bb); |
5071 | |
5072 | SET_PREV_INSN (note_head) = PREV_INSN (insn: head); |
5073 | SET_NEXT_INSN (PREV_INSN (insn: head)) = note_head; |
5074 | SET_PREV_INSN (head) = note_list; |
5075 | SET_NEXT_INSN (note_list) = head; |
5076 | |
5077 | if (BLOCK_FOR_INSN (insn: head) != head_bb) |
5078 | BB_END (head_bb) = note_list; |
5079 | |
5080 | head = note_head; |
5081 | } |
5082 | |
5083 | return head; |
5084 | } |
5085 | |
5086 | /* When we know we are going to discard the schedule due to a failed attempt |
5087 | at modulo scheduling, undo all replacements. */ |
5088 | static void |
5089 | undo_all_replacements (void) |
5090 | { |
5091 | rtx_insn *insn; |
5092 | int i; |
5093 | |
5094 | FOR_EACH_VEC_ELT (scheduled_insns, i, insn) |
5095 | { |
5096 | sd_iterator_def sd_it; |
5097 | dep_t dep; |
5098 | |
5099 | /* See if we must undo a replacement. */ |
5100 | for (sd_it = sd_iterator_start (insn, SD_LIST_RES_FORW); |
5101 | sd_iterator_cond (it_ptr: &sd_it, dep_ptr: &dep); sd_iterator_next (it_ptr: &sd_it)) |
5102 | { |
5103 | struct dep_replacement *desc = DEP_REPLACE (dep); |
5104 | if (desc != NULL) |
5105 | validate_change (desc->insn, desc->loc, desc->orig, 0); |
5106 | } |
5107 | } |
5108 | } |
5109 | |
5110 | /* Return first non-scheduled insn in the current scheduling block. |
5111 | This is mostly used for debug-counter purposes. */ |
5112 | static rtx_insn * |
5113 | first_nonscheduled_insn (void) |
5114 | { |
5115 | rtx_insn *insn = (nonscheduled_insns_begin != NULL_RTX |
5116 | ? nonscheduled_insns_begin |
5117 | : current_sched_info->prev_head); |
5118 | |
5119 | do |
5120 | { |
5121 | insn = next_nonnote_nondebug_insn (insn); |
5122 | } |
5123 | while (QUEUE_INDEX (insn) == QUEUE_SCHEDULED); |
5124 | |
5125 | return insn; |
5126 | } |
5127 | |
5128 | /* Move insns that became ready to fire from queue to ready list. */ |
5129 | |
5130 | static void |
5131 | queue_to_ready (struct ready_list *ready) |
5132 | { |
5133 | rtx_insn *insn; |
5134 | rtx_insn_list *link; |
5135 | rtx_insn *skip_insn; |
5136 | |
5137 | q_ptr = NEXT_Q (q_ptr); |
5138 | |
5139 | if (dbg_cnt (index: sched_insn) == false) |
5140 | /* If debug counter is activated do not requeue the first |
5141 | nonscheduled insn. */ |
5142 | skip_insn = first_nonscheduled_insn (); |
5143 | else |
5144 | skip_insn = NULL; |
5145 | |
5146 | /* Add all pending insns that can be scheduled without stalls to the |
5147 | ready list. */ |
5148 | for (link = insn_queue[q_ptr]; link; link = link->next ()) |
5149 | { |
5150 | insn = link->insn (); |
5151 | q_size -= 1; |
5152 | |
5153 | if (sched_verbose >= 2) |
5154 | fprintf (stream: sched_dump, format: ";;\t\tQ-->Ready: insn %s: " , |
5155 | (*current_sched_info->print_insn) (insn, 0)); |
5156 | |
5157 | /* If the ready list is full, delay the insn for 1 cycle. |
5158 | See the comment in schedule_block for the rationale. */ |
5159 | if (!reload_completed |
5160 | && (ready->n_ready - ready->n_debug > param_max_sched_ready_insns |
5161 | || (sched_pressure == SCHED_PRESSURE_MODEL |
5162 | /* Limit pressure recalculations to |
5163 | param_max_sched_ready_insns instructions too. */ |
5164 | && model_index (insn) > (model_curr_point |
5165 | + param_max_sched_ready_insns))) |
5166 | && !(sched_pressure == SCHED_PRESSURE_MODEL |
5167 | && model_curr_point < model_num_insns |
5168 | /* Always allow the next model instruction to issue. */ |
5169 | && model_index (insn) == model_curr_point) |
5170 | && !SCHED_GROUP_P (insn) |
5171 | && insn != skip_insn) |
5172 | { |
5173 | if (sched_verbose >= 2) |
5174 | fprintf (stream: sched_dump, format: "keeping in queue, ready full\n" ); |
5175 | queue_insn (insn, n_cycles: 1, reason: "ready full" ); |
5176 | } |
5177 | else |
5178 | { |
5179 | ready_add (ready, insn, first_p: false); |
5180 | if (sched_verbose >= 2) |
5181 | fprintf (stream: sched_dump, format: "moving to ready without stalls\n" ); |
5182 | } |
5183 | } |
5184 | free_INSN_LIST_list (&insn_queue[q_ptr]); |
5185 | |
5186 | /* If there are no ready insns, stall until one is ready and add all |
5187 | of the pending insns at that point to the ready list. */ |
5188 | if (ready->n_ready == 0) |
5189 | { |
5190 | int stalls; |
5191 | |
5192 | for (stalls = 1; stalls <= max_insn_queue_index; stalls++) |
5193 | { |
5194 | if ((link = insn_queue[NEXT_Q_AFTER (q_ptr, stalls)])) |
5195 | { |
5196 | for (; link; link = link->next ()) |
5197 | { |
5198 | insn = link->insn (); |
5199 | q_size -= 1; |
5200 | |
5201 | if (sched_verbose >= 2) |
5202 | fprintf (stream: sched_dump, format: ";;\t\tQ-->Ready: insn %s: " , |
5203 | (*current_sched_info->print_insn) (insn, 0)); |
5204 | |
5205 | ready_add (ready, insn, first_p: false); |
5206 | if (sched_verbose >= 2) |
5207 | fprintf (stream: sched_dump, format: "moving to ready with %d stalls\n" , stalls); |
5208 | } |
5209 | free_INSN_LIST_list (&insn_queue[NEXT_Q_AFTER (q_ptr, stalls)]); |
5210 | |
5211 | advance_one_cycle (); |
5212 | |
5213 | break; |
5214 | } |
5215 | |
5216 | advance_one_cycle (); |
5217 | } |
5218 | |
5219 | q_ptr = NEXT_Q_AFTER (q_ptr, stalls); |
5220 | clock_var += stalls; |
5221 | if (sched_verbose >= 2) |
5222 | fprintf (stream: sched_dump, format: ";;\tAdvancing clock by %d cycle[s] to %d\n" , |
5223 | stalls, clock_var); |
5224 | } |
5225 | } |
5226 | |
5227 | /* Used by early_queue_to_ready. Determines whether it is "ok" to |
5228 | prematurely move INSN from the queue to the ready list. Currently, |
5229 | if a target defines the hook 'is_costly_dependence', this function |
5230 | uses the hook to check whether there exist any dependences which are |
5231 | considered costly by the target, between INSN and other insns that |
5232 | have already been scheduled. Dependences are checked up to Y cycles |
5233 | back, with default Y=1; The flag -fsched-stalled-insns-dep=Y allows |
5234 | controlling this value. |
5235 | (Other considerations could be taken into account instead (or in |
5236 | addition) depending on user flags and target hooks. */ |
5237 | |
5238 | static bool |
5239 | ok_for_early_queue_removal (rtx_insn *insn) |
5240 | { |
5241 | if (targetm.sched.is_costly_dependence) |
5242 | { |
5243 | int n_cycles; |
5244 | int i = scheduled_insns.length (); |
5245 | for (n_cycles = flag_sched_stalled_insns_dep; n_cycles; n_cycles--) |
5246 | { |
5247 | while (i-- > 0) |
5248 | { |
5249 | int cost; |
5250 | |
5251 | rtx_insn *prev_insn = scheduled_insns[i]; |
5252 | |
5253 | if (!NOTE_P (prev_insn)) |
5254 | { |
5255 | dep_t dep; |
5256 | |
5257 | dep = sd_find_dep_between (prev_insn, insn, true); |
5258 | |
5259 | if (dep != NULL) |
5260 | { |
5261 | cost = dep_cost (link: dep); |
5262 | |
5263 | if (targetm.sched.is_costly_dependence (dep, cost, |
5264 | flag_sched_stalled_insns_dep - n_cycles)) |
5265 | return false; |
5266 | } |
5267 | } |
5268 | |
5269 | if (GET_MODE (prev_insn) == TImode) /* end of dispatch group */ |
5270 | break; |
5271 | } |
5272 | |
5273 | if (i == 0) |
5274 | break; |
5275 | } |
5276 | } |
5277 | |
5278 | return true; |
5279 | } |
5280 | |
5281 | |
5282 | /* Remove insns from the queue, before they become "ready" with respect |
5283 | to FU latency considerations. */ |
5284 | |
5285 | static int |
5286 | early_queue_to_ready (state_t state, struct ready_list *ready) |
5287 | { |
5288 | rtx_insn *insn; |
5289 | rtx_insn_list *link; |
5290 | rtx_insn_list *next_link; |
5291 | rtx_insn_list *prev_link; |
5292 | bool move_to_ready; |
5293 | int cost; |
5294 | state_t temp_state = alloca (dfa_state_size); |
5295 | int stalls; |
5296 | int insns_removed = 0; |
5297 | |
5298 | /* |
5299 | Flag '-fsched-stalled-insns=X' determines the aggressiveness of this |
5300 | function: |
5301 | |
5302 | X == 0: There is no limit on how many queued insns can be removed |
5303 | prematurely. (flag_sched_stalled_insns = -1). |
5304 | |
5305 | X >= 1: Only X queued insns can be removed prematurely in each |
5306 | invocation. (flag_sched_stalled_insns = X). |
5307 | |
5308 | Otherwise: Early queue removal is disabled. |
5309 | (flag_sched_stalled_insns = 0) |
5310 | */ |
5311 | |
5312 | if (! flag_sched_stalled_insns) |
5313 | return 0; |
5314 | |
5315 | for (stalls = 0; stalls <= max_insn_queue_index; stalls++) |
5316 | { |
5317 | if ((link = insn_queue[NEXT_Q_AFTER (q_ptr, stalls)])) |
5318 | { |
5319 | if (sched_verbose > 6) |
5320 | fprintf (stream: sched_dump, format: ";; look at index %d + %d\n" , q_ptr, stalls); |
5321 | |
5322 | prev_link = 0; |
5323 | while (link) |
5324 | { |
5325 | next_link = link->next (); |
5326 | insn = link->insn (); |
5327 | if (insn && sched_verbose > 6) |
5328 | print_rtl_single (sched_dump, insn); |
5329 | |
5330 | memcpy (dest: temp_state, src: state, n: dfa_state_size); |
5331 | if (recog_memoized (insn) < 0) |
5332 | /* non-negative to indicate that it's not ready |
5333 | to avoid infinite Q->R->Q->R... */ |
5334 | cost = 0; |
5335 | else |
5336 | cost = state_transition (temp_state, insn); |
5337 | |
5338 | if (sched_verbose >= 6) |
5339 | fprintf (stream: sched_dump, format: "transition cost = %d\n" , cost); |
5340 | |
5341 | move_to_ready = false; |
5342 | if (cost < 0) |
5343 | { |
5344 | move_to_ready = ok_for_early_queue_removal (insn); |
5345 | if (move_to_ready == true) |
5346 | { |
5347 | /* move from Q to R */ |
5348 | q_size -= 1; |
5349 | ready_add (ready, insn, first_p: false); |
5350 | |
5351 | if (prev_link) |
5352 | XEXP (prev_link, 1) = next_link; |
5353 | else |
5354 | insn_queue[NEXT_Q_AFTER (q_ptr, stalls)] = next_link; |
5355 | |
5356 | free_INSN_LIST_node (link); |
5357 | |
5358 | if (sched_verbose >= 2) |
5359 | fprintf (stream: sched_dump, format: ";;\t\tEarly Q-->Ready: insn %s\n" , |
5360 | (*current_sched_info->print_insn) (insn, 0)); |
5361 | |
5362 | insns_removed++; |
5363 | if (insns_removed == flag_sched_stalled_insns) |
5364 | /* Remove no more than flag_sched_stalled_insns insns |
5365 | from Q at a time. */ |
5366 | return insns_removed; |
5367 | } |
5368 | } |
5369 | |
5370 | if (move_to_ready == false) |
5371 | prev_link = link; |
5372 | |
5373 | link = next_link; |
5374 | } /* while link */ |
5375 | } /* if link */ |
5376 | |
5377 | } /* for stalls.. */ |
5378 | |
5379 | return insns_removed; |
5380 | } |
5381 | |
5382 | |
5383 | /* Print the ready list for debugging purposes. |
5384 | If READY_TRY is non-zero then only print insns that max_issue |
5385 | will consider. */ |
5386 | static void |
5387 | debug_ready_list_1 (struct ready_list *ready, signed char *ready_try) |
5388 | { |
5389 | rtx_insn **p; |
5390 | int i; |
5391 | |
5392 | if (ready->n_ready == 0) |
5393 | { |
5394 | fprintf (stream: sched_dump, format: "\n" ); |
5395 | return; |
5396 | } |
5397 | |
5398 | p = ready_lastpos (ready); |
5399 | for (i = 0; i < ready->n_ready; i++) |
5400 | { |
5401 | if (ready_try != NULL && ready_try[ready->n_ready - i - 1]) |
5402 | continue; |
5403 | |
5404 | fprintf (stream: sched_dump, format: " %s:%d" , |
5405 | (*current_sched_info->print_insn) (p[i], 0), |
5406 | INSN_LUID (p[i])); |
5407 | if (sched_pressure != SCHED_PRESSURE_NONE) |
5408 | fprintf (stream: sched_dump, format: "(cost=%d" , |
5409 | INSN_REG_PRESSURE_EXCESS_COST_CHANGE (p[i])); |
5410 | fprintf (stream: sched_dump, format: ":prio=%d" , INSN_PRIORITY (p[i])); |
5411 | if (INSN_TICK (p[i]) > clock_var) |
5412 | fprintf (stream: sched_dump, format: ":delay=%d" , INSN_TICK (p[i]) - clock_var); |
5413 | if (sched_pressure == SCHED_PRESSURE_MODEL) |
5414 | fprintf (stream: sched_dump, format: ":idx=%d" , |
5415 | model_index (insn: p[i])); |
5416 | if (sched_pressure != SCHED_PRESSURE_NONE) |
5417 | fprintf (stream: sched_dump, format: ")" ); |
5418 | } |
5419 | fprintf (stream: sched_dump, format: "\n" ); |
5420 | } |
5421 | |
5422 | /* Print the ready list. Callable from debugger. */ |
5423 | static void |
5424 | debug_ready_list (struct ready_list *ready) |
5425 | { |
5426 | debug_ready_list_1 (ready, NULL); |
5427 | } |
5428 | |
5429 | /* Search INSN for REG_SAVE_NOTE notes and convert them back into insn |
5430 | NOTEs. This is used for NOTE_INSN_EPILOGUE_BEG, so that sched-ebb |
5431 | replaces the epilogue note in the correct basic block. */ |
5432 | void |
5433 | reemit_notes (rtx_insn *insn) |
5434 | { |
5435 | rtx note; |
5436 | rtx_insn *last = insn; |
5437 | |
5438 | for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) |
5439 | { |
5440 | if (REG_NOTE_KIND (note) == REG_SAVE_NOTE) |
5441 | { |
5442 | enum insn_note note_type = (enum insn_note) INTVAL (XEXP (note, 0)); |
5443 | |
5444 | last = emit_note_before (note_type, last); |
5445 | remove_note (insn, note); |
5446 | df_insn_create_insn_record (last); |
5447 | } |
5448 | } |
5449 | } |
5450 | |
5451 | /* Move INSN. Reemit notes if needed. Update CFG, if needed. */ |
5452 | static void |
5453 | move_insn (rtx_insn *insn, rtx_insn *last, rtx nt) |
5454 | { |
5455 | if (PREV_INSN (insn) != last) |
5456 | { |
5457 | basic_block bb; |
5458 | rtx_insn *note; |
5459 | int jump_p = 0; |
5460 | |
5461 | bb = BLOCK_FOR_INSN (insn); |
5462 | |
5463 | /* BB_HEAD is either LABEL or NOTE. */ |
5464 | gcc_assert (BB_HEAD (bb) != insn); |
5465 | |
5466 | if (BB_END (bb) == insn) |
5467 | /* If this is last instruction in BB, move end marker one |
5468 | instruction up. */ |
5469 | { |
5470 | /* Jumps are always placed at the end of basic block. */ |
5471 | jump_p = control_flow_insn_p (insn); |
5472 | |
5473 | gcc_assert (!jump_p |
5474 | || ((common_sched_info->sched_pass_id == SCHED_RGN_PASS) |
5475 | && IS_SPECULATION_BRANCHY_CHECK_P (insn)) |
5476 | || (common_sched_info->sched_pass_id |
5477 | == SCHED_EBB_PASS)); |
5478 | |
5479 | gcc_assert (BLOCK_FOR_INSN (PREV_INSN (insn)) == bb); |
5480 | |
5481 | BB_END (bb) = PREV_INSN (insn); |
5482 | } |
5483 | |
5484 | gcc_assert (BB_END (bb) != last); |
5485 | |
5486 | if (jump_p) |
5487 | /* We move the block note along with jump. */ |
5488 | { |
5489 | gcc_assert (nt); |
5490 | |
5491 | note = NEXT_INSN (insn); |
5492 | while (NOTE_NOT_BB_P (note) && note != nt) |
5493 | note = NEXT_INSN (insn: note); |
5494 | |
5495 | if (note != nt |
5496 | && (LABEL_P (note) |
5497 | || BARRIER_P (note))) |
5498 | note = NEXT_INSN (insn: note); |
5499 | |
5500 | gcc_assert (NOTE_INSN_BASIC_BLOCK_P (note)); |
5501 | } |
5502 | else |
5503 | note = insn; |
5504 | |
5505 | SET_NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn: note); |
5506 | SET_PREV_INSN (NEXT_INSN (insn: note)) = PREV_INSN (insn); |
5507 | |
5508 | SET_NEXT_INSN (note) = NEXT_INSN (insn: last); |
5509 | SET_PREV_INSN (NEXT_INSN (insn: last)) = note; |
5510 | |
5511 | SET_NEXT_INSN (last) = insn; |
5512 | SET_PREV_INSN (insn) = last; |
5513 | |
5514 | bb = BLOCK_FOR_INSN (insn: last); |
5515 | |
5516 | if (jump_p) |
5517 | { |
5518 | fix_jump_move (insn); |
5519 | |
5520 | if (BLOCK_FOR_INSN (insn) != bb) |
5521 | move_block_after_check (insn); |
5522 | |
5523 | gcc_assert (BB_END (bb) == last); |
5524 | } |
5525 | |
5526 | df_insn_change_bb (insn, bb); |
5527 | |
5528 | /* Update BB_END, if needed. */ |
5529 | if (BB_END (bb) == last) |
5530 | BB_END (bb) = insn; |
5531 | } |
5532 | |
5533 | SCHED_GROUP_P (insn) = 0; |
5534 | } |
5535 | |
5536 | /* Return true if scheduling INSN will finish current clock cycle. */ |
5537 | static bool |
5538 | insn_finishes_cycle_p (rtx_insn *insn) |
5539 | { |
5540 | if (SCHED_GROUP_P (insn)) |
5541 | /* After issuing INSN, rest of the sched_group will be forced to issue |
5542 | in order. Don't make any plans for the rest of cycle. */ |
5543 | return true; |
5544 | |
5545 | /* Finishing the block will, apparently, finish the cycle. */ |
5546 | if (current_sched_info->insn_finishes_block_p |
5547 | && current_sched_info->insn_finishes_block_p (insn)) |
5548 | return true; |
5549 | |
5550 | return false; |
5551 | } |
5552 | |
5553 | /* Helper for autopref_multipass_init. Given a SET in PAT and whether |
5554 | we're expecting a memory WRITE or not, check that the insn is relevant to |
5555 | the autoprefetcher modelling code. Return true iff that is the case. |
5556 | If it is relevant, record the base register of the memory op in BASE and |
5557 | the offset in OFFSET. */ |
5558 | |
5559 | static bool |
5560 | analyze_set_insn_for_autopref (rtx pat, bool write, rtx *base, int *offset) |
5561 | { |
5562 | if (GET_CODE (pat) != SET) |
5563 | return false; |
5564 | |
5565 | rtx mem = write ? SET_DEST (pat) : SET_SRC (pat); |
5566 | if (!MEM_P (mem)) |
5567 | return false; |
5568 | |
5569 | struct address_info info; |
5570 | decompose_mem_address (&info, mem); |
5571 | |
5572 | /* TODO: Currently only (base+const) addressing is supported. */ |
5573 | if (info.base == NULL || !REG_P (*info.base) |
5574 | || (info.disp != NULL && !CONST_INT_P (*info.disp))) |
5575 | return false; |
5576 | |
5577 | *base = *info.base; |
5578 | *offset = info.disp ? INTVAL (*info.disp) : 0; |
5579 | return true; |
5580 | } |
5581 | |
5582 | /* Functions to model cache auto-prefetcher. |
5583 | |
5584 | Some of the CPUs have cache auto-prefetcher, which /seems/ to initiate |
5585 | memory prefetches if it sees instructions with consequitive memory accesses |
5586 | in the instruction stream. Details of such hardware units are not published, |
5587 | so we can only guess what exactly is going on there. |
5588 | In the scheduler, we model abstract auto-prefetcher. If there are memory |
5589 | insns in the ready list (or the queue) that have same memory base, but |
5590 | different offsets, then we delay the insns with larger offsets until insns |
5591 | with smaller offsets get scheduled. If PARAM_SCHED_AUTOPREF_QUEUE_DEPTH |
5592 | is "1", then we look at the ready list; if it is N>1, then we also look |
5593 | through N-1 queue entries. |
5594 | If the param is N>=0, then rank_for_schedule will consider auto-prefetching |
5595 | among its heuristics. |
5596 | Param value of "-1" disables modelling of the auto-prefetcher. */ |
5597 | |
5598 | /* Initialize autoprefetcher model data for INSN. */ |
5599 | static void |
5600 | autopref_multipass_init (const rtx_insn *insn, int write) |
5601 | { |
5602 | autopref_multipass_data_t data = &INSN_AUTOPREF_MULTIPASS_DATA (insn)[write]; |
5603 | |
5604 | gcc_assert (data->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED); |
5605 | data->base = NULL_RTX; |
5606 | data->offset = 0; |
5607 | /* Set insn entry initialized, but not relevant for auto-prefetcher. */ |
5608 | data->status = AUTOPREF_MULTIPASS_DATA_IRRELEVANT; |
5609 | |
5610 | rtx pat = PATTERN (insn); |
5611 | |
5612 | /* We have a multi-set insn like a load-multiple or store-multiple. |
5613 | We care about these as long as all the memory ops inside the PARALLEL |
5614 | have the same base register. We care about the minimum and maximum |
5615 | offsets from that base but don't check for the order of those offsets |
5616 | within the PARALLEL insn itself. */ |
5617 | if (GET_CODE (pat) == PARALLEL) |
5618 | { |
5619 | int n_elems = XVECLEN (pat, 0); |
5620 | |
5621 | int i, offset; |
5622 | rtx base, prev_base = NULL_RTX; |
5623 | int min_offset = INT_MAX; |
5624 | |
5625 | for (i = 0; i < n_elems; i++) |
5626 | { |
5627 | rtx set = XVECEXP (pat, 0, i); |
5628 | if (GET_CODE (set) != SET) |
5629 | return; |
5630 | |
5631 | if (!analyze_set_insn_for_autopref (pat: set, write, base: &base, offset: &offset)) |
5632 | return; |
5633 | |
5634 | /* Ensure that all memory operations in the PARALLEL use the same |
5635 | base register. */ |
5636 | if (i > 0 && REGNO (base) != REGNO (prev_base)) |
5637 | return; |
5638 | prev_base = base; |
5639 | min_offset = MIN (min_offset, offset); |
5640 | } |
5641 | |
5642 | /* If we reached here then we have a valid PARALLEL of multiple memory ops |
5643 | with prev_base as the base and min_offset containing the offset. */ |
5644 | gcc_assert (prev_base); |
5645 | data->base = prev_base; |
5646 | data->offset = min_offset; |
5647 | data->status = AUTOPREF_MULTIPASS_DATA_NORMAL; |
5648 | return; |
5649 | } |
5650 | |
5651 | /* Otherwise this is a single set memory operation. */ |
5652 | rtx set = single_set (insn); |
5653 | if (set == NULL_RTX) |
5654 | return; |
5655 | |
5656 | if (!analyze_set_insn_for_autopref (pat: set, write, base: &data->base, |
5657 | offset: &data->offset)) |
5658 | return; |
5659 | |
5660 | /* This insn is relevant for the auto-prefetcher. |
5661 | The base and offset fields will have been filled in the |
5662 | analyze_set_insn_for_autopref call above. */ |
5663 | data->status = AUTOPREF_MULTIPASS_DATA_NORMAL; |
5664 | } |
5665 | |
5666 | /* Helper function for rank_for_schedule sorting. */ |
5667 | static int |
5668 | autopref_rank_for_schedule (const rtx_insn *insn1, const rtx_insn *insn2) |
5669 | { |
5670 | int r = 0; |
5671 | for (int write = 0; write < 2 && !r; ++write) |
5672 | { |
5673 | autopref_multipass_data_t data1 |
5674 | = &INSN_AUTOPREF_MULTIPASS_DATA (insn1)[write]; |
5675 | autopref_multipass_data_t data2 |
5676 | = &INSN_AUTOPREF_MULTIPASS_DATA (insn2)[write]; |
5677 | |
5678 | if (data1->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED) |
5679 | autopref_multipass_init (insn: insn1, write); |
5680 | |
5681 | if (data2->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED) |
5682 | autopref_multipass_init (insn: insn2, write); |
5683 | |
5684 | int irrel1 = data1->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT; |
5685 | int irrel2 = data2->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT; |
5686 | |
5687 | if (!irrel1 && !irrel2) |
5688 | /* Sort memory references from lowest offset to the largest. */ |
5689 | r = (data1->offset > data2->offset) - (data1->offset < data2->offset); |
5690 | else if (write) |
5691 | /* Schedule "irrelevant" insns before memory stores to resolve |
5692 | as many producer dependencies of stores as possible. */ |
5693 | r = irrel2 - irrel1; |
5694 | else |
5695 | /* Schedule "irrelevant" insns after memory reads to avoid breaking |
5696 | memory read sequences. */ |
5697 | r = irrel1 - irrel2; |
5698 | } |
5699 | |
5700 | return r; |
5701 | } |
5702 | |
5703 | /* True if header of debug dump was printed. */ |
5704 | static bool autopref_multipass_dfa_lookahead_guard_started_dump_p; |
5705 | |
5706 | /* Helper for autopref_multipass_dfa_lookahead_guard. |
5707 | Return "1" if INSN1 should be delayed in favor of INSN2. */ |
5708 | static int |
5709 | autopref_multipass_dfa_lookahead_guard_1 (const rtx_insn *insn1, |
5710 | const rtx_insn *insn2, int write) |
5711 | { |
5712 | autopref_multipass_data_t data1 |
5713 | = &INSN_AUTOPREF_MULTIPASS_DATA (insn1)[write]; |
5714 | autopref_multipass_data_t data2 |
5715 | = &INSN_AUTOPREF_MULTIPASS_DATA (insn2)[write]; |
5716 | |
5717 | if (data2->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED) |
5718 | autopref_multipass_init (insn: insn2, write); |
5719 | if (data2->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT) |
5720 | return 0; |
5721 | |
5722 | if (rtx_equal_p (data1->base, data2->base) |
5723 | && data1->offset > data2->offset) |
5724 | { |
5725 | if (sched_verbose >= 2) |
5726 | { |
5727 | if (!autopref_multipass_dfa_lookahead_guard_started_dump_p) |
5728 | { |
5729 | fprintf (stream: sched_dump, |
5730 | format: ";;\t\tnot trying in max_issue due to autoprefetch " |
5731 | "model: " ); |
5732 | autopref_multipass_dfa_lookahead_guard_started_dump_p = true; |
5733 | } |
5734 | |
5735 | fprintf (stream: sched_dump, format: " %d(%d)" , INSN_UID (insn: insn1), INSN_UID (insn: insn2)); |
5736 | } |
5737 | |
5738 | return 1; |
5739 | } |
5740 | |
5741 | return 0; |
5742 | } |
5743 | |
5744 | /* General note: |
5745 | |
5746 | We could have also hooked autoprefetcher model into |
5747 | first_cycle_multipass_backtrack / first_cycle_multipass_issue hooks |
5748 | to enable intelligent selection of "[r1+0]=r2; [r1+4]=r3" on the same cycle |
5749 | (e.g., once "[r1+0]=r2" is issued in max_issue(), "[r1+4]=r3" gets |
5750 | unblocked). We don't bother about this yet because target of interest |
5751 | (ARM Cortex-A15) can issue only 1 memory operation per cycle. */ |
5752 | |
5753 | /* Implementation of first_cycle_multipass_dfa_lookahead_guard hook. |
5754 | Return "1" if INSN1 should not be considered in max_issue due to |
5755 | auto-prefetcher considerations. */ |
5756 | int |
5757 | autopref_multipass_dfa_lookahead_guard (rtx_insn *insn1, int ready_index) |
5758 | { |
5759 | int r = 0; |
5760 | |
5761 | /* Exit early if the param forbids this or if we're not entering here through |
5762 | normal haifa scheduling. This can happen if selective scheduling is |
5763 | explicitly enabled. */ |
5764 | if (!insn_queue || param_sched_autopref_queue_depth <= 0) |
5765 | return 0; |
5766 | |
5767 | if (sched_verbose >= 2 && ready_index == 0) |
5768 | autopref_multipass_dfa_lookahead_guard_started_dump_p = false; |
5769 | |
5770 | for (int write = 0; write < 2; ++write) |
5771 | { |
5772 | autopref_multipass_data_t data1 |
5773 | = &INSN_AUTOPREF_MULTIPASS_DATA (insn1)[write]; |
5774 | |
5775 | if (data1->status == AUTOPREF_MULTIPASS_DATA_UNINITIALIZED) |
5776 | autopref_multipass_init (insn: insn1, write); |
5777 | if (data1->status == AUTOPREF_MULTIPASS_DATA_IRRELEVANT) |
5778 | continue; |
5779 | |
5780 | if (ready_index == 0 |
5781 | && data1->status == AUTOPREF_MULTIPASS_DATA_DONT_DELAY) |
5782 | /* We allow only a single delay on priviledged instructions. |
5783 | Doing otherwise would cause infinite loop. */ |
5784 | { |
5785 | if (sched_verbose >= 2) |
5786 | { |
5787 | if (!autopref_multipass_dfa_lookahead_guard_started_dump_p) |
5788 | { |
5789 | fprintf (stream: sched_dump, |
5790 | format: ";;\t\tnot trying in max_issue due to autoprefetch " |
5791 | "model: " ); |
5792 | autopref_multipass_dfa_lookahead_guard_started_dump_p = true; |
5793 | } |
5794 | |
5795 | fprintf (stream: sched_dump, format: " *%d*" , INSN_UID (insn: insn1)); |
5796 | } |
5797 | continue; |
5798 | } |
5799 | |
5800 | for (int i2 = 0; i2 < ready.n_ready; ++i2) |
5801 | { |
5802 | rtx_insn *insn2 = get_ready_element (i2); |
5803 | if (insn1 == insn2) |
5804 | continue; |
5805 | r = autopref_multipass_dfa_lookahead_guard_1 (insn1, insn2, write); |
5806 | if (r) |
5807 | { |
5808 | if (ready_index == 0) |
5809 | { |
5810 | r = -1; |
5811 | data1->status = AUTOPREF_MULTIPASS_DATA_DONT_DELAY; |
5812 | } |
5813 | goto finish; |
5814 | } |
5815 | } |
5816 | |
5817 | if (param_sched_autopref_queue_depth == 1) |
5818 | continue; |
5819 | |
5820 | /* Everything from the current queue slot should have been moved to |
5821 | the ready list. */ |
5822 | gcc_assert (insn_queue[NEXT_Q_AFTER (q_ptr, 0)] == NULL_RTX); |
5823 | |
5824 | int n_stalls = param_sched_autopref_queue_depth - 1; |
5825 | if (n_stalls > max_insn_queue_index) |
5826 | n_stalls = max_insn_queue_index; |
5827 | |
5828 | for (int stalls = 1; stalls <= n_stalls; ++stalls) |
5829 | { |
5830 | for (rtx_insn_list *link = insn_queue[NEXT_Q_AFTER (q_ptr, stalls)]; |
5831 | link != NULL_RTX; |
5832 | link = link->next ()) |
5833 | { |
5834 | rtx_insn *insn2 = link->insn (); |
5835 | r = autopref_multipass_dfa_lookahead_guard_1 (insn1, insn2, |
5836 | write); |
5837 | if (r) |
5838 | { |
5839 | /* Queue INSN1 until INSN2 can issue. */ |
5840 | r = -stalls; |
5841 | if (ready_index == 0) |
5842 | data1->status = AUTOPREF_MULTIPASS_DATA_DONT_DELAY; |
5843 | goto finish; |
5844 | } |
5845 | } |
5846 | } |
5847 | } |
5848 | |
5849 | finish: |
5850 | if (sched_verbose >= 2 |
5851 | && autopref_multipass_dfa_lookahead_guard_started_dump_p |
5852 | && (ready_index == ready.n_ready - 1 || r < 0)) |
5853 | /* This does not /always/ trigger. We don't output EOL if the last |
5854 | insn is not recognized (INSN_CODE < 0) and lookahead_guard is not |
5855 | called. We can live with this. */ |
5856 | fprintf (stream: sched_dump, format: "\n" ); |
5857 | |
5858 | return r; |
5859 | } |
5860 | |
5861 | /* Define type for target data used in multipass scheduling. */ |
5862 | #ifndef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DATA_T |
5863 | # define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DATA_T int |
5864 | #endif |
5865 | typedef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DATA_T first_cycle_multipass_data_t; |
5866 | |
5867 | /* The following structure describe an entry of the stack of choices. */ |
5868 | struct choice_entry |
5869 | { |
5870 | /* Ordinal number of the issued insn in the ready queue. */ |
5871 | int index; |
5872 | /* The number of the rest insns whose issues we should try. */ |
5873 | int rest; |
5874 | /* The number of issued essential insns. */ |
5875 | int n; |
5876 | /* State after issuing the insn. */ |
5877 | state_t state; |
5878 | /* Target-specific data. */ |
5879 | first_cycle_multipass_data_t target_data; |
5880 | }; |
5881 | |
5882 | /* The following array is used to implement a stack of choices used in |
5883 | function max_issue. */ |
5884 | static struct choice_entry *choice_stack; |
5885 | |
5886 | /* This holds the value of the target dfa_lookahead hook. */ |
5887 | int dfa_lookahead; |
5888 | |
5889 | /* The following variable value is maximal number of tries of issuing |
5890 | insns for the first cycle multipass insn scheduling. We define |
5891 | this value as constant*(DFA_LOOKAHEAD**ISSUE_RATE). We would not |
5892 | need this constraint if all real insns (with non-negative codes) |
5893 | had reservations because in this case the algorithm complexity is |
5894 | O(DFA_LOOKAHEAD**ISSUE_RATE). Unfortunately, the dfa descriptions |
5895 | might be incomplete and such insn might occur. For such |
5896 | descriptions, the complexity of algorithm (without the constraint) |
5897 | could achieve DFA_LOOKAHEAD ** N , where N is the queue length. */ |
5898 | static int max_lookahead_tries; |
5899 | |
5900 | /* The following function returns maximal (or close to maximal) number |
5901 | of insns which can be issued on the same cycle and one of which |
5902 | insns is insns with the best rank (the first insn in READY). To |
5903 | make this function tries different samples of ready insns. READY |
5904 | is current queue `ready'. Global array READY_TRY reflects what |
5905 | insns are already issued in this try. The function stops immediately, |
5906 | if it reached the such a solution, that all instruction can be issued. |
5907 | INDEX will contain index of the best insn in READY. The following |
5908 | function is used only for first cycle multipass scheduling. |
5909 | |
5910 | PRIVILEGED_N >= 0 |
5911 | |
5912 | This function expects recognized insns only. All USEs, |
5913 | CLOBBERs, etc must be filtered elsewhere. */ |
5914 | int |
5915 | max_issue (struct ready_list *ready, int privileged_n, state_t state, |
5916 | bool first_cycle_insn_p, int *index) |
5917 | { |
5918 | int n, i, all, n_ready, best, delay, tries_num; |
5919 | int more_issue; |
5920 | struct choice_entry *top; |
5921 | rtx_insn *insn; |
5922 | |
5923 | if (sched_fusion) |
5924 | return 0; |
5925 | |
5926 | n_ready = ready->n_ready; |
5927 | gcc_assert (dfa_lookahead >= 1 && privileged_n >= 0 |
5928 | && privileged_n <= n_ready); |
5929 | |
5930 | /* Init MAX_LOOKAHEAD_TRIES. */ |
5931 | if (max_lookahead_tries == 0) |
5932 | { |
5933 | max_lookahead_tries = 100; |
5934 | for (i = 0; i < issue_rate; i++) |
5935 | max_lookahead_tries *= dfa_lookahead; |
5936 | } |
5937 | |
5938 | /* Init max_points. */ |
5939 | more_issue = issue_rate - cycle_issued_insns; |
5940 | gcc_assert (more_issue >= 0); |
5941 | |
5942 | /* The number of the issued insns in the best solution. */ |
5943 | best = 0; |
5944 | |
5945 | top = choice_stack; |
5946 | |
5947 | /* Set initial state of the search. */ |
5948 | memcpy (dest: top->state, src: state, n: dfa_state_size); |
5949 | top->rest = dfa_lookahead; |
5950 | top->n = 0; |
5951 | if (targetm.sched.first_cycle_multipass_begin) |
5952 | targetm.sched.first_cycle_multipass_begin (&top->target_data, |
5953 | ready_try, n_ready, |
5954 | first_cycle_insn_p); |
5955 | |
5956 | /* Count the number of the insns to search among. */ |
5957 | for (all = i = 0; i < n_ready; i++) |
5958 | if (!ready_try [i]) |
5959 | all++; |
5960 | |
5961 | if (sched_verbose >= 2) |
5962 | { |
5963 | fprintf (stream: sched_dump, format: ";;\t\tmax_issue among %d insns:" , all); |
5964 | debug_ready_list_1 (ready, ready_try); |
5965 | } |
5966 | |
5967 | /* I is the index of the insn to try next. */ |
5968 | i = 0; |
5969 | tries_num = 0; |
5970 | for (;;) |
5971 | { |
5972 | if (/* If we've reached a dead end or searched enough of what we have |
5973 | been asked... */ |
5974 | top->rest == 0 |
5975 | /* or have nothing else to try... */ |
5976 | || i >= n_ready |
5977 | /* or should not issue more. */ |
5978 | || top->n >= more_issue) |
5979 | { |
5980 | /* ??? (... || i == n_ready). */ |
5981 | gcc_assert (i <= n_ready); |
5982 | |
5983 | /* We should not issue more than issue_rate instructions. */ |
5984 | gcc_assert (top->n <= more_issue); |
5985 | |
5986 | if (top == choice_stack) |
5987 | break; |
5988 | |
5989 | if (best < top - choice_stack) |
5990 | { |
5991 | if (privileged_n) |
5992 | { |
5993 | n = privileged_n; |
5994 | /* Try to find issued privileged insn. */ |
5995 | while (n && !ready_try[--n]) |
5996 | ; |
5997 | } |
5998 | |
5999 | if (/* If all insns are equally good... */ |
6000 | privileged_n == 0 |
6001 | /* Or a privileged insn will be issued. */ |
6002 | || ready_try[n]) |
6003 | /* Then we have a solution. */ |
6004 | { |
6005 | best = top - choice_stack; |
6006 | /* This is the index of the insn issued first in this |
6007 | solution. */ |
6008 | *index = choice_stack [1].index; |
6009 | if (top->n == more_issue || best == all) |
6010 | break; |
6011 | } |
6012 | } |
6013 | |
6014 | /* Set ready-list index to point to the last insn |
6015 | ('i++' below will advance it to the next insn). */ |
6016 | i = top->index; |
6017 | |
6018 | /* Backtrack. */ |
6019 | ready_try [i] = 0; |
6020 | |
6021 | if (targetm.sched.first_cycle_multipass_backtrack) |
6022 | targetm.sched.first_cycle_multipass_backtrack (&top->target_data, |
6023 | ready_try, n_ready); |
6024 | |
6025 | top--; |
6026 | memcpy (dest: state, src: top->state, n: dfa_state_size); |
6027 | } |
6028 | else if (!ready_try [i]) |
6029 | { |
6030 | tries_num++; |
6031 | if (tries_num > max_lookahead_tries) |
6032 | break; |
6033 | insn = ready_element (ready, index: i); |
6034 | delay = state_transition (state, insn); |
6035 | if (delay < 0) |
6036 | { |
6037 | if (state_dead_lock_p (state) |
6038 | || insn_finishes_cycle_p (insn)) |
6039 | /* We won't issue any more instructions in the next |
6040 | choice_state. */ |
6041 | top->rest = 0; |
6042 | else |
6043 | top->rest--; |
6044 | |
6045 | n = top->n; |
6046 | if (memcmp (s1: top->state, s2: state, n: dfa_state_size) != 0) |
6047 | n++; |
6048 | |
6049 | /* Advance to the next choice_entry. */ |
6050 | top++; |
6051 | /* Initialize it. */ |
6052 | top->rest = dfa_lookahead; |
6053 | top->index = i; |
6054 | top->n = n; |
6055 | memcpy (dest: top->state, src: state, n: dfa_state_size); |
6056 | ready_try [i] = 1; |
6057 | |
6058 | if (targetm.sched.first_cycle_multipass_issue) |
6059 | targetm.sched.first_cycle_multipass_issue (&top->target_data, |
6060 | ready_try, n_ready, |
6061 | insn, |
6062 | &((top - 1) |
6063 | ->target_data)); |
6064 | |
6065 | i = -1; |
6066 | } |
6067 | } |
6068 | |
6069 | /* Increase ready-list index. */ |
6070 | i++; |
6071 | } |
6072 | |
6073 | if (targetm.sched.first_cycle_multipass_end) |
6074 | targetm.sched.first_cycle_multipass_end (best != 0 |
6075 | ? &choice_stack[1].target_data |
6076 | : NULL); |
6077 | |
6078 | /* Restore the original state of the DFA. */ |
6079 | memcpy (dest: state, src: choice_stack->state, n: dfa_state_size); |
6080 | |
6081 | return best; |
6082 | } |
6083 | |
6084 | /* The following function chooses insn from READY and modifies |
6085 | READY. The following function is used only for first |
6086 | cycle multipass scheduling. |
6087 | Return: |
6088 | -1 if cycle should be advanced, |
6089 | 0 if INSN_PTR is set to point to the desirable insn, |
6090 | 1 if choose_ready () should be restarted without advancing the cycle. */ |
6091 | static int |
6092 | choose_ready (struct ready_list *ready, bool first_cycle_insn_p, |
6093 | rtx_insn **insn_ptr) |
6094 | { |
6095 | if (dbg_cnt (index: sched_insn) == false) |
6096 | { |
6097 | if (nonscheduled_insns_begin == NULL_RTX) |
6098 | nonscheduled_insns_begin = current_sched_info->prev_head; |
6099 | |
6100 | rtx_insn *insn = first_nonscheduled_insn (); |
6101 | |
6102 | if (QUEUE_INDEX (insn) == QUEUE_READY) |
6103 | /* INSN is in the ready_list. */ |
6104 | { |
6105 | ready_remove_insn (insn); |
6106 | *insn_ptr = insn; |
6107 | return 0; |
6108 | } |
6109 | |
6110 | /* INSN is in the queue. Advance cycle to move it to the ready list. */ |
6111 | gcc_assert (QUEUE_INDEX (insn) >= 0); |
6112 | return -1; |
6113 | } |
6114 | |
6115 | if (dfa_lookahead <= 0 || SCHED_GROUP_P (ready_element (ready, 0)) |
6116 | || DEBUG_INSN_P (ready_element (ready, 0))) |
6117 | { |
6118 | if (targetm.sched.dispatch (NULL, IS_DISPATCH_ON)) |
6119 | *insn_ptr = ready_remove_first_dispatch (ready); |
6120 | else |
6121 | *insn_ptr = ready_remove_first (ready); |
6122 | |
6123 | return 0; |
6124 | } |
6125 | else |
6126 | { |
6127 | /* Try to choose the best insn. */ |
6128 | int index = 0, i; |
6129 | rtx_insn *insn; |
6130 | |
6131 | insn = ready_element (ready, index: 0); |
6132 | if (INSN_CODE (insn) < 0) |
6133 | { |
6134 | *insn_ptr = ready_remove_first (ready); |
6135 | return 0; |
6136 | } |
6137 | |
6138 | /* Filter the search space. */ |
6139 | for (i = 0; i < ready->n_ready; i++) |
6140 | { |
6141 | ready_try[i] = 0; |
6142 | |
6143 | insn = ready_element (ready, index: i); |
6144 | |
6145 | /* If this insn is recognizable we should have already |
6146 | recognized it earlier. |
6147 | ??? Not very clear where this is supposed to be done. |
6148 | See dep_cost_1. */ |
6149 | gcc_checking_assert (INSN_CODE (insn) >= 0 |
6150 | || recog_memoized (insn) < 0); |
6151 | if (INSN_CODE (insn) < 0) |
6152 | { |
6153 | /* Non-recognized insns at position 0 are handled above. */ |
6154 | gcc_assert (i > 0); |
6155 | ready_try[i] = 1; |
6156 | continue; |
6157 | } |
6158 | |
6159 | if (targetm.sched.first_cycle_multipass_dfa_lookahead_guard) |
6160 | { |
6161 | ready_try[i] |
6162 | = (targetm.sched.first_cycle_multipass_dfa_lookahead_guard |
6163 | (insn, i)); |
6164 | |
6165 | if (ready_try[i] < 0) |
6166 | /* Queue instruction for several cycles. |
6167 | We need to restart choose_ready as we have changed |
6168 | the ready list. */ |
6169 | { |
6170 | change_queue_index (insn, -ready_try[i]); |
6171 | return 1; |
6172 | } |
6173 | |
6174 | /* Make sure that we didn't end up with 0'th insn filtered out. |
6175 | Don't be tempted to make life easier for backends and just |
6176 | requeue 0'th insn if (ready_try[0] == 0) and restart |
6177 | choose_ready. Backends should be very considerate about |
6178 | requeueing instructions -- especially the highest priority |
6179 | one at position 0. */ |
6180 | gcc_assert (ready_try[i] == 0 || i > 0); |
6181 | if (ready_try[i]) |
6182 | continue; |
6183 | } |
6184 | |
6185 | gcc_assert (ready_try[i] == 0); |
6186 | /* INSN made it through the scrutiny of filters! */ |
6187 | } |
6188 | |
6189 | if (max_issue (ready, privileged_n: 1, state: curr_state, first_cycle_insn_p, index: &index) == 0) |
6190 | { |
6191 | *insn_ptr = ready_remove_first (ready); |
6192 | if (sched_verbose >= 4) |
6193 | fprintf (stream: sched_dump, format: ";;\t\tChosen insn (but can't issue) : %s \n" , |
6194 | (*current_sched_info->print_insn) (*insn_ptr, 0)); |
6195 | return 0; |
6196 | } |
6197 | else |
6198 | { |
6199 | if (sched_verbose >= 4) |
6200 | fprintf (stream: sched_dump, format: ";;\t\tChosen insn : %s\n" , |
6201 | (*current_sched_info->print_insn) |
6202 | (ready_element (ready, index), 0)); |
6203 | |
6204 | *insn_ptr = ready_remove (ready, index); |
6205 | return 0; |
6206 | } |
6207 | } |
6208 | } |
6209 | |
6210 | /* This function is called when we have successfully scheduled a |
6211 | block. It uses the schedule stored in the scheduled_insns vector |
6212 | to rearrange the RTL. PREV_HEAD is used as the anchor to which we |
6213 | append the scheduled insns; TAIL is the insn after the scheduled |
6214 | block. TARGET_BB is the argument passed to schedule_block. */ |
6215 | |
6216 | static void |
6217 | commit_schedule (rtx_insn *prev_head, rtx_insn *tail, basic_block *target_bb) |
6218 | { |
6219 | unsigned int i; |
6220 | rtx_insn *insn; |
6221 | |
6222 | last_scheduled_insn = prev_head; |
6223 | for (i = 0; |
6224 | scheduled_insns.iterate (ix: i, ptr: &insn); |
6225 | i++) |
6226 | { |
6227 | if (control_flow_insn_p (last_scheduled_insn) |
6228 | || current_sched_info->advance_target_bb (*target_bb, insn)) |
6229 | { |
6230 | *target_bb = current_sched_info->advance_target_bb (*target_bb, 0); |
6231 | |
6232 | if (sched_verbose) |
6233 | { |
6234 | rtx_insn *x; |
6235 | |
6236 | x = next_real_insn (last_scheduled_insn); |
6237 | gcc_assert (x); |
6238 | dump_new_block_header (1, *target_bb, x, tail); |
6239 | } |
6240 | |
6241 | last_scheduled_insn = bb_note (*target_bb); |
6242 | } |
6243 | |
6244 | if (current_sched_info->begin_move_insn) |
6245 | (*current_sched_info->begin_move_insn) (insn, last_scheduled_insn); |
6246 | move_insn (insn, last: last_scheduled_insn, |
6247 | nt: current_sched_info->next_tail); |
6248 | if (!DEBUG_INSN_P (insn)) |
6249 | reemit_notes (insn); |
6250 | last_scheduled_insn = insn; |
6251 | } |
6252 | |
6253 | scheduled_insns.truncate (size: 0); |
6254 | } |
6255 | |
6256 | /* Examine all insns on the ready list and queue those which can't be |
6257 | issued in this cycle. TEMP_STATE is temporary scheduler state we |
6258 | can use as scratch space. If FIRST_CYCLE_INSN_P is true, no insns |
6259 | have been issued for the current cycle, which means it is valid to |
6260 | issue an asm statement. |
6261 | |
6262 | If SHADOWS_ONLY_P is true, we eliminate all real insns and only |
6263 | leave those for which SHADOW_P is true. If MODULO_EPILOGUE is true, |
6264 | we only leave insns which have an INSN_EXACT_TICK. */ |
6265 | |
6266 | static void |
6267 | prune_ready_list (state_t temp_state, bool first_cycle_insn_p, |
6268 | bool shadows_only_p, bool modulo_epilogue_p) |
6269 | { |
6270 | int i, pass; |
6271 | bool sched_group_found = false; |
6272 | int min_cost_group = 0; |
6273 | |
6274 | if (sched_fusion) |
6275 | return; |
6276 | |
6277 | for (i = 0; i < ready.n_ready; i++) |
6278 | { |
6279 | rtx_insn *insn = ready_element (ready: &ready, index: i); |
6280 | if (SCHED_GROUP_P (insn)) |
6281 | { |
6282 | sched_group_found = true; |
6283 | break; |
6284 | } |
6285 | } |
6286 | |
6287 | /* Make two passes if there's a SCHED_GROUP_P insn; make sure to handle |
6288 | such an insn first and note its cost. If at least one SCHED_GROUP_P insn |
6289 | gets queued, then all other insns get queued for one cycle later. */ |
6290 | for (pass = sched_group_found ? 0 : 1; pass < 2; ) |
6291 | { |
6292 | int n = ready.n_ready; |
6293 | for (i = 0; i < n; i++) |
6294 | { |
6295 | rtx_insn *insn = ready_element (ready: &ready, index: i); |
6296 | int cost = 0; |
6297 | const char *reason = "resource conflict" ; |
6298 | |
6299 | if (DEBUG_INSN_P (insn)) |
6300 | continue; |
6301 | |
6302 | if (sched_group_found && !SCHED_GROUP_P (insn) |
6303 | && ((pass == 0) || (min_cost_group >= 1))) |
6304 | { |
6305 | if (pass == 0) |
6306 | continue; |
6307 | cost = min_cost_group; |
6308 | reason = "not in sched group" ; |
6309 | } |
6310 | else if (modulo_epilogue_p |
6311 | && INSN_EXACT_TICK (insn) == INVALID_TICK) |
6312 | { |
6313 | cost = max_insn_queue_index; |
6314 | reason = "not an epilogue insn" ; |
6315 | } |
6316 | else if (shadows_only_p && !SHADOW_P (insn)) |
6317 | { |
6318 | cost = 1; |
6319 | reason = "not a shadow" ; |
6320 | } |
6321 | else if (recog_memoized (insn) < 0) |
6322 | { |
6323 | if (!first_cycle_insn_p |
6324 | && (GET_CODE (PATTERN (insn)) == ASM_INPUT |
6325 | || asm_noperands (PATTERN (insn)) >= 0)) |
6326 | cost = 1; |
6327 | reason = "asm" ; |
6328 | } |
6329 | else if (sched_pressure != SCHED_PRESSURE_NONE) |
6330 | { |
6331 | if (sched_pressure == SCHED_PRESSURE_MODEL |
6332 | && INSN_TICK (insn) <= clock_var) |
6333 | { |
6334 | memcpy (dest: temp_state, src: curr_state, n: dfa_state_size); |
6335 | if (state_transition (temp_state, insn) >= 0) |
6336 | INSN_TICK (insn) = clock_var + 1; |
6337 | } |
6338 | cost = 0; |
6339 | } |
6340 | else |
6341 | { |
6342 | int delay_cost = 0; |
6343 | |
6344 | if (delay_htab) |
6345 | { |
6346 | struct delay_pair *delay_entry; |
6347 | delay_entry |
6348 | = delay_htab->find_with_hash (comparable: insn, |
6349 | hash: htab_hash_pointer (insn)); |
6350 | while (delay_entry && delay_cost == 0) |
6351 | { |
6352 | delay_cost = estimate_shadow_tick (p: delay_entry); |
6353 | if (delay_cost > max_insn_queue_index) |
6354 | delay_cost = max_insn_queue_index; |
6355 | delay_entry = delay_entry->next_same_i1; |
6356 | } |
6357 | } |
6358 | |
6359 | memcpy (dest: temp_state, src: curr_state, n: dfa_state_size); |
6360 | cost = state_transition (temp_state, insn); |
6361 | if (cost < 0) |
6362 | cost = 0; |
6363 | else if (cost == 0) |
6364 | cost = 1; |
6365 | if (cost < delay_cost) |
6366 | { |
6367 | cost = delay_cost; |
6368 | reason = "shadow tick" ; |
6369 | } |
6370 | } |
6371 | if (cost >= 1) |
6372 | { |
6373 | if (SCHED_GROUP_P (insn) && cost > min_cost_group) |
6374 | min_cost_group = cost; |
6375 | ready_remove (ready: &ready, index: i); |
6376 | /* Normally we'd want to queue INSN for COST cycles. However, |
6377 | if SCHED_GROUP_P is set, then we must ensure that nothing |
6378 | else comes between INSN and its predecessor. If there is |
6379 | some other insn ready to fire on the next cycle, then that |
6380 | invariant would be broken. |
6381 | |
6382 | So when SCHED_GROUP_P is set, just queue this insn for a |
6383 | single cycle. */ |
6384 | queue_insn (insn, SCHED_GROUP_P (insn) ? 1 : cost, reason); |
6385 | if (i + 1 < n) |
6386 | break; |
6387 | } |
6388 | } |
6389 | if (i == n) |
6390 | pass++; |
6391 | } |
6392 | } |
6393 | |
6394 | /* Called when we detect that the schedule is impossible. We examine the |
6395 | backtrack queue to find the earliest insn that caused this condition. */ |
6396 | |
6397 | static struct haifa_saved_data * |
6398 | verify_shadows (void) |
6399 | { |
6400 | struct haifa_saved_data *save, *earliest_fail = NULL; |
6401 | for (save = backtrack_queue; save; save = save->next) |
6402 | { |
6403 | int t; |
6404 | struct delay_pair *pair = save->delay_pair; |
6405 | rtx_insn *i1 = pair->i1; |
6406 | |
6407 | for (; pair; pair = pair->next_same_i1) |
6408 | { |
6409 | rtx_insn *i2 = pair->i2; |
6410 | |
6411 | if (QUEUE_INDEX (i2) == QUEUE_SCHEDULED) |
6412 | continue; |
6413 | |
6414 | t = INSN_TICK (i1) + pair_delay (p: pair); |
6415 | if (t < clock_var) |
6416 | { |
6417 | if (sched_verbose >= 2) |
6418 | fprintf (stream: sched_dump, |
6419 | format: ";;\t\tfailed delay requirements for %d/%d (%d->%d)" |
6420 | ", not ready\n" , |
6421 | INSN_UID (insn: pair->i1), INSN_UID (insn: pair->i2), |
6422 | INSN_TICK (pair->i1), INSN_EXACT_TICK (pair->i2)); |
6423 | earliest_fail = save; |
6424 | break; |
6425 | } |
6426 | if (QUEUE_INDEX (i2) >= 0) |
6427 | { |
6428 | int queued_for = INSN_TICK (i2); |
6429 | |
6430 | if (t < queued_for) |
6431 | { |
6432 | if (sched_verbose >= 2) |
6433 | fprintf (stream: sched_dump, |
6434 | format: ";;\t\tfailed delay requirements for %d/%d" |
6435 | " (%d->%d), queued too late\n" , |
6436 | INSN_UID (insn: pair->i1), INSN_UID (insn: pair->i2), |
6437 | INSN_TICK (pair->i1), INSN_EXACT_TICK (pair->i2)); |
6438 | earliest_fail = save; |
6439 | break; |
6440 | } |
6441 | } |
6442 | } |
6443 | } |
6444 | |
6445 | return earliest_fail; |
6446 | } |
6447 | |
6448 | /* Print instructions together with useful scheduling information between |
6449 | HEAD and TAIL (inclusive). */ |
6450 | static void |
6451 | dump_insn_stream (rtx_insn *head, rtx_insn *tail) |
6452 | { |
6453 | fprintf (stream: sched_dump, format: ";;\t| insn | prio |\n" ); |
6454 | |
6455 | rtx_insn *next_tail = NEXT_INSN (insn: tail); |
6456 | for (rtx_insn *insn = head; insn != next_tail; insn = NEXT_INSN (insn)) |
6457 | { |
6458 | int priority = NOTE_P (insn) ? 0 : INSN_PRIORITY (insn); |
6459 | const char *pattern = (NOTE_P (insn) |
6460 | ? "note" |
6461 | : str_pattern_slim (PATTERN (insn))); |
6462 | |
6463 | fprintf (stream: sched_dump, format: ";;\t| %4d | %4d | %-30s " , |
6464 | INSN_UID (insn), priority, pattern); |
6465 | |
6466 | if (sched_verbose >= 4) |
6467 | { |
6468 | if (NOTE_P (insn) || LABEL_P (insn) || recog_memoized (insn) < 0) |
6469 | fprintf (stream: sched_dump, format: "nothing" ); |
6470 | else |
6471 | print_reservation (sched_dump, insn); |
6472 | } |
6473 | fprintf (stream: sched_dump, format: "\n" ); |
6474 | } |
6475 | } |
6476 | |
6477 | /* Use forward list scheduling to rearrange insns of block pointed to by |
6478 | TARGET_BB, possibly bringing insns from subsequent blocks in the same |
6479 | region. */ |
6480 | |
6481 | bool |
6482 | schedule_block (basic_block *target_bb, state_t init_state) |
6483 | { |
6484 | int i; |
6485 | bool success = modulo_ii == 0; |
6486 | struct sched_block_state ls; |
6487 | state_t temp_state = NULL; /* It is used for multipass scheduling. */ |
6488 | int sort_p, advance, start_clock_var; |
6489 | |
6490 | /* Head/tail info for this block. */ |
6491 | rtx_insn *prev_head = current_sched_info->prev_head; |
6492 | rtx_insn *next_tail = current_sched_info->next_tail; |
6493 | rtx_insn *head = NEXT_INSN (insn: prev_head); |
6494 | rtx_insn *tail = PREV_INSN (insn: next_tail); |
6495 | |
6496 | if ((current_sched_info->flags & DONT_BREAK_DEPENDENCIES) == 0 |
6497 | && sched_pressure != SCHED_PRESSURE_MODEL && !sched_fusion) |
6498 | find_modifiable_mems (head, tail); |
6499 | |
6500 | /* We used to have code to avoid getting parameters moved from hard |
6501 | argument registers into pseudos. |
6502 | |
6503 | However, it was removed when it proved to be of marginal benefit |
6504 | and caused problems because schedule_block and compute_forward_dependences |
6505 | had different notions of what the "head" insn was. */ |
6506 | |
6507 | gcc_assert (head != tail || INSN_P (head)); |
6508 | |
6509 | haifa_recovery_bb_recently_added_p = false; |
6510 | |
6511 | backtrack_queue = NULL; |
6512 | |
6513 | /* Debug info. */ |
6514 | if (sched_verbose) |
6515 | { |
6516 | dump_new_block_header (0, *target_bb, head, tail); |
6517 | |
6518 | if (sched_verbose >= 2) |
6519 | { |
6520 | dump_insn_stream (head, tail); |
6521 | memset (s: &rank_for_schedule_stats, c: 0, |
6522 | n: sizeof (rank_for_schedule_stats)); |
6523 | } |
6524 | } |
6525 | |
6526 | if (init_state == NULL) |
6527 | state_reset (curr_state); |
6528 | else |
6529 | memcpy (dest: curr_state, src: init_state, n: dfa_state_size); |
6530 | |
6531 | /* Clear the ready list. */ |
6532 | ready.first = ready.veclen - 1; |
6533 | ready.n_ready = 0; |
6534 | ready.n_debug = 0; |
6535 | |
6536 | /* It is used for first cycle multipass scheduling. */ |
6537 | temp_state = alloca (dfa_state_size); |
6538 | |
6539 | if (targetm.sched.init) |
6540 | targetm.sched.init (sched_dump, sched_verbose, ready.veclen); |
6541 | |
6542 | /* We start inserting insns after PREV_HEAD. */ |
6543 | last_scheduled_insn = prev_head; |
6544 | last_nondebug_scheduled_insn = NULL; |
6545 | nonscheduled_insns_begin = NULL; |
6546 | |
6547 | gcc_assert ((NOTE_P (last_scheduled_insn) |
6548 | || DEBUG_INSN_P (last_scheduled_insn)) |
6549 | && BLOCK_FOR_INSN (last_scheduled_insn) == *target_bb); |
6550 | |
6551 | /* Initialize INSN_QUEUE. Q_SIZE is the total number of insns in the |
6552 | queue. */ |
6553 | q_ptr = 0; |
6554 | q_size = 0; |
6555 | |
6556 | insn_queue = XALLOCAVEC (rtx_insn_list *, max_insn_queue_index + 1); |
6557 | memset (s: insn_queue, c: 0, n: (max_insn_queue_index + 1) * sizeof (rtx)); |
6558 | |
6559 | /* Start just before the beginning of time. */ |
6560 | clock_var = -1; |
6561 | |
6562 | /* We need queue and ready lists and clock_var be initialized |
6563 | in try_ready () (which is called through init_ready_list ()). */ |
6564 | (*current_sched_info->init_ready_list) (); |
6565 | |
6566 | if (sched_pressure) |
6567 | sched_pressure_start_bb (bb: *target_bb); |
6568 | |
6569 | /* The algorithm is O(n^2) in the number of ready insns at any given |
6570 | time in the worst case. Before reload we are more likely to have |
6571 | big lists so truncate them to a reasonable size. */ |
6572 | if (!reload_completed |
6573 | && ready.n_ready - ready.n_debug > param_max_sched_ready_insns) |
6574 | { |
6575 | ready_sort_debug (ready: &ready); |
6576 | ready_sort_real (ready: &ready); |
6577 | |
6578 | /* Find first free-standing insn past param_max_sched_ready_insns. |
6579 | If there are debug insns, we know they're first. */ |
6580 | for (i = param_max_sched_ready_insns + ready.n_debug; i < ready.n_ready; |
6581 | i++) |
6582 | if (!SCHED_GROUP_P (ready_element (&ready, i))) |
6583 | break; |
6584 | |
6585 | if (sched_verbose >= 2) |
6586 | { |
6587 | fprintf (stream: sched_dump, |
6588 | format: ";;\t\tReady list on entry: %d insns: " , ready.n_ready); |
6589 | debug_ready_list (ready: &ready); |
6590 | fprintf (stream: sched_dump, |
6591 | format: ";;\t\t before reload => truncated to %d insns\n" , i); |
6592 | } |
6593 | |
6594 | /* Delay all insns past it for 1 cycle. If debug counter is |
6595 | activated make an exception for the insn right after |
6596 | nonscheduled_insns_begin. */ |
6597 | { |
6598 | rtx_insn *skip_insn; |
6599 | |
6600 | if (dbg_cnt (index: sched_insn) == false) |
6601 | skip_insn = first_nonscheduled_insn (); |
6602 | else |
6603 | skip_insn = NULL; |
6604 | |
6605 | while (i < ready.n_ready) |
6606 | { |
6607 | rtx_insn *insn; |
6608 | |
6609 | insn = ready_remove (ready: &ready, index: i); |
6610 | |
6611 | if (insn != skip_insn) |
6612 | queue_insn (insn, n_cycles: 1, reason: "list truncated" ); |
6613 | } |
6614 | if (skip_insn) |
6615 | ready_add (ready: &ready, insn: skip_insn, first_p: true); |
6616 | } |
6617 | } |
6618 | |
6619 | /* Now we can restore basic block notes and maintain precise cfg. */ |
6620 | restore_bb_notes (*target_bb); |
6621 | |
6622 | last_clock_var = -1; |
6623 | |
6624 | advance = 0; |
6625 | |
6626 | gcc_assert (scheduled_insns.length () == 0); |
6627 | sort_p = true; |
6628 | must_backtrack = false; |
6629 | modulo_insns_scheduled = 0; |
6630 | |
6631 | ls.modulo_epilogue = false; |
6632 | ls.first_cycle_insn_p = true; |
6633 | |
6634 | /* Loop until all the insns in BB are scheduled. */ |
6635 | while ((*current_sched_info->schedule_more_p) ()) |
6636 | { |
6637 | perform_replacements_new_cycle (); |
6638 | do |
6639 | { |
6640 | start_clock_var = clock_var; |
6641 | |
6642 | clock_var++; |
6643 | |
6644 | advance_one_cycle (); |
6645 | |
6646 | /* Add to the ready list all pending insns that can be issued now. |
6647 | If there are no ready insns, increment clock until one |
6648 | is ready and add all pending insns at that point to the ready |
6649 | list. */ |
6650 | queue_to_ready (ready: &ready); |
6651 | |
6652 | gcc_assert (ready.n_ready); |
6653 | |
6654 | if (sched_verbose >= 2) |
6655 | { |
6656 | fprintf (stream: sched_dump, format: ";;\t\tReady list after queue_to_ready:" ); |
6657 | debug_ready_list (ready: &ready); |
6658 | } |
6659 | advance -= clock_var - start_clock_var; |
6660 | } |
6661 | while (advance > 0); |
6662 | |
6663 | if (ls.modulo_epilogue) |
6664 | { |
6665 | int stage = clock_var / modulo_ii; |
6666 | if (stage > modulo_last_stage * 2 + 2) |
6667 | { |
6668 | if (sched_verbose >= 2) |
6669 | fprintf (stream: sched_dump, |
6670 | format: ";;\t\tmodulo scheduled succeeded at II %d\n" , |
6671 | modulo_ii); |
6672 | success = true; |
6673 | goto end_schedule; |
6674 | } |
6675 | } |
6676 | else if (modulo_ii > 0) |
6677 | { |
6678 | int stage = clock_var / modulo_ii; |
6679 | if (stage > modulo_max_stages) |
6680 | { |
6681 | if (sched_verbose >= 2) |
6682 | fprintf (stream: sched_dump, |
6683 | format: ";;\t\tfailing schedule due to excessive stages\n" ); |
6684 | goto end_schedule; |
6685 | } |
6686 | if (modulo_n_insns == modulo_insns_scheduled |
6687 | && stage > modulo_last_stage) |
6688 | { |
6689 | if (sched_verbose >= 2) |
6690 | fprintf (stream: sched_dump, |
6691 | format: ";;\t\tfound kernel after %d stages, II %d\n" , |
6692 | stage, modulo_ii); |
6693 | ls.modulo_epilogue = true; |
6694 | } |
6695 | } |
6696 | |
6697 | prune_ready_list (temp_state, first_cycle_insn_p: true, shadows_only_p: false, modulo_epilogue_p: ls.modulo_epilogue); |
6698 | if (ready.n_ready == 0) |
6699 | continue; |
6700 | if (must_backtrack) |
6701 | goto do_backtrack; |
6702 | |
6703 | ls.shadows_only_p = false; |
6704 | cycle_issued_insns = 0; |
6705 | ls.can_issue_more = issue_rate; |
6706 | for (;;) |
6707 | { |
6708 | rtx_insn *insn; |
6709 | int cost; |
6710 | bool asm_p; |
6711 | |
6712 | if (sort_p && ready.n_ready > 0) |
6713 | { |
6714 | /* Sort the ready list based on priority. This must be |
6715 | done every iteration through the loop, as schedule_insn |
6716 | may have readied additional insns that will not be |
6717 | sorted correctly. */ |
6718 | ready_sort (ready: &ready); |
6719 | |
6720 | if (sched_verbose >= 2) |
6721 | { |
6722 | fprintf (stream: sched_dump, |
6723 | format: ";;\t\tReady list after ready_sort: " ); |
6724 | debug_ready_list (ready: &ready); |
6725 | } |
6726 | } |
6727 | |
6728 | /* We don't want md sched reorder to even see debug isns, so put |
6729 | them out right away. */ |
6730 | if (ready.n_ready && DEBUG_INSN_P (ready_element (&ready, 0)) |
6731 | && (*current_sched_info->schedule_more_p) ()) |
6732 | { |
6733 | while (ready.n_ready && DEBUG_INSN_P (ready_element (&ready, 0))) |
6734 | { |
6735 | rtx_insn *insn = ready_remove_first (ready: &ready); |
6736 | gcc_assert (DEBUG_INSN_P (insn)); |
6737 | (*current_sched_info->begin_schedule_ready) (insn); |
6738 | scheduled_insns.safe_push (obj: insn); |
6739 | last_scheduled_insn = insn; |
6740 | advance = schedule_insn (insn); |
6741 | gcc_assert (advance == 0); |
6742 | if (ready.n_ready > 0) |
6743 | ready_sort (ready: &ready); |
6744 | } |
6745 | } |
6746 | |
6747 | if (ls.first_cycle_insn_p && !ready.n_ready) |
6748 | break; |
6749 | |
6750 | resume_after_backtrack: |
6751 | /* Allow the target to reorder the list, typically for |
6752 | better instruction bundling. */ |
6753 | if (sort_p |
6754 | && (ready.n_ready == 0 |
6755 | || !SCHED_GROUP_P (ready_element (&ready, 0)))) |
6756 | { |
6757 | if (ls.first_cycle_insn_p && targetm.sched.reorder) |
6758 | ls.can_issue_more |
6759 | = targetm.sched.reorder (sched_dump, sched_verbose, |
6760 | ready_lastpos (ready: &ready), |
6761 | &ready.n_ready, clock_var); |
6762 | else if (!ls.first_cycle_insn_p && targetm.sched.reorder2) |
6763 | ls.can_issue_more |
6764 | = targetm.sched.reorder2 (sched_dump, sched_verbose, |
6765 | ready.n_ready |
6766 | ? ready_lastpos (ready: &ready) : NULL, |
6767 | &ready.n_ready, clock_var); |
6768 | } |
6769 | |
6770 | restart_choose_ready: |
6771 | if (sched_verbose >= 2) |
6772 | { |
6773 | fprintf (stream: sched_dump, format: ";;\tReady list (t = %3d): " , |
6774 | clock_var); |
6775 | debug_ready_list (ready: &ready); |
6776 | if (sched_pressure == SCHED_PRESSURE_WEIGHTED) |
6777 | print_curr_reg_pressure (); |
6778 | } |
6779 | |
6780 | if (ready.n_ready == 0 |
6781 | && ls.can_issue_more |
6782 | && reload_completed) |
6783 | { |
6784 | /* Allow scheduling insns directly from the queue in case |
6785 | there's nothing better to do (ready list is empty) but |
6786 | there are still vacant dispatch slots in the current cycle. */ |
6787 | if (sched_verbose >= 6) |
6788 | fprintf (stream: sched_dump,format: ";;\t\tSecond chance\n" ); |
6789 | memcpy (dest: temp_state, src: curr_state, n: dfa_state_size); |
6790 | if (early_queue_to_ready (state: temp_state, ready: &ready)) |
6791 | ready_sort (ready: &ready); |
6792 | } |
6793 | |
6794 | if (ready.n_ready == 0 |
6795 | || !ls.can_issue_more |
6796 | || state_dead_lock_p (curr_state) |
6797 | || !(*current_sched_info->schedule_more_p) ()) |
6798 | break; |
6799 | |
6800 | /* Select and remove the insn from the ready list. */ |
6801 | if (sort_p) |
6802 | { |
6803 | int res; |
6804 | |
6805 | insn = NULL; |
6806 | res = choose_ready (ready: &ready, first_cycle_insn_p: ls.first_cycle_insn_p, insn_ptr: &insn); |
6807 | |
6808 | if (res < 0) |
6809 | /* Finish cycle. */ |
6810 | break; |
6811 | if (res > 0) |
6812 | goto restart_choose_ready; |
6813 | |
6814 | gcc_assert (insn != NULL_RTX); |
6815 | } |
6816 | else |
6817 | insn = ready_remove_first (ready: &ready); |
6818 | |
6819 | if (sched_pressure != SCHED_PRESSURE_NONE |
6820 | && INSN_TICK (insn) > clock_var) |
6821 | { |
6822 | ready_add (ready: &ready, insn, first_p: true); |
6823 | advance = 1; |
6824 | break; |
6825 | } |
6826 | |
6827 | if (targetm.sched.dfa_new_cycle |
6828 | && targetm.sched.dfa_new_cycle (sched_dump, sched_verbose, |
6829 | insn, last_clock_var, |
6830 | clock_var, &sort_p)) |
6831 | /* SORT_P is used by the target to override sorting |
6832 | of the ready list. This is needed when the target |
6833 | has modified its internal structures expecting that |
6834 | the insn will be issued next. As we need the insn |
6835 | to have the highest priority (so it will be returned by |
6836 | the ready_remove_first call above), we invoke |
6837 | ready_add (&ready, insn, true). |
6838 | But, still, there is one issue: INSN can be later |
6839 | discarded by scheduler's front end through |
6840 | current_sched_info->can_schedule_ready_p, hence, won't |
6841 | be issued next. */ |
6842 | { |
6843 | ready_add (ready: &ready, insn, first_p: true); |
6844 | break; |
6845 | } |
6846 | |
6847 | sort_p = true; |
6848 | |
6849 | if (current_sched_info->can_schedule_ready_p |
6850 | && ! (*current_sched_info->can_schedule_ready_p) (insn)) |
6851 | /* We normally get here only if we don't want to move |
6852 | insn from the split block. */ |
6853 | { |
6854 | TODO_SPEC (insn) = DEP_POSTPONED; |
6855 | goto restart_choose_ready; |
6856 | } |
6857 | |
6858 | if (delay_htab) |
6859 | { |
6860 | /* If this insn is the first part of a delay-slot pair, record a |
6861 | backtrack point. */ |
6862 | struct delay_pair *delay_entry; |
6863 | delay_entry |
6864 | = delay_htab->find_with_hash (comparable: insn, hash: htab_hash_pointer (insn)); |
6865 | if (delay_entry) |
6866 | { |
6867 | save_backtrack_point (pair: delay_entry, sched_block: ls); |
6868 | if (sched_verbose >= 2) |
6869 | fprintf (stream: sched_dump, format: ";;\t\tsaving backtrack point\n" ); |
6870 | } |
6871 | } |
6872 | |
6873 | /* DECISION is made. */ |
6874 | |
6875 | if (modulo_ii > 0 && INSN_UID (insn) < modulo_iter0_max_uid) |
6876 | { |
6877 | modulo_insns_scheduled++; |
6878 | modulo_last_stage = clock_var / modulo_ii; |
6879 | } |
6880 | if (TODO_SPEC (insn) & SPECULATIVE) |
6881 | generate_recovery_code (insn); |
6882 | |
6883 | if (targetm.sched.dispatch (NULL, IS_DISPATCH_ON)) |
6884 | targetm.sched.dispatch_do (insn, ADD_TO_DISPATCH_WINDOW); |
6885 | |
6886 | /* Update counters, etc in the scheduler's front end. */ |
6887 | (*current_sched_info->begin_schedule_ready) (insn); |
6888 | scheduled_insns.safe_push (obj: insn); |
6889 | gcc_assert (NONDEBUG_INSN_P (insn)); |
6890 | last_nondebug_scheduled_insn = last_scheduled_insn = insn; |
6891 | |
6892 | if (recog_memoized (insn) >= 0) |
6893 | { |
6894 | memcpy (dest: temp_state, src: curr_state, n: dfa_state_size); |
6895 | cost = state_transition (curr_state, insn); |
6896 | if (sched_pressure != SCHED_PRESSURE_WEIGHTED && !sched_fusion) |
6897 | gcc_assert (cost < 0); |
6898 | if (memcmp (s1: temp_state, s2: curr_state, n: dfa_state_size) != 0) |
6899 | cycle_issued_insns++; |
6900 | asm_p = false; |
6901 | } |
6902 | else |
6903 | asm_p = (GET_CODE (PATTERN (insn)) == ASM_INPUT |
6904 | || asm_noperands (PATTERN (insn)) >= 0); |
6905 | |
6906 | if (targetm.sched.variable_issue) |
6907 | ls.can_issue_more = |
6908 | targetm.sched.variable_issue (sched_dump, sched_verbose, |
6909 | insn, ls.can_issue_more); |
6910 | /* A naked CLOBBER or USE generates no instruction, so do |
6911 | not count them against the issue rate. */ |
6912 | else if (GET_CODE (PATTERN (insn)) != USE |
6913 | && GET_CODE (PATTERN (insn)) != CLOBBER) |
6914 | ls.can_issue_more--; |
6915 | advance = schedule_insn (insn); |
6916 | |
6917 | if (SHADOW_P (insn)) |
6918 | ls.shadows_only_p = true; |
6919 | |
6920 | /* After issuing an asm insn we should start a new cycle. */ |
6921 | if (advance == 0 && asm_p) |
6922 | advance = 1; |
6923 | |
6924 | if (must_backtrack) |
6925 | break; |
6926 | |
6927 | if (advance != 0) |
6928 | break; |
6929 | |
6930 | ls.first_cycle_insn_p = false; |
6931 | if (ready.n_ready > 0) |
6932 | prune_ready_list (temp_state, first_cycle_insn_p: false, shadows_only_p: ls.shadows_only_p, |
6933 | modulo_epilogue_p: ls.modulo_epilogue); |
6934 | } |
6935 | |
6936 | do_backtrack: |
6937 | if (!must_backtrack) |
6938 | for (i = 0; i < ready.n_ready; i++) |
6939 | { |
6940 | rtx_insn *insn = ready_element (ready: &ready, index: i); |
6941 | if (INSN_EXACT_TICK (insn) == clock_var) |
6942 | { |
6943 | must_backtrack = true; |
6944 | clock_var++; |
6945 | break; |
6946 | } |
6947 | } |
6948 | if (must_backtrack && modulo_ii > 0) |
6949 | { |
6950 | if (modulo_backtracks_left == 0) |
6951 | goto end_schedule; |
6952 | modulo_backtracks_left--; |
6953 | } |
6954 | while (must_backtrack) |
6955 | { |
6956 | struct haifa_saved_data *failed; |
6957 | rtx_insn *failed_insn; |
6958 | |
6959 | must_backtrack = false; |
6960 | failed = verify_shadows (); |
6961 | gcc_assert (failed); |
6962 | |
6963 | failed_insn = failed->delay_pair->i1; |
6964 | /* Clear these queues. */ |
6965 | perform_replacements_new_cycle (); |
6966 | toggle_cancelled_flags (set: false); |
6967 | unschedule_insns_until (insn: failed_insn); |
6968 | while (failed != backtrack_queue) |
6969 | free_topmost_backtrack_point (reset_tick: true); |
6970 | restore_last_backtrack_point (psched_block: &ls); |
6971 | if (sched_verbose >= 2) |
6972 | fprintf (stream: sched_dump, format: ";;\t\trewind to cycle %d\n" , clock_var); |
6973 | /* Delay by at least a cycle. This could cause additional |
6974 | backtracking. */ |
6975 | queue_insn (insn: failed_insn, n_cycles: 1, reason: "backtracked" ); |
6976 | advance = 0; |
6977 | if (must_backtrack) |
6978 | continue; |
6979 | if (ready.n_ready > 0) |
6980 | goto resume_after_backtrack; |
6981 | else |
6982 | { |
6983 | if (clock_var == 0 && ls.first_cycle_insn_p) |
6984 | goto end_schedule; |
6985 | advance = 1; |
6986 | break; |
6987 | } |
6988 | } |
6989 | ls.first_cycle_insn_p = true; |
6990 | } |
6991 | if (ls.modulo_epilogue) |
6992 | success = true; |
6993 | end_schedule: |
6994 | if (!ls.first_cycle_insn_p || advance) |
6995 | advance_one_cycle (); |
6996 | perform_replacements_new_cycle (); |
6997 | if (modulo_ii > 0) |
6998 | { |
6999 | /* Once again, debug insn suckiness: they can be on the ready list |
7000 | even if they have unresolved dependencies. To make our view |
7001 | of the world consistent, remove such "ready" insns. */ |
7002 | restart_debug_insn_loop: |
7003 | for (i = ready.n_ready - 1; i >= 0; i--) |
7004 | { |
7005 | rtx_insn *x; |
7006 | |
7007 | x = ready_element (ready: &ready, index: i); |
7008 | if (DEPS_LIST_FIRST (INSN_HARD_BACK_DEPS (x)) != NULL |
7009 | || DEPS_LIST_FIRST (INSN_SPEC_BACK_DEPS (x)) != NULL) |
7010 | { |
7011 | ready_remove (ready: &ready, index: i); |
7012 | goto restart_debug_insn_loop; |
7013 | } |
7014 | } |
7015 | for (i = ready.n_ready - 1; i >= 0; i--) |
7016 | { |
7017 | rtx_insn *x; |
7018 | |
7019 | x = ready_element (ready: &ready, index: i); |
7020 | resolve_dependencies (insn: x); |
7021 | } |
7022 | for (i = 0; i <= max_insn_queue_index; i++) |
7023 | { |
7024 | rtx_insn_list *link; |
7025 | while ((link = insn_queue[i]) != NULL) |
7026 | { |
7027 | rtx_insn *x = link->insn (); |
7028 | insn_queue[i] = link->next (); |
7029 | QUEUE_INDEX (x) = QUEUE_NOWHERE; |
7030 | free_INSN_LIST_node (link); |
7031 | resolve_dependencies (insn: x); |
7032 | } |
7033 | } |
7034 | } |
7035 | |
7036 | if (!success) |
7037 | undo_all_replacements (); |
7038 | |
7039 | /* Debug info. */ |
7040 | if (sched_verbose) |
7041 | { |
7042 | fprintf (stream: sched_dump, format: ";;\tReady list (final): " ); |
7043 | debug_ready_list (ready: &ready); |
7044 | } |
7045 | |
7046 | if (modulo_ii == 0 && current_sched_info->queue_must_finish_empty) |
7047 | /* Sanity check -- queue must be empty now. Meaningless if region has |
7048 | multiple bbs. */ |
7049 | gcc_assert (!q_size && !ready.n_ready && !ready.n_debug); |
7050 | else if (modulo_ii == 0) |
7051 | { |
7052 | /* We must maintain QUEUE_INDEX between blocks in region. */ |
7053 | for (i = ready.n_ready - 1; i >= 0; i--) |
7054 | { |
7055 | rtx_insn *x; |
7056 | |
7057 | x = ready_element (ready: &ready, index: i); |
7058 | QUEUE_INDEX (x) = QUEUE_NOWHERE; |
7059 | TODO_SPEC (x) = HARD_DEP; |
7060 | } |
7061 | |
7062 | if (q_size) |
7063 | for (i = 0; i <= max_insn_queue_index; i++) |
7064 | { |
7065 | rtx_insn_list *link; |
7066 | for (link = insn_queue[i]; link; link = link->next ()) |
7067 | { |
7068 | rtx_insn *x; |
7069 | |
7070 | x = link->insn (); |
7071 | QUEUE_INDEX (x) = QUEUE_NOWHERE; |
7072 | TODO_SPEC (x) = HARD_DEP; |
7073 | } |
7074 | free_INSN_LIST_list (&insn_queue[i]); |
7075 | } |
7076 | } |
7077 | |
7078 | if (sched_pressure == SCHED_PRESSURE_MODEL) |
7079 | model_end_schedule (); |
7080 | |
7081 | if (success) |
7082 | { |
7083 | commit_schedule (prev_head, tail, target_bb); |
7084 | if (sched_verbose) |
7085 | fprintf (stream: sched_dump, format: ";; total time = %d\n" , clock_var); |
7086 | } |
7087 | else |
7088 | last_scheduled_insn = tail; |
7089 | |
7090 | scheduled_insns.truncate (size: 0); |
7091 | |
7092 | if (!current_sched_info->queue_must_finish_empty |
7093 | || haifa_recovery_bb_recently_added_p) |
7094 | { |
7095 | /* INSN_TICK (minimum clock tick at which the insn becomes |
7096 | ready) may be not correct for the insn in the subsequent |
7097 | blocks of the region. We should use a correct value of |
7098 | `clock_var' or modify INSN_TICK. It is better to keep |
7099 | clock_var value equal to 0 at the start of a basic block. |
7100 | Therefore we modify INSN_TICK here. */ |
7101 | fix_inter_tick (NEXT_INSN (insn: prev_head), last_scheduled_insn); |
7102 | } |
7103 | |
7104 | if (targetm.sched.finish) |
7105 | { |
7106 | targetm.sched.finish (sched_dump, sched_verbose); |
7107 | /* Target might have added some instructions to the scheduled block |
7108 | in its md_finish () hook. These new insns don't have any data |
7109 | initialized and to identify them we extend h_i_d so that they'll |
7110 | get zero luids. */ |
7111 | sched_extend_luids (); |
7112 | } |
7113 | |
7114 | /* Update head/tail boundaries. */ |
7115 | head = NEXT_INSN (insn: prev_head); |
7116 | tail = last_scheduled_insn; |
7117 | |
7118 | if (sched_verbose) |
7119 | { |
7120 | fprintf (stream: sched_dump, format: ";; new head = %d\n;; new tail = %d\n" , |
7121 | INSN_UID (insn: head), INSN_UID (insn: tail)); |
7122 | |
7123 | if (sched_verbose >= 2) |
7124 | { |
7125 | dump_insn_stream (head, tail); |
7126 | print_rank_for_schedule_stats (prefix: ";; TOTAL " , stats: &rank_for_schedule_stats, |
7127 | NULL); |
7128 | } |
7129 | |
7130 | fprintf (stream: sched_dump, format: "\n" ); |
7131 | } |
7132 | |
7133 | head = restore_other_notes (head, NULL); |
7134 | |
7135 | current_sched_info->head = head; |
7136 | current_sched_info->tail = tail; |
7137 | |
7138 | free_backtrack_queue (); |
7139 | |
7140 | return success; |
7141 | } |
7142 | |
7143 | /* Set_priorities: compute priority of each insn in the block. */ |
7144 | |
7145 | int |
7146 | set_priorities (rtx_insn *head, rtx_insn *tail) |
7147 | { |
7148 | rtx_insn *insn; |
7149 | int n_insn; |
7150 | int sched_max_insns_priority = |
7151 | current_sched_info->sched_max_insns_priority; |
7152 | rtx_insn *prev_head; |
7153 | |
7154 | if (head == tail && ! INSN_P (head)) |
7155 | gcc_unreachable (); |
7156 | |
7157 | n_insn = 0; |
7158 | |
7159 | prev_head = PREV_INSN (insn: head); |
7160 | for (insn = tail; insn != prev_head; insn = PREV_INSN (insn)) |
7161 | { |
7162 | if (!INSN_P (insn)) |
7163 | continue; |
7164 | |
7165 | n_insn++; |
7166 | (void) priority (insn); |
7167 | |
7168 | gcc_assert (INSN_PRIORITY_KNOWN (insn)); |
7169 | |
7170 | sched_max_insns_priority = MAX (sched_max_insns_priority, |
7171 | INSN_PRIORITY (insn)); |
7172 | } |
7173 | |
7174 | current_sched_info->sched_max_insns_priority = sched_max_insns_priority; |
7175 | |
7176 | return n_insn; |
7177 | } |
7178 | |
7179 | /* Set sched_dump and sched_verbose for the desired debugging output. */ |
7180 | void |
7181 | setup_sched_dump (void) |
7182 | { |
7183 | sched_verbose = sched_verbose_param; |
7184 | sched_dump = dump_file; |
7185 | if (!dump_file) |
7186 | sched_verbose = 0; |
7187 | } |
7188 | |
7189 | /* Allocate data for register pressure sensitive scheduling. */ |
7190 | static void |
7191 | alloc_global_sched_pressure_data (void) |
7192 | { |
7193 | if (sched_pressure != SCHED_PRESSURE_NONE) |
7194 | { |
7195 | int i, max_regno = max_reg_num (); |
7196 | |
7197 | if (sched_dump != NULL) |
7198 | /* We need info about pseudos for rtl dumps about pseudo |
7199 | classes and costs. */ |
7200 | regstat_init_n_sets_and_refs (); |
7201 | ira_set_pseudo_classes (true, sched_verbose ? sched_dump : NULL); |
7202 | sched_regno_pressure_class |
7203 | = (enum reg_class *) xmalloc (max_regno * sizeof (enum reg_class)); |
7204 | for (i = 0; i < max_regno; i++) |
7205 | sched_regno_pressure_class[i] |
7206 | = (i < FIRST_PSEUDO_REGISTER |
7207 | ? ira_pressure_class_translate[REGNO_REG_CLASS (i)] |
7208 | : ira_pressure_class_translate[reg_allocno_class (i)]); |
7209 | curr_reg_live = BITMAP_ALLOC (NULL); |
7210 | if (sched_pressure == SCHED_PRESSURE_WEIGHTED) |
7211 | { |
7212 | saved_reg_live = BITMAP_ALLOC (NULL); |
7213 | region_ref_regs = BITMAP_ALLOC (NULL); |
7214 | } |
7215 | if (sched_pressure == SCHED_PRESSURE_MODEL) |
7216 | tmp_bitmap = BITMAP_ALLOC (NULL); |
7217 | |
7218 | /* Calculate number of CALL_SAVED_REGS and FIXED_REGS in register classes |
7219 | that we calculate register pressure for. */ |
7220 | for (int c = 0; c < ira_pressure_classes_num; ++c) |
7221 | { |
7222 | enum reg_class cl = ira_pressure_classes[c]; |
7223 | |
7224 | call_saved_regs_num[cl] = 0; |
7225 | fixed_regs_num[cl] = 0; |
7226 | |
7227 | for (int i = 0; i < ira_class_hard_regs_num[cl]; ++i) |
7228 | { |
7229 | unsigned int regno = ira_class_hard_regs[cl][i]; |
7230 | if (fixed_regs[regno]) |
7231 | ++fixed_regs_num[cl]; |
7232 | else if (!crtl->abi->clobbers_full_reg_p (regno)) |
7233 | ++call_saved_regs_num[cl]; |
7234 | } |
7235 | } |
7236 | } |
7237 | } |
7238 | |
7239 | /* Free data for register pressure sensitive scheduling. Also called |
7240 | from schedule_region when stopping sched-pressure early. */ |
7241 | void |
7242 | free_global_sched_pressure_data (void) |
7243 | { |
7244 | if (sched_pressure != SCHED_PRESSURE_NONE) |
7245 | { |
7246 | if (regstat_n_sets_and_refs != NULL) |
7247 | regstat_free_n_sets_and_refs (); |
7248 | if (sched_pressure == SCHED_PRESSURE_WEIGHTED) |
7249 | { |
7250 | BITMAP_FREE (region_ref_regs); |
7251 | BITMAP_FREE (saved_reg_live); |
7252 | } |
7253 | if (sched_pressure == SCHED_PRESSURE_MODEL) |
7254 | BITMAP_FREE (tmp_bitmap); |
7255 | BITMAP_FREE (curr_reg_live); |
7256 | free (ptr: sched_regno_pressure_class); |
7257 | } |
7258 | } |
7259 | |
7260 | /* Initialize some global state for the scheduler. This function works |
7261 | with the common data shared between all the schedulers. It is called |
7262 | from the scheduler specific initialization routine. */ |
7263 | |
7264 | void |
7265 | sched_init (void) |
7266 | { |
7267 | if (targetm.sched.dispatch (NULL, IS_DISPATCH_ON)) |
7268 | targetm.sched.dispatch_do (NULL, DISPATCH_INIT); |
7269 | |
7270 | if (live_range_shrinkage_p) |
7271 | sched_pressure = SCHED_PRESSURE_WEIGHTED; |
7272 | else if (flag_sched_pressure |
7273 | && !reload_completed |
7274 | && common_sched_info->sched_pass_id == SCHED_RGN_PASS) |
7275 | sched_pressure = ((enum sched_pressure_algorithm) |
7276 | param_sched_pressure_algorithm); |
7277 | else |
7278 | sched_pressure = SCHED_PRESSURE_NONE; |
7279 | |
7280 | if (sched_pressure != SCHED_PRESSURE_NONE) |
7281 | ira_setup_eliminable_regset (); |
7282 | |
7283 | /* Initialize SPEC_INFO. */ |
7284 | if (targetm.sched.set_sched_flags) |
7285 | { |
7286 | spec_info = &spec_info_var; |
7287 | targetm.sched.set_sched_flags (spec_info); |
7288 | |
7289 | if (spec_info->mask != 0) |
7290 | { |
7291 | spec_info->data_weakness_cutoff |
7292 | = (param_sched_spec_prob_cutoff * MAX_DEP_WEAK) / 100; |
7293 | spec_info->control_weakness_cutoff |
7294 | = (param_sched_spec_prob_cutoff * REG_BR_PROB_BASE) / 100; |
7295 | } |
7296 | else |
7297 | /* So we won't read anything accidentally. */ |
7298 | spec_info = NULL; |
7299 | |
7300 | } |
7301 | else |
7302 | /* So we won't read anything accidentally. */ |
7303 | spec_info = 0; |
7304 | |
7305 | /* Initialize issue_rate. */ |
7306 | if (targetm.sched.issue_rate) |
7307 | issue_rate = targetm.sched.issue_rate (); |
7308 | else |
7309 | issue_rate = 1; |
7310 | |
7311 | if (targetm.sched.first_cycle_multipass_dfa_lookahead |
7312 | /* Don't use max_issue with reg_pressure scheduling. Multipass |
7313 | scheduling and reg_pressure scheduling undo each other's decisions. */ |
7314 | && sched_pressure == SCHED_PRESSURE_NONE) |
7315 | dfa_lookahead = targetm.sched.first_cycle_multipass_dfa_lookahead (); |
7316 | else |
7317 | dfa_lookahead = 0; |
7318 | |
7319 | /* Set to "0" so that we recalculate. */ |
7320 | max_lookahead_tries = 0; |
7321 | |
7322 | if (targetm.sched.init_dfa_pre_cycle_insn) |
7323 | targetm.sched.init_dfa_pre_cycle_insn (); |
7324 | |
7325 | if (targetm.sched.init_dfa_post_cycle_insn) |
7326 | targetm.sched.init_dfa_post_cycle_insn (); |
7327 | |
7328 | dfa_start (); |
7329 | dfa_state_size = state_size (); |
7330 | |
7331 | init_alias_analysis (); |
7332 | |
7333 | if (!sched_no_dce) |
7334 | df_set_flags (DF_LR_RUN_DCE); |
7335 | df_note_add_problem (); |
7336 | |
7337 | /* More problems needed for interloop dep calculation in SMS. */ |
7338 | if (common_sched_info->sched_pass_id == SCHED_SMS_PASS) |
7339 | { |
7340 | df_rd_add_problem (); |
7341 | df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN); |
7342 | } |
7343 | |
7344 | df_analyze (); |
7345 | |
7346 | /* Do not run DCE after reload, as this can kill nops inserted |
7347 | by bundling. */ |
7348 | if (reload_completed) |
7349 | df_clear_flags (DF_LR_RUN_DCE); |
7350 | |
7351 | regstat_compute_calls_crossed (); |
7352 | |
7353 | if (targetm.sched.init_global) |
7354 | targetm.sched.init_global (sched_dump, sched_verbose, get_max_uid () + 1); |
7355 | |
7356 | alloc_global_sched_pressure_data (); |
7357 | |
7358 | curr_state = xmalloc (dfa_state_size); |
7359 | } |
7360 | |
7361 | static void haifa_init_only_bb (basic_block, basic_block); |
7362 | |
7363 | /* Initialize data structures specific to the Haifa scheduler. */ |
7364 | void |
7365 | haifa_sched_init (void) |
7366 | { |
7367 | setup_sched_dump (); |
7368 | sched_init (); |
7369 | |
7370 | scheduled_insns.create (nelems: 0); |
7371 | |
7372 | if (spec_info != NULL) |
7373 | { |
7374 | sched_deps_info->use_deps_list = 1; |
7375 | sched_deps_info->generate_spec_deps = 1; |
7376 | } |
7377 | |
7378 | /* Initialize luids, dependency caches, target and h_i_d for the |
7379 | whole function. */ |
7380 | { |
7381 | sched_init_bbs (); |
7382 | |
7383 | auto_vec<basic_block> bbs (n_basic_blocks_for_fn (cfun)); |
7384 | basic_block bb; |
7385 | FOR_EACH_BB_FN (bb, cfun) |
7386 | bbs.quick_push (obj: bb); |
7387 | sched_init_luids (bbs); |
7388 | sched_deps_init (true); |
7389 | sched_extend_target (); |
7390 | haifa_init_h_i_d (bbs); |
7391 | } |
7392 | |
7393 | sched_init_only_bb = haifa_init_only_bb; |
7394 | sched_split_block = sched_split_block_1; |
7395 | sched_create_empty_bb = sched_create_empty_bb_1; |
7396 | haifa_recovery_bb_ever_added_p = false; |
7397 | |
7398 | nr_begin_data = nr_begin_control = nr_be_in_data = nr_be_in_control = 0; |
7399 | before_recovery = 0; |
7400 | after_recovery = 0; |
7401 | |
7402 | modulo_ii = 0; |
7403 | } |
7404 | |
7405 | /* Finish work with the data specific to the Haifa scheduler. */ |
7406 | void |
7407 | haifa_sched_finish (void) |
7408 | { |
7409 | sched_create_empty_bb = NULL; |
7410 | sched_split_block = NULL; |
7411 | sched_init_only_bb = NULL; |
7412 | |
7413 | if (spec_info && spec_info->dump) |
7414 | { |
7415 | char c = reload_completed ? 'a' : 'b'; |
7416 | |
7417 | fprintf (stream: spec_info->dump, |
7418 | format: ";; %s:\n" , current_function_name ()); |
7419 | |
7420 | fprintf (stream: spec_info->dump, |
7421 | format: ";; Procedure %cr-begin-data-spec motions == %d\n" , |
7422 | c, nr_begin_data); |
7423 | fprintf (stream: spec_info->dump, |
7424 | format: ";; Procedure %cr-be-in-data-spec motions == %d\n" , |
7425 | c, nr_be_in_data); |
7426 | fprintf (stream: spec_info->dump, |
7427 | format: ";; Procedure %cr-begin-control-spec motions == %d\n" , |
7428 | c, nr_begin_control); |
7429 | fprintf (stream: spec_info->dump, |
7430 | format: ";; Procedure %cr-be-in-control-spec motions == %d\n" , |
7431 | c, nr_be_in_control); |
7432 | } |
7433 | |
7434 | scheduled_insns.release (); |
7435 | |
7436 | /* Finalize h_i_d, dependency caches, and luids for the whole |
7437 | function. Target will be finalized in md_global_finish (). */ |
7438 | sched_deps_finish (); |
7439 | sched_finish_luids (); |
7440 | current_sched_info = NULL; |
7441 | insn_queue = NULL; |
7442 | sched_finish (); |
7443 | } |
7444 | |
7445 | /* Free global data used during insn scheduling. This function works with |
7446 | the common data shared between the schedulers. */ |
7447 | |
7448 | void |
7449 | sched_finish (void) |
7450 | { |
7451 | haifa_finish_h_i_d (); |
7452 | free_global_sched_pressure_data (); |
7453 | free (ptr: curr_state); |
7454 | |
7455 | if (targetm.sched.finish_global) |
7456 | targetm.sched.finish_global (sched_dump, sched_verbose); |
7457 | |
7458 | end_alias_analysis (); |
7459 | |
7460 | regstat_free_calls_crossed (); |
7461 | |
7462 | dfa_finish (); |
7463 | } |
7464 | |
7465 | /* Free all delay_pair structures that were recorded. */ |
7466 | void |
7467 | free_delay_pairs (void) |
7468 | { |
7469 | if (delay_htab) |
7470 | { |
7471 | delay_htab->empty (); |
7472 | delay_htab_i2->empty (); |
7473 | } |
7474 | } |
7475 | |
7476 | /* Fix INSN_TICKs of the instructions in the current block as well as |
7477 | INSN_TICKs of their dependents. |
7478 | HEAD and TAIL are the begin and the end of the current scheduled block. */ |
7479 | static void |
7480 | fix_inter_tick (rtx_insn *head, rtx_insn *tail) |
7481 | { |
7482 | /* Set of instructions with corrected INSN_TICK. */ |
7483 | auto_bitmap processed; |
7484 | /* ??? It is doubtful if we should assume that cycle advance happens on |
7485 | basic block boundaries. Basically insns that are unconditionally ready |
7486 | on the start of the block are more preferable then those which have |
7487 | a one cycle dependency over insn from the previous block. */ |
7488 | int next_clock = clock_var + 1; |
7489 | |
7490 | /* Iterates over scheduled instructions and fix their INSN_TICKs and |
7491 | INSN_TICKs of dependent instructions, so that INSN_TICKs are consistent |
7492 | across different blocks. */ |
7493 | for (tail = NEXT_INSN (insn: tail); head != tail; head = NEXT_INSN (insn: head)) |
7494 | { |
7495 | if (INSN_P (head)) |
7496 | { |
7497 | int tick; |
7498 | sd_iterator_def sd_it; |
7499 | dep_t dep; |
7500 | |
7501 | tick = INSN_TICK (head); |
7502 | gcc_assert (tick >= MIN_TICK); |
7503 | |
7504 | /* Fix INSN_TICK of instruction from just scheduled block. */ |
7505 | if (bitmap_set_bit (processed, INSN_LUID (head))) |
7506 | { |
7507 | tick -= next_clock; |
7508 | |
7509 | if (tick < MIN_TICK) |
7510 | tick = MIN_TICK; |
7511 | |
7512 | INSN_TICK (head) = tick; |
7513 | } |
7514 | |
7515 | if (DEBUG_INSN_P (head)) |
7516 | continue; |
7517 | |
7518 | FOR_EACH_DEP (head, SD_LIST_RES_FORW, sd_it, dep) |
7519 | { |
7520 | rtx_insn *next; |
7521 | |
7522 | next = DEP_CON (dep); |
7523 | tick = INSN_TICK (next); |
7524 | |
7525 | if (tick != INVALID_TICK |
7526 | /* If NEXT has its INSN_TICK calculated, fix it. |
7527 | If not - it will be properly calculated from |
7528 | scratch later in fix_tick_ready. */ |
7529 | && bitmap_set_bit (processed, INSN_LUID (next))) |
7530 | { |
7531 | tick -= next_clock; |
7532 | |
7533 | if (tick < MIN_TICK) |
7534 | tick = MIN_TICK; |
7535 | |
7536 | if (tick > INTER_TICK (next)) |
7537 | INTER_TICK (next) = tick; |
7538 | else |
7539 | tick = INTER_TICK (next); |
7540 | |
7541 | INSN_TICK (next) = tick; |
7542 | } |
7543 | } |
7544 | } |
7545 | } |
7546 | } |
7547 | |
7548 | /* Check if NEXT is ready to be added to the ready or queue list. |
7549 | If "yes", add it to the proper list. |
7550 | Returns: |
7551 | -1 - is not ready yet, |
7552 | 0 - added to the ready list, |
7553 | 0 < N - queued for N cycles. */ |
7554 | int |
7555 | try_ready (rtx_insn *next) |
7556 | { |
7557 | ds_t old_ts, new_ts; |
7558 | |
7559 | old_ts = TODO_SPEC (next); |
7560 | |
7561 | gcc_assert (!(old_ts & ~(SPECULATIVE | HARD_DEP | DEP_CONTROL | DEP_POSTPONED)) |
7562 | && (old_ts == HARD_DEP |
7563 | || old_ts == DEP_POSTPONED |
7564 | || (old_ts & SPECULATIVE) |
7565 | || old_ts == DEP_CONTROL)); |
7566 | |
7567 | new_ts = recompute_todo_spec (next, for_backtrack: false); |
7568 | |
7569 | if (new_ts & (HARD_DEP | DEP_POSTPONED)) |
7570 | gcc_assert (new_ts == old_ts |
7571 | && QUEUE_INDEX (next) == QUEUE_NOWHERE); |
7572 | else if (current_sched_info->new_ready) |
7573 | new_ts = current_sched_info->new_ready (next, new_ts); |
7574 | |
7575 | /* * if !(old_ts & SPECULATIVE) (e.g. HARD_DEP or 0), then insn might |
7576 | have its original pattern or changed (speculative) one. This is due |
7577 | to changing ebb in region scheduling. |
7578 | * But if (old_ts & SPECULATIVE), then we are pretty sure that insn |
7579 | has speculative pattern. |
7580 | |
7581 | We can't assert (!(new_ts & HARD_DEP) || new_ts == old_ts) here because |
7582 | control-speculative NEXT could have been discarded by sched-rgn.cc |
7583 | (the same case as when discarded by can_schedule_ready_p ()). */ |
7584 | |
7585 | if ((new_ts & SPECULATIVE) |
7586 | /* If (old_ts == new_ts), then (old_ts & SPECULATIVE) and we don't |
7587 | need to change anything. */ |
7588 | && new_ts != old_ts) |
7589 | { |
7590 | int res; |
7591 | rtx new_pat; |
7592 | |
7593 | gcc_assert ((new_ts & SPECULATIVE) && !(new_ts & ~SPECULATIVE)); |
7594 | |
7595 | res = haifa_speculate_insn (next, new_ts, &new_pat); |
7596 | |
7597 | switch (res) |
7598 | { |
7599 | case -1: |
7600 | /* It would be nice to change DEP_STATUS of all dependences, |
7601 | which have ((DEP_STATUS & SPECULATIVE) == new_ts) to HARD_DEP, |
7602 | so we won't reanalyze anything. */ |
7603 | new_ts = HARD_DEP; |
7604 | break; |
7605 | |
7606 | case 0: |
7607 | /* We follow the rule, that every speculative insn |
7608 | has non-null ORIG_PAT. */ |
7609 | if (!ORIG_PAT (next)) |
7610 | ORIG_PAT (next) = PATTERN (insn: next); |
7611 | break; |
7612 | |
7613 | case 1: |
7614 | if (!ORIG_PAT (next)) |
7615 | /* If we gonna to overwrite the original pattern of insn, |
7616 | save it. */ |
7617 | ORIG_PAT (next) = PATTERN (insn: next); |
7618 | |
7619 | res = haifa_change_pattern (next, new_pat); |
7620 | gcc_assert (res); |
7621 | break; |
7622 | |
7623 | default: |
7624 | gcc_unreachable (); |
7625 | } |
7626 | } |
7627 | |
7628 | /* We need to restore pattern only if (new_ts == 0), because otherwise it is |
7629 | either correct (new_ts & SPECULATIVE), |
7630 | or we simply don't care (new_ts & HARD_DEP). */ |
7631 | |
7632 | gcc_assert (!ORIG_PAT (next) |
7633 | || !IS_SPECULATION_BRANCHY_CHECK_P (next)); |
7634 | |
7635 | TODO_SPEC (next) = new_ts; |
7636 | |
7637 | if (new_ts & (HARD_DEP | DEP_POSTPONED)) |
7638 | { |
7639 | /* We can't assert (QUEUE_INDEX (next) == QUEUE_NOWHERE) here because |
7640 | control-speculative NEXT could have been discarded by sched-rgn.cc |
7641 | (the same case as when discarded by can_schedule_ready_p ()). */ |
7642 | /*gcc_assert (QUEUE_INDEX (next) == QUEUE_NOWHERE);*/ |
7643 | |
7644 | change_queue_index (next, QUEUE_NOWHERE); |
7645 | |
7646 | return -1; |
7647 | } |
7648 | else if (!(new_ts & BEGIN_SPEC) |
7649 | && ORIG_PAT (next) && PREDICATED_PAT (next) == NULL_RTX |
7650 | && !IS_SPECULATION_CHECK_P (next)) |
7651 | /* We should change pattern of every previously speculative |
7652 | instruction - and we determine if NEXT was speculative by using |
7653 | ORIG_PAT field. Except one case - speculation checks have ORIG_PAT |
7654 | pat too, so skip them. */ |
7655 | { |
7656 | bool success = haifa_change_pattern (next, ORIG_PAT (next)); |
7657 | gcc_assert (success); |
7658 | ORIG_PAT (next) = 0; |
7659 | } |
7660 | |
7661 | if (sched_verbose >= 2) |
7662 | { |
7663 | fprintf (stream: sched_dump, format: ";;\t\tdependencies resolved: insn %s" , |
7664 | (*current_sched_info->print_insn) (next, 0)); |
7665 | |
7666 | if (spec_info && spec_info->dump) |
7667 | { |
7668 | if (new_ts & BEGIN_DATA) |
7669 | fprintf (stream: spec_info->dump, format: "; data-spec;" ); |
7670 | if (new_ts & BEGIN_CONTROL) |
7671 | fprintf (stream: spec_info->dump, format: "; control-spec;" ); |
7672 | if (new_ts & BE_IN_CONTROL) |
7673 | fprintf (stream: spec_info->dump, format: "; in-control-spec;" ); |
7674 | } |
7675 | if (TODO_SPEC (next) & DEP_CONTROL) |
7676 | fprintf (stream: sched_dump, format: " predicated" ); |
7677 | fprintf (stream: sched_dump, format: "\n" ); |
7678 | } |
7679 | |
7680 | adjust_priority (prev: next); |
7681 | |
7682 | return fix_tick_ready (next); |
7683 | } |
7684 | |
7685 | /* Calculate INSN_TICK of NEXT and add it to either ready or queue list. */ |
7686 | static int |
7687 | fix_tick_ready (rtx_insn *next) |
7688 | { |
7689 | int tick, delay; |
7690 | |
7691 | if (!DEBUG_INSN_P (next) && !sd_lists_empty_p (next, SD_LIST_RES_BACK)) |
7692 | { |
7693 | int full_p; |
7694 | sd_iterator_def sd_it; |
7695 | dep_t dep; |
7696 | |
7697 | tick = INSN_TICK (next); |
7698 | /* if tick is not equal to INVALID_TICK, then update |
7699 | INSN_TICK of NEXT with the most recent resolved dependence |
7700 | cost. Otherwise, recalculate from scratch. */ |
7701 | full_p = (tick == INVALID_TICK); |
7702 | |
7703 | FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep) |
7704 | { |
7705 | rtx_insn *pro = DEP_PRO (dep); |
7706 | int tick1; |
7707 | |
7708 | gcc_assert (INSN_TICK (pro) >= MIN_TICK); |
7709 | |
7710 | tick1 = INSN_TICK (pro) + dep_cost (link: dep); |
7711 | if (tick1 > tick) |
7712 | tick = tick1; |
7713 | |
7714 | if (!full_p) |
7715 | break; |
7716 | } |
7717 | } |
7718 | else |
7719 | tick = -1; |
7720 | |
7721 | INSN_TICK (next) = tick; |
7722 | |
7723 | delay = tick - clock_var; |
7724 | if (delay <= 0 || sched_pressure != SCHED_PRESSURE_NONE || sched_fusion) |
7725 | delay = QUEUE_READY; |
7726 | |
7727 | change_queue_index (next, delay); |
7728 | |
7729 | return delay; |
7730 | } |
7731 | |
7732 | /* Move NEXT to the proper queue list with (DELAY >= 1), |
7733 | or add it to the ready list (DELAY == QUEUE_READY), |
7734 | or remove it from ready and queue lists at all (DELAY == QUEUE_NOWHERE). */ |
7735 | static void |
7736 | change_queue_index (rtx_insn *next, int delay) |
7737 | { |
7738 | int i = QUEUE_INDEX (next); |
7739 | |
7740 | gcc_assert (QUEUE_NOWHERE <= delay && delay <= max_insn_queue_index |
7741 | && delay != 0); |
7742 | gcc_assert (i != QUEUE_SCHEDULED); |
7743 | |
7744 | if ((delay > 0 && NEXT_Q_AFTER (q_ptr, delay) == i) |
7745 | || (delay < 0 && delay == i)) |
7746 | /* We have nothing to do. */ |
7747 | return; |
7748 | |
7749 | /* Remove NEXT from wherever it is now. */ |
7750 | if (i == QUEUE_READY) |
7751 | ready_remove_insn (insn: next); |
7752 | else if (i >= 0) |
7753 | queue_remove (insn: next); |
7754 | |
7755 | /* Add it to the proper place. */ |
7756 | if (delay == QUEUE_READY) |
7757 | ready_add (ready: readyp, insn: next, first_p: false); |
7758 | else if (delay >= 1) |
7759 | queue_insn (insn: next, n_cycles: delay, reason: "change queue index" ); |
7760 | |
7761 | if (sched_verbose >= 2) |
7762 | { |
7763 | fprintf (stream: sched_dump, format: ";;\t\ttick updated: insn %s" , |
7764 | (*current_sched_info->print_insn) (next, 0)); |
7765 | |
7766 | if (delay == QUEUE_READY) |
7767 | fprintf (stream: sched_dump, format: " into ready\n" ); |
7768 | else if (delay >= 1) |
7769 | fprintf (stream: sched_dump, format: " into queue with cost=%d\n" , delay); |
7770 | else |
7771 | fprintf (stream: sched_dump, format: " removed from ready or queue lists\n" ); |
7772 | } |
7773 | } |
7774 | |
7775 | static int sched_ready_n_insns = -1; |
7776 | |
7777 | /* Initialize per region data structures. */ |
7778 | void |
7779 | sched_extend_ready_list (int new_sched_ready_n_insns) |
7780 | { |
7781 | int i; |
7782 | |
7783 | if (sched_ready_n_insns == -1) |
7784 | /* At the first call we need to initialize one more choice_stack |
7785 | entry. */ |
7786 | { |
7787 | i = 0; |
7788 | sched_ready_n_insns = 0; |
7789 | scheduled_insns.reserve (nelems: new_sched_ready_n_insns); |
7790 | } |
7791 | else |
7792 | i = sched_ready_n_insns + 1; |
7793 | |
7794 | ready.veclen = new_sched_ready_n_insns + issue_rate; |
7795 | ready.vec = XRESIZEVEC (rtx_insn *, ready.vec, ready.veclen); |
7796 | |
7797 | gcc_assert (new_sched_ready_n_insns >= sched_ready_n_insns); |
7798 | |
7799 | ready_try = (signed char *) xrecalloc (ready_try, new_sched_ready_n_insns, |
7800 | sched_ready_n_insns, |
7801 | sizeof (*ready_try)); |
7802 | |
7803 | /* We allocate +1 element to save initial state in the choice_stack[0] |
7804 | entry. */ |
7805 | choice_stack = XRESIZEVEC (struct choice_entry, choice_stack, |
7806 | new_sched_ready_n_insns + 1); |
7807 | |
7808 | for (; i <= new_sched_ready_n_insns; i++) |
7809 | { |
7810 | choice_stack[i].state = xmalloc (dfa_state_size); |
7811 | |
7812 | if (targetm.sched.first_cycle_multipass_init) |
7813 | targetm.sched.first_cycle_multipass_init (&(choice_stack[i] |
7814 | .target_data)); |
7815 | } |
7816 | |
7817 | sched_ready_n_insns = new_sched_ready_n_insns; |
7818 | } |
7819 | |
7820 | /* Free per region data structures. */ |
7821 | void |
7822 | sched_finish_ready_list (void) |
7823 | { |
7824 | int i; |
7825 | |
7826 | free (ptr: ready.vec); |
7827 | ready.vec = NULL; |
7828 | ready.veclen = 0; |
7829 | |
7830 | free (ptr: ready_try); |
7831 | ready_try = NULL; |
7832 | |
7833 | for (i = 0; i <= sched_ready_n_insns; i++) |
7834 | { |
7835 | if (targetm.sched.first_cycle_multipass_fini) |
7836 | targetm.sched.first_cycle_multipass_fini (&(choice_stack[i] |
7837 | .target_data)); |
7838 | |
7839 | free (ptr: choice_stack [i].state); |
7840 | } |
7841 | free (ptr: choice_stack); |
7842 | choice_stack = NULL; |
7843 | |
7844 | sched_ready_n_insns = -1; |
7845 | } |
7846 | |
7847 | static int |
7848 | haifa_luid_for_non_insn (rtx x) |
7849 | { |
7850 | gcc_assert (NOTE_P (x) || LABEL_P (x)); |
7851 | |
7852 | return 0; |
7853 | } |
7854 | |
7855 | /* Generates recovery code for INSN. */ |
7856 | static void |
7857 | generate_recovery_code (rtx_insn *insn) |
7858 | { |
7859 | if (TODO_SPEC (insn) & BEGIN_SPEC) |
7860 | begin_speculative_block (insn); |
7861 | |
7862 | /* Here we have insn with no dependencies to |
7863 | instructions other then CHECK_SPEC ones. */ |
7864 | |
7865 | if (TODO_SPEC (insn) & BE_IN_SPEC) |
7866 | add_to_speculative_block (insn); |
7867 | } |
7868 | |
7869 | /* Helper function. |
7870 | Tries to add speculative dependencies of type FS between instructions |
7871 | in deps_list L and TWIN. */ |
7872 | static void |
7873 | process_insn_forw_deps_be_in_spec (rtx_insn *insn, rtx_insn *twin, ds_t fs) |
7874 | { |
7875 | sd_iterator_def sd_it; |
7876 | dep_t dep; |
7877 | |
7878 | FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep) |
7879 | { |
7880 | ds_t ds; |
7881 | rtx_insn *consumer; |
7882 | |
7883 | consumer = DEP_CON (dep); |
7884 | |
7885 | ds = DEP_STATUS (dep); |
7886 | |
7887 | if (/* If we want to create speculative dep. */ |
7888 | fs |
7889 | /* And we can do that because this is a true dep. */ |
7890 | && (ds & DEP_TYPES) == DEP_TRUE) |
7891 | { |
7892 | gcc_assert (!(ds & BE_IN_SPEC)); |
7893 | |
7894 | if (/* If this dep can be overcome with 'begin speculation'. */ |
7895 | ds & BEGIN_SPEC) |
7896 | /* Then we have a choice: keep the dep 'begin speculative' |
7897 | or transform it into 'be in speculative'. */ |
7898 | { |
7899 | if (/* In try_ready we assert that if insn once became ready |
7900 | it can be removed from the ready (or queue) list only |
7901 | due to backend decision. Hence we can't let the |
7902 | probability of the speculative dep to decrease. */ |
7903 | ds_weak (ds) <= ds_weak (fs)) |
7904 | { |
7905 | ds_t new_ds; |
7906 | |
7907 | new_ds = (ds & ~BEGIN_SPEC) | fs; |
7908 | |
7909 | if (/* consumer can 'be in speculative'. */ |
7910 | sched_insn_is_legitimate_for_speculation_p (consumer, |
7911 | new_ds)) |
7912 | /* Transform it to be in speculative. */ |
7913 | ds = new_ds; |
7914 | } |
7915 | } |
7916 | else |
7917 | /* Mark the dep as 'be in speculative'. */ |
7918 | ds |= fs; |
7919 | } |
7920 | |
7921 | { |
7922 | dep_def _new_dep, *new_dep = &_new_dep; |
7923 | |
7924 | init_dep_1 (new_dep, twin, consumer, DEP_TYPE (dep), ds); |
7925 | sd_add_dep (new_dep, false); |
7926 | } |
7927 | } |
7928 | } |
7929 | |
7930 | /* Generates recovery code for BEGIN speculative INSN. */ |
7931 | static void |
7932 | begin_speculative_block (rtx_insn *insn) |
7933 | { |
7934 | if (TODO_SPEC (insn) & BEGIN_DATA) |
7935 | nr_begin_data++; |
7936 | if (TODO_SPEC (insn) & BEGIN_CONTROL) |
7937 | nr_begin_control++; |
7938 | |
7939 | create_check_block_twin (insn, false); |
7940 | |
7941 | TODO_SPEC (insn) &= ~BEGIN_SPEC; |
7942 | } |
7943 | |
7944 | static void haifa_init_insn (rtx_insn *); |
7945 | |
7946 | /* Generates recovery code for BE_IN speculative INSN. */ |
7947 | static void |
7948 | add_to_speculative_block (rtx_insn *insn) |
7949 | { |
7950 | ds_t ts; |
7951 | sd_iterator_def sd_it; |
7952 | dep_t dep; |
7953 | auto_vec<rtx_insn *, 10> twins; |
7954 | |
7955 | ts = TODO_SPEC (insn); |
7956 | gcc_assert (!(ts & ~BE_IN_SPEC)); |
7957 | |
7958 | if (ts & BE_IN_DATA) |
7959 | nr_be_in_data++; |
7960 | if (ts & BE_IN_CONTROL) |
7961 | nr_be_in_control++; |
7962 | |
7963 | TODO_SPEC (insn) &= ~BE_IN_SPEC; |
7964 | gcc_assert (!TODO_SPEC (insn)); |
7965 | |
7966 | DONE_SPEC (insn) |= ts; |
7967 | |
7968 | /* First we convert all simple checks to branchy. */ |
7969 | for (sd_it = sd_iterator_start (insn, SD_LIST_SPEC_BACK); |
7970 | sd_iterator_cond (it_ptr: &sd_it, dep_ptr: &dep);) |
7971 | { |
7972 | rtx_insn *check = DEP_PRO (dep); |
7973 | |
7974 | if (IS_SPECULATION_SIMPLE_CHECK_P (check)) |
7975 | { |
7976 | create_check_block_twin (check, true); |
7977 | |
7978 | /* Restart search. */ |
7979 | sd_it = sd_iterator_start (insn, SD_LIST_SPEC_BACK); |
7980 | } |
7981 | else |
7982 | /* Continue search. */ |
7983 | sd_iterator_next (it_ptr: &sd_it); |
7984 | } |
7985 | |
7986 | auto_vec<rtx_insn *> priorities_roots; |
7987 | clear_priorities (insn, &priorities_roots); |
7988 | |
7989 | while (1) |
7990 | { |
7991 | rtx_insn *check, *twin; |
7992 | basic_block rec; |
7993 | |
7994 | /* Get the first backward dependency of INSN. */ |
7995 | sd_it = sd_iterator_start (insn, SD_LIST_SPEC_BACK); |
7996 | if (!sd_iterator_cond (it_ptr: &sd_it, dep_ptr: &dep)) |
7997 | /* INSN has no backward dependencies left. */ |
7998 | break; |
7999 | |
8000 | gcc_assert ((DEP_STATUS (dep) & BEGIN_SPEC) == 0 |
8001 | && (DEP_STATUS (dep) & BE_IN_SPEC) != 0 |
8002 | && (DEP_STATUS (dep) & DEP_TYPES) == DEP_TRUE); |
8003 | |
8004 | check = DEP_PRO (dep); |
8005 | |
8006 | gcc_assert (!IS_SPECULATION_CHECK_P (check) && !ORIG_PAT (check) |
8007 | && QUEUE_INDEX (check) == QUEUE_NOWHERE); |
8008 | |
8009 | rec = BLOCK_FOR_INSN (insn: check); |
8010 | |
8011 | twin = emit_insn_before (copy_insn (PATTERN (insn)), BB_END (rec)); |
8012 | haifa_init_insn (twin); |
8013 | |
8014 | sd_copy_back_deps (twin, insn, true); |
8015 | |
8016 | if (sched_verbose && spec_info->dump) |
8017 | /* INSN_BB (insn) isn't determined for twin insns yet. |
8018 | So we can't use current_sched_info->print_insn. */ |
8019 | fprintf (stream: spec_info->dump, format: ";;\t\tGenerated twin insn : %d/rec%d\n" , |
8020 | INSN_UID (insn: twin), rec->index); |
8021 | |
8022 | twins.safe_push (obj: twin); |
8023 | |
8024 | /* Add dependences between TWIN and all appropriate |
8025 | instructions from REC. */ |
8026 | FOR_EACH_DEP (insn, SD_LIST_SPEC_BACK, sd_it, dep) |
8027 | { |
8028 | rtx_insn *pro = DEP_PRO (dep); |
8029 | |
8030 | gcc_assert (DEP_TYPE (dep) == REG_DEP_TRUE); |
8031 | |
8032 | /* INSN might have dependencies from the instructions from |
8033 | several recovery blocks. At this iteration we process those |
8034 | producers that reside in REC. */ |
8035 | if (BLOCK_FOR_INSN (insn: pro) == rec) |
8036 | { |
8037 | dep_def _new_dep, *new_dep = &_new_dep; |
8038 | |
8039 | init_dep (new_dep, pro, twin, REG_DEP_TRUE); |
8040 | sd_add_dep (new_dep, false); |
8041 | } |
8042 | } |
8043 | |
8044 | process_insn_forw_deps_be_in_spec (insn, twin, fs: ts); |
8045 | |
8046 | /* Remove all dependencies between INSN and insns in REC. */ |
8047 | for (sd_it = sd_iterator_start (insn, SD_LIST_SPEC_BACK); |
8048 | sd_iterator_cond (it_ptr: &sd_it, dep_ptr: &dep);) |
8049 | { |
8050 | rtx_insn *pro = DEP_PRO (dep); |
8051 | |
8052 | if (BLOCK_FOR_INSN (insn: pro) == rec) |
8053 | sd_delete_dep (sd_it); |
8054 | else |
8055 | sd_iterator_next (it_ptr: &sd_it); |
8056 | } |
8057 | } |
8058 | |
8059 | /* We couldn't have added the dependencies between INSN and TWINS earlier |
8060 | because that would make TWINS appear in the INSN_BACK_DEPS (INSN). */ |
8061 | unsigned int i; |
8062 | rtx_insn *twin; |
8063 | FOR_EACH_VEC_ELT_REVERSE (twins, i, twin) |
8064 | { |
8065 | dep_def _new_dep, *new_dep = &_new_dep; |
8066 | |
8067 | init_dep (new_dep, insn, twin, REG_DEP_OUTPUT); |
8068 | sd_add_dep (new_dep, false); |
8069 | } |
8070 | |
8071 | calc_priorities (priorities_roots); |
8072 | } |
8073 | |
8074 | /* Extends and fills with zeros (only the new part) array pointed to by P. */ |
8075 | void * |
8076 | xrecalloc (void *p, size_t new_nmemb, size_t old_nmemb, size_t size) |
8077 | { |
8078 | gcc_assert (new_nmemb >= old_nmemb); |
8079 | p = XRESIZEVAR (void, p, new_nmemb * size); |
8080 | memset (s: ((char *) p) + old_nmemb * size, c: 0, n: (new_nmemb - old_nmemb) * size); |
8081 | return p; |
8082 | } |
8083 | |
8084 | /* Helper function. |
8085 | Find fallthru edge from PRED. */ |
8086 | edge |
8087 | find_fallthru_edge_from (basic_block pred) |
8088 | { |
8089 | edge e; |
8090 | basic_block succ; |
8091 | |
8092 | succ = pred->next_bb; |
8093 | gcc_assert (succ->prev_bb == pred); |
8094 | |
8095 | if (EDGE_COUNT (pred->succs) <= EDGE_COUNT (succ->preds)) |
8096 | { |
8097 | e = find_fallthru_edge (edges: pred->succs); |
8098 | |
8099 | if (e) |
8100 | { |
8101 | gcc_assert (e->dest == succ || e->dest->index == EXIT_BLOCK); |
8102 | return e; |
8103 | } |
8104 | } |
8105 | else |
8106 | { |
8107 | e = find_fallthru_edge (edges: succ->preds); |
8108 | |
8109 | if (e) |
8110 | { |
8111 | gcc_assert (e->src == pred); |
8112 | return e; |
8113 | } |
8114 | } |
8115 | |
8116 | return NULL; |
8117 | } |
8118 | |
8119 | /* Extend per basic block data structures. */ |
8120 | static void |
8121 | sched_extend_bb (void) |
8122 | { |
8123 | /* The following is done to keep current_sched_info->next_tail non null. */ |
8124 | rtx_insn *end = BB_END (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); |
8125 | rtx_insn *insn = DEBUG_INSN_P (end) ? prev_nondebug_insn (end) : end; |
8126 | if (NEXT_INSN (insn: end) == 0 |
8127 | || (!NOTE_P (insn) |
8128 | && !LABEL_P (insn) |
8129 | /* Don't emit a NOTE if it would end up before a BARRIER. */ |
8130 | && !BARRIER_P (next_nondebug_insn (end)))) |
8131 | { |
8132 | rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end); |
8133 | /* Make note appear outside BB. */ |
8134 | set_block_for_insn (insn: note, NULL); |
8135 | BB_END (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb) = end; |
8136 | } |
8137 | } |
8138 | |
8139 | /* Init per basic block data structures. */ |
8140 | void |
8141 | sched_init_bbs (void) |
8142 | { |
8143 | sched_extend_bb (); |
8144 | } |
8145 | |
8146 | /* Initialize BEFORE_RECOVERY variable. */ |
8147 | static void |
8148 | init_before_recovery (basic_block *before_recovery_ptr) |
8149 | { |
8150 | basic_block last; |
8151 | edge e; |
8152 | |
8153 | last = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb; |
8154 | e = find_fallthru_edge_from (pred: last); |
8155 | |
8156 | if (e) |
8157 | { |
8158 | /* We create two basic blocks: |
8159 | 1. Single instruction block is inserted right after E->SRC |
8160 | and has jump to |
8161 | 2. Empty block right before EXIT_BLOCK. |
8162 | Between these two blocks recovery blocks will be emitted. */ |
8163 | |
8164 | basic_block single, empty; |
8165 | |
8166 | /* If the fallthrough edge to exit we've found is from the block we've |
8167 | created before, don't do anything more. */ |
8168 | if (last == after_recovery) |
8169 | return; |
8170 | |
8171 | adding_bb_to_current_region_p = false; |
8172 | |
8173 | single = sched_create_empty_bb (last); |
8174 | empty = sched_create_empty_bb (single); |
8175 | |
8176 | /* Add new blocks to the root loop. */ |
8177 | if (current_loops != NULL) |
8178 | { |
8179 | add_bb_to_loop (single, (*current_loops->larray)[0]); |
8180 | add_bb_to_loop (empty, (*current_loops->larray)[0]); |
8181 | } |
8182 | |
8183 | single->count = last->count; |
8184 | empty->count = last->count; |
8185 | BB_COPY_PARTITION (single, last); |
8186 | BB_COPY_PARTITION (empty, last); |
8187 | |
8188 | redirect_edge_succ (e, single); |
8189 | make_single_succ_edge (single, empty, 0); |
8190 | make_single_succ_edge (empty, EXIT_BLOCK_PTR_FOR_FN (cfun), |
8191 | EDGE_FALLTHRU); |
8192 | |
8193 | rtx_code_label *label = block_label (empty); |
8194 | rtx_jump_insn *x = emit_jump_insn_after (targetm.gen_jump (label), |
8195 | BB_END (single)); |
8196 | JUMP_LABEL (x) = label; |
8197 | LABEL_NUSES (label)++; |
8198 | haifa_init_insn (x); |
8199 | |
8200 | emit_barrier_after (x); |
8201 | |
8202 | sched_init_only_bb (empty, NULL); |
8203 | sched_init_only_bb (single, NULL); |
8204 | sched_extend_bb (); |
8205 | |
8206 | adding_bb_to_current_region_p = true; |
8207 | before_recovery = single; |
8208 | after_recovery = empty; |
8209 | |
8210 | if (before_recovery_ptr) |
8211 | *before_recovery_ptr = before_recovery; |
8212 | |
8213 | if (sched_verbose >= 2 && spec_info->dump) |
8214 | fprintf (stream: spec_info->dump, |
8215 | format: ";;\t\tFixed fallthru to EXIT : %d->>%d->%d->>EXIT\n" , |
8216 | last->index, single->index, empty->index); |
8217 | } |
8218 | else |
8219 | before_recovery = last; |
8220 | } |
8221 | |
8222 | /* Returns new recovery block. */ |
8223 | basic_block |
8224 | sched_create_recovery_block (basic_block *before_recovery_ptr) |
8225 | { |
8226 | rtx_insn *barrier; |
8227 | basic_block rec; |
8228 | |
8229 | haifa_recovery_bb_recently_added_p = true; |
8230 | haifa_recovery_bb_ever_added_p = true; |
8231 | |
8232 | init_before_recovery (before_recovery_ptr); |
8233 | |
8234 | barrier = get_last_bb_insn (before_recovery); |
8235 | gcc_assert (BARRIER_P (barrier)); |
8236 | |
8237 | rtx_insn *label = emit_label_after (gen_label_rtx (), barrier); |
8238 | |
8239 | rec = create_basic_block (label, label, before_recovery); |
8240 | |
8241 | /* A recovery block always ends with an unconditional jump. */ |
8242 | emit_barrier_after (BB_END (rec)); |
8243 | |
8244 | if (BB_PARTITION (before_recovery) != BB_UNPARTITIONED) |
8245 | BB_SET_PARTITION (rec, BB_COLD_PARTITION); |
8246 | |
8247 | if (sched_verbose && spec_info->dump) |
8248 | fprintf (stream: spec_info->dump, format: ";;\t\tGenerated recovery block rec%d\n" , |
8249 | rec->index); |
8250 | |
8251 | return rec; |
8252 | } |
8253 | |
8254 | /* Create edges: FIRST_BB -> REC; FIRST_BB -> SECOND_BB; REC -> SECOND_BB |
8255 | and emit necessary jumps. */ |
8256 | void |
8257 | sched_create_recovery_edges (basic_block first_bb, basic_block rec, |
8258 | basic_block second_bb) |
8259 | { |
8260 | int edge_flags; |
8261 | |
8262 | /* This is fixing of incoming edge. */ |
8263 | /* ??? Which other flags should be specified? */ |
8264 | if (BB_PARTITION (first_bb) != BB_PARTITION (rec)) |
8265 | /* Partition type is the same, if it is "unpartitioned". */ |
8266 | edge_flags = EDGE_CROSSING; |
8267 | else |
8268 | edge_flags = 0; |
8269 | |
8270 | edge e2 = single_succ_edge (bb: first_bb); |
8271 | edge e = make_edge (first_bb, rec, edge_flags); |
8272 | |
8273 | /* TODO: The actual probability can be determined and is computed as |
8274 | 'todo_spec' variable in create_check_block_twin and |
8275 | in sel-sched.cc `check_ds' in create_speculation_check. */ |
8276 | e->probability = profile_probability::very_unlikely (); |
8277 | rec->count = e->count (); |
8278 | e2->probability = e->probability.invert (); |
8279 | |
8280 | rtx_code_label *label = block_label (second_bb); |
8281 | rtx_jump_insn *jump = emit_jump_insn_after (targetm.gen_jump (label), |
8282 | BB_END (rec)); |
8283 | JUMP_LABEL (jump) = label; |
8284 | LABEL_NUSES (label)++; |
8285 | |
8286 | if (BB_PARTITION (second_bb) != BB_PARTITION (rec)) |
8287 | /* Partition type is the same, if it is "unpartitioned". */ |
8288 | { |
8289 | /* Rewritten from cfgrtl.cc. */ |
8290 | if (crtl->has_bb_partition && targetm_common.have_named_sections) |
8291 | { |
8292 | /* We don't need the same note for the check because |
8293 | any_condjump_p (check) == true. */ |
8294 | CROSSING_JUMP_P (jump) = 1; |
8295 | } |
8296 | edge_flags = EDGE_CROSSING; |
8297 | } |
8298 | else |
8299 | edge_flags = 0; |
8300 | |
8301 | make_single_succ_edge (rec, second_bb, edge_flags); |
8302 | if (dom_info_available_p (CDI_DOMINATORS)) |
8303 | set_immediate_dominator (CDI_DOMINATORS, rec, first_bb); |
8304 | } |
8305 | |
8306 | /* This function creates recovery code for INSN. If MUTATE_P is nonzero, |
8307 | INSN is a simple check, that should be converted to branchy one. */ |
8308 | static void |
8309 | create_check_block_twin (rtx_insn *insn, bool mutate_p) |
8310 | { |
8311 | basic_block rec; |
8312 | rtx_insn *label, *check, *twin; |
8313 | rtx check_pat; |
8314 | ds_t fs; |
8315 | sd_iterator_def sd_it; |
8316 | dep_t dep; |
8317 | dep_def _new_dep, *new_dep = &_new_dep; |
8318 | ds_t todo_spec; |
8319 | |
8320 | gcc_assert (ORIG_PAT (insn) != NULL_RTX); |
8321 | |
8322 | if (!mutate_p) |
8323 | todo_spec = TODO_SPEC (insn); |
8324 | else |
8325 | { |
8326 | gcc_assert (IS_SPECULATION_SIMPLE_CHECK_P (insn) |
8327 | && (TODO_SPEC (insn) & SPECULATIVE) == 0); |
8328 | |
8329 | todo_spec = CHECK_SPEC (insn); |
8330 | } |
8331 | |
8332 | todo_spec &= SPECULATIVE; |
8333 | |
8334 | /* Create recovery block. */ |
8335 | if (mutate_p || targetm.sched.needs_block_p (todo_spec)) |
8336 | { |
8337 | rec = sched_create_recovery_block (NULL); |
8338 | label = BB_HEAD (rec); |
8339 | } |
8340 | else |
8341 | { |
8342 | rec = EXIT_BLOCK_PTR_FOR_FN (cfun); |
8343 | label = NULL; |
8344 | } |
8345 | |
8346 | /* Emit CHECK. */ |
8347 | check_pat = targetm.sched.gen_spec_check (insn, label, todo_spec); |
8348 | |
8349 | if (rec != EXIT_BLOCK_PTR_FOR_FN (cfun)) |
8350 | { |
8351 | /* To have mem_reg alive at the beginning of second_bb, |
8352 | we emit check BEFORE insn, so insn after splitting |
8353 | insn will be at the beginning of second_bb, which will |
8354 | provide us with the correct life information. */ |
8355 | check = emit_jump_insn_before (check_pat, insn); |
8356 | JUMP_LABEL (check) = label; |
8357 | LABEL_NUSES (label)++; |
8358 | } |
8359 | else |
8360 | check = emit_insn_before (check_pat, insn); |
8361 | |
8362 | /* Extend data structures. */ |
8363 | haifa_init_insn (check); |
8364 | |
8365 | /* CHECK is being added to current region. Extend ready list. */ |
8366 | gcc_assert (sched_ready_n_insns != -1); |
8367 | sched_extend_ready_list (new_sched_ready_n_insns: sched_ready_n_insns + 1); |
8368 | |
8369 | if (current_sched_info->add_remove_insn) |
8370 | current_sched_info->add_remove_insn (insn, 0); |
8371 | |
8372 | RECOVERY_BLOCK (check) = rec; |
8373 | |
8374 | if (sched_verbose && spec_info->dump) |
8375 | fprintf (stream: spec_info->dump, format: ";;\t\tGenerated check insn : %s\n" , |
8376 | (*current_sched_info->print_insn) (check, 0)); |
8377 | |
8378 | gcc_assert (ORIG_PAT (insn)); |
8379 | |
8380 | /* Initialize TWIN (twin is a duplicate of original instruction |
8381 | in the recovery block). */ |
8382 | if (rec != EXIT_BLOCK_PTR_FOR_FN (cfun)) |
8383 | { |
8384 | sd_iterator_def sd_it; |
8385 | dep_t dep; |
8386 | |
8387 | FOR_EACH_DEP (insn, SD_LIST_RES_BACK, sd_it, dep) |
8388 | if ((DEP_STATUS (dep) & DEP_OUTPUT) != 0) |
8389 | { |
8390 | struct _dep _dep2, *dep2 = &_dep2; |
8391 | |
8392 | init_dep (dep2, DEP_PRO (dep), check, REG_DEP_TRUE); |
8393 | |
8394 | sd_add_dep (dep2, true); |
8395 | } |
8396 | |
8397 | twin = emit_insn_after (ORIG_PAT (insn), BB_END (rec)); |
8398 | haifa_init_insn (twin); |
8399 | |
8400 | if (sched_verbose && spec_info->dump) |
8401 | /* INSN_BB (insn) isn't determined for twin insns yet. |
8402 | So we can't use current_sched_info->print_insn. */ |
8403 | fprintf (stream: spec_info->dump, format: ";;\t\tGenerated twin insn : %d/rec%d\n" , |
8404 | INSN_UID (insn: twin), rec->index); |
8405 | } |
8406 | else |
8407 | { |
8408 | ORIG_PAT (check) = ORIG_PAT (insn); |
8409 | HAS_INTERNAL_DEP (check) = 1; |
8410 | twin = check; |
8411 | /* ??? We probably should change all OUTPUT dependencies to |
8412 | (TRUE | OUTPUT). */ |
8413 | } |
8414 | |
8415 | /* Copy all resolved back dependencies of INSN to TWIN. This will |
8416 | provide correct value for INSN_TICK (TWIN). */ |
8417 | sd_copy_back_deps (twin, insn, true); |
8418 | |
8419 | if (rec != EXIT_BLOCK_PTR_FOR_FN (cfun)) |
8420 | /* In case of branchy check, fix CFG. */ |
8421 | { |
8422 | basic_block first_bb, second_bb; |
8423 | rtx_insn *jump; |
8424 | |
8425 | first_bb = BLOCK_FOR_INSN (insn: check); |
8426 | second_bb = sched_split_block (first_bb, check); |
8427 | |
8428 | sched_create_recovery_edges (first_bb, rec, second_bb); |
8429 | |
8430 | sched_init_only_bb (second_bb, first_bb); |
8431 | sched_init_only_bb (rec, EXIT_BLOCK_PTR_FOR_FN (cfun)); |
8432 | |
8433 | jump = BB_END (rec); |
8434 | haifa_init_insn (jump); |
8435 | } |
8436 | |
8437 | /* Move backward dependences from INSN to CHECK and |
8438 | move forward dependences from INSN to TWIN. */ |
8439 | |
8440 | /* First, create dependencies between INSN's producers and CHECK & TWIN. */ |
8441 | FOR_EACH_DEP (insn, SD_LIST_BACK, sd_it, dep) |
8442 | { |
8443 | rtx_insn *pro = DEP_PRO (dep); |
8444 | ds_t ds; |
8445 | |
8446 | /* If BEGIN_DATA: [insn ~~TRUE~~> producer]: |
8447 | check --TRUE--> producer ??? or ANTI ??? |
8448 | twin --TRUE--> producer |
8449 | twin --ANTI--> check |
8450 | |
8451 | If BEGIN_CONTROL: [insn ~~ANTI~~> producer]: |
8452 | check --ANTI--> producer |
8453 | twin --ANTI--> producer |
8454 | twin --ANTI--> check |
8455 | |
8456 | If BE_IN_SPEC: [insn ~~TRUE~~> producer]: |
8457 | check ~~TRUE~~> producer |
8458 | twin ~~TRUE~~> producer |
8459 | twin --ANTI--> check */ |
8460 | |
8461 | ds = DEP_STATUS (dep); |
8462 | |
8463 | if (ds & BEGIN_SPEC) |
8464 | { |
8465 | gcc_assert (!mutate_p); |
8466 | ds &= ~BEGIN_SPEC; |
8467 | } |
8468 | |
8469 | init_dep_1 (new_dep, pro, check, DEP_TYPE (dep), ds); |
8470 | sd_add_dep (new_dep, false); |
8471 | |
8472 | if (rec != EXIT_BLOCK_PTR_FOR_FN (cfun)) |
8473 | { |
8474 | DEP_CON (new_dep) = twin; |
8475 | sd_add_dep (new_dep, false); |
8476 | } |
8477 | } |
8478 | |
8479 | /* Second, remove backward dependencies of INSN. */ |
8480 | for (sd_it = sd_iterator_start (insn, SD_LIST_SPEC_BACK); |
8481 | sd_iterator_cond (it_ptr: &sd_it, dep_ptr: &dep);) |
8482 | { |
8483 | if ((DEP_STATUS (dep) & BEGIN_SPEC) |
8484 | || mutate_p) |
8485 | /* We can delete this dep because we overcome it with |
8486 | BEGIN_SPECULATION. */ |
8487 | sd_delete_dep (sd_it); |
8488 | else |
8489 | sd_iterator_next (it_ptr: &sd_it); |
8490 | } |
8491 | |
8492 | /* Future Speculations. Determine what BE_IN speculations will be like. */ |
8493 | fs = 0; |
8494 | |
8495 | /* Fields (DONE_SPEC (x) & BEGIN_SPEC) and CHECK_SPEC (x) are set only |
8496 | here. */ |
8497 | |
8498 | gcc_assert (!DONE_SPEC (insn)); |
8499 | |
8500 | if (!mutate_p) |
8501 | { |
8502 | ds_t ts = TODO_SPEC (insn); |
8503 | |
8504 | DONE_SPEC (insn) = ts & BEGIN_SPEC; |
8505 | CHECK_SPEC (check) = ts & BEGIN_SPEC; |
8506 | |
8507 | /* Luckiness of future speculations solely depends upon initial |
8508 | BEGIN speculation. */ |
8509 | if (ts & BEGIN_DATA) |
8510 | fs = set_dep_weak (fs, BE_IN_DATA, get_dep_weak (ts, BEGIN_DATA)); |
8511 | if (ts & BEGIN_CONTROL) |
8512 | fs = set_dep_weak (fs, BE_IN_CONTROL, |
8513 | get_dep_weak (ts, BEGIN_CONTROL)); |
8514 | } |
8515 | else |
8516 | CHECK_SPEC (check) = CHECK_SPEC (insn); |
8517 | |
8518 | /* Future speculations: call the helper. */ |
8519 | process_insn_forw_deps_be_in_spec (insn, twin, fs); |
8520 | |
8521 | if (rec != EXIT_BLOCK_PTR_FOR_FN (cfun)) |
8522 | { |
8523 | /* Which types of dependencies should we use here is, |
8524 | generally, machine-dependent question... But, for now, |
8525 | it is not. */ |
8526 | |
8527 | if (!mutate_p) |
8528 | { |
8529 | init_dep (new_dep, insn, check, REG_DEP_TRUE); |
8530 | sd_add_dep (new_dep, false); |
8531 | |
8532 | init_dep (new_dep, insn, twin, REG_DEP_OUTPUT); |
8533 | sd_add_dep (new_dep, false); |
8534 | } |
8535 | else |
8536 | { |
8537 | if (spec_info->dump) |
8538 | fprintf (stream: spec_info->dump, format: ";;\t\tRemoved simple check : %s\n" , |
8539 | (*current_sched_info->print_insn) (insn, 0)); |
8540 | |
8541 | /* Remove all dependencies of the INSN. */ |
8542 | { |
8543 | sd_it = sd_iterator_start (insn, types: (SD_LIST_FORW |
8544 | | SD_LIST_BACK |
8545 | | SD_LIST_RES_BACK)); |
8546 | while (sd_iterator_cond (it_ptr: &sd_it, dep_ptr: &dep)) |
8547 | sd_delete_dep (sd_it); |
8548 | } |
8549 | |
8550 | /* If former check (INSN) already was moved to the ready (or queue) |
8551 | list, add new check (CHECK) there too. */ |
8552 | if (QUEUE_INDEX (insn) != QUEUE_NOWHERE) |
8553 | try_ready (next: check); |
8554 | |
8555 | /* Remove old check from instruction stream and free its |
8556 | data. */ |
8557 | sched_remove_insn (insn); |
8558 | } |
8559 | |
8560 | init_dep (new_dep, check, twin, REG_DEP_ANTI); |
8561 | sd_add_dep (new_dep, false); |
8562 | } |
8563 | else |
8564 | { |
8565 | init_dep_1 (new_dep, insn, check, REG_DEP_TRUE, DEP_TRUE | DEP_OUTPUT); |
8566 | sd_add_dep (new_dep, false); |
8567 | } |
8568 | |
8569 | if (!mutate_p) |
8570 | /* Fix priorities. If MUTATE_P is nonzero, this is not necessary, |
8571 | because it'll be done later in add_to_speculative_block. */ |
8572 | { |
8573 | auto_vec<rtx_insn *> priorities_roots; |
8574 | |
8575 | clear_priorities (twin, &priorities_roots); |
8576 | calc_priorities (priorities_roots); |
8577 | } |
8578 | } |
8579 | |
8580 | /* Removes dependency between instructions in the recovery block REC |
8581 | and usual region instructions. It keeps inner dependences so it |
8582 | won't be necessary to recompute them. */ |
8583 | static void |
8584 | fix_recovery_deps (basic_block rec) |
8585 | { |
8586 | rtx_insn *note, *insn, *jump; |
8587 | auto_vec<rtx_insn *, 10> ready_list; |
8588 | auto_bitmap in_ready; |
8589 | |
8590 | /* NOTE - a basic block note. */ |
8591 | note = NEXT_INSN (BB_HEAD (rec)); |
8592 | gcc_assert (NOTE_INSN_BASIC_BLOCK_P (note)); |
8593 | insn = BB_END (rec); |
8594 | gcc_assert (JUMP_P (insn)); |
8595 | insn = PREV_INSN (insn); |
8596 | |
8597 | do |
8598 | { |
8599 | sd_iterator_def sd_it; |
8600 | dep_t dep; |
8601 | |
8602 | for (sd_it = sd_iterator_start (insn, SD_LIST_FORW); |
8603 | sd_iterator_cond (it_ptr: &sd_it, dep_ptr: &dep);) |
8604 | { |
8605 | rtx_insn *consumer = DEP_CON (dep); |
8606 | |
8607 | if (BLOCK_FOR_INSN (insn: consumer) != rec) |
8608 | { |
8609 | sd_delete_dep (sd_it); |
8610 | |
8611 | if (bitmap_set_bit (in_ready, INSN_LUID (consumer))) |
8612 | ready_list.safe_push (obj: consumer); |
8613 | } |
8614 | else |
8615 | { |
8616 | gcc_assert ((DEP_STATUS (dep) & DEP_TYPES) == DEP_TRUE); |
8617 | |
8618 | sd_iterator_next (it_ptr: &sd_it); |
8619 | } |
8620 | } |
8621 | |
8622 | insn = PREV_INSN (insn); |
8623 | } |
8624 | while (insn != note); |
8625 | |
8626 | /* Try to add instructions to the ready or queue list. */ |
8627 | unsigned int i; |
8628 | rtx_insn *temp; |
8629 | FOR_EACH_VEC_ELT_REVERSE (ready_list, i, temp) |
8630 | try_ready (next: temp); |
8631 | |
8632 | /* Fixing jump's dependences. */ |
8633 | insn = BB_HEAD (rec); |
8634 | jump = BB_END (rec); |
8635 | |
8636 | gcc_assert (LABEL_P (insn)); |
8637 | insn = NEXT_INSN (insn); |
8638 | |
8639 | gcc_assert (NOTE_INSN_BASIC_BLOCK_P (insn)); |
8640 | add_jump_dependencies (insn, jump); |
8641 | } |
8642 | |
8643 | /* Change pattern of INSN to NEW_PAT. Invalidate cached haifa |
8644 | instruction data. */ |
8645 | static bool |
8646 | haifa_change_pattern (rtx_insn *insn, rtx new_pat) |
8647 | { |
8648 | int t; |
8649 | |
8650 | t = validate_change (insn, &PATTERN (insn), new_pat, 0); |
8651 | if (!t) |
8652 | return false; |
8653 | |
8654 | update_insn_after_change (insn); |
8655 | return true; |
8656 | } |
8657 | |
8658 | /* -1 - can't speculate, |
8659 | 0 - for speculation with REQUEST mode it is OK to use |
8660 | current instruction pattern, |
8661 | 1 - need to change pattern for *NEW_PAT to be speculative. */ |
8662 | int |
8663 | sched_speculate_insn (rtx_insn *insn, ds_t request, rtx *new_pat) |
8664 | { |
8665 | gcc_assert (current_sched_info->flags & DO_SPECULATION |
8666 | && (request & SPECULATIVE) |
8667 | && sched_insn_is_legitimate_for_speculation_p (insn, request)); |
8668 | |
8669 | if ((request & spec_info->mask) != request) |
8670 | return -1; |
8671 | |
8672 | if (request & BE_IN_SPEC |
8673 | && !(request & BEGIN_SPEC)) |
8674 | return 0; |
8675 | |
8676 | return targetm.sched.speculate_insn (insn, request, new_pat); |
8677 | } |
8678 | |
8679 | static int |
8680 | haifa_speculate_insn (rtx_insn *insn, ds_t request, rtx *new_pat) |
8681 | { |
8682 | gcc_assert (sched_deps_info->generate_spec_deps |
8683 | && !IS_SPECULATION_CHECK_P (insn)); |
8684 | |
8685 | if (HAS_INTERNAL_DEP (insn) |
8686 | || SCHED_GROUP_P (insn)) |
8687 | return -1; |
8688 | |
8689 | return sched_speculate_insn (insn, request, new_pat); |
8690 | } |
8691 | |
8692 | /* Print some information about block BB, which starts with HEAD and |
8693 | ends with TAIL, before scheduling it. |
8694 | I is zero, if scheduler is about to start with the fresh ebb. */ |
8695 | static void |
8696 | (int i, basic_block bb, rtx_insn *head, rtx_insn *tail) |
8697 | { |
8698 | if (!i) |
8699 | fprintf (stream: sched_dump, |
8700 | format: ";; ======================================================\n" ); |
8701 | else |
8702 | fprintf (stream: sched_dump, |
8703 | format: ";; =====================ADVANCING TO=====================\n" ); |
8704 | fprintf (stream: sched_dump, |
8705 | format: ";; -- basic block %d from %d to %d -- %s reload\n" , |
8706 | bb->index, INSN_UID (insn: head), INSN_UID (insn: tail), |
8707 | (reload_completed ? "after" : "before" )); |
8708 | fprintf (stream: sched_dump, |
8709 | format: ";; ======================================================\n" ); |
8710 | fprintf (stream: sched_dump, format: "\n" ); |
8711 | } |
8712 | |
8713 | /* Unlink basic block notes and labels and saves them, so they |
8714 | can be easily restored. We unlink basic block notes in EBB to |
8715 | provide back-compatibility with the previous code, as target backends |
8716 | assume, that there'll be only instructions between |
8717 | current_sched_info->{head and tail}. We restore these notes as soon |
8718 | as we can. |
8719 | FIRST (LAST) is the first (last) basic block in the ebb. |
8720 | NB: In usual case (FIRST == LAST) nothing is really done. */ |
8721 | void |
8722 | unlink_bb_notes (basic_block first, basic_block last) |
8723 | { |
8724 | /* We DON'T unlink basic block notes of the first block in the ebb. */ |
8725 | if (first == last) |
8726 | return; |
8727 | |
8728 | bb_header = XNEWVEC (rtx_insn *, last_basic_block_for_fn (cfun)); |
8729 | |
8730 | /* Make a sentinel. */ |
8731 | if (last->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun)) |
8732 | bb_header[last->next_bb->index] = 0; |
8733 | |
8734 | first = first->next_bb; |
8735 | do |
8736 | { |
8737 | rtx_insn *prev, *label, *note, *next; |
8738 | |
8739 | label = BB_HEAD (last); |
8740 | if (LABEL_P (label)) |
8741 | note = NEXT_INSN (insn: label); |
8742 | else |
8743 | note = label; |
8744 | gcc_assert (NOTE_INSN_BASIC_BLOCK_P (note)); |
8745 | |
8746 | prev = PREV_INSN (insn: label); |
8747 | next = NEXT_INSN (insn: note); |
8748 | gcc_assert (prev && next); |
8749 | |
8750 | SET_NEXT_INSN (prev) = next; |
8751 | SET_PREV_INSN (next) = prev; |
8752 | |
8753 | bb_header[last->index] = label; |
8754 | |
8755 | if (last == first) |
8756 | break; |
8757 | |
8758 | last = last->prev_bb; |
8759 | } |
8760 | while (1); |
8761 | } |
8762 | |
8763 | /* Restore basic block notes. |
8764 | FIRST is the first basic block in the ebb. */ |
8765 | static void |
8766 | restore_bb_notes (basic_block first) |
8767 | { |
8768 | if (!bb_header) |
8769 | return; |
8770 | |
8771 | /* We DON'T unlink basic block notes of the first block in the ebb. */ |
8772 | first = first->next_bb; |
8773 | /* Remember: FIRST is actually a second basic block in the ebb. */ |
8774 | |
8775 | while (first != EXIT_BLOCK_PTR_FOR_FN (cfun) |
8776 | && bb_header[first->index]) |
8777 | { |
8778 | rtx_insn *prev, *label, *note, *next; |
8779 | |
8780 | label = bb_header[first->index]; |
8781 | prev = PREV_INSN (insn: label); |
8782 | next = NEXT_INSN (insn: prev); |
8783 | |
8784 | if (LABEL_P (label)) |
8785 | note = NEXT_INSN (insn: label); |
8786 | else |
8787 | note = label; |
8788 | gcc_assert (NOTE_INSN_BASIC_BLOCK_P (note)); |
8789 | |
8790 | bb_header[first->index] = 0; |
8791 | |
8792 | SET_NEXT_INSN (prev) = label; |
8793 | SET_NEXT_INSN (note) = next; |
8794 | SET_PREV_INSN (next) = note; |
8795 | |
8796 | first = first->next_bb; |
8797 | } |
8798 | |
8799 | free (ptr: bb_header); |
8800 | bb_header = 0; |
8801 | } |
8802 | |
8803 | /* Helper function. |
8804 | Fix CFG after both in- and inter-block movement of |
8805 | control_flow_insn_p JUMP. */ |
8806 | static void |
8807 | fix_jump_move (rtx_insn *jump) |
8808 | { |
8809 | basic_block bb, jump_bb, jump_bb_next; |
8810 | |
8811 | bb = BLOCK_FOR_INSN (insn: PREV_INSN (insn: jump)); |
8812 | jump_bb = BLOCK_FOR_INSN (insn: jump); |
8813 | jump_bb_next = jump_bb->next_bb; |
8814 | |
8815 | gcc_assert (common_sched_info->sched_pass_id == SCHED_EBB_PASS |
8816 | || IS_SPECULATION_BRANCHY_CHECK_P (jump)); |
8817 | |
8818 | if (!NOTE_INSN_BASIC_BLOCK_P (BB_END (jump_bb_next))) |
8819 | /* if jump_bb_next is not empty. */ |
8820 | BB_END (jump_bb) = BB_END (jump_bb_next); |
8821 | |
8822 | if (BB_END (bb) != PREV_INSN (insn: jump)) |
8823 | /* Then there are instruction after jump that should be placed |
8824 | to jump_bb_next. */ |
8825 | BB_END (jump_bb_next) = BB_END (bb); |
8826 | else |
8827 | /* Otherwise jump_bb_next is empty. */ |
8828 | BB_END (jump_bb_next) = NEXT_INSN (BB_HEAD (jump_bb_next)); |
8829 | |
8830 | /* To make assertion in move_insn happy. */ |
8831 | BB_END (bb) = PREV_INSN (insn: jump); |
8832 | |
8833 | update_bb_for_insn (jump_bb_next); |
8834 | } |
8835 | |
8836 | /* Fix CFG after interblock movement of control_flow_insn_p JUMP. */ |
8837 | static void |
8838 | move_block_after_check (rtx_insn *jump) |
8839 | { |
8840 | basic_block bb, jump_bb, jump_bb_next; |
8841 | vec<edge, va_gc> *t; |
8842 | |
8843 | bb = BLOCK_FOR_INSN (insn: PREV_INSN (insn: jump)); |
8844 | jump_bb = BLOCK_FOR_INSN (insn: jump); |
8845 | jump_bb_next = jump_bb->next_bb; |
8846 | |
8847 | update_bb_for_insn (jump_bb); |
8848 | |
8849 | gcc_assert (IS_SPECULATION_CHECK_P (jump) |
8850 | || IS_SPECULATION_CHECK_P (BB_END (jump_bb_next))); |
8851 | |
8852 | unlink_block (jump_bb_next); |
8853 | link_block (jump_bb_next, bb); |
8854 | |
8855 | t = bb->succs; |
8856 | bb->succs = 0; |
8857 | move_succs (&(jump_bb->succs), bb); |
8858 | move_succs (&(jump_bb_next->succs), jump_bb); |
8859 | move_succs (&t, jump_bb_next); |
8860 | |
8861 | df_mark_solutions_dirty (); |
8862 | |
8863 | common_sched_info->fix_recovery_cfg |
8864 | (bb->index, jump_bb->index, jump_bb_next->index); |
8865 | } |
8866 | |
8867 | /* Helper function for move_block_after_check. |
8868 | This functions attaches edge vector pointed to by SUCCSP to |
8869 | block TO. */ |
8870 | static void |
8871 | move_succs (vec<edge, va_gc> **succsp, basic_block to) |
8872 | { |
8873 | edge e; |
8874 | edge_iterator ei; |
8875 | |
8876 | gcc_assert (to->succs == 0); |
8877 | |
8878 | to->succs = *succsp; |
8879 | |
8880 | FOR_EACH_EDGE (e, ei, to->succs) |
8881 | e->src = to; |
8882 | |
8883 | *succsp = 0; |
8884 | } |
8885 | |
8886 | /* Remove INSN from the instruction stream. |
8887 | INSN should have any dependencies. */ |
8888 | static void |
8889 | sched_remove_insn (rtx_insn *insn) |
8890 | { |
8891 | sd_finish_insn (insn); |
8892 | |
8893 | change_queue_index (next: insn, QUEUE_NOWHERE); |
8894 | current_sched_info->add_remove_insn (insn, 1); |
8895 | delete_insn (insn); |
8896 | } |
8897 | |
8898 | /* Clear priorities of all instructions, that are forward dependent on INSN. |
8899 | Store in vector pointed to by ROOTS_PTR insns on which priority () should |
8900 | be invoked to initialize all cleared priorities. */ |
8901 | static void |
8902 | clear_priorities (rtx_insn *insn, rtx_vec_t *roots_ptr) |
8903 | { |
8904 | sd_iterator_def sd_it; |
8905 | dep_t dep; |
8906 | bool insn_is_root_p = true; |
8907 | |
8908 | gcc_assert (QUEUE_INDEX (insn) != QUEUE_SCHEDULED); |
8909 | |
8910 | FOR_EACH_DEP (insn, SD_LIST_BACK, sd_it, dep) |
8911 | { |
8912 | rtx_insn *pro = DEP_PRO (dep); |
8913 | |
8914 | if (INSN_PRIORITY_STATUS (pro) >= 0 |
8915 | && QUEUE_INDEX (insn) != QUEUE_SCHEDULED) |
8916 | { |
8917 | /* If DEP doesn't contribute to priority then INSN itself should |
8918 | be added to priority roots. */ |
8919 | if (contributes_to_priority_p (dep)) |
8920 | insn_is_root_p = false; |
8921 | |
8922 | INSN_PRIORITY_STATUS (pro) = -1; |
8923 | clear_priorities (insn: pro, roots_ptr); |
8924 | } |
8925 | } |
8926 | |
8927 | if (insn_is_root_p) |
8928 | roots_ptr->safe_push (obj: insn); |
8929 | } |
8930 | |
8931 | /* Recompute priorities of instructions, whose priorities might have been |
8932 | changed. ROOTS is a vector of instructions whose priority computation will |
8933 | trigger initialization of all cleared priorities. */ |
8934 | static void |
8935 | calc_priorities (const rtx_vec_t &roots) |
8936 | { |
8937 | int i; |
8938 | rtx_insn *insn; |
8939 | |
8940 | FOR_EACH_VEC_ELT (roots, i, insn) |
8941 | priority (insn); |
8942 | } |
8943 | |
8944 | |
8945 | /* Add dependences between JUMP and other instructions in the recovery |
8946 | block. INSN is the first insn the recovery block. */ |
8947 | static void |
8948 | add_jump_dependencies (rtx_insn *insn, rtx_insn *jump) |
8949 | { |
8950 | do |
8951 | { |
8952 | insn = NEXT_INSN (insn); |
8953 | if (insn == jump) |
8954 | break; |
8955 | |
8956 | if (dep_list_size (insn, SD_LIST_FORW) == 0) |
8957 | { |
8958 | dep_def _new_dep, *new_dep = &_new_dep; |
8959 | |
8960 | init_dep (new_dep, insn, jump, REG_DEP_ANTI); |
8961 | sd_add_dep (new_dep, false); |
8962 | } |
8963 | } |
8964 | while (1); |
8965 | |
8966 | gcc_assert (!sd_lists_empty_p (jump, SD_LIST_BACK)); |
8967 | } |
8968 | |
8969 | /* Extend data structures for logical insn UID. */ |
8970 | void |
8971 | sched_extend_luids (void) |
8972 | { |
8973 | int new_luids_max_uid = get_max_uid () + 1; |
8974 | |
8975 | sched_luids.safe_grow_cleared (len: new_luids_max_uid, exact: true); |
8976 | } |
8977 | |
8978 | /* Initialize LUID for INSN. */ |
8979 | void |
8980 | sched_init_insn_luid (rtx_insn *insn) |
8981 | { |
8982 | int i = INSN_P (insn) ? 1 : common_sched_info->luid_for_non_insn (insn); |
8983 | int luid; |
8984 | |
8985 | if (i >= 0) |
8986 | { |
8987 | luid = sched_max_luid; |
8988 | sched_max_luid += i; |
8989 | } |
8990 | else |
8991 | luid = -1; |
8992 | |
8993 | SET_INSN_LUID (insn, luid); |
8994 | } |
8995 | |
8996 | /* Initialize luids for BBS. |
8997 | The hook common_sched_info->luid_for_non_insn () is used to determine |
8998 | if notes, labels, etc. need luids. */ |
8999 | void |
9000 | sched_init_luids (const bb_vec_t &bbs) |
9001 | { |
9002 | int i; |
9003 | basic_block bb; |
9004 | |
9005 | sched_extend_luids (); |
9006 | FOR_EACH_VEC_ELT (bbs, i, bb) |
9007 | { |
9008 | rtx_insn *insn; |
9009 | |
9010 | FOR_BB_INSNS (bb, insn) |
9011 | sched_init_insn_luid (insn); |
9012 | } |
9013 | } |
9014 | |
9015 | /* Free LUIDs. */ |
9016 | void |
9017 | sched_finish_luids (void) |
9018 | { |
9019 | sched_luids.release (); |
9020 | sched_max_luid = 1; |
9021 | } |
9022 | |
9023 | /* Return logical uid of INSN. Helpful while debugging. */ |
9024 | int |
9025 | insn_luid (rtx_insn *insn) |
9026 | { |
9027 | return INSN_LUID (insn); |
9028 | } |
9029 | |
9030 | /* Extend per insn data in the target. */ |
9031 | void |
9032 | sched_extend_target (void) |
9033 | { |
9034 | if (targetm.sched.h_i_d_extended) |
9035 | targetm.sched.h_i_d_extended (); |
9036 | } |
9037 | |
9038 | /* Extend global scheduler structures (those, that live across calls to |
9039 | schedule_block) to include information about just emitted INSN. */ |
9040 | static void |
9041 | extend_h_i_d (void) |
9042 | { |
9043 | int reserve = (get_max_uid () + 1 - h_i_d.length ()); |
9044 | if (reserve > 0 |
9045 | && ! h_i_d.space (nelems: reserve)) |
9046 | { |
9047 | h_i_d.safe_grow_cleared (len: 3 * get_max_uid () / 2, exact: true); |
9048 | sched_extend_target (); |
9049 | } |
9050 | } |
9051 | |
9052 | /* Initialize h_i_d entry of the INSN with default values. |
9053 | Values, that are not explicitly initialized here, hold zero. */ |
9054 | static void |
9055 | init_h_i_d (rtx_insn *insn) |
9056 | { |
9057 | if (INSN_LUID (insn) > 0) |
9058 | { |
9059 | INSN_COST (insn) = -1; |
9060 | QUEUE_INDEX (insn) = QUEUE_NOWHERE; |
9061 | INSN_TICK (insn) = INVALID_TICK; |
9062 | INSN_EXACT_TICK (insn) = INVALID_TICK; |
9063 | INTER_TICK (insn) = INVALID_TICK; |
9064 | TODO_SPEC (insn) = HARD_DEP; |
9065 | INSN_AUTOPREF_MULTIPASS_DATA (insn)[0].status |
9066 | = AUTOPREF_MULTIPASS_DATA_UNINITIALIZED; |
9067 | INSN_AUTOPREF_MULTIPASS_DATA (insn)[1].status |
9068 | = AUTOPREF_MULTIPASS_DATA_UNINITIALIZED; |
9069 | } |
9070 | } |
9071 | |
9072 | /* Initialize haifa_insn_data for BBS. */ |
9073 | void |
9074 | haifa_init_h_i_d (const bb_vec_t &bbs) |
9075 | { |
9076 | int i; |
9077 | basic_block bb; |
9078 | |
9079 | extend_h_i_d (); |
9080 | FOR_EACH_VEC_ELT (bbs, i, bb) |
9081 | { |
9082 | rtx_insn *insn; |
9083 | |
9084 | FOR_BB_INSNS (bb, insn) |
9085 | init_h_i_d (insn); |
9086 | } |
9087 | } |
9088 | |
9089 | /* Finalize haifa_insn_data. */ |
9090 | void |
9091 | haifa_finish_h_i_d (void) |
9092 | { |
9093 | int i; |
9094 | haifa_insn_data_t data; |
9095 | reg_use_data *use, *next_use; |
9096 | reg_set_data *set, *next_set; |
9097 | |
9098 | FOR_EACH_VEC_ELT (h_i_d, i, data) |
9099 | { |
9100 | free (ptr: data->max_reg_pressure); |
9101 | free (ptr: data->reg_pressure); |
9102 | for (use = data->reg_use_list; use != NULL; use = next_use) |
9103 | { |
9104 | next_use = use->next_insn_use; |
9105 | free (ptr: use); |
9106 | } |
9107 | for (set = data->reg_set_list; set != NULL; set = next_set) |
9108 | { |
9109 | next_set = set->next_insn_set; |
9110 | free (ptr: set); |
9111 | } |
9112 | |
9113 | } |
9114 | h_i_d.release (); |
9115 | } |
9116 | |
9117 | /* Init data for the new insn INSN. */ |
9118 | static void |
9119 | haifa_init_insn (rtx_insn *insn) |
9120 | { |
9121 | gcc_assert (insn != NULL); |
9122 | |
9123 | sched_extend_luids (); |
9124 | sched_init_insn_luid (insn); |
9125 | sched_extend_target (); |
9126 | sched_deps_init (false); |
9127 | extend_h_i_d (); |
9128 | init_h_i_d (insn); |
9129 | |
9130 | if (adding_bb_to_current_region_p) |
9131 | { |
9132 | sd_init_insn (insn); |
9133 | |
9134 | /* Extend dependency caches by one element. */ |
9135 | extend_dependency_caches (1, false); |
9136 | } |
9137 | if (sched_pressure != SCHED_PRESSURE_NONE) |
9138 | init_insn_reg_pressure_info (insn); |
9139 | } |
9140 | |
9141 | /* Init data for the new basic block BB which comes after AFTER. */ |
9142 | static void |
9143 | haifa_init_only_bb (basic_block bb, basic_block after) |
9144 | { |
9145 | gcc_assert (bb != NULL); |
9146 | |
9147 | sched_init_bbs (); |
9148 | |
9149 | if (common_sched_info->add_block) |
9150 | /* This changes only data structures of the front-end. */ |
9151 | common_sched_info->add_block (bb, after); |
9152 | } |
9153 | |
9154 | /* A generic version of sched_split_block (). */ |
9155 | basic_block |
9156 | sched_split_block_1 (basic_block first_bb, rtx after) |
9157 | { |
9158 | edge e; |
9159 | |
9160 | e = split_block (first_bb, after); |
9161 | gcc_assert (e->src == first_bb); |
9162 | |
9163 | /* sched_split_block emits note if *check == BB_END. Probably it |
9164 | is better to rip that note off. */ |
9165 | |
9166 | return e->dest; |
9167 | } |
9168 | |
9169 | /* A generic version of sched_create_empty_bb (). */ |
9170 | basic_block |
9171 | sched_create_empty_bb_1 (basic_block after) |
9172 | { |
9173 | return create_empty_bb (after); |
9174 | } |
9175 | |
9176 | /* Insert PAT as an INSN into the schedule and update the necessary data |
9177 | structures to account for it. */ |
9178 | rtx_insn * |
9179 | sched_emit_insn (rtx pat) |
9180 | { |
9181 | rtx_insn *insn = emit_insn_before (pat, first_nonscheduled_insn ()); |
9182 | haifa_init_insn (insn); |
9183 | |
9184 | if (current_sched_info->add_remove_insn) |
9185 | current_sched_info->add_remove_insn (insn, 0); |
9186 | |
9187 | (*current_sched_info->begin_schedule_ready) (insn); |
9188 | scheduled_insns.safe_push (obj: insn); |
9189 | |
9190 | last_scheduled_insn = insn; |
9191 | return insn; |
9192 | } |
9193 | |
9194 | /* This function returns a candidate satisfying dispatch constraints from |
9195 | the ready list. */ |
9196 | |
9197 | static rtx_insn * |
9198 | ready_remove_first_dispatch (struct ready_list *ready) |
9199 | { |
9200 | int i; |
9201 | rtx_insn *insn = ready_element (ready, index: 0); |
9202 | |
9203 | if (ready->n_ready == 1 |
9204 | || !INSN_P (insn) |
9205 | || INSN_CODE (insn) < 0 |
9206 | || !active_insn_p (insn) |
9207 | || targetm.sched.dispatch (insn, FITS_DISPATCH_WINDOW)) |
9208 | return ready_remove_first (ready); |
9209 | |
9210 | for (i = 1; i < ready->n_ready; i++) |
9211 | { |
9212 | insn = ready_element (ready, index: i); |
9213 | |
9214 | if (!INSN_P (insn) |
9215 | || INSN_CODE (insn) < 0 |
9216 | || !active_insn_p (insn)) |
9217 | continue; |
9218 | |
9219 | if (targetm.sched.dispatch (insn, FITS_DISPATCH_WINDOW)) |
9220 | { |
9221 | /* Return ith element of ready. */ |
9222 | insn = ready_remove (ready, index: i); |
9223 | return insn; |
9224 | } |
9225 | } |
9226 | |
9227 | if (targetm.sched.dispatch (NULL, DISPATCH_VIOLATION)) |
9228 | return ready_remove_first (ready); |
9229 | |
9230 | for (i = 1; i < ready->n_ready; i++) |
9231 | { |
9232 | insn = ready_element (ready, index: i); |
9233 | |
9234 | if (!INSN_P (insn) |
9235 | || INSN_CODE (insn) < 0 |
9236 | || !active_insn_p (insn)) |
9237 | continue; |
9238 | |
9239 | /* Return i-th element of ready. */ |
9240 | if (targetm.sched.dispatch (insn, IS_CMP)) |
9241 | return ready_remove (ready, index: i); |
9242 | } |
9243 | |
9244 | return ready_remove_first (ready); |
9245 | } |
9246 | |
9247 | /* Get number of ready insn in the ready list. */ |
9248 | |
9249 | int |
9250 | number_in_ready (void) |
9251 | { |
9252 | return ready.n_ready; |
9253 | } |
9254 | |
9255 | /* Get number of ready's in the ready list. */ |
9256 | |
9257 | rtx_insn * |
9258 | get_ready_element (int i) |
9259 | { |
9260 | return ready_element (ready: &ready, index: i); |
9261 | } |
9262 | |
9263 | #endif /* INSN_SCHEDULING */ |
9264 | |