1 | /* Partial redundancy elimination / Hoisting for RTL. |
2 | Copyright (C) 1997-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free |
8 | Software Foundation; either version 3, or (at your option) any later |
9 | version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | /* TODO |
21 | - reordering of memory allocation and freeing to be more space efficient |
22 | - calc rough register pressure information and use the info to drive all |
23 | kinds of code motion (including code hoisting) in a unified way. |
24 | */ |
25 | |
26 | /* References searched while implementing this. |
27 | |
28 | Compilers Principles, Techniques and Tools |
29 | Aho, Sethi, Ullman |
30 | Addison-Wesley, 1988 |
31 | |
32 | Global Optimization by Suppression of Partial Redundancies |
33 | E. Morel, C. Renvoise |
34 | communications of the acm, Vol. 22, Num. 2, Feb. 1979 |
35 | |
36 | A Portable Machine-Independent Global Optimizer - Design and Measurements |
37 | Frederick Chow |
38 | Stanford Ph.D. thesis, Dec. 1983 |
39 | |
40 | A Fast Algorithm for Code Movement Optimization |
41 | D.M. Dhamdhere |
42 | SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988 |
43 | |
44 | A Solution to a Problem with Morel and Renvoise's |
45 | Global Optimization by Suppression of Partial Redundancies |
46 | K-H Drechsler, M.P. Stadel |
47 | ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988 |
48 | |
49 | Practical Adaptation of the Global Optimization |
50 | Algorithm of Morel and Renvoise |
51 | D.M. Dhamdhere |
52 | ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991 |
53 | |
54 | Efficiently Computing Static Single Assignment Form and the Control |
55 | Dependence Graph |
56 | R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck |
57 | ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991 |
58 | |
59 | Lazy Code Motion |
60 | J. Knoop, O. Ruthing, B. Steffen |
61 | ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI |
62 | |
63 | What's In a Region? Or Computing Control Dependence Regions in Near-Linear |
64 | Time for Reducible Flow Control |
65 | Thomas Ball |
66 | ACM Letters on Programming Languages and Systems, |
67 | Vol. 2, Num. 1-4, Mar-Dec 1993 |
68 | |
69 | An Efficient Representation for Sparse Sets |
70 | Preston Briggs, Linda Torczon |
71 | ACM Letters on Programming Languages and Systems, |
72 | Vol. 2, Num. 1-4, Mar-Dec 1993 |
73 | |
74 | A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion |
75 | K-H Drechsler, M.P. Stadel |
76 | ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993 |
77 | |
78 | Partial Dead Code Elimination |
79 | J. Knoop, O. Ruthing, B. Steffen |
80 | ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994 |
81 | |
82 | Effective Partial Redundancy Elimination |
83 | P. Briggs, K.D. Cooper |
84 | ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994 |
85 | |
86 | The Program Structure Tree: Computing Control Regions in Linear Time |
87 | R. Johnson, D. Pearson, K. Pingali |
88 | ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994 |
89 | |
90 | Optimal Code Motion: Theory and Practice |
91 | J. Knoop, O. Ruthing, B. Steffen |
92 | ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994 |
93 | |
94 | The power of assignment motion |
95 | J. Knoop, O. Ruthing, B. Steffen |
96 | ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI |
97 | |
98 | Global code motion / global value numbering |
99 | C. Click |
100 | ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI |
101 | |
102 | Value Driven Redundancy Elimination |
103 | L.T. Simpson |
104 | Rice University Ph.D. thesis, Apr. 1996 |
105 | |
106 | Value Numbering |
107 | L.T. Simpson |
108 | Massively Scalar Compiler Project, Rice University, Sep. 1996 |
109 | |
110 | High Performance Compilers for Parallel Computing |
111 | Michael Wolfe |
112 | Addison-Wesley, 1996 |
113 | |
114 | Advanced Compiler Design and Implementation |
115 | Steven Muchnick |
116 | Morgan Kaufmann, 1997 |
117 | |
118 | Building an Optimizing Compiler |
119 | Robert Morgan |
120 | Digital Press, 1998 |
121 | |
122 | People wishing to speed up the code here should read: |
123 | Elimination Algorithms for Data Flow Analysis |
124 | B.G. Ryder, M.C. Paull |
125 | ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986 |
126 | |
127 | How to Analyze Large Programs Efficiently and Informatively |
128 | D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck |
129 | ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI |
130 | |
131 | People wishing to do something different can find various possibilities |
132 | in the above papers and elsewhere. |
133 | */ |
134 | |
135 | #include "config.h" |
136 | #include "system.h" |
137 | #include "coretypes.h" |
138 | #include "backend.h" |
139 | #include "target.h" |
140 | #include "rtl.h" |
141 | #include "tree.h" |
142 | #include "predict.h" |
143 | #include "df.h" |
144 | #include "memmodel.h" |
145 | #include "tm_p.h" |
146 | #include "insn-config.h" |
147 | #include "print-rtl.h" |
148 | #include "regs.h" |
149 | #include "ira.h" |
150 | #include "recog.h" |
151 | #include "diagnostic-core.h" |
152 | #include "cfgrtl.h" |
153 | #include "cfganal.h" |
154 | #include "lcm.h" |
155 | #include "cfgcleanup.h" |
156 | #include "expr.h" |
157 | #include "intl.h" |
158 | #include "tree-pass.h" |
159 | #include "dbgcnt.h" |
160 | #include "gcse.h" |
161 | #include "gcse-common.h" |
162 | #include "function-abi.h" |
163 | |
164 | /* We support GCSE via Partial Redundancy Elimination. PRE optimizations |
165 | are a superset of those done by classic GCSE. |
166 | |
167 | Two passes of copy/constant propagation are done around PRE or hoisting |
168 | because the first one enables more GCSE and the second one helps to clean |
169 | up the copies that PRE and HOIST create. This is needed more for PRE than |
170 | for HOIST because code hoisting will try to use an existing register |
171 | containing the common subexpression rather than create a new one. This is |
172 | harder to do for PRE because of the code motion (which HOIST doesn't do). |
173 | |
174 | Expressions we are interested in GCSE-ing are of the form |
175 | (set (pseudo-reg) (expression)). |
176 | Function want_to_gcse_p says what these are. |
177 | |
178 | In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing. |
179 | This allows PRE to hoist expressions that are expressed in multiple insns, |
180 | such as complex address calculations (e.g. for PIC code, or loads with a |
181 | high part and a low part). |
182 | |
183 | PRE handles moving invariant expressions out of loops (by treating them as |
184 | partially redundant). |
185 | |
186 | ********************** |
187 | |
188 | We used to support multiple passes but there are diminishing returns in |
189 | doing so. The first pass usually makes 90% of the changes that are doable. |
190 | A second pass can make a few more changes made possible by the first pass. |
191 | Experiments show any further passes don't make enough changes to justify |
192 | the expense. |
193 | |
194 | A study of spec92 using an unlimited number of passes: |
195 | [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83, |
196 | [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2, |
197 | [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1 |
198 | |
199 | It was found doing copy propagation between each pass enables further |
200 | substitutions. |
201 | |
202 | This study was done before expressions in REG_EQUAL notes were added as |
203 | candidate expressions for optimization, and before the GIMPLE optimizers |
204 | were added. Probably, multiple passes is even less efficient now than |
205 | at the time when the study was conducted. |
206 | |
207 | PRE is quite expensive in complicated functions because the DFA can take |
208 | a while to converge. Hence we only perform one pass. |
209 | |
210 | ********************** |
211 | |
212 | The steps for PRE are: |
213 | |
214 | 1) Build the hash table of expressions we wish to GCSE (expr_hash_table). |
215 | |
216 | 2) Perform the data flow analysis for PRE. |
217 | |
218 | 3) Delete the redundant instructions |
219 | |
220 | 4) Insert the required copies [if any] that make the partially |
221 | redundant instructions fully redundant. |
222 | |
223 | 5) For other reaching expressions, insert an instruction to copy the value |
224 | to a newly created pseudo that will reach the redundant instruction. |
225 | |
226 | The deletion is done first so that when we do insertions we |
227 | know which pseudo reg to use. |
228 | |
229 | Various papers have argued that PRE DFA is expensive (O(n^2)) and others |
230 | argue it is not. The number of iterations for the algorithm to converge |
231 | is typically 2-4 so I don't view it as that expensive (relatively speaking). |
232 | |
233 | PRE GCSE depends heavily on the second CPROP pass to clean up the copies |
234 | we create. To make an expression reach the place where it's redundant, |
235 | the result of the expression is copied to a new register, and the redundant |
236 | expression is deleted by replacing it with this new register. Classic GCSE |
237 | doesn't have this problem as much as it computes the reaching defs of |
238 | each register in each block and thus can try to use an existing |
239 | register. */ |
240 | |
241 | /* GCSE global vars. */ |
242 | |
243 | struct target_gcse default_target_gcse; |
244 | #if SWITCHABLE_TARGET |
245 | struct target_gcse *this_target_gcse = &default_target_gcse; |
246 | #endif |
247 | |
248 | /* Set to non-zero if CSE should run after all GCSE optimizations are done. */ |
249 | int flag_rerun_cse_after_global_opts; |
250 | |
251 | /* An obstack for our working variables. */ |
252 | static struct obstack gcse_obstack; |
253 | |
254 | /* Hash table of expressions. */ |
255 | |
256 | struct gcse_expr |
257 | { |
258 | /* The expression. */ |
259 | rtx expr; |
260 | /* Index in the available expression bitmaps. */ |
261 | int bitmap_index; |
262 | /* Next entry with the same hash. */ |
263 | struct gcse_expr *next_same_hash; |
264 | /* List of anticipatable occurrences in basic blocks in the function. |
265 | An "anticipatable occurrence" is one that is the first occurrence in the |
266 | basic block, the operands are not modified in the basic block prior |
267 | to the occurrence and the output is not used between the start of |
268 | the block and the occurrence. */ |
269 | struct gcse_occr *antic_occr; |
270 | /* List of available occurrence in basic blocks in the function. |
271 | An "available occurrence" is one that is the last occurrence in the |
272 | basic block and the operands are not modified by following statements in |
273 | the basic block [including this insn]. */ |
274 | struct gcse_occr *avail_occr; |
275 | /* Non-null if the computation is PRE redundant. |
276 | The value is the newly created pseudo-reg to record a copy of the |
277 | expression in all the places that reach the redundant copy. */ |
278 | rtx reaching_reg; |
279 | /* Maximum distance in instructions this expression can travel. |
280 | We avoid moving simple expressions for more than a few instructions |
281 | to keep register pressure under control. |
282 | A value of "0" removes restrictions on how far the expression can |
283 | travel. */ |
284 | HOST_WIDE_INT max_distance; |
285 | }; |
286 | |
287 | /* Occurrence of an expression. |
288 | There is one per basic block. If a pattern appears more than once the |
289 | last appearance is used [or first for anticipatable expressions]. */ |
290 | |
291 | struct gcse_occr |
292 | { |
293 | /* Next occurrence of this expression. */ |
294 | struct gcse_occr *next; |
295 | /* The insn that computes the expression. */ |
296 | rtx_insn *insn; |
297 | /* Nonzero if this [anticipatable] occurrence has been deleted. */ |
298 | char deleted_p; |
299 | /* Nonzero if this [available] occurrence has been copied to |
300 | reaching_reg. */ |
301 | /* ??? This is mutually exclusive with deleted_p, so they could share |
302 | the same byte. */ |
303 | char copied_p; |
304 | }; |
305 | |
306 | typedef struct gcse_occr *occr_t; |
307 | |
308 | /* Expression hash tables. |
309 | Each hash table is an array of buckets. |
310 | ??? It is known that if it were an array of entries, structure elements |
311 | `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is |
312 | not clear whether in the final analysis a sufficient amount of memory would |
313 | be saved as the size of the available expression bitmaps would be larger |
314 | [one could build a mapping table without holes afterwards though]. |
315 | Someday I'll perform the computation and figure it out. */ |
316 | |
317 | struct gcse_hash_table_d |
318 | { |
319 | /* The table itself. |
320 | This is an array of `expr_hash_table_size' elements. */ |
321 | struct gcse_expr **table; |
322 | |
323 | /* Size of the hash table, in elements. */ |
324 | unsigned int size; |
325 | |
326 | /* Number of hash table elements. */ |
327 | unsigned int n_elems; |
328 | }; |
329 | |
330 | /* Expression hash table. */ |
331 | static struct gcse_hash_table_d expr_hash_table; |
332 | |
333 | /* This is a list of expressions which are MEMs and will be used by load |
334 | or store motion. |
335 | Load motion tracks MEMs which aren't killed by anything except itself, |
336 | i.e. loads and stores to a single location. |
337 | We can then allow movement of these MEM refs with a little special |
338 | allowance. (all stores copy the same value to the reaching reg used |
339 | for the loads). This means all values used to store into memory must have |
340 | no side effects so we can re-issue the setter value. */ |
341 | |
342 | struct ls_expr |
343 | { |
344 | struct gcse_expr * expr; /* Gcse expression reference for LM. */ |
345 | rtx pattern; /* Pattern of this mem. */ |
346 | rtx pattern_regs; /* List of registers mentioned by the mem. */ |
347 | vec<rtx_insn *> stores; /* INSN list of stores seen. */ |
348 | struct ls_expr * next; /* Next in the list. */ |
349 | int invalid; /* Invalid for some reason. */ |
350 | int index; /* If it maps to a bitmap index. */ |
351 | unsigned int hash_index; /* Index when in a hash table. */ |
352 | rtx reaching_reg; /* Register to use when re-writing. */ |
353 | }; |
354 | |
355 | /* Head of the list of load/store memory refs. */ |
356 | static struct ls_expr * pre_ldst_mems = NULL; |
357 | |
358 | struct pre_ldst_expr_hasher : nofree_ptr_hash <ls_expr> |
359 | { |
360 | typedef value_type compare_type; |
361 | static inline hashval_t hash (const ls_expr *); |
362 | static inline bool equal (const ls_expr *, const ls_expr *); |
363 | }; |
364 | |
365 | /* Hashtable helpers. */ |
366 | inline hashval_t |
367 | pre_ldst_expr_hasher::hash (const ls_expr *x) |
368 | { |
369 | int do_not_record_p = 0; |
370 | return |
371 | hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false); |
372 | } |
373 | |
374 | static bool expr_equiv_p (const_rtx, const_rtx); |
375 | |
376 | inline bool |
377 | pre_ldst_expr_hasher::equal (const ls_expr *ptr1, |
378 | const ls_expr *ptr2) |
379 | { |
380 | return expr_equiv_p (ptr1->pattern, ptr2->pattern); |
381 | } |
382 | |
383 | /* Hashtable for the load/store memory refs. */ |
384 | static hash_table<pre_ldst_expr_hasher> *pre_ldst_table; |
385 | |
386 | /* Bitmap containing one bit for each register in the program. |
387 | Used when performing GCSE to track which registers have been set since |
388 | the start of the basic block. */ |
389 | static regset reg_set_bitmap; |
390 | |
391 | /* Array, indexed by basic block number for a list of insns which modify |
392 | memory within that block. */ |
393 | static vec<rtx_insn *> *modify_mem_list; |
394 | static bitmap modify_mem_list_set; |
395 | |
396 | /* This array parallels modify_mem_list, except that it stores MEMs |
397 | being set and their canonicalized memory addresses. */ |
398 | static vec<modify_pair> *canon_modify_mem_list; |
399 | |
400 | /* Bitmap indexed by block numbers to record which blocks contain |
401 | function calls. */ |
402 | static bitmap blocks_with_calls; |
403 | |
404 | /* Various variables for statistics gathering. */ |
405 | |
406 | /* Memory used in a pass. |
407 | This isn't intended to be absolutely precise. Its intent is only |
408 | to keep an eye on memory usage. */ |
409 | static int bytes_used; |
410 | |
411 | /* GCSE substitutions made. */ |
412 | static int gcse_subst_count; |
413 | /* Number of copy instructions created. */ |
414 | static int gcse_create_count; |
415 | |
416 | /* Doing code hoisting. */ |
417 | static bool doing_code_hoisting_p = false; |
418 | |
419 | /* For available exprs */ |
420 | static sbitmap *ae_kill; |
421 | |
422 | /* Data stored for each basic block. */ |
423 | struct bb_data |
424 | { |
425 | /* Maximal register pressure inside basic block for given register class |
426 | (defined only for the pressure classes). */ |
427 | int max_reg_pressure[N_REG_CLASSES]; |
428 | /* Recorded register pressure of basic block before trying to hoist |
429 | an expression. Will be used to restore the register pressure |
430 | if the expression should not be hoisted. */ |
431 | int old_pressure; |
432 | /* Recorded register live_in info of basic block during code hoisting |
433 | process. BACKUP is used to record live_in info before trying to |
434 | hoist an expression, and will be used to restore LIVE_IN if the |
435 | expression should not be hoisted. */ |
436 | bitmap live_in, backup; |
437 | }; |
438 | |
439 | #define BB_DATA(bb) ((struct bb_data *) (bb)->aux) |
440 | |
441 | static basic_block curr_bb; |
442 | |
443 | /* Current register pressure for each pressure class. */ |
444 | static int curr_reg_pressure[N_REG_CLASSES]; |
445 | |
446 | |
447 | static void compute_can_copy (void); |
448 | static void *gmalloc (size_t) ATTRIBUTE_MALLOC; |
449 | static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC; |
450 | static void *gcse_alloc (unsigned long); |
451 | static void alloc_gcse_mem (void); |
452 | static void free_gcse_mem (void); |
453 | static void hash_scan_insn (rtx_insn *, struct gcse_hash_table_d *); |
454 | static void hash_scan_set (rtx, rtx_insn *, struct gcse_hash_table_d *); |
455 | static void hash_scan_clobber (rtx, rtx_insn *, struct gcse_hash_table_d *); |
456 | static void hash_scan_call (rtx, rtx_insn *, struct gcse_hash_table_d *); |
457 | static bool oprs_unchanged_p (const_rtx, const rtx_insn *, bool); |
458 | static bool oprs_anticipatable_p (const_rtx, const rtx_insn *); |
459 | static bool oprs_available_p (const_rtx, const rtx_insn *); |
460 | static void insert_expr_in_table (rtx, machine_mode, rtx_insn *, bool, bool, |
461 | HOST_WIDE_INT, struct gcse_hash_table_d *); |
462 | static unsigned int hash_expr (const_rtx, machine_mode, int *, int); |
463 | static void record_last_reg_set_info (rtx_insn *, int); |
464 | static void record_last_mem_set_info (rtx_insn *); |
465 | static void record_last_set_info (rtx, const_rtx, void *); |
466 | static void compute_hash_table (struct gcse_hash_table_d *); |
467 | static void alloc_hash_table (struct gcse_hash_table_d *); |
468 | static void free_hash_table (struct gcse_hash_table_d *); |
469 | static void compute_hash_table_work (struct gcse_hash_table_d *); |
470 | static void dump_hash_table (FILE *, const char *, struct gcse_hash_table_d *); |
471 | static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *, |
472 | struct gcse_hash_table_d *); |
473 | static void mems_conflict_for_gcse_p (rtx, const_rtx, void *); |
474 | static bool load_killed_in_block_p (const_basic_block, int, const_rtx, bool); |
475 | static void alloc_pre_mem (int, int); |
476 | static void free_pre_mem (void); |
477 | static struct edge_list *compute_pre_data (void); |
478 | static bool pre_expr_reaches_here_p (basic_block, struct gcse_expr *, |
479 | basic_block); |
480 | static void insert_insn_end_basic_block (struct gcse_expr *, basic_block); |
481 | static void pre_insert_copy_insn (struct gcse_expr *, rtx_insn *); |
482 | static void pre_insert_copies (void); |
483 | static bool pre_delete (void); |
484 | static bool pre_gcse (struct edge_list *); |
485 | static bool one_pre_gcse_pass (void); |
486 | static void add_label_notes (rtx, rtx_insn *); |
487 | static void alloc_code_hoist_mem (int, int); |
488 | static void free_code_hoist_mem (void); |
489 | static void compute_code_hoist_vbeinout (void); |
490 | static void compute_code_hoist_data (void); |
491 | static bool should_hoist_expr_to_dom (basic_block, struct gcse_expr *, |
492 | basic_block, |
493 | sbitmap, HOST_WIDE_INT, int *, |
494 | enum reg_class, |
495 | int *, bitmap, rtx_insn *); |
496 | static bool hoist_code (void); |
497 | static enum reg_class get_regno_pressure_class (int regno, int *nregs); |
498 | static enum reg_class get_pressure_class_and_nregs (rtx_insn *insn, int *nregs); |
499 | static bool one_code_hoisting_pass (void); |
500 | static rtx_insn *process_insert_insn (struct gcse_expr *); |
501 | static bool pre_edge_insert (struct edge_list *, struct gcse_expr **); |
502 | static bool pre_expr_reaches_here_p_work (basic_block, struct gcse_expr *, |
503 | basic_block, char *); |
504 | static struct ls_expr * ldst_entry (rtx); |
505 | static void free_ldst_entry (struct ls_expr *); |
506 | static void free_ld_motion_mems (void); |
507 | static void print_ldst_list (FILE *); |
508 | static struct ls_expr * find_rtx_in_ldst (rtx); |
509 | static bool simple_mem (const_rtx); |
510 | static void invalidate_any_buried_refs (rtx); |
511 | static void compute_ld_motion_mems (void); |
512 | static void trim_ld_motion_mems (void); |
513 | static void update_ld_motion_stores (struct gcse_expr *); |
514 | static void clear_modify_mem_tables (void); |
515 | static void free_modify_mem_tables (void); |
516 | |
517 | #define GNEW(T) ((T *) gmalloc (sizeof (T))) |
518 | #define GCNEW(T) ((T *) gcalloc (1, sizeof (T))) |
519 | |
520 | #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N))) |
521 | #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T))) |
522 | |
523 | #define GNEWVAR(T, S) ((T *) gmalloc ((S))) |
524 | #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S))) |
525 | |
526 | #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T))) |
527 | #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S))) |
528 | |
529 | /* Misc. utilities. */ |
530 | |
531 | #define can_copy \ |
532 | (this_target_gcse->x_can_copy) |
533 | #define can_copy_init_p \ |
534 | (this_target_gcse->x_can_copy_init_p) |
535 | |
536 | /* Compute which modes support reg/reg copy operations. */ |
537 | |
538 | static void |
539 | compute_can_copy (void) |
540 | { |
541 | int i; |
542 | #ifndef AVOID_CCMODE_COPIES |
543 | rtx reg; |
544 | rtx_insn *insn; |
545 | #endif |
546 | memset (can_copy, c: 0, n: NUM_MACHINE_MODES); |
547 | |
548 | start_sequence (); |
549 | for (i = 0; i < NUM_MACHINE_MODES; i++) |
550 | if (GET_MODE_CLASS (i) == MODE_CC) |
551 | { |
552 | #ifdef AVOID_CCMODE_COPIES |
553 | can_copy[i] = 0; |
554 | #else |
555 | reg = gen_rtx_REG ((machine_mode) i, LAST_VIRTUAL_REGISTER + 1); |
556 | insn = emit_insn (gen_rtx_SET (reg, reg)); |
557 | if (recog (PATTERN (insn), insn, NULL) >= 0) |
558 | can_copy[i] = 1; |
559 | #endif |
560 | } |
561 | else |
562 | can_copy[i] = 1; |
563 | |
564 | end_sequence (); |
565 | } |
566 | |
567 | /* Returns whether the mode supports reg/reg copy operations. */ |
568 | |
569 | bool |
570 | can_copy_p (machine_mode mode) |
571 | { |
572 | if (! can_copy_init_p) |
573 | { |
574 | compute_can_copy (); |
575 | can_copy_init_p = true; |
576 | } |
577 | |
578 | return can_copy[mode] != 0; |
579 | } |
580 | |
581 | /* Cover function to xmalloc to record bytes allocated. */ |
582 | |
583 | static void * |
584 | gmalloc (size_t size) |
585 | { |
586 | bytes_used += size; |
587 | return xmalloc (size); |
588 | } |
589 | |
590 | /* Cover function to xcalloc to record bytes allocated. */ |
591 | |
592 | static void * |
593 | gcalloc (size_t nelem, size_t elsize) |
594 | { |
595 | bytes_used += nelem * elsize; |
596 | return xcalloc (nelem, elsize); |
597 | } |
598 | |
599 | /* Cover function to obstack_alloc. */ |
600 | |
601 | static void * |
602 | gcse_alloc (unsigned long size) |
603 | { |
604 | bytes_used += size; |
605 | return obstack_alloc (&gcse_obstack, size); |
606 | } |
607 | |
608 | /* Allocate memory for the reg/memory set tracking tables. |
609 | This is called at the start of each pass. */ |
610 | |
611 | static void |
612 | alloc_gcse_mem (void) |
613 | { |
614 | /* Allocate vars to track sets of regs. */ |
615 | reg_set_bitmap = ALLOC_REG_SET (NULL); |
616 | |
617 | /* Allocate array to keep a list of insns which modify memory in each |
618 | basic block. The two typedefs are needed to work around the |
619 | pre-processor limitation with template types in macro arguments. */ |
620 | typedef vec<rtx_insn *> vec_rtx_heap; |
621 | typedef vec<modify_pair> vec_modify_pair_heap; |
622 | modify_mem_list = GCNEWVEC (vec_rtx_heap, last_basic_block_for_fn (cfun)); |
623 | canon_modify_mem_list = GCNEWVEC (vec_modify_pair_heap, |
624 | last_basic_block_for_fn (cfun)); |
625 | modify_mem_list_set = BITMAP_ALLOC (NULL); |
626 | blocks_with_calls = BITMAP_ALLOC (NULL); |
627 | } |
628 | |
629 | /* Free memory allocated by alloc_gcse_mem. */ |
630 | |
631 | static void |
632 | free_gcse_mem (void) |
633 | { |
634 | FREE_REG_SET (reg_set_bitmap); |
635 | |
636 | free_modify_mem_tables (); |
637 | BITMAP_FREE (modify_mem_list_set); |
638 | BITMAP_FREE (blocks_with_calls); |
639 | } |
640 | |
641 | /* Compute the local properties of each recorded expression. |
642 | |
643 | Local properties are those that are defined by the block, irrespective of |
644 | other blocks. |
645 | |
646 | An expression is transparent in a block if its operands are not modified |
647 | in the block. |
648 | |
649 | An expression is computed (locally available) in a block if it is computed |
650 | at least once and expression would contain the same value if the |
651 | computation was moved to the end of the block. |
652 | |
653 | An expression is locally anticipatable in a block if it is computed at |
654 | least once and expression would contain the same value if the computation |
655 | was moved to the beginning of the block. |
656 | |
657 | We call this routine for pre and code hoisting. They all compute |
658 | basically the same information and thus can easily share this code. |
659 | |
660 | TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local |
661 | properties. If NULL, then it is not necessary to compute or record that |
662 | particular property. |
663 | |
664 | TABLE controls which hash table to look at. */ |
665 | |
666 | static void |
667 | compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc, |
668 | struct gcse_hash_table_d *table) |
669 | { |
670 | unsigned int i; |
671 | |
672 | /* Initialize any bitmaps that were passed in. */ |
673 | if (transp) |
674 | { |
675 | bitmap_vector_ones (transp, last_basic_block_for_fn (cfun)); |
676 | } |
677 | |
678 | if (comp) |
679 | bitmap_vector_clear (comp, last_basic_block_for_fn (cfun)); |
680 | if (antloc) |
681 | bitmap_vector_clear (antloc, last_basic_block_for_fn (cfun)); |
682 | |
683 | for (i = 0; i < table->size; i++) |
684 | { |
685 | struct gcse_expr *expr; |
686 | |
687 | for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash) |
688 | { |
689 | int indx = expr->bitmap_index; |
690 | struct gcse_occr *occr; |
691 | |
692 | /* The expression is transparent in this block if it is not killed. |
693 | We start by assuming all are transparent [none are killed], and |
694 | then reset the bits for those that are. */ |
695 | if (transp) |
696 | compute_transp (expr->expr, indx, transp, |
697 | blocks_with_calls, |
698 | modify_mem_list_set, |
699 | canon_modify_mem_list); |
700 | |
701 | /* The occurrences recorded in antic_occr are exactly those that |
702 | we want to set to nonzero in ANTLOC. */ |
703 | if (antloc) |
704 | for (occr = expr->antic_occr; occr != NULL; occr = occr->next) |
705 | { |
706 | bitmap_set_bit (map: antloc[BLOCK_FOR_INSN (insn: occr->insn)->index], bitno: indx); |
707 | |
708 | /* While we're scanning the table, this is a good place to |
709 | initialize this. */ |
710 | occr->deleted_p = 0; |
711 | } |
712 | |
713 | /* The occurrences recorded in avail_occr are exactly those that |
714 | we want to set to nonzero in COMP. */ |
715 | if (comp) |
716 | for (occr = expr->avail_occr; occr != NULL; occr = occr->next) |
717 | { |
718 | bitmap_set_bit (map: comp[BLOCK_FOR_INSN (insn: occr->insn)->index], bitno: indx); |
719 | |
720 | /* While we're scanning the table, this is a good place to |
721 | initialize this. */ |
722 | occr->copied_p = 0; |
723 | } |
724 | |
725 | /* While we're scanning the table, this is a good place to |
726 | initialize this. */ |
727 | expr->reaching_reg = 0; |
728 | } |
729 | } |
730 | } |
731 | |
732 | /* Hash table support. */ |
733 | |
734 | struct reg_avail_info |
735 | { |
736 | basic_block last_bb; |
737 | int first_set; |
738 | int last_set; |
739 | }; |
740 | |
741 | static struct reg_avail_info *reg_avail_info; |
742 | static basic_block current_bb; |
743 | |
744 | /* See whether X, the source of a set, is something we want to consider for |
745 | GCSE. */ |
746 | |
747 | static bool |
748 | want_to_gcse_p (rtx x, machine_mode mode, HOST_WIDE_INT *max_distance_ptr) |
749 | { |
750 | #ifdef STACK_REGS |
751 | /* On register stack architectures, don't GCSE constants from the |
752 | constant pool, as the benefits are often swamped by the overhead |
753 | of shuffling the register stack between basic blocks. */ |
754 | if (IS_STACK_MODE (GET_MODE (x))) |
755 | x = avoid_constant_pool_reference (x); |
756 | #endif |
757 | |
758 | /* GCSE'ing constants: |
759 | |
760 | We do not specifically distinguish between constant and non-constant |
761 | expressions in PRE and Hoist. We use set_src_cost below to limit |
762 | the maximum distance simple expressions can travel. |
763 | |
764 | Nevertheless, constants are much easier to GCSE, and, hence, |
765 | it is easy to overdo the optimizations. Usually, excessive PRE and |
766 | Hoisting of constant leads to increased register pressure. |
767 | |
768 | RA can deal with this by rematerialing some of the constants. |
769 | Therefore, it is important that the back-end generates sets of constants |
770 | in a way that allows reload rematerialize them under high register |
771 | pressure, i.e., a pseudo register with REG_EQUAL to constant |
772 | is set only once. Failing to do so will result in IRA/reload |
773 | spilling such constants under high register pressure instead of |
774 | rematerializing them. */ |
775 | |
776 | switch (GET_CODE (x)) |
777 | { |
778 | case REG: |
779 | case SUBREG: |
780 | case CALL: |
781 | return false; |
782 | |
783 | CASE_CONST_ANY: |
784 | if (!doing_code_hoisting_p) |
785 | /* Do not PRE constants. */ |
786 | return false; |
787 | |
788 | /* FALLTHRU */ |
789 | |
790 | default: |
791 | if (doing_code_hoisting_p) |
792 | /* PRE doesn't implement max_distance restriction. */ |
793 | { |
794 | int cost; |
795 | HOST_WIDE_INT max_distance; |
796 | |
797 | gcc_assert (!optimize_function_for_speed_p (cfun) |
798 | && optimize_function_for_size_p (cfun)); |
799 | cost = set_src_cost (x, mode, speed_p: 0); |
800 | |
801 | if (cost < COSTS_N_INSNS (param_gcse_unrestricted_cost)) |
802 | { |
803 | max_distance |
804 | = ((HOST_WIDE_INT)param_gcse_cost_distance_ratio * cost) / 10; |
805 | if (max_distance == 0) |
806 | return false; |
807 | |
808 | gcc_assert (max_distance > 0); |
809 | } |
810 | else |
811 | max_distance = 0; |
812 | |
813 | if (max_distance_ptr) |
814 | *max_distance_ptr = max_distance; |
815 | } |
816 | |
817 | return can_assign_to_reg_without_clobbers_p (x, mode); |
818 | } |
819 | } |
820 | |
821 | /* Used internally by can_assign_to_reg_without_clobbers_p. */ |
822 | |
823 | static GTY(()) rtx_insn *test_insn; |
824 | |
825 | /* Return true if we can assign X to a pseudo register of mode MODE |
826 | such that the resulting insn does not result in clobbering a hard |
827 | register as a side-effect. |
828 | |
829 | Additionally, if the target requires it, check that the resulting insn |
830 | can be copied. If it cannot, this means that X is special and probably |
831 | has hidden side-effects we don't want to mess with. |
832 | |
833 | This function is typically used by code motion passes, to verify |
834 | that it is safe to insert an insn without worrying about clobbering |
835 | maybe live hard regs. */ |
836 | |
837 | bool |
838 | can_assign_to_reg_without_clobbers_p (rtx x, machine_mode mode) |
839 | { |
840 | int num_clobbers = 0; |
841 | int icode; |
842 | bool can_assign = false; |
843 | |
844 | /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */ |
845 | if (general_operand (x, mode)) |
846 | return true; |
847 | else if (GET_MODE (x) == VOIDmode) |
848 | return false; |
849 | |
850 | /* Otherwise, check if we can make a valid insn from it. First initialize |
851 | our test insn if we haven't already. */ |
852 | if (test_insn == 0) |
853 | { |
854 | test_insn |
855 | = make_insn_raw (gen_rtx_SET (gen_rtx_REG (word_mode, |
856 | FIRST_PSEUDO_REGISTER * 2), |
857 | const0_rtx)); |
858 | SET_NEXT_INSN (test_insn) = SET_PREV_INSN (test_insn) = 0; |
859 | INSN_LOCATION (insn: test_insn) = UNKNOWN_LOCATION; |
860 | } |
861 | |
862 | /* Now make an insn like the one we would make when GCSE'ing and see if |
863 | valid. */ |
864 | PUT_MODE (SET_DEST (PATTERN (test_insn)), mode); |
865 | SET_SRC (PATTERN (test_insn)) = x; |
866 | |
867 | icode = recog (PATTERN (insn: test_insn), test_insn, &num_clobbers); |
868 | |
869 | /* If the test insn is valid and doesn't need clobbers, and the target also |
870 | has no objections, we're good. */ |
871 | if (icode >= 0 |
872 | && (num_clobbers == 0 || !added_clobbers_hard_reg_p (icode)) |
873 | && ! (targetm.cannot_copy_insn_p |
874 | && targetm.cannot_copy_insn_p (test_insn))) |
875 | can_assign = true; |
876 | |
877 | /* Make sure test_insn doesn't have any pointers into GC space. */ |
878 | SET_SRC (PATTERN (test_insn)) = NULL_RTX; |
879 | |
880 | return can_assign; |
881 | } |
882 | |
883 | /* Return true if the operands of expression X are unchanged from the |
884 | start of INSN's basic block up to but not including INSN |
885 | (if AVAIL_P == false), or from INSN to the end of INSN's basic block |
886 | (if AVAIL_P == true). */ |
887 | |
888 | static bool |
889 | oprs_unchanged_p (const_rtx x, const rtx_insn *insn, bool avail_p) |
890 | { |
891 | int i, j; |
892 | enum rtx_code code; |
893 | const char *fmt; |
894 | |
895 | if (x == 0) |
896 | return true; |
897 | |
898 | code = GET_CODE (x); |
899 | switch (code) |
900 | { |
901 | case REG: |
902 | { |
903 | struct reg_avail_info *info = ®_avail_info[REGNO (x)]; |
904 | |
905 | if (info->last_bb != current_bb) |
906 | return true; |
907 | if (avail_p) |
908 | return info->last_set < DF_INSN_LUID (insn); |
909 | else |
910 | return info->first_set >= DF_INSN_LUID (insn); |
911 | } |
912 | |
913 | case MEM: |
914 | if (! flag_gcse_lm |
915 | || load_killed_in_block_p (current_bb, DF_INSN_LUID (insn), |
916 | x, avail_p)) |
917 | return false; |
918 | else |
919 | return oprs_unchanged_p (XEXP (x, 0), insn, avail_p); |
920 | |
921 | case PRE_DEC: |
922 | case PRE_INC: |
923 | case POST_DEC: |
924 | case POST_INC: |
925 | case PRE_MODIFY: |
926 | case POST_MODIFY: |
927 | return false; |
928 | |
929 | case PC: |
930 | case CONST: |
931 | CASE_CONST_ANY: |
932 | case SYMBOL_REF: |
933 | case LABEL_REF: |
934 | case ADDR_VEC: |
935 | case ADDR_DIFF_VEC: |
936 | return true; |
937 | |
938 | default: |
939 | break; |
940 | } |
941 | |
942 | for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) |
943 | { |
944 | if (fmt[i] == 'e') |
945 | { |
946 | /* If we are about to do the last recursive call needed at this |
947 | level, change it into iteration. This function is called enough |
948 | to be worth it. */ |
949 | if (i == 0) |
950 | return oprs_unchanged_p (XEXP (x, i), insn, avail_p); |
951 | |
952 | else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p)) |
953 | return false; |
954 | } |
955 | else if (fmt[i] == 'E') |
956 | for (j = 0; j < XVECLEN (x, i); j++) |
957 | if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p)) |
958 | return false; |
959 | } |
960 | |
961 | return true; |
962 | } |
963 | |
964 | /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */ |
965 | |
966 | struct mem_conflict_info |
967 | { |
968 | /* A memory reference for a load instruction, mems_conflict_for_gcse_p will |
969 | see if a memory store conflicts with this memory load. */ |
970 | const_rtx mem; |
971 | |
972 | /* True if mems_conflict_for_gcse_p finds a conflict between two memory |
973 | references. */ |
974 | bool conflict; |
975 | }; |
976 | |
977 | /* DEST is the output of an instruction. If it is a memory reference and |
978 | possibly conflicts with the load found in DATA, then communicate this |
979 | information back through DATA. */ |
980 | |
981 | static void |
982 | mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, |
983 | void *data) |
984 | { |
985 | struct mem_conflict_info *mci = (struct mem_conflict_info *) data; |
986 | |
987 | while (GET_CODE (dest) == SUBREG |
988 | || GET_CODE (dest) == ZERO_EXTRACT |
989 | || GET_CODE (dest) == STRICT_LOW_PART) |
990 | dest = XEXP (dest, 0); |
991 | |
992 | /* If DEST is not a MEM, then it will not conflict with the load. Note |
993 | that function calls are assumed to clobber memory, but are handled |
994 | elsewhere. */ |
995 | if (! MEM_P (dest)) |
996 | return; |
997 | |
998 | /* If we are setting a MEM in our list of specially recognized MEMs, |
999 | don't mark as killed this time. */ |
1000 | if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem)) |
1001 | { |
1002 | if (!find_rtx_in_ldst (dest)) |
1003 | mci->conflict = true; |
1004 | return; |
1005 | } |
1006 | |
1007 | if (true_dependence (dest, GET_MODE (dest), mci->mem)) |
1008 | mci->conflict = true; |
1009 | } |
1010 | |
1011 | /* Return true if the expression in X (a memory reference) is killed |
1012 | in block BB before or after the insn with the LUID in UID_LIMIT. |
1013 | AVAIL_P is true for kills after UID_LIMIT, and zero for kills |
1014 | before UID_LIMIT. |
1015 | |
1016 | To check the entire block, set UID_LIMIT to max_uid + 1 and |
1017 | AVAIL_P to false. */ |
1018 | |
1019 | static bool |
1020 | load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x, |
1021 | bool avail_p) |
1022 | { |
1023 | vec<rtx_insn *> list = modify_mem_list[bb->index]; |
1024 | rtx_insn *setter; |
1025 | unsigned ix; |
1026 | |
1027 | /* If this is a readonly then we aren't going to be changing it. */ |
1028 | if (MEM_READONLY_P (x)) |
1029 | return false; |
1030 | |
1031 | FOR_EACH_VEC_ELT_REVERSE (list, ix, setter) |
1032 | { |
1033 | struct mem_conflict_info mci; |
1034 | |
1035 | /* Ignore entries in the list that do not apply. */ |
1036 | if ((avail_p |
1037 | && DF_INSN_LUID (setter) < uid_limit) |
1038 | || (! avail_p |
1039 | && DF_INSN_LUID (setter) > uid_limit)) |
1040 | continue; |
1041 | |
1042 | /* If SETTER is a call everything is clobbered. Note that calls |
1043 | to pure functions are never put on the list, so we need not |
1044 | worry about them. */ |
1045 | if (CALL_P (setter)) |
1046 | return true; |
1047 | |
1048 | /* SETTER must be an INSN of some kind that sets memory. Call |
1049 | note_stores to examine each hunk of memory that is modified. */ |
1050 | mci.mem = x; |
1051 | mci.conflict = false; |
1052 | note_stores (setter, mems_conflict_for_gcse_p, &mci); |
1053 | if (mci.conflict) |
1054 | return true; |
1055 | } |
1056 | return false; |
1057 | } |
1058 | |
1059 | /* Return true if the operands of expression X are unchanged from |
1060 | the start of INSN's basic block up to but not including INSN. */ |
1061 | |
1062 | static bool |
1063 | oprs_anticipatable_p (const_rtx x, const rtx_insn *insn) |
1064 | { |
1065 | return oprs_unchanged_p (x, insn, avail_p: false); |
1066 | } |
1067 | |
1068 | /* Return true if the operands of expression X are unchanged from |
1069 | INSN to the end of INSN's basic block. */ |
1070 | |
1071 | static bool |
1072 | oprs_available_p (const_rtx x, const rtx_insn *insn) |
1073 | { |
1074 | return oprs_unchanged_p (x, insn, avail_p: true); |
1075 | } |
1076 | |
1077 | /* Hash expression X. |
1078 | |
1079 | MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean |
1080 | indicating if a volatile operand is found or if the expression contains |
1081 | something we don't want to insert in the table. HASH_TABLE_SIZE is |
1082 | the current size of the hash table to be probed. */ |
1083 | |
1084 | static unsigned int |
1085 | hash_expr (const_rtx x, machine_mode mode, int *do_not_record_p, |
1086 | int hash_table_size) |
1087 | { |
1088 | unsigned int hash; |
1089 | |
1090 | *do_not_record_p = 0; |
1091 | |
1092 | hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false); |
1093 | return hash % hash_table_size; |
1094 | } |
1095 | |
1096 | /* Return true if exp1 is equivalent to exp2. */ |
1097 | |
1098 | static bool |
1099 | expr_equiv_p (const_rtx x, const_rtx y) |
1100 | { |
1101 | return exp_equiv_p (x, y, 0, true); |
1102 | } |
1103 | |
1104 | /* Insert expression X in INSN in the hash TABLE. |
1105 | If it is already present, record it as the last occurrence in INSN's |
1106 | basic block. |
1107 | |
1108 | MODE is the mode of the value X is being stored into. |
1109 | It is only used if X is a CONST_INT. |
1110 | |
1111 | ANTIC_P is true if X is an anticipatable expression. |
1112 | AVAIL_P is true if X is an available expression. |
1113 | |
1114 | MAX_DISTANCE is the maximum distance in instructions this expression can |
1115 | be moved. */ |
1116 | |
1117 | static void |
1118 | insert_expr_in_table (rtx x, machine_mode mode, rtx_insn *insn, |
1119 | bool antic_p, bool avail_p, HOST_WIDE_INT max_distance, |
1120 | struct gcse_hash_table_d *table) |
1121 | { |
1122 | bool found; |
1123 | int do_not_record_p; |
1124 | unsigned int hash; |
1125 | struct gcse_expr *cur_expr, *last_expr = NULL; |
1126 | struct gcse_occr *antic_occr, *avail_occr; |
1127 | |
1128 | hash = hash_expr (x, mode, do_not_record_p: &do_not_record_p, hash_table_size: table->size); |
1129 | |
1130 | /* Do not insert expression in table if it contains volatile operands, |
1131 | or if hash_expr determines the expression is something we don't want |
1132 | to or can't handle. */ |
1133 | if (do_not_record_p) |
1134 | return; |
1135 | |
1136 | cur_expr = table->table[hash]; |
1137 | found = false; |
1138 | |
1139 | while (cur_expr && (found = expr_equiv_p (x: cur_expr->expr, y: x)) == 0) |
1140 | { |
1141 | /* If the expression isn't found, save a pointer to the end of |
1142 | the list. */ |
1143 | last_expr = cur_expr; |
1144 | cur_expr = cur_expr->next_same_hash; |
1145 | } |
1146 | |
1147 | if (! found) |
1148 | { |
1149 | cur_expr = GOBNEW (struct gcse_expr); |
1150 | bytes_used += sizeof (struct gcse_expr); |
1151 | if (table->table[hash] == NULL) |
1152 | /* This is the first pattern that hashed to this index. */ |
1153 | table->table[hash] = cur_expr; |
1154 | else |
1155 | /* Add EXPR to end of this hash chain. */ |
1156 | last_expr->next_same_hash = cur_expr; |
1157 | |
1158 | /* Set the fields of the expr element. */ |
1159 | cur_expr->expr = x; |
1160 | cur_expr->bitmap_index = table->n_elems++; |
1161 | cur_expr->next_same_hash = NULL; |
1162 | cur_expr->antic_occr = NULL; |
1163 | cur_expr->avail_occr = NULL; |
1164 | gcc_assert (max_distance >= 0); |
1165 | cur_expr->max_distance = max_distance; |
1166 | } |
1167 | else |
1168 | gcc_assert (cur_expr->max_distance == max_distance); |
1169 | |
1170 | /* Now record the occurrence(s). */ |
1171 | if (antic_p) |
1172 | { |
1173 | antic_occr = cur_expr->antic_occr; |
1174 | |
1175 | if (antic_occr |
1176 | && BLOCK_FOR_INSN (insn: antic_occr->insn) != BLOCK_FOR_INSN (insn)) |
1177 | antic_occr = NULL; |
1178 | |
1179 | if (antic_occr) |
1180 | /* Found another instance of the expression in the same basic block. |
1181 | Prefer the currently recorded one. We want the first one in the |
1182 | block and the block is scanned from start to end. */ |
1183 | ; /* nothing to do */ |
1184 | else |
1185 | { |
1186 | /* First occurrence of this expression in this basic block. */ |
1187 | antic_occr = GOBNEW (struct gcse_occr); |
1188 | bytes_used += sizeof (struct gcse_occr); |
1189 | antic_occr->insn = insn; |
1190 | antic_occr->next = cur_expr->antic_occr; |
1191 | antic_occr->deleted_p = 0; |
1192 | cur_expr->antic_occr = antic_occr; |
1193 | } |
1194 | } |
1195 | |
1196 | if (avail_p) |
1197 | { |
1198 | avail_occr = cur_expr->avail_occr; |
1199 | |
1200 | if (avail_occr |
1201 | && BLOCK_FOR_INSN (insn: avail_occr->insn) == BLOCK_FOR_INSN (insn)) |
1202 | { |
1203 | /* Found another instance of the expression in the same basic block. |
1204 | Prefer this occurrence to the currently recorded one. We want |
1205 | the last one in the block and the block is scanned from start |
1206 | to end. */ |
1207 | avail_occr->insn = insn; |
1208 | } |
1209 | else |
1210 | { |
1211 | /* First occurrence of this expression in this basic block. */ |
1212 | avail_occr = GOBNEW (struct gcse_occr); |
1213 | bytes_used += sizeof (struct gcse_occr); |
1214 | avail_occr->insn = insn; |
1215 | avail_occr->next = cur_expr->avail_occr; |
1216 | avail_occr->deleted_p = 0; |
1217 | cur_expr->avail_occr = avail_occr; |
1218 | } |
1219 | } |
1220 | } |
1221 | |
1222 | /* Scan SET present in INSN and add an entry to the hash TABLE. */ |
1223 | |
1224 | static void |
1225 | hash_scan_set (rtx set, rtx_insn *insn, struct gcse_hash_table_d *table) |
1226 | { |
1227 | rtx src = SET_SRC (set); |
1228 | rtx dest = SET_DEST (set); |
1229 | rtx note; |
1230 | |
1231 | if (GET_CODE (src) == CALL) |
1232 | hash_scan_call (src, insn, table); |
1233 | |
1234 | else if (REG_P (dest)) |
1235 | { |
1236 | unsigned int regno = REGNO (dest); |
1237 | HOST_WIDE_INT max_distance = 0; |
1238 | |
1239 | /* See if a REG_EQUAL note shows this equivalent to a simpler expression. |
1240 | |
1241 | This allows us to do a single GCSE pass and still eliminate |
1242 | redundant constants, addresses or other expressions that are |
1243 | constructed with multiple instructions. |
1244 | |
1245 | However, keep the original SRC if INSN is a simple reg-reg move. |
1246 | In this case, there will almost always be a REG_EQUAL note on the |
1247 | insn that sets SRC. By recording the REG_EQUAL value here as SRC |
1248 | for INSN, we miss copy propagation opportunities and we perform the |
1249 | same PRE GCSE operation repeatedly on the same REG_EQUAL value if we |
1250 | do more than one PRE GCSE pass. |
1251 | |
1252 | Note that this does not impede profitable constant propagations. We |
1253 | "look through" reg-reg sets in lookup_avail_set. */ |
1254 | note = find_reg_equal_equiv_note (insn); |
1255 | if (note != 0 |
1256 | && REG_NOTE_KIND (note) == REG_EQUAL |
1257 | && !REG_P (src) |
1258 | && want_to_gcse_p (XEXP (note, 0), GET_MODE (dest), NULL)) |
1259 | src = XEXP (note, 0), set = gen_rtx_SET (dest, src); |
1260 | |
1261 | /* Only record sets of pseudo-regs in the hash table. */ |
1262 | if (regno >= FIRST_PSEUDO_REGISTER |
1263 | /* Don't GCSE something if we can't do a reg/reg copy. */ |
1264 | && can_copy_p (GET_MODE (dest)) |
1265 | /* GCSE commonly inserts instruction after the insn. We can't |
1266 | do that easily for EH edges so disable GCSE on these for now. */ |
1267 | /* ??? We can now easily create new EH landing pads at the |
1268 | gimple level, for splitting edges; there's no reason we |
1269 | can't do the same thing at the rtl level. */ |
1270 | && !can_throw_internal (insn) |
1271 | /* Is SET_SRC something we want to gcse? */ |
1272 | && want_to_gcse_p (x: src, GET_MODE (dest), max_distance_ptr: &max_distance) |
1273 | /* Don't CSE a nop. */ |
1274 | && ! set_noop_p (set) |
1275 | /* Don't GCSE if it has attached REG_EQUIV note. |
1276 | At this point this only function parameters should have |
1277 | REG_EQUIV notes and if the argument slot is used somewhere |
1278 | explicitly, it means address of parameter has been taken, |
1279 | so we should not extend the lifetime of the pseudo. */ |
1280 | && (note == NULL_RTX || ! MEM_P (XEXP (note, 0)))) |
1281 | { |
1282 | /* An expression is not anticipatable if its operands are |
1283 | modified before this insn or if this is not the only SET in |
1284 | this insn. The latter condition does not have to mean that |
1285 | SRC itself is not anticipatable, but we just will not be |
1286 | able to handle code motion of insns with multiple sets. */ |
1287 | bool antic_p = (oprs_anticipatable_p (x: src, insn) |
1288 | && !multiple_sets (insn)); |
1289 | /* An expression is not available if its operands are |
1290 | subsequently modified, including this insn. It's also not |
1291 | available if this is a branch, because we can't insert |
1292 | a set after the branch. */ |
1293 | bool avail_p = (oprs_available_p (x: src, insn) |
1294 | && ! JUMP_P (insn)); |
1295 | |
1296 | insert_expr_in_table (x: src, GET_MODE (dest), insn, antic_p, avail_p, |
1297 | max_distance, table); |
1298 | } |
1299 | } |
1300 | /* In case of store we want to consider the memory value as available in |
1301 | the REG stored in that memory. This makes it possible to remove |
1302 | redundant loads from due to stores to the same location. */ |
1303 | else if (flag_gcse_las && REG_P (src) && MEM_P (dest)) |
1304 | { |
1305 | unsigned int regno = REGNO (src); |
1306 | HOST_WIDE_INT max_distance = 0; |
1307 | |
1308 | /* Only record sets of pseudo-regs in the hash table. */ |
1309 | if (regno >= FIRST_PSEUDO_REGISTER |
1310 | /* Don't GCSE something if we can't do a reg/reg copy. */ |
1311 | && can_copy_p (GET_MODE (src)) |
1312 | /* GCSE commonly inserts instruction after the insn. We can't |
1313 | do that easily for EH edges so disable GCSE on these for now. */ |
1314 | && !can_throw_internal (insn) |
1315 | /* Is SET_DEST something we want to gcse? */ |
1316 | && want_to_gcse_p (x: dest, GET_MODE (dest), max_distance_ptr: &max_distance) |
1317 | /* Don't CSE a nop. */ |
1318 | && ! set_noop_p (set) |
1319 | /* Don't GCSE if it has attached REG_EQUIV note. |
1320 | At this point this only function parameters should have |
1321 | REG_EQUIV notes and if the argument slot is used somewhere |
1322 | explicitly, it means address of parameter has been taken, |
1323 | so we should not extend the lifetime of the pseudo. */ |
1324 | && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0 |
1325 | || ! MEM_P (XEXP (note, 0)))) |
1326 | { |
1327 | /* Stores are never anticipatable. */ |
1328 | bool antic_p = 0; |
1329 | /* An expression is not available if its operands are |
1330 | subsequently modified, including this insn. It's also not |
1331 | available if this is a branch, because we can't insert |
1332 | a set after the branch. */ |
1333 | bool avail_p = oprs_available_p (x: dest, insn) && ! JUMP_P (insn); |
1334 | |
1335 | /* Record the memory expression (DEST) in the hash table. */ |
1336 | insert_expr_in_table (x: dest, GET_MODE (dest), insn, |
1337 | antic_p, avail_p, max_distance, table); |
1338 | } |
1339 | } |
1340 | } |
1341 | |
1342 | static void |
1343 | hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED, |
1344 | struct gcse_hash_table_d *table ATTRIBUTE_UNUSED) |
1345 | { |
1346 | /* Currently nothing to do. */ |
1347 | } |
1348 | |
1349 | static void |
1350 | hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED, |
1351 | struct gcse_hash_table_d *table ATTRIBUTE_UNUSED) |
1352 | { |
1353 | /* Currently nothing to do. */ |
1354 | } |
1355 | |
1356 | /* Process INSN and add hash table entries as appropriate. */ |
1357 | |
1358 | static void |
1359 | hash_scan_insn (rtx_insn *insn, struct gcse_hash_table_d *table) |
1360 | { |
1361 | rtx pat = PATTERN (insn); |
1362 | int i; |
1363 | |
1364 | /* Pick out the sets of INSN and for other forms of instructions record |
1365 | what's been modified. */ |
1366 | |
1367 | if (GET_CODE (pat) == SET) |
1368 | hash_scan_set (set: pat, insn, table); |
1369 | |
1370 | else if (GET_CODE (pat) == CLOBBER) |
1371 | hash_scan_clobber (x: pat, insn, table); |
1372 | |
1373 | else if (GET_CODE (pat) == CALL) |
1374 | hash_scan_call (x: pat, insn, table); |
1375 | |
1376 | else if (GET_CODE (pat) == PARALLEL) |
1377 | for (i = 0; i < XVECLEN (pat, 0); i++) |
1378 | { |
1379 | rtx x = XVECEXP (pat, 0, i); |
1380 | |
1381 | if (GET_CODE (x) == SET) |
1382 | hash_scan_set (set: x, insn, table); |
1383 | else if (GET_CODE (x) == CLOBBER) |
1384 | hash_scan_clobber (x, insn, table); |
1385 | else if (GET_CODE (x) == CALL) |
1386 | hash_scan_call (x, insn, table); |
1387 | } |
1388 | } |
1389 | |
1390 | /* Dump the hash table TABLE to file FILE under the name NAME. */ |
1391 | |
1392 | static void |
1393 | dump_hash_table (FILE *file, const char *name, struct gcse_hash_table_d *table) |
1394 | { |
1395 | int i; |
1396 | /* Flattened out table, so it's printed in proper order. */ |
1397 | struct gcse_expr **flat_table; |
1398 | unsigned int *hash_val; |
1399 | struct gcse_expr *expr; |
1400 | |
1401 | flat_table = XCNEWVEC (struct gcse_expr *, table->n_elems); |
1402 | hash_val = XNEWVEC (unsigned int, table->n_elems); |
1403 | |
1404 | for (i = 0; i < (int) table->size; i++) |
1405 | for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash) |
1406 | { |
1407 | flat_table[expr->bitmap_index] = expr; |
1408 | hash_val[expr->bitmap_index] = i; |
1409 | } |
1410 | |
1411 | fprintf (stream: file, format: "%s hash table (%d buckets, %d entries)\n" , |
1412 | name, table->size, table->n_elems); |
1413 | |
1414 | for (i = 0; i < (int) table->n_elems; i++) |
1415 | if (flat_table[i] != 0) |
1416 | { |
1417 | expr = flat_table[i]; |
1418 | fprintf (stream: file, format: "Index %d (hash value %d; max distance " |
1419 | HOST_WIDE_INT_PRINT_DEC ")\n " , |
1420 | expr->bitmap_index, hash_val[i], expr->max_distance); |
1421 | print_rtl (file, expr->expr); |
1422 | fprintf (stream: file, format: "\n" ); |
1423 | } |
1424 | |
1425 | fprintf (stream: file, format: "\n" ); |
1426 | |
1427 | free (ptr: flat_table); |
1428 | free (ptr: hash_val); |
1429 | } |
1430 | |
1431 | /* Record register first/last/block set information for REGNO in INSN. |
1432 | |
1433 | first_set records the first place in the block where the register |
1434 | is set and is used to compute "anticipatability". |
1435 | |
1436 | last_set records the last place in the block where the register |
1437 | is set and is used to compute "availability". |
1438 | |
1439 | last_bb records the block for which first_set and last_set are |
1440 | valid, as a quick test to invalidate them. */ |
1441 | |
1442 | static void |
1443 | record_last_reg_set_info (rtx_insn *insn, int regno) |
1444 | { |
1445 | struct reg_avail_info *info = ®_avail_info[regno]; |
1446 | int luid = DF_INSN_LUID (insn); |
1447 | |
1448 | info->last_set = luid; |
1449 | if (info->last_bb != current_bb) |
1450 | { |
1451 | info->last_bb = current_bb; |
1452 | info->first_set = luid; |
1453 | } |
1454 | } |
1455 | |
1456 | /* Record memory modification information for INSN. We do not actually care |
1457 | about the memory location(s) that are set, or even how they are set (consider |
1458 | a CALL_INSN). We merely need to record which insns modify memory. */ |
1459 | |
1460 | static void |
1461 | record_last_mem_set_info (rtx_insn *insn) |
1462 | { |
1463 | if (! flag_gcse_lm) |
1464 | return; |
1465 | |
1466 | record_last_mem_set_info_common (insn, modify_mem_list, |
1467 | canon_modify_mem_list, |
1468 | modify_mem_list_set, |
1469 | blocks_with_calls); |
1470 | } |
1471 | |
1472 | /* Called from compute_hash_table via note_stores to handle one |
1473 | SET or CLOBBER in an insn. DATA is really the instruction in which |
1474 | the SET is taking place. */ |
1475 | |
1476 | static void |
1477 | record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data) |
1478 | { |
1479 | rtx_insn *last_set_insn = (rtx_insn *) data; |
1480 | |
1481 | if (GET_CODE (dest) == SUBREG) |
1482 | dest = SUBREG_REG (dest); |
1483 | |
1484 | if (REG_P (dest)) |
1485 | record_last_reg_set_info (insn: last_set_insn, REGNO (dest)); |
1486 | else if (MEM_P (dest) |
1487 | /* Ignore pushes, they clobber nothing. */ |
1488 | && ! push_operand (dest, GET_MODE (dest))) |
1489 | record_last_mem_set_info (insn: last_set_insn); |
1490 | } |
1491 | |
1492 | /* Top level function to create an expression hash table. |
1493 | |
1494 | Expression entries are placed in the hash table if |
1495 | - they are of the form (set (pseudo-reg) src), |
1496 | - src is something we want to perform GCSE on, |
1497 | - none of the operands are subsequently modified in the block |
1498 | |
1499 | Currently src must be a pseudo-reg or a const_int. |
1500 | |
1501 | TABLE is the table computed. */ |
1502 | |
1503 | static void |
1504 | compute_hash_table_work (struct gcse_hash_table_d *table) |
1505 | { |
1506 | int i; |
1507 | |
1508 | /* re-Cache any INSN_LIST nodes we have allocated. */ |
1509 | clear_modify_mem_tables (); |
1510 | /* Some working arrays used to track first and last set in each block. */ |
1511 | reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ()); |
1512 | |
1513 | for (i = 0; i < max_reg_num (); ++i) |
1514 | reg_avail_info[i].last_bb = NULL; |
1515 | |
1516 | FOR_EACH_BB_FN (current_bb, cfun) |
1517 | { |
1518 | rtx_insn *insn; |
1519 | unsigned int regno; |
1520 | |
1521 | /* First pass over the instructions records information used to |
1522 | determine when registers and memory are first and last set. */ |
1523 | FOR_BB_INSNS (current_bb, insn) |
1524 | { |
1525 | if (!NONDEBUG_INSN_P (insn)) |
1526 | continue; |
1527 | |
1528 | if (CALL_P (insn)) |
1529 | { |
1530 | hard_reg_set_iterator hrsi; |
1531 | |
1532 | /* We don't track modes of hard registers, so we need |
1533 | to be conservative and assume that partial kills |
1534 | are full kills. */ |
1535 | HARD_REG_SET callee_clobbers |
1536 | = insn_callee_abi (insn).full_and_partial_reg_clobbers (); |
1537 | EXECUTE_IF_SET_IN_HARD_REG_SET (callee_clobbers, 0, regno, hrsi) |
1538 | record_last_reg_set_info (insn, regno); |
1539 | |
1540 | if (! RTL_CONST_OR_PURE_CALL_P (insn) |
1541 | || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn) |
1542 | || can_throw_external (insn)) |
1543 | record_last_mem_set_info (insn); |
1544 | } |
1545 | |
1546 | note_stores (insn, record_last_set_info, insn); |
1547 | } |
1548 | |
1549 | /* The next pass builds the hash table. */ |
1550 | FOR_BB_INSNS (current_bb, insn) |
1551 | if (NONDEBUG_INSN_P (insn)) |
1552 | hash_scan_insn (insn, table); |
1553 | } |
1554 | |
1555 | free (ptr: reg_avail_info); |
1556 | reg_avail_info = NULL; |
1557 | } |
1558 | |
1559 | /* Allocate space for the set/expr hash TABLE. |
1560 | It is used to determine the number of buckets to use. */ |
1561 | |
1562 | static void |
1563 | alloc_hash_table (struct gcse_hash_table_d *table) |
1564 | { |
1565 | int n; |
1566 | |
1567 | n = get_max_insn_count (); |
1568 | |
1569 | table->size = n / 4; |
1570 | if (table->size < 11) |
1571 | table->size = 11; |
1572 | |
1573 | /* Attempt to maintain efficient use of hash table. |
1574 | Making it an odd number is simplest for now. |
1575 | ??? Later take some measurements. */ |
1576 | table->size |= 1; |
1577 | n = table->size * sizeof (struct gcse_expr *); |
1578 | table->table = GNEWVAR (struct gcse_expr *, n); |
1579 | } |
1580 | |
1581 | /* Free things allocated by alloc_hash_table. */ |
1582 | |
1583 | static void |
1584 | free_hash_table (struct gcse_hash_table_d *table) |
1585 | { |
1586 | free (ptr: table->table); |
1587 | } |
1588 | |
1589 | /* Compute the expression hash table TABLE. */ |
1590 | |
1591 | static void |
1592 | compute_hash_table (struct gcse_hash_table_d *table) |
1593 | { |
1594 | /* Initialize count of number of entries in hash table. */ |
1595 | table->n_elems = 0; |
1596 | memset (s: table->table, c: 0, n: table->size * sizeof (struct gcse_expr *)); |
1597 | |
1598 | compute_hash_table_work (table); |
1599 | } |
1600 | |
1601 | /* Expression tracking support. */ |
1602 | |
1603 | /* Clear canon_modify_mem_list and modify_mem_list tables. */ |
1604 | static void |
1605 | clear_modify_mem_tables (void) |
1606 | { |
1607 | unsigned i; |
1608 | bitmap_iterator bi; |
1609 | |
1610 | EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi) |
1611 | { |
1612 | modify_mem_list[i].release (); |
1613 | canon_modify_mem_list[i].release (); |
1614 | } |
1615 | bitmap_clear (modify_mem_list_set); |
1616 | bitmap_clear (blocks_with_calls); |
1617 | } |
1618 | |
1619 | /* Release memory used by modify_mem_list_set. */ |
1620 | |
1621 | static void |
1622 | free_modify_mem_tables (void) |
1623 | { |
1624 | clear_modify_mem_tables (); |
1625 | free (ptr: modify_mem_list); |
1626 | free (ptr: canon_modify_mem_list); |
1627 | modify_mem_list = 0; |
1628 | canon_modify_mem_list = 0; |
1629 | } |
1630 | |
1631 | /* Compute PRE+LCM working variables. */ |
1632 | |
1633 | /* Local properties of expressions. */ |
1634 | |
1635 | /* Nonzero for expressions that are transparent in the block. */ |
1636 | static sbitmap *transp; |
1637 | |
1638 | /* Nonzero for expressions that are computed (available) in the block. */ |
1639 | static sbitmap *comp; |
1640 | |
1641 | /* Nonzero for expressions that are locally anticipatable in the block. */ |
1642 | static sbitmap *antloc; |
1643 | |
1644 | /* Nonzero for expressions where this block is an optimal computation |
1645 | point. */ |
1646 | static sbitmap *pre_optimal; |
1647 | |
1648 | /* Nonzero for expressions which are redundant in a particular block. */ |
1649 | static sbitmap *pre_redundant; |
1650 | |
1651 | /* Nonzero for expressions which should be inserted on a specific edge. */ |
1652 | static sbitmap *pre_insert_map; |
1653 | |
1654 | /* Nonzero for expressions which should be deleted in a specific block. */ |
1655 | static sbitmap *pre_delete_map; |
1656 | |
1657 | /* Allocate vars used for PRE analysis. */ |
1658 | |
1659 | static void |
1660 | alloc_pre_mem (int n_blocks, int n_exprs) |
1661 | { |
1662 | transp = sbitmap_vector_alloc (n_blocks, n_exprs); |
1663 | comp = sbitmap_vector_alloc (n_blocks, n_exprs); |
1664 | antloc = sbitmap_vector_alloc (n_blocks, n_exprs); |
1665 | |
1666 | pre_optimal = NULL; |
1667 | pre_redundant = NULL; |
1668 | pre_insert_map = NULL; |
1669 | pre_delete_map = NULL; |
1670 | ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs); |
1671 | |
1672 | /* pre_insert and pre_delete are allocated later. */ |
1673 | } |
1674 | |
1675 | /* Free vars used for PRE analysis. */ |
1676 | |
1677 | static void |
1678 | free_pre_mem (void) |
1679 | { |
1680 | sbitmap_vector_free (vec: transp); |
1681 | sbitmap_vector_free (vec: comp); |
1682 | |
1683 | /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */ |
1684 | |
1685 | if (pre_optimal) |
1686 | sbitmap_vector_free (vec: pre_optimal); |
1687 | if (pre_redundant) |
1688 | sbitmap_vector_free (vec: pre_redundant); |
1689 | if (pre_insert_map) |
1690 | sbitmap_vector_free (vec: pre_insert_map); |
1691 | if (pre_delete_map) |
1692 | sbitmap_vector_free (vec: pre_delete_map); |
1693 | |
1694 | transp = comp = NULL; |
1695 | pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL; |
1696 | } |
1697 | |
1698 | /* Remove certain expressions from anticipatable and transparent |
1699 | sets of basic blocks that have incoming abnormal edge. |
1700 | For PRE remove potentially trapping expressions to avoid placing |
1701 | them on abnormal edges. For hoisting remove memory references that |
1702 | can be clobbered by calls. */ |
1703 | |
1704 | static void |
1705 | prune_expressions (bool pre_p) |
1706 | { |
1707 | struct gcse_expr *expr; |
1708 | unsigned int ui; |
1709 | basic_block bb; |
1710 | |
1711 | auto_sbitmap prune_exprs (expr_hash_table.n_elems); |
1712 | bitmap_clear (prune_exprs); |
1713 | for (ui = 0; ui < expr_hash_table.size; ui++) |
1714 | { |
1715 | for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash) |
1716 | { |
1717 | /* Note potentially trapping expressions. */ |
1718 | if (may_trap_p (expr->expr)) |
1719 | { |
1720 | bitmap_set_bit (map: prune_exprs, bitno: expr->bitmap_index); |
1721 | continue; |
1722 | } |
1723 | |
1724 | if (!pre_p && contains_mem_rtx_p (x: expr->expr)) |
1725 | /* Note memory references that can be clobbered by a call. |
1726 | We do not split abnormal edges in hoisting, so would |
1727 | a memory reference get hoisted along an abnormal edge, |
1728 | it would be placed /before/ the call. Therefore, only |
1729 | constant memory references can be hoisted along abnormal |
1730 | edges. */ |
1731 | { |
1732 | rtx x = expr->expr; |
1733 | |
1734 | /* Common cases where we might find the MEM which may allow us |
1735 | to avoid pruning the expression. */ |
1736 | while (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND) |
1737 | x = XEXP (x, 0); |
1738 | |
1739 | /* If we found the MEM, go ahead and look at it to see if it has |
1740 | properties that allow us to avoid pruning its expression out |
1741 | of the tables. */ |
1742 | if (MEM_P (x)) |
1743 | { |
1744 | if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF |
1745 | && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))) |
1746 | continue; |
1747 | |
1748 | if (MEM_READONLY_P (x) |
1749 | && !MEM_VOLATILE_P (x) |
1750 | && MEM_NOTRAP_P (x)) |
1751 | /* Constant memory reference, e.g., a PIC address. */ |
1752 | continue; |
1753 | } |
1754 | |
1755 | /* ??? Optimally, we would use interprocedural alias |
1756 | analysis to determine if this mem is actually killed |
1757 | by this call. */ |
1758 | |
1759 | bitmap_set_bit (map: prune_exprs, bitno: expr->bitmap_index); |
1760 | } |
1761 | } |
1762 | } |
1763 | |
1764 | FOR_EACH_BB_FN (bb, cfun) |
1765 | { |
1766 | edge e; |
1767 | edge_iterator ei; |
1768 | |
1769 | /* If the current block is the destination of an abnormal edge, we |
1770 | kill all trapping (for PRE) and memory (for hoist) expressions |
1771 | because we won't be able to properly place the instruction on |
1772 | the edge. So make them neither anticipatable nor transparent. |
1773 | This is fairly conservative. |
1774 | |
1775 | ??? For hoisting it may be necessary to check for set-and-jump |
1776 | instructions here, not just for abnormal edges. The general problem |
1777 | is that when an expression cannot not be placed right at the end of |
1778 | a basic block we should account for any side-effects of a subsequent |
1779 | jump instructions that could clobber the expression. It would |
1780 | be best to implement this check along the lines of |
1781 | should_hoist_expr_to_dom where the target block is already known |
1782 | and, hence, there's no need to conservatively prune expressions on |
1783 | "intermediate" set-and-jump instructions. */ |
1784 | FOR_EACH_EDGE (e, ei, bb->preds) |
1785 | if ((e->flags & EDGE_ABNORMAL) |
1786 | && (pre_p || CALL_P (BB_END (e->src)))) |
1787 | { |
1788 | bitmap_and_compl (antloc[bb->index], |
1789 | antloc[bb->index], prune_exprs); |
1790 | bitmap_and_compl (transp[bb->index], |
1791 | transp[bb->index], prune_exprs); |
1792 | break; |
1793 | } |
1794 | } |
1795 | } |
1796 | |
1797 | /* It may be necessary to insert a large number of insns on edges to |
1798 | make the existing occurrences of expressions fully redundant. This |
1799 | routine examines the set of insertions and deletions and if the ratio |
1800 | of insertions to deletions is too high for a particular expression, then |
1801 | the expression is removed from the insertion/deletion sets. |
1802 | |
1803 | N_ELEMS is the number of elements in the hash table. */ |
1804 | |
1805 | static void |
1806 | prune_insertions_deletions (int n_elems) |
1807 | { |
1808 | sbitmap_iterator sbi; |
1809 | |
1810 | /* We always use I to iterate over blocks/edges and J to iterate over |
1811 | expressions. */ |
1812 | unsigned int i, j; |
1813 | |
1814 | /* Counts for the number of times an expression needs to be inserted and |
1815 | number of times an expression can be removed as a result. */ |
1816 | int *insertions = GCNEWVEC (int, n_elems); |
1817 | int *deletions = GCNEWVEC (int, n_elems); |
1818 | |
1819 | /* Set of expressions which require too many insertions relative to |
1820 | the number of deletions achieved. We will prune these out of the |
1821 | insertion/deletion sets. */ |
1822 | auto_sbitmap prune_exprs (n_elems); |
1823 | bitmap_clear (prune_exprs); |
1824 | |
1825 | /* Iterate over the edges counting the number of times each expression |
1826 | needs to be inserted. */ |
1827 | for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++) |
1828 | { |
1829 | EXECUTE_IF_SET_IN_BITMAP (pre_insert_map[i], 0, j, sbi) |
1830 | insertions[j]++; |
1831 | } |
1832 | |
1833 | /* Similarly for deletions, but those occur in blocks rather than on |
1834 | edges. */ |
1835 | for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++) |
1836 | { |
1837 | EXECUTE_IF_SET_IN_BITMAP (pre_delete_map[i], 0, j, sbi) |
1838 | deletions[j]++; |
1839 | } |
1840 | |
1841 | /* Now that we have accurate counts, iterate over the elements in the |
1842 | hash table and see if any need too many insertions relative to the |
1843 | number of evaluations that can be removed. If so, mark them in |
1844 | PRUNE_EXPRS. */ |
1845 | for (j = 0; j < (unsigned) n_elems; j++) |
1846 | if (deletions[j] |
1847 | && (insertions[j] / deletions[j]) > param_max_gcse_insertion_ratio) |
1848 | bitmap_set_bit (map: prune_exprs, bitno: j); |
1849 | |
1850 | /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */ |
1851 | EXECUTE_IF_SET_IN_BITMAP (prune_exprs, 0, j, sbi) |
1852 | { |
1853 | for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++) |
1854 | bitmap_clear_bit (map: pre_insert_map[i], bitno: j); |
1855 | |
1856 | for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++) |
1857 | bitmap_clear_bit (map: pre_delete_map[i], bitno: j); |
1858 | } |
1859 | |
1860 | free (ptr: insertions); |
1861 | free (ptr: deletions); |
1862 | } |
1863 | |
1864 | /* Top level routine to do the dataflow analysis needed by PRE. */ |
1865 | |
1866 | static struct edge_list * |
1867 | compute_pre_data (void) |
1868 | { |
1869 | struct edge_list *edge_list; |
1870 | basic_block bb; |
1871 | |
1872 | compute_local_properties (transp, comp, antloc, table: &expr_hash_table); |
1873 | prune_expressions (pre_p: true); |
1874 | bitmap_vector_clear (ae_kill, last_basic_block_for_fn (cfun)); |
1875 | |
1876 | /* Compute ae_kill for each basic block using: |
1877 | |
1878 | ~(TRANSP | COMP) |
1879 | */ |
1880 | |
1881 | FOR_EACH_BB_FN (bb, cfun) |
1882 | { |
1883 | bitmap_ior (ae_kill[bb->index], transp[bb->index], comp[bb->index]); |
1884 | bitmap_not (ae_kill[bb->index], ae_kill[bb->index]); |
1885 | } |
1886 | |
1887 | edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc, |
1888 | ae_kill, &pre_insert_map, &pre_delete_map); |
1889 | sbitmap_vector_free (vec: antloc); |
1890 | antloc = NULL; |
1891 | sbitmap_vector_free (vec: ae_kill); |
1892 | ae_kill = NULL; |
1893 | |
1894 | prune_insertions_deletions (n_elems: expr_hash_table.n_elems); |
1895 | |
1896 | return edge_list; |
1897 | } |
1898 | |
1899 | /* PRE utilities */ |
1900 | |
1901 | /* Return true if an occurrence of expression EXPR in OCCR_BB would reach |
1902 | block BB. |
1903 | |
1904 | VISITED is a pointer to a working buffer for tracking which BB's have |
1905 | been visited. It is NULL for the top-level call. |
1906 | |
1907 | We treat reaching expressions that go through blocks containing the same |
1908 | reaching expression as "not reaching". E.g. if EXPR is generated in blocks |
1909 | 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block |
1910 | 2 as not reaching. The intent is to improve the probability of finding |
1911 | only one reaching expression and to reduce register lifetimes by picking |
1912 | the closest such expression. */ |
1913 | |
1914 | static bool |
1915 | pre_expr_reaches_here_p_work (basic_block occr_bb, struct gcse_expr *expr, |
1916 | basic_block bb, char *visited) |
1917 | { |
1918 | edge pred; |
1919 | edge_iterator ei; |
1920 | |
1921 | FOR_EACH_EDGE (pred, ei, bb->preds) |
1922 | { |
1923 | basic_block pred_bb = pred->src; |
1924 | |
1925 | if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun) |
1926 | /* Has predecessor has already been visited? */ |
1927 | || visited[pred_bb->index]) |
1928 | ;/* Nothing to do. */ |
1929 | |
1930 | /* Does this predecessor generate this expression? */ |
1931 | else if (bitmap_bit_p (map: comp[pred_bb->index], bitno: expr->bitmap_index)) |
1932 | { |
1933 | /* Is this the occurrence we're looking for? |
1934 | Note that there's only one generating occurrence per block |
1935 | so we just need to check the block number. */ |
1936 | if (occr_bb == pred_bb) |
1937 | return true; |
1938 | |
1939 | visited[pred_bb->index] = 1; |
1940 | } |
1941 | /* Ignore this predecessor if it kills the expression. */ |
1942 | else if (! bitmap_bit_p (map: transp[pred_bb->index], bitno: expr->bitmap_index)) |
1943 | visited[pred_bb->index] = 1; |
1944 | |
1945 | /* Neither gen nor kill. */ |
1946 | else |
1947 | { |
1948 | visited[pred_bb->index] = 1; |
1949 | if (pre_expr_reaches_here_p_work (occr_bb, expr, bb: pred_bb, visited)) |
1950 | return true; |
1951 | } |
1952 | } |
1953 | |
1954 | /* All paths have been checked. */ |
1955 | return false; |
1956 | } |
1957 | |
1958 | /* The wrapper for pre_expr_reaches_here_work that ensures that any |
1959 | memory allocated for that function is returned. */ |
1960 | |
1961 | static bool |
1962 | pre_expr_reaches_here_p (basic_block occr_bb, struct gcse_expr *expr, |
1963 | basic_block bb) |
1964 | { |
1965 | bool rval; |
1966 | char *visited = XCNEWVEC (char, last_basic_block_for_fn (cfun)); |
1967 | |
1968 | rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited); |
1969 | |
1970 | free (ptr: visited); |
1971 | return rval; |
1972 | } |
1973 | |
1974 | /* Generate RTL to copy an EXP to REG and return it. */ |
1975 | |
1976 | rtx_insn * |
1977 | prepare_copy_insn (rtx reg, rtx exp) |
1978 | { |
1979 | rtx_insn *pat; |
1980 | |
1981 | start_sequence (); |
1982 | |
1983 | /* If the expression is something that's an operand, like a constant, |
1984 | just copy it to a register. */ |
1985 | if (general_operand (exp, GET_MODE (reg))) |
1986 | emit_move_insn (reg, exp); |
1987 | |
1988 | /* Otherwise, make a new insn to compute this expression and make sure the |
1989 | insn will be recognized (this also adds any needed CLOBBERs). */ |
1990 | else |
1991 | { |
1992 | rtx_insn *insn = emit_insn (gen_rtx_SET (reg, exp)); |
1993 | |
1994 | if (insn_invalid_p (insn, false)) |
1995 | gcc_unreachable (); |
1996 | } |
1997 | |
1998 | pat = get_insns (); |
1999 | end_sequence (); |
2000 | |
2001 | return pat; |
2002 | } |
2003 | |
2004 | /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */ |
2005 | |
2006 | static rtx_insn * |
2007 | process_insert_insn (struct gcse_expr *expr) |
2008 | { |
2009 | rtx reg = expr->reaching_reg; |
2010 | /* Copy the expression to make sure we don't have any sharing issues. */ |
2011 | rtx exp = copy_rtx (expr->expr); |
2012 | |
2013 | return prepare_copy_insn (reg, exp); |
2014 | } |
2015 | |
2016 | /* Return the INSN which is added at the end of the block BB with |
2017 | same instruction pattern with PAT. */ |
2018 | |
2019 | rtx_insn * |
2020 | insert_insn_end_basic_block (rtx_insn *pat, basic_block bb) |
2021 | { |
2022 | rtx_insn *insn = BB_END (bb); |
2023 | rtx_insn *new_insn; |
2024 | rtx_insn *pat_end; |
2025 | |
2026 | gcc_assert (pat && INSN_P (pat)); |
2027 | |
2028 | pat_end = pat; |
2029 | while (NEXT_INSN (insn: pat_end) != NULL_RTX) |
2030 | pat_end = NEXT_INSN (insn: pat_end); |
2031 | |
2032 | /* If the last insn is a jump, insert EXPR in front. Similarly we need to |
2033 | take care of trapping instructions in presence of non-call exceptions. */ |
2034 | |
2035 | if (JUMP_P (insn) |
2036 | || (NONJUMP_INSN_P (insn) |
2037 | && (!single_succ_p (bb) |
2038 | || single_succ_edge (bb)->flags & EDGE_ABNORMAL))) |
2039 | { |
2040 | /* FIXME: What if something in jump uses value set in new insn? */ |
2041 | new_insn = emit_insn_before_noloc (pat, insn, bb); |
2042 | } |
2043 | |
2044 | /* Likewise if the last insn is a call, as will happen in the presence |
2045 | of exception handling. */ |
2046 | else if (CALL_P (insn) |
2047 | && (!single_succ_p (bb) |
2048 | || single_succ_edge (bb)->flags & EDGE_ABNORMAL)) |
2049 | { |
2050 | /* Keeping in mind targets with small register classes and parameters |
2051 | in registers, we search backward and place the instructions before |
2052 | the first parameter is loaded. Do this for everyone for consistency |
2053 | and a presumption that we'll get better code elsewhere as well. */ |
2054 | |
2055 | /* Since different machines initialize their parameter registers |
2056 | in different orders, assume nothing. Collect the set of all |
2057 | parameter registers. */ |
2058 | insn = find_first_parameter_load (insn, BB_HEAD (bb)); |
2059 | |
2060 | /* If we found all the parameter loads, then we want to insert |
2061 | before the first parameter load. |
2062 | |
2063 | If we did not find all the parameter loads, then we might have |
2064 | stopped on the head of the block, which could be a CODE_LABEL. |
2065 | If we inserted before the CODE_LABEL, then we would be putting |
2066 | the insn in the wrong basic block. In that case, put the insn |
2067 | after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */ |
2068 | while (LABEL_P (insn) |
2069 | || NOTE_INSN_BASIC_BLOCK_P (insn)) |
2070 | insn = NEXT_INSN (insn); |
2071 | |
2072 | new_insn = emit_insn_before_noloc (pat, insn, bb); |
2073 | } |
2074 | else |
2075 | new_insn = emit_insn_after_noloc (pat, insn, bb); |
2076 | |
2077 | while (1) |
2078 | { |
2079 | if (INSN_P (pat)) |
2080 | add_label_notes (PATTERN (insn: pat), new_insn); |
2081 | if (pat == pat_end) |
2082 | break; |
2083 | pat = NEXT_INSN (insn: pat); |
2084 | } |
2085 | return new_insn; |
2086 | } |
2087 | |
2088 | /* Add EXPR to the end of basic block BB. |
2089 | |
2090 | This is used by both the PRE and code hoisting. */ |
2091 | |
2092 | static void |
2093 | insert_insn_end_basic_block (struct gcse_expr *expr, basic_block bb) |
2094 | { |
2095 | rtx reg = expr->reaching_reg; |
2096 | int regno = REGNO (reg); |
2097 | |
2098 | rtx_insn *insn = process_insert_insn (expr); |
2099 | rtx_insn *new_insn = insert_insn_end_basic_block (pat: insn, bb); |
2100 | |
2101 | gcse_create_count++; |
2102 | |
2103 | if (dump_file) |
2104 | { |
2105 | fprintf (stream: dump_file, format: "PRE/HOIST: end of bb %d, insn %d, " , |
2106 | bb->index, INSN_UID (insn: new_insn)); |
2107 | fprintf (stream: dump_file, format: "copying expression %d to reg %d\n" , |
2108 | expr->bitmap_index, regno); |
2109 | } |
2110 | } |
2111 | |
2112 | /* Insert partially redundant expressions on edges in the CFG to make |
2113 | the expressions fully redundant. */ |
2114 | |
2115 | static bool |
2116 | pre_edge_insert (struct edge_list *edge_list, struct gcse_expr **index_map) |
2117 | { |
2118 | int e, i, j, num_edges, set_size; |
2119 | bool did_insert = false; |
2120 | sbitmap *inserted; |
2121 | |
2122 | /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge |
2123 | if it reaches any of the deleted expressions. */ |
2124 | |
2125 | set_size = pre_insert_map[0]->size; |
2126 | num_edges = NUM_EDGES (edge_list); |
2127 | inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems); |
2128 | bitmap_vector_clear (inserted, num_edges); |
2129 | |
2130 | for (e = 0; e < num_edges; e++) |
2131 | { |
2132 | int indx; |
2133 | basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e); |
2134 | |
2135 | for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS) |
2136 | { |
2137 | SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i]; |
2138 | |
2139 | for (j = indx; |
2140 | insert && j < (int) expr_hash_table.n_elems; |
2141 | j++, insert >>= 1) |
2142 | if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX) |
2143 | { |
2144 | struct gcse_expr *expr = index_map[j]; |
2145 | struct gcse_occr *occr; |
2146 | |
2147 | /* Now look at each deleted occurrence of this expression. */ |
2148 | for (occr = expr->antic_occr; occr != NULL; occr = occr->next) |
2149 | { |
2150 | if (! occr->deleted_p) |
2151 | continue; |
2152 | |
2153 | /* Insert this expression on this edge if it would |
2154 | reach the deleted occurrence in BB. */ |
2155 | if (!bitmap_bit_p (map: inserted[e], bitno: j)) |
2156 | { |
2157 | rtx_insn *insn; |
2158 | edge eg = INDEX_EDGE (edge_list, e); |
2159 | |
2160 | /* We can't insert anything on an abnormal and |
2161 | critical edge, so we insert the insn at the end of |
2162 | the previous block. There are several alternatives |
2163 | detailed in Morgans book P277 (sec 10.5) for |
2164 | handling this situation. This one is easiest for |
2165 | now. */ |
2166 | |
2167 | if (eg->flags & EDGE_ABNORMAL) |
2168 | insert_insn_end_basic_block (expr: index_map[j], bb); |
2169 | else |
2170 | { |
2171 | insn = process_insert_insn (expr: index_map[j]); |
2172 | insert_insn_on_edge (insn, eg); |
2173 | } |
2174 | |
2175 | if (dump_file) |
2176 | { |
2177 | fprintf (stream: dump_file, format: "PRE: edge (%d,%d), " , |
2178 | bb->index, |
2179 | INDEX_EDGE_SUCC_BB (edge_list, e)->index); |
2180 | fprintf (stream: dump_file, format: "copy expression %d\n" , |
2181 | expr->bitmap_index); |
2182 | } |
2183 | |
2184 | update_ld_motion_stores (expr); |
2185 | bitmap_set_bit (map: inserted[e], bitno: j); |
2186 | did_insert = true; |
2187 | gcse_create_count++; |
2188 | } |
2189 | } |
2190 | } |
2191 | } |
2192 | } |
2193 | |
2194 | sbitmap_vector_free (vec: inserted); |
2195 | return did_insert; |
2196 | } |
2197 | |
2198 | /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG. |
2199 | Given "old_reg <- expr" (INSN), instead of adding after it |
2200 | reaching_reg <- old_reg |
2201 | it's better to do the following: |
2202 | reaching_reg <- expr |
2203 | old_reg <- reaching_reg |
2204 | because this way copy propagation can discover additional PRE |
2205 | opportunities. But if this fails, we try the old way. |
2206 | When "expr" is a store, i.e. |
2207 | given "MEM <- old_reg", instead of adding after it |
2208 | reaching_reg <- old_reg |
2209 | it's better to add it before as follows: |
2210 | reaching_reg <- old_reg |
2211 | MEM <- reaching_reg. */ |
2212 | |
2213 | static void |
2214 | pre_insert_copy_insn (struct gcse_expr *expr, rtx_insn *insn) |
2215 | { |
2216 | rtx reg = expr->reaching_reg; |
2217 | int regno = REGNO (reg); |
2218 | int indx = expr->bitmap_index; |
2219 | rtx pat = PATTERN (insn); |
2220 | rtx set, first_set; |
2221 | rtx_insn *new_insn; |
2222 | rtx old_reg; |
2223 | int i; |
2224 | |
2225 | /* This block matches the logic in hash_scan_insn. */ |
2226 | switch (GET_CODE (pat)) |
2227 | { |
2228 | case SET: |
2229 | set = pat; |
2230 | break; |
2231 | |
2232 | case PARALLEL: |
2233 | /* Search through the parallel looking for the set whose |
2234 | source was the expression that we're interested in. */ |
2235 | first_set = NULL_RTX; |
2236 | set = NULL_RTX; |
2237 | for (i = 0; i < XVECLEN (pat, 0); i++) |
2238 | { |
2239 | rtx x = XVECEXP (pat, 0, i); |
2240 | if (GET_CODE (x) == SET) |
2241 | { |
2242 | /* If the source was a REG_EQUAL or REG_EQUIV note, we |
2243 | may not find an equivalent expression, but in this |
2244 | case the PARALLEL will have a single set. */ |
2245 | if (first_set == NULL_RTX) |
2246 | first_set = x; |
2247 | if (expr_equiv_p (SET_SRC (x), y: expr->expr)) |
2248 | { |
2249 | set = x; |
2250 | break; |
2251 | } |
2252 | } |
2253 | } |
2254 | |
2255 | gcc_assert (first_set); |
2256 | if (set == NULL_RTX) |
2257 | set = first_set; |
2258 | break; |
2259 | |
2260 | default: |
2261 | gcc_unreachable (); |
2262 | } |
2263 | |
2264 | if (REG_P (SET_DEST (set))) |
2265 | { |
2266 | old_reg = SET_DEST (set); |
2267 | /* Check if we can modify the set destination in the original insn. */ |
2268 | if (validate_change (insn, &SET_DEST (set), reg, 0)) |
2269 | { |
2270 | new_insn = gen_move_insn (old_reg, reg); |
2271 | new_insn = emit_insn_after (new_insn, insn); |
2272 | } |
2273 | else |
2274 | { |
2275 | new_insn = gen_move_insn (reg, old_reg); |
2276 | new_insn = emit_insn_after (new_insn, insn); |
2277 | } |
2278 | } |
2279 | else /* This is possible only in case of a store to memory. */ |
2280 | { |
2281 | old_reg = SET_SRC (set); |
2282 | new_insn = gen_move_insn (reg, old_reg); |
2283 | |
2284 | /* Check if we can modify the set source in the original insn. */ |
2285 | if (validate_change (insn, &SET_SRC (set), reg, 0)) |
2286 | new_insn = emit_insn_before (new_insn, insn); |
2287 | else |
2288 | new_insn = emit_insn_after (new_insn, insn); |
2289 | } |
2290 | |
2291 | gcse_create_count++; |
2292 | |
2293 | if (dump_file) |
2294 | fprintf (stream: dump_file, |
2295 | format: "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n" , |
2296 | BLOCK_FOR_INSN (insn)->index, INSN_UID (insn: new_insn), indx, |
2297 | INSN_UID (insn), regno); |
2298 | } |
2299 | |
2300 | /* Copy available expressions that reach the redundant expression |
2301 | to `reaching_reg'. */ |
2302 | |
2303 | static void |
2304 | pre_insert_copies (void) |
2305 | { |
2306 | unsigned int i; |
2307 | bool added_copy; |
2308 | struct gcse_expr *expr; |
2309 | struct gcse_occr *occr; |
2310 | struct gcse_occr *avail; |
2311 | |
2312 | /* For each available expression in the table, copy the result to |
2313 | `reaching_reg' if the expression reaches a deleted one. |
2314 | |
2315 | ??? The current algorithm is rather brute force. |
2316 | Need to do some profiling. */ |
2317 | |
2318 | for (i = 0; i < expr_hash_table.size; i++) |
2319 | for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash) |
2320 | { |
2321 | /* If the basic block isn't reachable, PPOUT will be TRUE. However, |
2322 | we don't want to insert a copy here because the expression may not |
2323 | really be redundant. So only insert an insn if the expression was |
2324 | deleted. This test also avoids further processing if the |
2325 | expression wasn't deleted anywhere. */ |
2326 | if (expr->reaching_reg == NULL) |
2327 | continue; |
2328 | |
2329 | /* Set when we add a copy for that expression. */ |
2330 | added_copy = false; |
2331 | |
2332 | for (occr = expr->antic_occr; occr != NULL; occr = occr->next) |
2333 | { |
2334 | if (! occr->deleted_p) |
2335 | continue; |
2336 | |
2337 | for (avail = expr->avail_occr; avail != NULL; avail = avail->next) |
2338 | { |
2339 | rtx_insn *insn = avail->insn; |
2340 | |
2341 | /* No need to handle this one if handled already. */ |
2342 | if (avail->copied_p) |
2343 | continue; |
2344 | |
2345 | /* Don't handle this one if it's a redundant one. */ |
2346 | if (insn->deleted ()) |
2347 | continue; |
2348 | |
2349 | /* Or if the expression doesn't reach the deleted one. */ |
2350 | if (! pre_expr_reaches_here_p (occr_bb: BLOCK_FOR_INSN (insn: avail->insn), |
2351 | expr, |
2352 | bb: BLOCK_FOR_INSN (insn: occr->insn))) |
2353 | continue; |
2354 | |
2355 | added_copy = true; |
2356 | |
2357 | /* Copy the result of avail to reaching_reg. */ |
2358 | pre_insert_copy_insn (expr, insn); |
2359 | avail->copied_p = 1; |
2360 | } |
2361 | } |
2362 | |
2363 | if (added_copy) |
2364 | update_ld_motion_stores (expr); |
2365 | } |
2366 | } |
2367 | |
2368 | struct set_data |
2369 | { |
2370 | rtx_insn *insn; |
2371 | const_rtx set; |
2372 | int nsets; |
2373 | }; |
2374 | |
2375 | /* Increment number of sets and record set in DATA. */ |
2376 | |
2377 | static void |
2378 | record_set_data (rtx dest, const_rtx set, void *data) |
2379 | { |
2380 | struct set_data *s = (struct set_data *)data; |
2381 | |
2382 | if (GET_CODE (set) == SET) |
2383 | { |
2384 | /* We allow insns having multiple sets, where all but one are |
2385 | dead as single set insns. In the common case only a single |
2386 | set is present, so we want to avoid checking for REG_UNUSED |
2387 | notes unless necessary. */ |
2388 | if (s->nsets == 1 |
2389 | && find_reg_note (s->insn, REG_UNUSED, SET_DEST (s->set)) |
2390 | && !side_effects_p (s->set)) |
2391 | s->nsets = 0; |
2392 | |
2393 | if (!s->nsets) |
2394 | { |
2395 | /* Record this set. */ |
2396 | s->nsets += 1; |
2397 | s->set = set; |
2398 | } |
2399 | else if (!find_reg_note (s->insn, REG_UNUSED, dest) |
2400 | || side_effects_p (set)) |
2401 | s->nsets += 1; |
2402 | } |
2403 | } |
2404 | |
2405 | static const_rtx |
2406 | single_set_gcse (rtx_insn *insn) |
2407 | { |
2408 | struct set_data s; |
2409 | rtx pattern; |
2410 | |
2411 | gcc_assert (INSN_P (insn)); |
2412 | |
2413 | /* Optimize common case. */ |
2414 | pattern = PATTERN (insn); |
2415 | if (GET_CODE (pattern) == SET) |
2416 | return pattern; |
2417 | |
2418 | s.insn = insn; |
2419 | s.nsets = 0; |
2420 | note_pattern_stores (pattern, record_set_data, &s); |
2421 | |
2422 | /* Considered invariant insns have exactly one set. */ |
2423 | gcc_assert (s.nsets == 1); |
2424 | return s.set; |
2425 | } |
2426 | |
2427 | /* Emit move from SRC to DEST noting the equivalence with expression computed |
2428 | in INSN. */ |
2429 | |
2430 | static rtx_insn * |
2431 | gcse_emit_move_after (rtx dest, rtx src, rtx_insn *insn) |
2432 | { |
2433 | rtx_insn *new_rtx; |
2434 | const_rtx set = single_set_gcse (insn); |
2435 | rtx set2; |
2436 | rtx note; |
2437 | rtx eqv = NULL_RTX; |
2438 | |
2439 | /* This should never fail since we're creating a reg->reg copy |
2440 | we've verified to be valid. */ |
2441 | |
2442 | new_rtx = emit_insn_after (gen_move_insn (dest, src), insn); |
2443 | |
2444 | /* Note the equivalence for local CSE pass. Take the note from the old |
2445 | set if there was one. Otherwise record the SET_SRC from the old set |
2446 | unless DEST is also an operand of the SET_SRC. */ |
2447 | set2 = single_set (insn: new_rtx); |
2448 | if (!set2 || !rtx_equal_p (SET_DEST (set2), dest)) |
2449 | return new_rtx; |
2450 | if ((note = find_reg_equal_equiv_note (insn))) |
2451 | eqv = XEXP (note, 0); |
2452 | else if (! REG_P (dest) |
2453 | || ! reg_mentioned_p (dest, SET_SRC (set))) |
2454 | eqv = SET_SRC (set); |
2455 | |
2456 | if (eqv != NULL_RTX) |
2457 | set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv)); |
2458 | |
2459 | return new_rtx; |
2460 | } |
2461 | |
2462 | /* Delete redundant computations. |
2463 | Deletion is done by changing the insn to copy the `reaching_reg' of |
2464 | the expression into the result of the SET. It is left to later passes |
2465 | to propagate the copy or eliminate it. |
2466 | |
2467 | Return true if a change is made. */ |
2468 | |
2469 | static bool |
2470 | pre_delete (void) |
2471 | { |
2472 | unsigned int i; |
2473 | bool changed = false; |
2474 | struct gcse_expr *expr; |
2475 | struct gcse_occr *occr; |
2476 | |
2477 | for (i = 0; i < expr_hash_table.size; i++) |
2478 | for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash) |
2479 | { |
2480 | int indx = expr->bitmap_index; |
2481 | |
2482 | /* We only need to search antic_occr since we require ANTLOC != 0. */ |
2483 | for (occr = expr->antic_occr; occr != NULL; occr = occr->next) |
2484 | { |
2485 | rtx_insn *insn = occr->insn; |
2486 | rtx set; |
2487 | basic_block bb = BLOCK_FOR_INSN (insn); |
2488 | |
2489 | /* We only delete insns that have a single_set. */ |
2490 | if (bitmap_bit_p (map: pre_delete_map[bb->index], bitno: indx) |
2491 | && (set = single_set (insn)) != 0 |
2492 | && dbg_cnt (index: pre_insn)) |
2493 | { |
2494 | /* Create a pseudo-reg to store the result of reaching |
2495 | expressions into. Get the mode for the new pseudo from |
2496 | the mode of the original destination pseudo. */ |
2497 | if (expr->reaching_reg == NULL) |
2498 | expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set)); |
2499 | |
2500 | gcse_emit_move_after (SET_DEST (set), src: expr->reaching_reg, insn); |
2501 | delete_insn (insn); |
2502 | occr->deleted_p = 1; |
2503 | changed = true; |
2504 | gcse_subst_count++; |
2505 | |
2506 | if (dump_file) |
2507 | { |
2508 | fprintf (stream: dump_file, |
2509 | format: "PRE: redundant insn %d (expression %d) in " , |
2510 | INSN_UID (insn), indx); |
2511 | fprintf (stream: dump_file, format: "bb %d, reaching reg is %d\n" , |
2512 | bb->index, REGNO (expr->reaching_reg)); |
2513 | } |
2514 | } |
2515 | } |
2516 | } |
2517 | |
2518 | return changed; |
2519 | } |
2520 | |
2521 | /* Perform GCSE optimizations using PRE. |
2522 | This is called by one_pre_gcse_pass after all the dataflow analysis |
2523 | has been done. |
2524 | |
2525 | This is based on the original Morel-Renvoise paper Fred Chow's thesis, and |
2526 | lazy code motion from Knoop, Ruthing and Steffen as described in Advanced |
2527 | Compiler Design and Implementation. |
2528 | |
2529 | ??? A new pseudo reg is created to hold the reaching expression. The nice |
2530 | thing about the classical approach is that it would try to use an existing |
2531 | reg. If the register can't be adequately optimized [i.e. we introduce |
2532 | reload problems], one could add a pass here to propagate the new register |
2533 | through the block. |
2534 | |
2535 | ??? We don't handle single sets in PARALLELs because we're [currently] not |
2536 | able to copy the rest of the parallel when we insert copies to create full |
2537 | redundancies from partial redundancies. However, there's no reason why we |
2538 | can't handle PARALLELs in the cases where there are no partial |
2539 | redundancies. */ |
2540 | |
2541 | static bool |
2542 | pre_gcse (struct edge_list *edge_list) |
2543 | { |
2544 | unsigned int i; |
2545 | bool did_insert, changed; |
2546 | struct gcse_expr **index_map; |
2547 | struct gcse_expr *expr; |
2548 | |
2549 | /* Compute a mapping from expression number (`bitmap_index') to |
2550 | hash table entry. */ |
2551 | |
2552 | index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems); |
2553 | for (i = 0; i < expr_hash_table.size; i++) |
2554 | for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash) |
2555 | index_map[expr->bitmap_index] = expr; |
2556 | |
2557 | /* Delete the redundant insns first so that |
2558 | - we know what register to use for the new insns and for the other |
2559 | ones with reaching expressions |
2560 | - we know which insns are redundant when we go to create copies */ |
2561 | |
2562 | changed = pre_delete (); |
2563 | did_insert = pre_edge_insert (edge_list, index_map); |
2564 | |
2565 | /* In other places with reaching expressions, copy the expression to the |
2566 | specially allocated pseudo-reg that reaches the redundant expr. */ |
2567 | pre_insert_copies (); |
2568 | if (did_insert) |
2569 | { |
2570 | commit_edge_insertions (); |
2571 | changed = true; |
2572 | } |
2573 | |
2574 | free (ptr: index_map); |
2575 | return changed; |
2576 | } |
2577 | |
2578 | /* Top level routine to perform one PRE GCSE pass. |
2579 | |
2580 | Return true if a change was made. */ |
2581 | |
2582 | static bool |
2583 | one_pre_gcse_pass (void) |
2584 | { |
2585 | bool changed = false; |
2586 | |
2587 | gcse_subst_count = 0; |
2588 | gcse_create_count = 0; |
2589 | |
2590 | /* Return if there's nothing to do, or it is too expensive. */ |
2591 | if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1 |
2592 | || gcse_or_cprop_is_too_expensive (_("PRE disabled" ))) |
2593 | return false; |
2594 | |
2595 | /* We need alias. */ |
2596 | init_alias_analysis (); |
2597 | |
2598 | bytes_used = 0; |
2599 | gcc_obstack_init (&gcse_obstack); |
2600 | alloc_gcse_mem (); |
2601 | |
2602 | alloc_hash_table (table: &expr_hash_table); |
2603 | add_noreturn_fake_exit_edges (); |
2604 | if (flag_gcse_lm) |
2605 | compute_ld_motion_mems (); |
2606 | |
2607 | compute_hash_table (table: &expr_hash_table); |
2608 | if (flag_gcse_lm) |
2609 | trim_ld_motion_mems (); |
2610 | if (dump_file) |
2611 | dump_hash_table (file: dump_file, name: "Expression" , table: &expr_hash_table); |
2612 | |
2613 | if (expr_hash_table.n_elems > 0) |
2614 | { |
2615 | struct edge_list *edge_list; |
2616 | alloc_pre_mem (last_basic_block_for_fn (cfun), n_exprs: expr_hash_table.n_elems); |
2617 | edge_list = compute_pre_data (); |
2618 | if (pre_gcse (edge_list)) |
2619 | changed = true; |
2620 | free_edge_list (edge_list); |
2621 | free_pre_mem (); |
2622 | } |
2623 | |
2624 | if (flag_gcse_lm) |
2625 | free_ld_motion_mems (); |
2626 | remove_fake_exit_edges (); |
2627 | free_hash_table (table: &expr_hash_table); |
2628 | |
2629 | free_gcse_mem (); |
2630 | obstack_free (&gcse_obstack, NULL); |
2631 | |
2632 | /* We are finished with alias. */ |
2633 | end_alias_analysis (); |
2634 | |
2635 | if (dump_file) |
2636 | { |
2637 | fprintf (stream: dump_file, format: "PRE GCSE of %s, %d basic blocks, %d bytes needed, " , |
2638 | current_function_name (), n_basic_blocks_for_fn (cfun), |
2639 | bytes_used); |
2640 | fprintf (stream: dump_file, format: "%d substs, %d insns created\n" , |
2641 | gcse_subst_count, gcse_create_count); |
2642 | } |
2643 | |
2644 | return changed; |
2645 | } |
2646 | |
2647 | /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them |
2648 | to INSN. If such notes are added to an insn which references a |
2649 | CODE_LABEL, the LABEL_NUSES count is incremented. We have to add |
2650 | that note, because the following loop optimization pass requires |
2651 | them. */ |
2652 | |
2653 | /* ??? If there was a jump optimization pass after gcse and before loop, |
2654 | then we would not need to do this here, because jump would add the |
2655 | necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */ |
2656 | |
2657 | static void |
2658 | add_label_notes (rtx x, rtx_insn *insn) |
2659 | { |
2660 | enum rtx_code code = GET_CODE (x); |
2661 | int i, j; |
2662 | const char *fmt; |
2663 | |
2664 | if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x)) |
2665 | { |
2666 | /* This code used to ignore labels that referred to dispatch tables to |
2667 | avoid flow generating (slightly) worse code. |
2668 | |
2669 | We no longer ignore such label references (see LABEL_REF handling in |
2670 | mark_jump_label for additional information). */ |
2671 | |
2672 | /* There's no reason for current users to emit jump-insns with |
2673 | such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET |
2674 | notes. */ |
2675 | gcc_assert (!JUMP_P (insn)); |
2676 | add_reg_note (insn, REG_LABEL_OPERAND, label_ref_label (ref: x)); |
2677 | |
2678 | if (LABEL_P (label_ref_label (x))) |
2679 | LABEL_NUSES (label_ref_label (x))++; |
2680 | |
2681 | return; |
2682 | } |
2683 | |
2684 | for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) |
2685 | { |
2686 | if (fmt[i] == 'e') |
2687 | add_label_notes (XEXP (x, i), insn); |
2688 | else if (fmt[i] == 'E') |
2689 | for (j = XVECLEN (x, i) - 1; j >= 0; j--) |
2690 | add_label_notes (XVECEXP (x, i, j), insn); |
2691 | } |
2692 | } |
2693 | |
2694 | /* Code Hoisting variables and subroutines. */ |
2695 | |
2696 | /* Very busy expressions. */ |
2697 | static sbitmap *hoist_vbein; |
2698 | static sbitmap *hoist_vbeout; |
2699 | |
2700 | /* ??? We could compute post dominators and run this algorithm in |
2701 | reverse to perform tail merging, doing so would probably be |
2702 | more effective than the tail merging code in jump.cc. |
2703 | |
2704 | It's unclear if tail merging could be run in parallel with |
2705 | code hoisting. It would be nice. */ |
2706 | |
2707 | /* Allocate vars used for code hoisting analysis. */ |
2708 | |
2709 | static void |
2710 | alloc_code_hoist_mem (int n_blocks, int n_exprs) |
2711 | { |
2712 | antloc = sbitmap_vector_alloc (n_blocks, n_exprs); |
2713 | transp = sbitmap_vector_alloc (n_blocks, n_exprs); |
2714 | comp = sbitmap_vector_alloc (n_blocks, n_exprs); |
2715 | |
2716 | hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs); |
2717 | hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs); |
2718 | } |
2719 | |
2720 | /* Free vars used for code hoisting analysis. */ |
2721 | |
2722 | static void |
2723 | free_code_hoist_mem (void) |
2724 | { |
2725 | sbitmap_vector_free (vec: antloc); |
2726 | sbitmap_vector_free (vec: transp); |
2727 | sbitmap_vector_free (vec: comp); |
2728 | |
2729 | sbitmap_vector_free (vec: hoist_vbein); |
2730 | sbitmap_vector_free (vec: hoist_vbeout); |
2731 | |
2732 | free_dominance_info (CDI_DOMINATORS); |
2733 | } |
2734 | |
2735 | /* Compute the very busy expressions at entry/exit from each block. |
2736 | |
2737 | An expression is very busy if all paths from a given point |
2738 | compute the expression. */ |
2739 | |
2740 | static void |
2741 | compute_code_hoist_vbeinout (void) |
2742 | { |
2743 | int changed, passes; |
2744 | basic_block bb; |
2745 | |
2746 | bitmap_vector_clear (hoist_vbeout, last_basic_block_for_fn (cfun)); |
2747 | bitmap_vector_clear (hoist_vbein, last_basic_block_for_fn (cfun)); |
2748 | |
2749 | passes = 0; |
2750 | changed = 1; |
2751 | |
2752 | while (changed) |
2753 | { |
2754 | changed = 0; |
2755 | |
2756 | /* We scan the blocks in the reverse order to speed up |
2757 | the convergence. */ |
2758 | FOR_EACH_BB_REVERSE_FN (bb, cfun) |
2759 | { |
2760 | if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun)) |
2761 | { |
2762 | bitmap_intersection_of_succs (hoist_vbeout[bb->index], |
2763 | hoist_vbein, bb); |
2764 | |
2765 | /* Include expressions in VBEout that are calculated |
2766 | in BB and available at its end. */ |
2767 | bitmap_ior (hoist_vbeout[bb->index], |
2768 | hoist_vbeout[bb->index], comp[bb->index]); |
2769 | } |
2770 | |
2771 | changed |= bitmap_or_and (hoist_vbein[bb->index], |
2772 | antloc[bb->index], |
2773 | hoist_vbeout[bb->index], |
2774 | transp[bb->index]); |
2775 | } |
2776 | |
2777 | passes++; |
2778 | } |
2779 | |
2780 | if (dump_file) |
2781 | { |
2782 | fprintf (stream: dump_file, format: "hoisting vbeinout computation: %d passes\n" , passes); |
2783 | |
2784 | FOR_EACH_BB_FN (bb, cfun) |
2785 | { |
2786 | fprintf (stream: dump_file, format: "vbein (%d): " , bb->index); |
2787 | dump_bitmap_file (dump_file, hoist_vbein[bb->index]); |
2788 | fprintf (stream: dump_file, format: "vbeout(%d): " , bb->index); |
2789 | dump_bitmap_file (dump_file, hoist_vbeout[bb->index]); |
2790 | } |
2791 | } |
2792 | } |
2793 | |
2794 | /* Top level routine to do the dataflow analysis needed by code hoisting. */ |
2795 | |
2796 | static void |
2797 | compute_code_hoist_data (void) |
2798 | { |
2799 | compute_local_properties (transp, comp, antloc, table: &expr_hash_table); |
2800 | prune_expressions (pre_p: false); |
2801 | compute_code_hoist_vbeinout (); |
2802 | calculate_dominance_info (CDI_DOMINATORS); |
2803 | if (dump_file) |
2804 | fprintf (stream: dump_file, format: "\n" ); |
2805 | } |
2806 | |
2807 | /* Update register pressure for BB when hoisting an expression from |
2808 | instruction FROM, if live ranges of inputs are shrunk. Also |
2809 | maintain live_in information if live range of register referred |
2810 | in FROM is shrunk. |
2811 | |
2812 | Return 0 if register pressure doesn't change, otherwise return |
2813 | the number by which register pressure is decreased. |
2814 | |
2815 | NOTE: Register pressure won't be increased in this function. */ |
2816 | |
2817 | static int |
2818 | update_bb_reg_pressure (basic_block bb, rtx_insn *from) |
2819 | { |
2820 | rtx dreg; |
2821 | rtx_insn *insn; |
2822 | basic_block succ_bb; |
2823 | df_ref use, op_ref; |
2824 | edge succ; |
2825 | edge_iterator ei; |
2826 | int decreased_pressure = 0; |
2827 | int nregs; |
2828 | enum reg_class pressure_class; |
2829 | |
2830 | FOR_EACH_INSN_USE (use, from) |
2831 | { |
2832 | dreg = DF_REF_REAL_REG (use); |
2833 | /* The live range of register is shrunk only if it isn't: |
2834 | 1. referred on any path from the end of this block to EXIT, or |
2835 | 2. referred by insns other than FROM in this block. */ |
2836 | FOR_EACH_EDGE (succ, ei, bb->succs) |
2837 | { |
2838 | succ_bb = succ->dest; |
2839 | if (succ_bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
2840 | continue; |
2841 | |
2842 | if (bitmap_bit_p (BB_DATA (succ_bb)->live_in, REGNO (dreg))) |
2843 | break; |
2844 | } |
2845 | if (succ != NULL) |
2846 | continue; |
2847 | |
2848 | op_ref = DF_REG_USE_CHAIN (REGNO (dreg)); |
2849 | for (; op_ref; op_ref = DF_REF_NEXT_REG (op_ref)) |
2850 | { |
2851 | if (!DF_REF_INSN_INFO (op_ref)) |
2852 | continue; |
2853 | |
2854 | insn = DF_REF_INSN (op_ref); |
2855 | if (BLOCK_FOR_INSN (insn) == bb |
2856 | && NONDEBUG_INSN_P (insn) && insn != from) |
2857 | break; |
2858 | } |
2859 | |
2860 | pressure_class = get_regno_pressure_class (REGNO (dreg), nregs: &nregs); |
2861 | /* Decrease register pressure and update live_in information for |
2862 | this block. */ |
2863 | if (!op_ref && pressure_class != NO_REGS) |
2864 | { |
2865 | decreased_pressure += nregs; |
2866 | BB_DATA (bb)->max_reg_pressure[pressure_class] -= nregs; |
2867 | bitmap_clear_bit (BB_DATA (bb)->live_in, REGNO (dreg)); |
2868 | } |
2869 | } |
2870 | return decreased_pressure; |
2871 | } |
2872 | |
2873 | /* Determine if the expression EXPR should be hoisted to EXPR_BB up in |
2874 | flow graph, if it can reach BB unimpared. Stop the search if the |
2875 | expression would need to be moved more than DISTANCE instructions. |
2876 | |
2877 | DISTANCE is the number of instructions through which EXPR can be |
2878 | hoisted up in flow graph. |
2879 | |
2880 | BB_SIZE points to an array which contains the number of instructions |
2881 | for each basic block. |
2882 | |
2883 | PRESSURE_CLASS and NREGS are register class and number of hard registers |
2884 | for storing EXPR. |
2885 | |
2886 | HOISTED_BBS points to a bitmap indicating basic blocks through which |
2887 | EXPR is hoisted. |
2888 | |
2889 | FROM is the instruction from which EXPR is hoisted. |
2890 | |
2891 | It's unclear exactly what Muchnick meant by "unimpared". It seems |
2892 | to me that the expression must either be computed or transparent in |
2893 | *every* block in the path(s) from EXPR_BB to BB. Any other definition |
2894 | would allow the expression to be hoisted out of loops, even if |
2895 | the expression wasn't a loop invariant. |
2896 | |
2897 | Contrast this to reachability for PRE where an expression is |
2898 | considered reachable if *any* path reaches instead of *all* |
2899 | paths. */ |
2900 | |
2901 | static bool |
2902 | should_hoist_expr_to_dom (basic_block expr_bb, struct gcse_expr *expr, |
2903 | basic_block bb, sbitmap visited, |
2904 | HOST_WIDE_INT distance, |
2905 | int *bb_size, enum reg_class pressure_class, |
2906 | int *nregs, bitmap hoisted_bbs, rtx_insn *from) |
2907 | { |
2908 | unsigned int i; |
2909 | edge pred; |
2910 | edge_iterator ei; |
2911 | sbitmap_iterator sbi; |
2912 | bool visited_allocated_locally = false; |
2913 | int decreased_pressure = 0; |
2914 | |
2915 | if (flag_ira_hoist_pressure) |
2916 | { |
2917 | /* Record old information of basic block BB when it is visited |
2918 | at the first time. */ |
2919 | if (!bitmap_bit_p (hoisted_bbs, bb->index)) |
2920 | { |
2921 | struct bb_data *data = BB_DATA (bb); |
2922 | bitmap_copy (data->backup, data->live_in); |
2923 | data->old_pressure = data->max_reg_pressure[pressure_class]; |
2924 | } |
2925 | decreased_pressure = update_bb_reg_pressure (bb, from); |
2926 | } |
2927 | /* Terminate the search if distance, for which EXPR is allowed to move, |
2928 | is exhausted. */ |
2929 | if (distance > 0) |
2930 | { |
2931 | if (flag_ira_hoist_pressure) |
2932 | { |
2933 | /* Prefer to hoist EXPR if register pressure is decreased. */ |
2934 | if (decreased_pressure > *nregs) |
2935 | distance += bb_size[bb->index]; |
2936 | /* Let EXPR be hoisted through basic block at no cost if one |
2937 | of following conditions is satisfied: |
2938 | |
2939 | 1. The basic block has low register pressure. |
2940 | 2. Register pressure won't be increases after hoisting EXPR. |
2941 | |
2942 | Constant expressions is handled conservatively, because |
2943 | hoisting constant expression aggressively results in worse |
2944 | code. This decision is made by the observation of CSiBE |
2945 | on ARM target, while it has no obvious effect on other |
2946 | targets like x86, x86_64, mips and powerpc. */ |
2947 | else if (CONST_INT_P (expr->expr) |
2948 | || (BB_DATA (bb)->max_reg_pressure[pressure_class] |
2949 | >= ira_class_hard_regs_num[pressure_class] |
2950 | && decreased_pressure < *nregs)) |
2951 | distance -= bb_size[bb->index]; |
2952 | } |
2953 | else |
2954 | distance -= bb_size[bb->index]; |
2955 | |
2956 | if (distance <= 0) |
2957 | return 0; |
2958 | } |
2959 | else |
2960 | gcc_assert (distance == 0); |
2961 | |
2962 | if (visited == NULL) |
2963 | { |
2964 | visited_allocated_locally = true; |
2965 | visited = sbitmap_alloc (last_basic_block_for_fn (cfun)); |
2966 | bitmap_clear (visited); |
2967 | } |
2968 | |
2969 | FOR_EACH_EDGE (pred, ei, bb->preds) |
2970 | { |
2971 | basic_block pred_bb = pred->src; |
2972 | |
2973 | if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
2974 | break; |
2975 | else if (pred_bb == expr_bb) |
2976 | continue; |
2977 | else if (bitmap_bit_p (map: visited, bitno: pred_bb->index)) |
2978 | continue; |
2979 | else if (! bitmap_bit_p (map: transp[pred_bb->index], bitno: expr->bitmap_index)) |
2980 | break; |
2981 | /* Not killed. */ |
2982 | else |
2983 | { |
2984 | bitmap_set_bit (map: visited, bitno: pred_bb->index); |
2985 | if (! should_hoist_expr_to_dom (expr_bb, expr, bb: pred_bb, |
2986 | visited, distance, bb_size, |
2987 | pressure_class, nregs, |
2988 | hoisted_bbs, from)) |
2989 | break; |
2990 | } |
2991 | } |
2992 | if (visited_allocated_locally) |
2993 | { |
2994 | /* If EXPR can be hoisted to expr_bb, record basic blocks through |
2995 | which EXPR is hoisted in hoisted_bbs. */ |
2996 | if (flag_ira_hoist_pressure && !pred) |
2997 | { |
2998 | /* Record the basic block from which EXPR is hoisted. */ |
2999 | bitmap_set_bit (map: visited, bitno: bb->index); |
3000 | EXECUTE_IF_SET_IN_BITMAP (visited, 0, i, sbi) |
3001 | bitmap_set_bit (hoisted_bbs, i); |
3002 | } |
3003 | sbitmap_free (map: visited); |
3004 | } |
3005 | |
3006 | return (pred == NULL); |
3007 | } |
3008 | |
3009 | /* Find occurrence in BB. */ |
3010 | |
3011 | static struct gcse_occr * |
3012 | find_occr_in_bb (struct gcse_occr *occr, basic_block bb) |
3013 | { |
3014 | /* Find the right occurrence of this expression. */ |
3015 | while (occr && BLOCK_FOR_INSN (insn: occr->insn) != bb) |
3016 | occr = occr->next; |
3017 | |
3018 | return occr; |
3019 | } |
3020 | |
3021 | /* Actually perform code hoisting. |
3022 | |
3023 | The code hoisting pass can hoist multiple computations of the same |
3024 | expression along dominated path to a dominating basic block, like |
3025 | from b2/b3 to b1 as depicted below: |
3026 | |
3027 | b1 ------ |
3028 | /\ | |
3029 | / \ | |
3030 | bx by distance |
3031 | / \ | |
3032 | / \ | |
3033 | b2 b3 ------ |
3034 | |
3035 | Unfortunately code hoisting generally extends the live range of an |
3036 | output pseudo register, which increases register pressure and hurts |
3037 | register allocation. To address this issue, an attribute MAX_DISTANCE |
3038 | is computed and attached to each expression. The attribute is computed |
3039 | from rtx cost of the corresponding expression and it's used to control |
3040 | how long the expression can be hoisted up in flow graph. As the |
3041 | expression is hoisted up in flow graph, GCC decreases its DISTANCE |
3042 | and stops the hoist if DISTANCE reaches 0. Code hoisting can decrease |
3043 | register pressure if live ranges of inputs are shrunk. |
3044 | |
3045 | Option "-fira-hoist-pressure" implements register pressure directed |
3046 | hoist based on upper method. The rationale is: |
3047 | 1. Calculate register pressure for each basic block by reusing IRA |
3048 | facility. |
3049 | 2. When expression is hoisted through one basic block, GCC checks |
3050 | the change of live ranges for inputs/output. The basic block's |
3051 | register pressure will be increased because of extended live |
3052 | range of output. However, register pressure will be decreased |
3053 | if the live ranges of inputs are shrunk. |
3054 | 3. After knowing how hoisting affects register pressure, GCC prefers |
3055 | to hoist the expression if it can decrease register pressure, by |
3056 | increasing DISTANCE of the corresponding expression. |
3057 | 4. If hoisting the expression increases register pressure, GCC checks |
3058 | register pressure of the basic block and decrease DISTANCE only if |
3059 | the register pressure is high. In other words, expression will be |
3060 | hoisted through at no cost if the basic block has low register |
3061 | pressure. |
3062 | 5. Update register pressure information for basic blocks through |
3063 | which expression is hoisted. */ |
3064 | |
3065 | static bool |
3066 | hoist_code (void) |
3067 | { |
3068 | basic_block bb, dominated; |
3069 | unsigned int dom_tree_walk_index; |
3070 | unsigned int i, j, k; |
3071 | struct gcse_expr **index_map; |
3072 | struct gcse_expr *expr; |
3073 | int *to_bb_head; |
3074 | int *bb_size; |
3075 | bool changed = false; |
3076 | struct bb_data *data; |
3077 | /* Basic blocks that have occurrences reachable from BB. */ |
3078 | bitmap from_bbs; |
3079 | /* Basic blocks through which expr is hoisted. */ |
3080 | bitmap hoisted_bbs = NULL; |
3081 | bitmap_iterator bi; |
3082 | |
3083 | /* Compute a mapping from expression number (`bitmap_index') to |
3084 | hash table entry. */ |
3085 | |
3086 | index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems); |
3087 | for (i = 0; i < expr_hash_table.size; i++) |
3088 | for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash) |
3089 | index_map[expr->bitmap_index] = expr; |
3090 | |
3091 | /* Calculate sizes of basic blocks and note how far |
3092 | each instruction is from the start of its block. We then use this |
3093 | data to restrict distance an expression can travel. */ |
3094 | |
3095 | to_bb_head = XCNEWVEC (int, get_max_uid ()); |
3096 | bb_size = XCNEWVEC (int, last_basic_block_for_fn (cfun)); |
3097 | |
3098 | FOR_EACH_BB_FN (bb, cfun) |
3099 | { |
3100 | rtx_insn *insn; |
3101 | int to_head; |
3102 | |
3103 | to_head = 0; |
3104 | FOR_BB_INSNS (bb, insn) |
3105 | { |
3106 | /* Don't count debug instructions to avoid them affecting |
3107 | decision choices. */ |
3108 | if (NONDEBUG_INSN_P (insn)) |
3109 | to_bb_head[INSN_UID (insn)] = to_head++; |
3110 | } |
3111 | |
3112 | bb_size[bb->index] = to_head; |
3113 | } |
3114 | |
3115 | gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) == 1 |
3116 | && (EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0)->dest |
3117 | == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)); |
3118 | |
3119 | from_bbs = BITMAP_ALLOC (NULL); |
3120 | if (flag_ira_hoist_pressure) |
3121 | hoisted_bbs = BITMAP_ALLOC (NULL); |
3122 | |
3123 | auto_vec<basic_block> dom_tree_walk |
3124 | = get_all_dominated_blocks (CDI_DOMINATORS, |
3125 | ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb); |
3126 | |
3127 | /* Walk over each basic block looking for potentially hoistable |
3128 | expressions, nothing gets hoisted from the entry block. */ |
3129 | FOR_EACH_VEC_ELT (dom_tree_walk, dom_tree_walk_index, bb) |
3130 | { |
3131 | auto_vec<basic_block> domby |
3132 | = get_dominated_to_depth (CDI_DOMINATORS, bb, param_max_hoist_depth); |
3133 | |
3134 | if (domby.length () == 0) |
3135 | continue; |
3136 | |
3137 | /* Examine each expression that is very busy at the exit of this |
3138 | block. These are the potentially hoistable expressions. */ |
3139 | for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++) |
3140 | { |
3141 | if (bitmap_bit_p (map: hoist_vbeout[bb->index], bitno: i)) |
3142 | { |
3143 | int nregs = 0; |
3144 | enum reg_class pressure_class = NO_REGS; |
3145 | /* Current expression. */ |
3146 | struct gcse_expr *expr = index_map[i]; |
3147 | /* Number of occurrences of EXPR that can be hoisted to BB. */ |
3148 | int hoistable = 0; |
3149 | /* Occurrences reachable from BB. */ |
3150 | vec<occr_t> occrs_to_hoist = vNULL; |
3151 | /* We want to insert the expression into BB only once, so |
3152 | note when we've inserted it. */ |
3153 | int insn_inserted_p; |
3154 | occr_t occr; |
3155 | |
3156 | /* If an expression is computed in BB and is available at end of |
3157 | BB, hoist all occurrences dominated by BB to BB. */ |
3158 | if (bitmap_bit_p (map: comp[bb->index], bitno: i)) |
3159 | { |
3160 | occr = find_occr_in_bb (occr: expr->antic_occr, bb); |
3161 | |
3162 | if (occr) |
3163 | { |
3164 | /* An occurrence might've been already deleted |
3165 | while processing a dominator of BB. */ |
3166 | if (!occr->deleted_p) |
3167 | { |
3168 | gcc_assert (NONDEBUG_INSN_P (occr->insn)); |
3169 | hoistable++; |
3170 | } |
3171 | } |
3172 | else |
3173 | hoistable++; |
3174 | } |
3175 | |
3176 | /* We've found a potentially hoistable expression, now |
3177 | we look at every block BB dominates to see if it |
3178 | computes the expression. */ |
3179 | FOR_EACH_VEC_ELT (domby, j, dominated) |
3180 | { |
3181 | HOST_WIDE_INT max_distance; |
3182 | |
3183 | /* Ignore self dominance. */ |
3184 | if (bb == dominated) |
3185 | continue; |
3186 | /* We've found a dominated block, now see if it computes |
3187 | the busy expression and whether or not moving that |
3188 | expression to the "beginning" of that block is safe. */ |
3189 | if (!bitmap_bit_p (map: antloc[dominated->index], bitno: i)) |
3190 | continue; |
3191 | |
3192 | occr = find_occr_in_bb (occr: expr->antic_occr, bb: dominated); |
3193 | gcc_assert (occr); |
3194 | |
3195 | /* An occurrence might've been already deleted |
3196 | while processing a dominator of BB. */ |
3197 | if (occr->deleted_p) |
3198 | continue; |
3199 | gcc_assert (NONDEBUG_INSN_P (occr->insn)); |
3200 | |
3201 | max_distance = expr->max_distance; |
3202 | if (max_distance > 0) |
3203 | /* Adjust MAX_DISTANCE to account for the fact that |
3204 | OCCR won't have to travel all of DOMINATED, but |
3205 | only part of it. */ |
3206 | max_distance += (bb_size[dominated->index] |
3207 | - to_bb_head[INSN_UID (insn: occr->insn)]); |
3208 | |
3209 | pressure_class = get_pressure_class_and_nregs (insn: occr->insn, |
3210 | nregs: &nregs); |
3211 | |
3212 | /* Note if the expression should be hoisted from the dominated |
3213 | block to BB if it can reach DOMINATED unimpared. |
3214 | |
3215 | Keep track of how many times this expression is hoistable |
3216 | from a dominated block into BB. */ |
3217 | if (should_hoist_expr_to_dom (expr_bb: bb, expr, bb: dominated, NULL, |
3218 | distance: max_distance, bb_size, |
3219 | pressure_class, nregs: &nregs, |
3220 | hoisted_bbs, from: occr->insn)) |
3221 | { |
3222 | hoistable++; |
3223 | occrs_to_hoist.safe_push (obj: occr); |
3224 | bitmap_set_bit (from_bbs, dominated->index); |
3225 | } |
3226 | } |
3227 | |
3228 | /* If we found more than one hoistable occurrence of this |
3229 | expression, then note it in the vector of expressions to |
3230 | hoist. It makes no sense to hoist things which are computed |
3231 | in only one BB, and doing so tends to pessimize register |
3232 | allocation. One could increase this value to try harder |
3233 | to avoid any possible code expansion due to register |
3234 | allocation issues; however experiments have shown that |
3235 | the vast majority of hoistable expressions are only movable |
3236 | from two successors, so raising this threshold is likely |
3237 | to nullify any benefit we get from code hoisting. */ |
3238 | if (hoistable > 1 && dbg_cnt (index: hoist_insn)) |
3239 | { |
3240 | /* If (hoistable != vec::length), then there is |
3241 | an occurrence of EXPR in BB itself. Don't waste |
3242 | time looking for LCA in this case. */ |
3243 | if ((unsigned) hoistable == occrs_to_hoist.length ()) |
3244 | { |
3245 | basic_block lca; |
3246 | |
3247 | lca = nearest_common_dominator_for_set (CDI_DOMINATORS, |
3248 | from_bbs); |
3249 | if (lca != bb) |
3250 | /* Punt, it's better to hoist these occurrences to |
3251 | LCA. */ |
3252 | occrs_to_hoist.release (); |
3253 | } |
3254 | } |
3255 | else |
3256 | /* Punt, no point hoisting a single occurrence. */ |
3257 | occrs_to_hoist.release (); |
3258 | |
3259 | if (flag_ira_hoist_pressure |
3260 | && !occrs_to_hoist.is_empty ()) |
3261 | { |
3262 | /* Increase register pressure of basic blocks to which |
3263 | expr is hoisted because of extended live range of |
3264 | output. */ |
3265 | data = BB_DATA (bb); |
3266 | data->max_reg_pressure[pressure_class] += nregs; |
3267 | EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi) |
3268 | { |
3269 | data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k)); |
3270 | data->max_reg_pressure[pressure_class] += nregs; |
3271 | } |
3272 | } |
3273 | else if (flag_ira_hoist_pressure) |
3274 | { |
3275 | /* Restore register pressure and live_in info for basic |
3276 | blocks recorded in hoisted_bbs when expr will not be |
3277 | hoisted. */ |
3278 | EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi) |
3279 | { |
3280 | data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k)); |
3281 | bitmap_copy (data->live_in, data->backup); |
3282 | data->max_reg_pressure[pressure_class] |
3283 | = data->old_pressure; |
3284 | } |
3285 | } |
3286 | |
3287 | if (flag_ira_hoist_pressure) |
3288 | bitmap_clear (hoisted_bbs); |
3289 | |
3290 | insn_inserted_p = 0; |
3291 | |
3292 | /* Walk through occurrences of I'th expressions we want |
3293 | to hoist to BB and make the transformations. */ |
3294 | FOR_EACH_VEC_ELT (occrs_to_hoist, j, occr) |
3295 | { |
3296 | rtx_insn *insn; |
3297 | const_rtx set; |
3298 | |
3299 | gcc_assert (!occr->deleted_p); |
3300 | |
3301 | insn = occr->insn; |
3302 | set = single_set_gcse (insn); |
3303 | |
3304 | /* Create a pseudo-reg to store the result of reaching |
3305 | expressions into. Get the mode for the new pseudo |
3306 | from the mode of the original destination pseudo. |
3307 | |
3308 | It is important to use new pseudos whenever we |
3309 | emit a set. This will allow reload to use |
3310 | rematerialization for such registers. */ |
3311 | if (!insn_inserted_p) |
3312 | expr->reaching_reg |
3313 | = gen_reg_rtx_and_attrs (SET_DEST (set)); |
3314 | |
3315 | gcse_emit_move_after (SET_DEST (set), src: expr->reaching_reg, |
3316 | insn); |
3317 | delete_insn (insn); |
3318 | occr->deleted_p = 1; |
3319 | changed = true; |
3320 | gcse_subst_count++; |
3321 | |
3322 | if (!insn_inserted_p) |
3323 | { |
3324 | insert_insn_end_basic_block (expr, bb); |
3325 | insn_inserted_p = 1; |
3326 | } |
3327 | } |
3328 | |
3329 | occrs_to_hoist.release (); |
3330 | bitmap_clear (from_bbs); |
3331 | } |
3332 | } |
3333 | } |
3334 | |
3335 | BITMAP_FREE (from_bbs); |
3336 | if (flag_ira_hoist_pressure) |
3337 | BITMAP_FREE (hoisted_bbs); |
3338 | |
3339 | free (ptr: bb_size); |
3340 | free (ptr: to_bb_head); |
3341 | free (ptr: index_map); |
3342 | |
3343 | return changed; |
3344 | } |
3345 | |
3346 | /* Return pressure class and number of needed hard registers (through |
3347 | *NREGS) of register REGNO. */ |
3348 | static enum reg_class |
3349 | get_regno_pressure_class (int regno, int *nregs) |
3350 | { |
3351 | if (regno >= FIRST_PSEUDO_REGISTER) |
3352 | { |
3353 | enum reg_class pressure_class; |
3354 | |
3355 | pressure_class = reg_allocno_class (regno); |
3356 | pressure_class = ira_pressure_class_translate[pressure_class]; |
3357 | *nregs |
3358 | = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)]; |
3359 | return pressure_class; |
3360 | } |
3361 | else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, bit: regno) |
3362 | && ! TEST_HARD_REG_BIT (set: eliminable_regset, bit: regno)) |
3363 | { |
3364 | *nregs = 1; |
3365 | return ira_pressure_class_translate[REGNO_REG_CLASS (regno)]; |
3366 | } |
3367 | else |
3368 | { |
3369 | *nregs = 0; |
3370 | return NO_REGS; |
3371 | } |
3372 | } |
3373 | |
3374 | /* Return pressure class and number of hard registers (through *NREGS) |
3375 | for destination of INSN. */ |
3376 | static enum reg_class |
3377 | get_pressure_class_and_nregs (rtx_insn *insn, int *nregs) |
3378 | { |
3379 | rtx reg; |
3380 | enum reg_class pressure_class; |
3381 | const_rtx set = single_set_gcse (insn); |
3382 | |
3383 | reg = SET_DEST (set); |
3384 | if (GET_CODE (reg) == SUBREG) |
3385 | reg = SUBREG_REG (reg); |
3386 | if (MEM_P (reg)) |
3387 | { |
3388 | *nregs = 0; |
3389 | pressure_class = NO_REGS; |
3390 | } |
3391 | else |
3392 | { |
3393 | gcc_assert (REG_P (reg)); |
3394 | pressure_class = reg_allocno_class (REGNO (reg)); |
3395 | pressure_class = ira_pressure_class_translate[pressure_class]; |
3396 | *nregs |
3397 | = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))]; |
3398 | } |
3399 | return pressure_class; |
3400 | } |
3401 | |
3402 | /* Increase (if INCR_P) or decrease current register pressure for |
3403 | register REGNO. */ |
3404 | static void |
3405 | change_pressure (int regno, bool incr_p) |
3406 | { |
3407 | int nregs; |
3408 | enum reg_class pressure_class; |
3409 | |
3410 | pressure_class = get_regno_pressure_class (regno, nregs: &nregs); |
3411 | if (! incr_p) |
3412 | curr_reg_pressure[pressure_class] -= nregs; |
3413 | else |
3414 | { |
3415 | curr_reg_pressure[pressure_class] += nregs; |
3416 | if (BB_DATA (curr_bb)->max_reg_pressure[pressure_class] |
3417 | < curr_reg_pressure[pressure_class]) |
3418 | BB_DATA (curr_bb)->max_reg_pressure[pressure_class] |
3419 | = curr_reg_pressure[pressure_class]; |
3420 | } |
3421 | } |
3422 | |
3423 | /* Calculate register pressure for each basic block by walking insns |
3424 | from last to first. */ |
3425 | static void |
3426 | calculate_bb_reg_pressure (void) |
3427 | { |
3428 | int i; |
3429 | unsigned int j; |
3430 | rtx_insn *insn; |
3431 | basic_block bb; |
3432 | bitmap curr_regs_live; |
3433 | bitmap_iterator bi; |
3434 | |
3435 | |
3436 | ira_setup_eliminable_regset (); |
3437 | curr_regs_live = BITMAP_ALLOC (obstack: ®_obstack); |
3438 | FOR_EACH_BB_FN (bb, cfun) |
3439 | { |
3440 | curr_bb = bb; |
3441 | BB_DATA (bb)->live_in = BITMAP_ALLOC (NULL); |
3442 | BB_DATA (bb)->backup = BITMAP_ALLOC (NULL); |
3443 | bitmap_copy (BB_DATA (bb)->live_in, df_get_live_in (bb)); |
3444 | bitmap_copy (curr_regs_live, df_get_live_out (bb)); |
3445 | for (i = 0; i < ira_pressure_classes_num; i++) |
3446 | curr_reg_pressure[ira_pressure_classes[i]] = 0; |
3447 | EXECUTE_IF_SET_IN_BITMAP (curr_regs_live, 0, j, bi) |
3448 | change_pressure (regno: j, incr_p: true); |
3449 | |
3450 | FOR_BB_INSNS_REVERSE (bb, insn) |
3451 | { |
3452 | rtx dreg; |
3453 | int regno; |
3454 | df_ref def, use; |
3455 | |
3456 | if (! NONDEBUG_INSN_P (insn)) |
3457 | continue; |
3458 | |
3459 | FOR_EACH_INSN_DEF (def, insn) |
3460 | { |
3461 | dreg = DF_REF_REAL_REG (def); |
3462 | gcc_assert (REG_P (dreg)); |
3463 | regno = REGNO (dreg); |
3464 | if (!(DF_REF_FLAGS (def) |
3465 | & (DF_REF_PARTIAL | DF_REF_CONDITIONAL))) |
3466 | { |
3467 | if (bitmap_clear_bit (curr_regs_live, regno)) |
3468 | change_pressure (regno, incr_p: false); |
3469 | } |
3470 | } |
3471 | |
3472 | FOR_EACH_INSN_USE (use, insn) |
3473 | { |
3474 | dreg = DF_REF_REAL_REG (use); |
3475 | gcc_assert (REG_P (dreg)); |
3476 | regno = REGNO (dreg); |
3477 | if (bitmap_set_bit (curr_regs_live, regno)) |
3478 | change_pressure (regno, incr_p: true); |
3479 | } |
3480 | } |
3481 | } |
3482 | BITMAP_FREE (curr_regs_live); |
3483 | |
3484 | if (dump_file == NULL) |
3485 | return; |
3486 | |
3487 | fprintf (stream: dump_file, format: "\nRegister Pressure: \n" ); |
3488 | FOR_EACH_BB_FN (bb, cfun) |
3489 | { |
3490 | fprintf (stream: dump_file, format: " Basic block %d: \n" , bb->index); |
3491 | for (i = 0; (int) i < ira_pressure_classes_num; i++) |
3492 | { |
3493 | enum reg_class pressure_class; |
3494 | |
3495 | pressure_class = ira_pressure_classes[i]; |
3496 | if (BB_DATA (bb)->max_reg_pressure[pressure_class] == 0) |
3497 | continue; |
3498 | |
3499 | fprintf (stream: dump_file, format: " %s=%d\n" , reg_class_names[pressure_class], |
3500 | BB_DATA (bb)->max_reg_pressure[pressure_class]); |
3501 | } |
3502 | } |
3503 | fprintf (stream: dump_file, format: "\n" ); |
3504 | } |
3505 | |
3506 | /* Top level routine to perform one code hoisting (aka unification) pass |
3507 | |
3508 | Return true if a change was made. */ |
3509 | |
3510 | static bool |
3511 | one_code_hoisting_pass (void) |
3512 | { |
3513 | bool changed = false; |
3514 | |
3515 | gcse_subst_count = 0; |
3516 | gcse_create_count = 0; |
3517 | |
3518 | /* Return if there's nothing to do, or it is too expensive. */ |
3519 | if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1 |
3520 | || gcse_or_cprop_is_too_expensive (_("GCSE disabled" ))) |
3521 | return false; |
3522 | |
3523 | doing_code_hoisting_p = true; |
3524 | |
3525 | /* Calculate register pressure for each basic block. */ |
3526 | if (flag_ira_hoist_pressure) |
3527 | { |
3528 | regstat_init_n_sets_and_refs (); |
3529 | ira_set_pseudo_classes (false, dump_file); |
3530 | alloc_aux_for_blocks (sizeof (struct bb_data)); |
3531 | calculate_bb_reg_pressure (); |
3532 | regstat_free_n_sets_and_refs (); |
3533 | } |
3534 | |
3535 | /* We need alias. */ |
3536 | init_alias_analysis (); |
3537 | |
3538 | bytes_used = 0; |
3539 | gcc_obstack_init (&gcse_obstack); |
3540 | alloc_gcse_mem (); |
3541 | |
3542 | alloc_hash_table (table: &expr_hash_table); |
3543 | compute_hash_table (table: &expr_hash_table); |
3544 | if (dump_file) |
3545 | dump_hash_table (file: dump_file, name: "Code Hosting Expressions" , table: &expr_hash_table); |
3546 | |
3547 | if (expr_hash_table.n_elems > 0) |
3548 | { |
3549 | alloc_code_hoist_mem (last_basic_block_for_fn (cfun), |
3550 | n_exprs: expr_hash_table.n_elems); |
3551 | compute_code_hoist_data (); |
3552 | changed = hoist_code (); |
3553 | free_code_hoist_mem (); |
3554 | } |
3555 | |
3556 | if (flag_ira_hoist_pressure) |
3557 | { |
3558 | free_aux_for_blocks (); |
3559 | free_reg_info (); |
3560 | } |
3561 | free_hash_table (table: &expr_hash_table); |
3562 | free_gcse_mem (); |
3563 | obstack_free (&gcse_obstack, NULL); |
3564 | |
3565 | /* We are finished with alias. */ |
3566 | end_alias_analysis (); |
3567 | |
3568 | if (dump_file) |
3569 | { |
3570 | fprintf (stream: dump_file, format: "HOIST of %s, %d basic blocks, %d bytes needed, " , |
3571 | current_function_name (), n_basic_blocks_for_fn (cfun), |
3572 | bytes_used); |
3573 | fprintf (stream: dump_file, format: "%d substs, %d insns created\n" , |
3574 | gcse_subst_count, gcse_create_count); |
3575 | } |
3576 | |
3577 | doing_code_hoisting_p = false; |
3578 | |
3579 | return changed; |
3580 | } |
3581 | |
3582 | /* Here we provide the things required to do store motion towards the exit. |
3583 | In order for this to be effective, gcse also needed to be taught how to |
3584 | move a load when it is killed only by a store to itself. |
3585 | |
3586 | int i; |
3587 | float a[10]; |
3588 | |
3589 | void foo(float scale) |
3590 | { |
3591 | for (i=0; i<10; i++) |
3592 | a[i] *= scale; |
3593 | } |
3594 | |
3595 | 'i' is both loaded and stored to in the loop. Normally, gcse cannot move |
3596 | the load out since its live around the loop, and stored at the bottom |
3597 | of the loop. |
3598 | |
3599 | The 'Load Motion' referred to and implemented in this file is |
3600 | an enhancement to gcse which when using edge based LCM, recognizes |
3601 | this situation and allows gcse to move the load out of the loop. |
3602 | |
3603 | Once gcse has hoisted the load, store motion can then push this |
3604 | load towards the exit, and we end up with no loads or stores of 'i' |
3605 | in the loop. */ |
3606 | |
3607 | /* This will search the ldst list for a matching expression. If it |
3608 | doesn't find one, we create one and initialize it. */ |
3609 | |
3610 | static struct ls_expr * |
3611 | ldst_entry (rtx x) |
3612 | { |
3613 | int do_not_record_p = 0; |
3614 | struct ls_expr * ptr; |
3615 | unsigned int hash; |
3616 | ls_expr **slot; |
3617 | struct ls_expr e; |
3618 | |
3619 | hash = hash_rtx (x, GET_MODE (x), &do_not_record_p, |
3620 | NULL, /*have_reg_qty=*/false); |
3621 | |
3622 | e.pattern = x; |
3623 | slot = pre_ldst_table->find_slot_with_hash (comparable: &e, hash, insert: INSERT); |
3624 | if (*slot) |
3625 | return *slot; |
3626 | |
3627 | ptr = XNEW (struct ls_expr); |
3628 | |
3629 | ptr->next = pre_ldst_mems; |
3630 | ptr->expr = NULL; |
3631 | ptr->pattern = x; |
3632 | ptr->pattern_regs = NULL_RTX; |
3633 | ptr->stores.create (nelems: 0); |
3634 | ptr->reaching_reg = NULL_RTX; |
3635 | ptr->invalid = 0; |
3636 | ptr->index = 0; |
3637 | ptr->hash_index = hash; |
3638 | pre_ldst_mems = ptr; |
3639 | *slot = ptr; |
3640 | |
3641 | return ptr; |
3642 | } |
3643 | |
3644 | /* Free up an individual ldst entry. */ |
3645 | |
3646 | static void |
3647 | free_ldst_entry (struct ls_expr * ptr) |
3648 | { |
3649 | ptr->stores.release (); |
3650 | |
3651 | free (ptr: ptr); |
3652 | } |
3653 | |
3654 | /* Free up all memory associated with the ldst list. */ |
3655 | |
3656 | static void |
3657 | free_ld_motion_mems (void) |
3658 | { |
3659 | delete pre_ldst_table; |
3660 | pre_ldst_table = NULL; |
3661 | |
3662 | while (pre_ldst_mems) |
3663 | { |
3664 | struct ls_expr * tmp = pre_ldst_mems; |
3665 | |
3666 | pre_ldst_mems = pre_ldst_mems->next; |
3667 | |
3668 | free_ldst_entry (ptr: tmp); |
3669 | } |
3670 | |
3671 | pre_ldst_mems = NULL; |
3672 | } |
3673 | |
3674 | /* Dump debugging info about the ldst list. */ |
3675 | |
3676 | static void |
3677 | print_ldst_list (FILE * file) |
3678 | { |
3679 | struct ls_expr * ptr; |
3680 | |
3681 | fprintf (stream: file, format: "LDST list: \n" ); |
3682 | |
3683 | for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next) |
3684 | { |
3685 | fprintf (stream: file, format: " Pattern (%3d): " , ptr->index); |
3686 | |
3687 | print_rtl (file, ptr->pattern); |
3688 | |
3689 | fprintf (stream: file, format: "\n Stores : " ); |
3690 | print_rtx_insn_vec (file, vec: ptr->stores); |
3691 | |
3692 | fprintf (stream: file, format: "\n\n" ); |
3693 | } |
3694 | |
3695 | fprintf (stream: file, format: "\n" ); |
3696 | } |
3697 | |
3698 | /* Returns 1 if X is in the list of ldst only expressions. */ |
3699 | |
3700 | static struct ls_expr * |
3701 | find_rtx_in_ldst (rtx x) |
3702 | { |
3703 | struct ls_expr e; |
3704 | ls_expr **slot; |
3705 | if (!pre_ldst_table) |
3706 | return NULL; |
3707 | e.pattern = x; |
3708 | slot = pre_ldst_table->find_slot (value: &e, insert: NO_INSERT); |
3709 | if (!slot || (*slot)->invalid) |
3710 | return NULL; |
3711 | return *slot; |
3712 | } |
3713 | |
3714 | /* Load Motion for loads which only kill themselves. */ |
3715 | |
3716 | /* Return true if x, a MEM, is a simple access with no side effects. |
3717 | These are the types of loads we consider for the ld_motion list, |
3718 | otherwise we let the usual aliasing take care of it. */ |
3719 | |
3720 | static bool |
3721 | simple_mem (const_rtx x) |
3722 | { |
3723 | if (MEM_VOLATILE_P (x)) |
3724 | return false; |
3725 | |
3726 | if (GET_MODE (x) == BLKmode) |
3727 | return false; |
3728 | |
3729 | /* If we are handling exceptions, we must be careful with memory references |
3730 | that may trap. If we are not, the behavior is undefined, so we may just |
3731 | continue. */ |
3732 | if (cfun->can_throw_non_call_exceptions && may_trap_p (x)) |
3733 | return false; |
3734 | |
3735 | if (side_effects_p (x)) |
3736 | return false; |
3737 | |
3738 | /* Do not consider function arguments passed on stack. */ |
3739 | if (reg_mentioned_p (stack_pointer_rtx, x)) |
3740 | return false; |
3741 | |
3742 | if (flag_float_store && FLOAT_MODE_P (GET_MODE (x))) |
3743 | return false; |
3744 | |
3745 | return true; |
3746 | } |
3747 | |
3748 | /* Make sure there isn't a buried reference in this pattern anywhere. |
3749 | If there is, invalidate the entry for it since we're not capable |
3750 | of fixing it up just yet.. We have to be sure we know about ALL |
3751 | loads since the aliasing code will allow all entries in the |
3752 | ld_motion list to not-alias itself. If we miss a load, we will get |
3753 | the wrong value since gcse might common it and we won't know to |
3754 | fix it up. */ |
3755 | |
3756 | static void |
3757 | invalidate_any_buried_refs (rtx x) |
3758 | { |
3759 | const char * fmt; |
3760 | int i, j; |
3761 | struct ls_expr * ptr; |
3762 | |
3763 | /* Invalidate it in the list. */ |
3764 | if (MEM_P (x) && simple_mem (x)) |
3765 | { |
3766 | ptr = ldst_entry (x); |
3767 | ptr->invalid = 1; |
3768 | } |
3769 | |
3770 | /* Recursively process the insn. */ |
3771 | fmt = GET_RTX_FORMAT (GET_CODE (x)); |
3772 | |
3773 | for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) |
3774 | { |
3775 | if (fmt[i] == 'e') |
3776 | invalidate_any_buried_refs (XEXP (x, i)); |
3777 | else if (fmt[i] == 'E') |
3778 | for (j = XVECLEN (x, i) - 1; j >= 0; j--) |
3779 | invalidate_any_buried_refs (XVECEXP (x, i, j)); |
3780 | } |
3781 | } |
3782 | |
3783 | /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple |
3784 | being defined as MEM loads and stores to symbols, with no side effects |
3785 | and no registers in the expression. For a MEM destination, we also |
3786 | check that the insn is still valid if we replace the destination with a |
3787 | REG, as is done in update_ld_motion_stores. If there are any uses/defs |
3788 | which don't match this criteria, they are invalidated and trimmed out |
3789 | later. */ |
3790 | |
3791 | static void |
3792 | compute_ld_motion_mems (void) |
3793 | { |
3794 | struct ls_expr * ptr; |
3795 | basic_block bb; |
3796 | rtx_insn *insn; |
3797 | |
3798 | pre_ldst_mems = NULL; |
3799 | pre_ldst_table = new hash_table<pre_ldst_expr_hasher> (13); |
3800 | |
3801 | FOR_EACH_BB_FN (bb, cfun) |
3802 | { |
3803 | FOR_BB_INSNS (bb, insn) |
3804 | { |
3805 | if (NONDEBUG_INSN_P (insn)) |
3806 | { |
3807 | if (GET_CODE (PATTERN (insn)) == SET) |
3808 | { |
3809 | rtx src = SET_SRC (PATTERN (insn)); |
3810 | rtx dest = SET_DEST (PATTERN (insn)); |
3811 | |
3812 | /* Check for a simple load. */ |
3813 | if (MEM_P (src) && simple_mem (x: src)) |
3814 | { |
3815 | ptr = ldst_entry (x: src); |
3816 | if (!REG_P (dest)) |
3817 | ptr->invalid = 1; |
3818 | } |
3819 | else |
3820 | { |
3821 | /* Make sure there isn't a buried load somewhere. */ |
3822 | invalidate_any_buried_refs (x: src); |
3823 | } |
3824 | |
3825 | /* Check for a simple load through a REG_EQUAL note. */ |
3826 | rtx note = find_reg_equal_equiv_note (insn), src_eq; |
3827 | if (note |
3828 | && REG_NOTE_KIND (note) == REG_EQUAL |
3829 | && (src_eq = XEXP (note, 0)) |
3830 | && !(MEM_P (src_eq) && simple_mem (x: src_eq))) |
3831 | invalidate_any_buried_refs (x: src_eq); |
3832 | |
3833 | /* Check for stores. Don't worry about aliased ones, they |
3834 | will block any movement we might do later. We only care |
3835 | about this exact pattern since those are the only |
3836 | circumstance that we will ignore the aliasing info. */ |
3837 | if (MEM_P (dest) && simple_mem (x: dest)) |
3838 | { |
3839 | ptr = ldst_entry (x: dest); |
3840 | machine_mode src_mode = GET_MODE (src); |
3841 | if (! MEM_P (src) |
3842 | && GET_CODE (src) != ASM_OPERANDS |
3843 | /* Check for REG manually since want_to_gcse_p |
3844 | returns 0 for all REGs. */ |
3845 | && can_assign_to_reg_without_clobbers_p (x: src, |
3846 | mode: src_mode)) |
3847 | ptr->stores.safe_push (obj: insn); |
3848 | else |
3849 | ptr->invalid = 1; |
3850 | } |
3851 | } |
3852 | else |
3853 | { |
3854 | /* Invalidate all MEMs in the pattern and... */ |
3855 | invalidate_any_buried_refs (x: PATTERN (insn)); |
3856 | |
3857 | /* ...in REG_EQUAL notes for PARALLELs with single SET. */ |
3858 | rtx note = find_reg_equal_equiv_note (insn), src_eq; |
3859 | if (note |
3860 | && REG_NOTE_KIND (note) == REG_EQUAL |
3861 | && (src_eq = XEXP (note, 0))) |
3862 | invalidate_any_buried_refs (x: src_eq); |
3863 | } |
3864 | } |
3865 | } |
3866 | } |
3867 | } |
3868 | |
3869 | /* Remove any references that have been either invalidated or are not in the |
3870 | expression list for pre gcse. */ |
3871 | |
3872 | static void |
3873 | trim_ld_motion_mems (void) |
3874 | { |
3875 | struct ls_expr * * last = & pre_ldst_mems; |
3876 | struct ls_expr * ptr = pre_ldst_mems; |
3877 | |
3878 | while (ptr != NULL) |
3879 | { |
3880 | struct gcse_expr * expr; |
3881 | |
3882 | /* Delete if entry has been made invalid. */ |
3883 | if (! ptr->invalid) |
3884 | { |
3885 | /* Delete if we cannot find this mem in the expression list. */ |
3886 | unsigned int hash = ptr->hash_index % expr_hash_table.size; |
3887 | |
3888 | for (expr = expr_hash_table.table[hash]; |
3889 | expr != NULL; |
3890 | expr = expr->next_same_hash) |
3891 | if (expr_equiv_p (x: expr->expr, y: ptr->pattern)) |
3892 | break; |
3893 | } |
3894 | else |
3895 | expr = (struct gcse_expr *) 0; |
3896 | |
3897 | if (expr) |
3898 | { |
3899 | /* Set the expression field if we are keeping it. */ |
3900 | ptr->expr = expr; |
3901 | last = & ptr->next; |
3902 | ptr = ptr->next; |
3903 | } |
3904 | else |
3905 | { |
3906 | *last = ptr->next; |
3907 | pre_ldst_table->remove_elt_with_hash (comparable: ptr, hash: ptr->hash_index); |
3908 | free_ldst_entry (ptr); |
3909 | ptr = * last; |
3910 | } |
3911 | } |
3912 | |
3913 | /* Show the world what we've found. */ |
3914 | if (dump_file && pre_ldst_mems != NULL) |
3915 | print_ldst_list (file: dump_file); |
3916 | } |
3917 | |
3918 | /* This routine will take an expression which we are replacing with |
3919 | a reaching register, and update any stores that are needed if |
3920 | that expression is in the ld_motion list. Stores are updated by |
3921 | copying their SRC to the reaching register, and then storing |
3922 | the reaching register into the store location. These keeps the |
3923 | correct value in the reaching register for the loads. */ |
3924 | |
3925 | static void |
3926 | update_ld_motion_stores (struct gcse_expr * expr) |
3927 | { |
3928 | struct ls_expr * mem_ptr; |
3929 | |
3930 | if ((mem_ptr = find_rtx_in_ldst (x: expr->expr))) |
3931 | { |
3932 | /* We can try to find just the REACHED stores, but is shouldn't |
3933 | matter to set the reaching reg everywhere... some might be |
3934 | dead and should be eliminated later. */ |
3935 | |
3936 | /* We replace (set mem expr) with (set reg expr) (set mem reg) |
3937 | where reg is the reaching reg used in the load. We checked in |
3938 | compute_ld_motion_mems that we can replace (set mem expr) with |
3939 | (set reg expr) in that insn. */ |
3940 | rtx_insn *insn; |
3941 | unsigned int i; |
3942 | FOR_EACH_VEC_ELT_REVERSE (mem_ptr->stores, i, insn) |
3943 | { |
3944 | rtx pat = PATTERN (insn); |
3945 | rtx src = SET_SRC (pat); |
3946 | rtx reg = expr->reaching_reg; |
3947 | |
3948 | /* If we've already copied it, continue. */ |
3949 | if (expr->reaching_reg == src) |
3950 | continue; |
3951 | |
3952 | if (dump_file) |
3953 | { |
3954 | fprintf (stream: dump_file, format: "PRE: store updated with reaching reg " ); |
3955 | print_rtl (dump_file, reg); |
3956 | fprintf (stream: dump_file, format: ":\n " ); |
3957 | print_inline_rtx (dump_file, insn, 8); |
3958 | fprintf (stream: dump_file, format: "\n" ); |
3959 | } |
3960 | |
3961 | rtx_insn *copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat))); |
3962 | emit_insn_before (copy, insn); |
3963 | SET_SRC (pat) = reg; |
3964 | df_insn_rescan (insn); |
3965 | |
3966 | /* un-recognize this pattern since it's probably different now. */ |
3967 | INSN_CODE (insn) = -1; |
3968 | gcse_create_count++; |
3969 | } |
3970 | } |
3971 | } |
3972 | |
3973 | /* Return true if the graph is too expensive to optimize. PASS is the |
3974 | optimization about to be performed. */ |
3975 | |
3976 | bool |
3977 | gcse_or_cprop_is_too_expensive (const char *pass) |
3978 | { |
3979 | unsigned HOST_WIDE_INT memory_request |
3980 | = ((unsigned HOST_WIDE_INT)n_basic_blocks_for_fn (cfun) |
3981 | * SBITMAP_SET_SIZE (max_reg_num ()) * sizeof (SBITMAP_ELT_TYPE)); |
3982 | |
3983 | /* Trying to perform global optimizations on flow graphs which have |
3984 | a high connectivity will take a long time and is unlikely to be |
3985 | particularly useful. |
3986 | |
3987 | In normal circumstances a cfg should have about twice as many |
3988 | edges as blocks. But we do not want to punish small functions |
3989 | which have a couple switch statements. Rather than simply |
3990 | threshold the number of blocks, uses something with a more |
3991 | graceful degradation. */ |
3992 | if (n_edges_for_fn (cfun) > 20000 + n_basic_blocks_for_fn (cfun) * 4) |
3993 | { |
3994 | warning (OPT_Wdisabled_optimization, |
3995 | "%s: %d basic blocks and %d edges/basic block" , |
3996 | pass, n_basic_blocks_for_fn (cfun), |
3997 | n_edges_for_fn (cfun) / n_basic_blocks_for_fn (cfun)); |
3998 | |
3999 | return true; |
4000 | } |
4001 | |
4002 | /* If allocating memory for the dataflow bitmaps would take up too much |
4003 | storage it's better just to disable the optimization. */ |
4004 | if (memory_request / 1024 > (unsigned HOST_WIDE_INT)param_max_gcse_memory) |
4005 | { |
4006 | warning (OPT_Wdisabled_optimization, |
4007 | "%s: %d basic blocks and %d registers; " |
4008 | "increase %<--param max-gcse-memory%> above %wu" , |
4009 | pass, n_basic_blocks_for_fn (cfun), max_reg_num (), |
4010 | memory_request / 1024); |
4011 | |
4012 | return true; |
4013 | } |
4014 | |
4015 | return false; |
4016 | } |
4017 | |
4018 | static unsigned int |
4019 | execute_rtl_pre (void) |
4020 | { |
4021 | int changed; |
4022 | delete_unreachable_blocks (); |
4023 | df_analyze (); |
4024 | changed = one_pre_gcse_pass (); |
4025 | flag_rerun_cse_after_global_opts |= changed; |
4026 | if (changed) |
4027 | cleanup_cfg (0); |
4028 | return 0; |
4029 | } |
4030 | |
4031 | static unsigned int |
4032 | execute_rtl_hoist (void) |
4033 | { |
4034 | int changed; |
4035 | delete_unreachable_blocks (); |
4036 | df_analyze (); |
4037 | changed = one_code_hoisting_pass (); |
4038 | flag_rerun_cse_after_global_opts |= changed; |
4039 | if (changed) |
4040 | cleanup_cfg (0); |
4041 | return 0; |
4042 | } |
4043 | |
4044 | namespace { |
4045 | |
4046 | const pass_data pass_data_rtl_pre = |
4047 | { |
4048 | .type: RTL_PASS, /* type */ |
4049 | .name: "rtl pre" , /* name */ |
4050 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
4051 | .tv_id: TV_PRE, /* tv_id */ |
4052 | PROP_cfglayout, /* properties_required */ |
4053 | .properties_provided: 0, /* properties_provided */ |
4054 | .properties_destroyed: 0, /* properties_destroyed */ |
4055 | .todo_flags_start: 0, /* todo_flags_start */ |
4056 | TODO_df_finish, /* todo_flags_finish */ |
4057 | }; |
4058 | |
4059 | class pass_rtl_pre : public rtl_opt_pass |
4060 | { |
4061 | public: |
4062 | pass_rtl_pre (gcc::context *ctxt) |
4063 | : rtl_opt_pass (pass_data_rtl_pre, ctxt) |
4064 | {} |
4065 | |
4066 | /* opt_pass methods: */ |
4067 | bool gate (function *) final override; |
4068 | unsigned int execute (function *) final override |
4069 | { |
4070 | return execute_rtl_pre (); |
4071 | } |
4072 | |
4073 | }; // class pass_rtl_pre |
4074 | |
4075 | /* We do not construct an accurate cfg in functions which call |
4076 | setjmp, so none of these passes runs if the function calls |
4077 | setjmp. |
4078 | FIXME: Should just handle setjmp via REG_SETJMP notes. */ |
4079 | |
4080 | bool |
4081 | pass_rtl_pre::gate (function *fun) |
4082 | { |
4083 | return optimize > 0 && flag_gcse |
4084 | && !fun->calls_setjmp |
4085 | && optimize_function_for_speed_p (fun) |
4086 | && dbg_cnt (index: pre); |
4087 | } |
4088 | |
4089 | } // anon namespace |
4090 | |
4091 | rtl_opt_pass * |
4092 | make_pass_rtl_pre (gcc::context *ctxt) |
4093 | { |
4094 | return new pass_rtl_pre (ctxt); |
4095 | } |
4096 | |
4097 | namespace { |
4098 | |
4099 | const pass_data pass_data_rtl_hoist = |
4100 | { |
4101 | .type: RTL_PASS, /* type */ |
4102 | .name: "hoist" , /* name */ |
4103 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
4104 | .tv_id: TV_HOIST, /* tv_id */ |
4105 | PROP_cfglayout, /* properties_required */ |
4106 | .properties_provided: 0, /* properties_provided */ |
4107 | .properties_destroyed: 0, /* properties_destroyed */ |
4108 | .todo_flags_start: 0, /* todo_flags_start */ |
4109 | TODO_df_finish, /* todo_flags_finish */ |
4110 | }; |
4111 | |
4112 | class pass_rtl_hoist : public rtl_opt_pass |
4113 | { |
4114 | public: |
4115 | pass_rtl_hoist (gcc::context *ctxt) |
4116 | : rtl_opt_pass (pass_data_rtl_hoist, ctxt) |
4117 | {} |
4118 | |
4119 | /* opt_pass methods: */ |
4120 | bool gate (function *) final override; |
4121 | unsigned int execute (function *) final override |
4122 | { |
4123 | return execute_rtl_hoist (); |
4124 | } |
4125 | |
4126 | }; // class pass_rtl_hoist |
4127 | |
4128 | bool |
4129 | pass_rtl_hoist::gate (function *) |
4130 | { |
4131 | return optimize > 0 && flag_gcse |
4132 | && !cfun->calls_setjmp |
4133 | /* It does not make sense to run code hoisting unless we are optimizing |
4134 | for code size -- it rarely makes programs faster, and can make then |
4135 | bigger if we did PRE (when optimizing for space, we don't run PRE). */ |
4136 | && optimize_function_for_size_p (cfun) |
4137 | && dbg_cnt (index: hoist); |
4138 | } |
4139 | |
4140 | } // anon namespace |
4141 | |
4142 | rtl_opt_pass * |
4143 | make_pass_rtl_hoist (gcc::context *ctxt) |
4144 | { |
4145 | return new pass_rtl_hoist (ctxt); |
4146 | } |
4147 | |
4148 | /* Reset all state within gcse.cc so that we can rerun the compiler |
4149 | within the same process. For use by toplev::finalize. */ |
4150 | |
4151 | void |
4152 | gcse_cc_finalize (void) |
4153 | { |
4154 | test_insn = NULL; |
4155 | } |
4156 | |
4157 | #include "gt-gcse.h" |
4158 | |