1 | /* Analysis used by inlining decision heuristics. |
2 | Copyright (C) 2003-2023 Free Software Foundation, Inc. |
3 | Contributed by Jan Hubicka |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #include "config.h" |
22 | #include "system.h" |
23 | #include "coretypes.h" |
24 | #include "backend.h" |
25 | #include "tree.h" |
26 | #include "gimple.h" |
27 | #include "alloc-pool.h" |
28 | #include "tree-pass.h" |
29 | #include "ssa.h" |
30 | #include "tree-streamer.h" |
31 | #include "cgraph.h" |
32 | #include "diagnostic.h" |
33 | #include "fold-const.h" |
34 | #include "print-tree.h" |
35 | #include "tree-inline.h" |
36 | #include "gimple-pretty-print.h" |
37 | #include "cfganal.h" |
38 | #include "gimple-iterator.h" |
39 | #include "tree-cfg.h" |
40 | #include "tree-ssa-loop-niter.h" |
41 | #include "tree-ssa-loop.h" |
42 | #include "symbol-summary.h" |
43 | #include "ipa-prop.h" |
44 | #include "ipa-fnsummary.h" |
45 | #include "ipa-inline.h" |
46 | #include "cfgloop.h" |
47 | #include "tree-scalar-evolution.h" |
48 | #include "ipa-utils.h" |
49 | #include "cfgexpand.h" |
50 | #include "gimplify.h" |
51 | #include "attribs.h" |
52 | |
53 | /* Cached node/edge growths. */ |
54 | fast_call_summary<edge_growth_cache_entry *, va_heap> *edge_growth_cache = NULL; |
55 | |
56 | /* The context cache remembers estimated time/size and hints for given |
57 | ipa_call_context of a call. */ |
58 | class node_context_cache_entry |
59 | { |
60 | public: |
61 | ipa_cached_call_context ctx; |
62 | sreal time, nonspec_time; |
63 | int size; |
64 | ipa_hints hints; |
65 | |
66 | node_context_cache_entry () |
67 | : ctx () |
68 | { |
69 | } |
70 | ~node_context_cache_entry () |
71 | { |
72 | ctx.release (); |
73 | } |
74 | }; |
75 | |
76 | /* At the moment we implement primitive single entry LRU cache. */ |
77 | class node_context_summary |
78 | { |
79 | public: |
80 | node_context_cache_entry entry; |
81 | |
82 | node_context_summary () |
83 | : entry () |
84 | { |
85 | } |
86 | ~node_context_summary () |
87 | { |
88 | } |
89 | }; |
90 | |
91 | /* Summary holding the context cache. */ |
92 | static fast_function_summary <node_context_summary *, va_heap> |
93 | *node_context_cache = NULL; |
94 | /* Statistics about the context cache effectivity. */ |
95 | static long node_context_cache_hit, node_context_cache_miss, |
96 | node_context_cache_clear; |
97 | |
98 | /* Give initial reasons why inlining would fail on EDGE. This gets either |
99 | nullified or usually overwritten by more precise reasons later. */ |
100 | |
101 | void |
102 | initialize_inline_failed (struct cgraph_edge *e) |
103 | { |
104 | struct cgraph_node *callee = e->callee; |
105 | |
106 | if (e->inline_failed && e->inline_failed != CIF_BODY_NOT_AVAILABLE |
107 | && cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR) |
108 | ; |
109 | else if (e->indirect_unknown_callee) |
110 | e->inline_failed = CIF_INDIRECT_UNKNOWN_CALL; |
111 | else if (!callee->definition) |
112 | e->inline_failed = CIF_BODY_NOT_AVAILABLE; |
113 | else if (callee->redefined_extern_inline) |
114 | e->inline_failed = CIF_REDEFINED_EXTERN_INLINE; |
115 | else |
116 | e->inline_failed = CIF_FUNCTION_NOT_CONSIDERED; |
117 | gcc_checking_assert (!e->call_stmt_cannot_inline_p |
118 | || cgraph_inline_failed_type (e->inline_failed) |
119 | == CIF_FINAL_ERROR); |
120 | } |
121 | |
122 | /* Allocate edge growth caches. */ |
123 | |
124 | void |
125 | initialize_growth_caches () |
126 | { |
127 | edge_growth_cache |
128 | = new fast_call_summary<edge_growth_cache_entry *, va_heap> (symtab); |
129 | node_context_cache |
130 | = new fast_function_summary<node_context_summary *, va_heap> (symtab); |
131 | edge_growth_cache->disable_duplication_hook (); |
132 | node_context_cache->disable_insertion_hook (); |
133 | node_context_cache->disable_duplication_hook (); |
134 | } |
135 | |
136 | /* Free growth caches. */ |
137 | |
138 | void |
139 | free_growth_caches (void) |
140 | { |
141 | delete edge_growth_cache; |
142 | delete node_context_cache; |
143 | edge_growth_cache = NULL; |
144 | node_context_cache = NULL; |
145 | if (dump_file) |
146 | fprintf (stream: dump_file, format: "node context cache: %li hits, %li misses," |
147 | " %li initializations\n" , |
148 | node_context_cache_hit, node_context_cache_miss, |
149 | node_context_cache_clear); |
150 | node_context_cache_hit = 0; |
151 | node_context_cache_miss = 0; |
152 | node_context_cache_clear = 0; |
153 | } |
154 | |
155 | /* Return hints derived from EDGE. */ |
156 | |
157 | int |
158 | simple_edge_hints (struct cgraph_edge *edge) |
159 | { |
160 | int hints = 0; |
161 | struct cgraph_node *to = (edge->caller->inlined_to |
162 | ? edge->caller->inlined_to : edge->caller); |
163 | struct cgraph_node *callee = edge->callee->ultimate_alias_target (); |
164 | int to_scc_no = ipa_fn_summaries->get (node: to)->scc_no; |
165 | int callee_scc_no = ipa_fn_summaries->get (node: callee)->scc_no; |
166 | |
167 | if (to_scc_no && to_scc_no == callee_scc_no && !edge->recursive_p ()) |
168 | hints |= INLINE_HINT_same_scc; |
169 | |
170 | if (cross_module_call_p (edge)) |
171 | hints |= INLINE_HINT_cross_module; |
172 | |
173 | return hints; |
174 | } |
175 | |
176 | /* Estimate the time cost for the caller when inlining EDGE. |
177 | Only to be called via estimate_edge_time, that handles the |
178 | caching mechanism. |
179 | |
180 | When caching, also update the cache entry. Compute both time and |
181 | size, since we always need both metrics eventually. */ |
182 | |
183 | sreal |
184 | do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time) |
185 | { |
186 | sreal time, nonspec_time; |
187 | int size; |
188 | ipa_hints hints; |
189 | struct cgraph_node *callee; |
190 | clause_t clause, nonspec_clause; |
191 | ipa_auto_call_arg_values avals; |
192 | class ipa_call_summary *es = ipa_call_summaries->get (edge); |
193 | int min_size = -1; |
194 | |
195 | callee = edge->callee->ultimate_alias_target (); |
196 | |
197 | gcc_checking_assert (edge->inline_failed); |
198 | evaluate_properties_for_edge (e: edge, inline_p: true, clause_ptr: &clause, nonspec_clause_ptr: &nonspec_clause, |
199 | avals: &avals, compute_contexts: true); |
200 | ipa_call_context ctx (callee, clause, nonspec_clause, es->param, &avals); |
201 | if (node_context_cache != NULL) |
202 | { |
203 | node_context_summary *e = node_context_cache->get_create (node: callee); |
204 | if (e->entry.ctx.equal_to (ctx)) |
205 | { |
206 | node_context_cache_hit++; |
207 | size = e->entry.size; |
208 | time = e->entry.time; |
209 | nonspec_time = e->entry.nonspec_time; |
210 | hints = e->entry.hints; |
211 | if (flag_checking |
212 | && !opt_for_fn (callee->decl, flag_profile_partial_training) |
213 | && !callee->count.ipa_p ()) |
214 | { |
215 | ipa_call_estimates chk_estimates; |
216 | ctx.estimate_size_and_time (estimates: &chk_estimates); |
217 | gcc_assert (chk_estimates.size == size |
218 | && chk_estimates.time == time |
219 | && chk_estimates.nonspecialized_time == nonspec_time |
220 | && chk_estimates.hints == hints); |
221 | } |
222 | } |
223 | else |
224 | { |
225 | if (e->entry.ctx.exists_p ()) |
226 | node_context_cache_miss++; |
227 | else |
228 | node_context_cache_clear++; |
229 | e->entry.ctx.release (); |
230 | ipa_call_estimates estimates; |
231 | ctx.estimate_size_and_time (estimates: &estimates); |
232 | size = estimates.size; |
233 | e->entry.size = size; |
234 | time = estimates.time; |
235 | e->entry.time = time; |
236 | nonspec_time = estimates.nonspecialized_time; |
237 | e->entry.nonspec_time = nonspec_time; |
238 | hints = estimates.hints; |
239 | e->entry.hints = hints; |
240 | e->entry.ctx.duplicate_from (ctx); |
241 | } |
242 | } |
243 | else |
244 | { |
245 | ipa_call_estimates estimates; |
246 | ctx.estimate_size_and_time (estimates: &estimates); |
247 | size = estimates.size; |
248 | time = estimates.time; |
249 | nonspec_time = estimates.nonspecialized_time; |
250 | hints = estimates.hints; |
251 | } |
252 | |
253 | /* When we have profile feedback or function attribute, we can quite safely |
254 | identify hot edges and for those we disable size limits. Don't do that |
255 | when probability that caller will call the callee is low however, since it |
256 | may hurt optimization of the caller's hot path. */ |
257 | if ((edge->count.ipa ().initialized_p () && edge->maybe_hot_p () |
258 | && (edge->count.ipa () * 2 |
259 | > (edge->caller->inlined_to |
260 | ? edge->caller->inlined_to->count.ipa () |
261 | : edge->caller->count.ipa ()))) |
262 | || (lookup_attribute (attr_name: "hot" , DECL_ATTRIBUTES (edge->caller->decl)) |
263 | != NULL |
264 | && lookup_attribute (attr_name: "hot" , DECL_ATTRIBUTES (edge->callee->decl)) |
265 | != NULL)) |
266 | hints |= INLINE_HINT_known_hot; |
267 | |
268 | gcc_checking_assert (size >= 0); |
269 | gcc_checking_assert (time >= 0); |
270 | |
271 | /* When caching, update the cache entry. */ |
272 | if (edge_growth_cache != NULL) |
273 | { |
274 | if (min_size >= 0) |
275 | ipa_fn_summaries->get (node: edge->callee->function_symbol ())->min_size |
276 | = min_size; |
277 | edge_growth_cache_entry *entry |
278 | = edge_growth_cache->get_create (edge); |
279 | entry->time = time; |
280 | entry->nonspec_time = nonspec_time; |
281 | |
282 | entry->size = size + (size >= 0); |
283 | hints |= simple_edge_hints (edge); |
284 | entry->hints = hints + 1; |
285 | } |
286 | if (ret_nonspec_time) |
287 | *ret_nonspec_time = nonspec_time; |
288 | return time; |
289 | } |
290 | |
291 | /* Reset cache for NODE. |
292 | This must be done each time NODE body is modified. */ |
293 | void |
294 | reset_node_cache (struct cgraph_node *node) |
295 | { |
296 | if (node_context_cache) |
297 | node_context_cache->remove (node); |
298 | } |
299 | |
300 | /* Remove EDGE from caches once it was inlined. */ |
301 | void |
302 | ipa_remove_from_growth_caches (struct cgraph_edge *edge) |
303 | { |
304 | if (node_context_cache) |
305 | node_context_cache->remove (node: edge->callee); |
306 | if (edge_growth_cache) |
307 | edge_growth_cache->remove (edge); |
308 | } |
309 | |
310 | /* Return estimated callee growth after inlining EDGE. |
311 | Only to be called via estimate_edge_size. */ |
312 | |
313 | int |
314 | do_estimate_edge_size (struct cgraph_edge *edge) |
315 | { |
316 | int size; |
317 | struct cgraph_node *callee; |
318 | clause_t clause, nonspec_clause; |
319 | |
320 | /* When we do caching, use do_estimate_edge_time to populate the entry. */ |
321 | |
322 | if (edge_growth_cache != NULL) |
323 | { |
324 | do_estimate_edge_time (edge); |
325 | size = edge_growth_cache->get (edge)->size; |
326 | gcc_checking_assert (size); |
327 | return size - (size > 0); |
328 | } |
329 | |
330 | callee = edge->callee->ultimate_alias_target (); |
331 | |
332 | /* Early inliner runs without caching, go ahead and do the dirty work. */ |
333 | gcc_checking_assert (edge->inline_failed); |
334 | ipa_auto_call_arg_values avals; |
335 | evaluate_properties_for_edge (e: edge, inline_p: true, clause_ptr: &clause, nonspec_clause_ptr: &nonspec_clause, |
336 | avals: &avals, compute_contexts: true); |
337 | ipa_call_context ctx (callee, clause, nonspec_clause, vNULL, &avals); |
338 | ipa_call_estimates estimates; |
339 | ctx.estimate_size_and_time (estimates: &estimates, est_times: false, est_hints: false); |
340 | return estimates.size; |
341 | } |
342 | |
343 | |
344 | /* Estimate the growth of the caller when inlining EDGE. |
345 | Only to be called via estimate_edge_size. */ |
346 | |
347 | ipa_hints |
348 | do_estimate_edge_hints (struct cgraph_edge *edge) |
349 | { |
350 | struct cgraph_node *callee; |
351 | clause_t clause, nonspec_clause; |
352 | |
353 | /* When we do caching, use do_estimate_edge_time to populate the entry. */ |
354 | |
355 | if (edge_growth_cache != NULL) |
356 | { |
357 | do_estimate_edge_time (edge); |
358 | ipa_hints hints = edge_growth_cache->get (edge)->hints; |
359 | gcc_checking_assert (hints); |
360 | return hints - 1; |
361 | } |
362 | |
363 | callee = edge->callee->ultimate_alias_target (); |
364 | |
365 | /* Early inliner runs without caching, go ahead and do the dirty work. */ |
366 | gcc_checking_assert (edge->inline_failed); |
367 | ipa_auto_call_arg_values avals; |
368 | evaluate_properties_for_edge (e: edge, inline_p: true, clause_ptr: &clause, nonspec_clause_ptr: &nonspec_clause, |
369 | avals: &avals, compute_contexts: true); |
370 | ipa_call_context ctx (callee, clause, nonspec_clause, vNULL, &avals); |
371 | ipa_call_estimates estimates; |
372 | ctx.estimate_size_and_time (estimates: &estimates, est_times: false, est_hints: true); |
373 | ipa_hints hints = estimates.hints | simple_edge_hints (edge); |
374 | return hints; |
375 | } |
376 | |
377 | /* Estimate the size of NODE after inlining EDGE which should be an |
378 | edge to either NODE or a call inlined into NODE. */ |
379 | |
380 | int |
381 | estimate_size_after_inlining (struct cgraph_node *node, |
382 | struct cgraph_edge *edge) |
383 | { |
384 | class ipa_call_summary *es = ipa_call_summaries->get (edge); |
385 | ipa_size_summary *s = ipa_size_summaries->get (node); |
386 | if (!es->predicate || *es->predicate != false) |
387 | { |
388 | int size = s->size + estimate_edge_growth (edge); |
389 | gcc_assert (size >= 0); |
390 | return size; |
391 | } |
392 | return s->size; |
393 | } |
394 | |
395 | |
396 | struct growth_data |
397 | { |
398 | struct cgraph_node *node; |
399 | bool self_recursive; |
400 | bool uninlinable; |
401 | int growth; |
402 | int cap; |
403 | }; |
404 | |
405 | |
406 | /* Worker for do_estimate_growth. Collect growth for all callers. */ |
407 | |
408 | static bool |
409 | do_estimate_growth_1 (struct cgraph_node *node, void *data) |
410 | { |
411 | struct cgraph_edge *e; |
412 | struct growth_data *d = (struct growth_data *) data; |
413 | |
414 | for (e = node->callers; e; e = e->next_caller) |
415 | { |
416 | gcc_checking_assert (e->inline_failed); |
417 | |
418 | if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR |
419 | || !opt_for_fn (e->caller->decl, optimize)) |
420 | { |
421 | d->uninlinable = true; |
422 | if (d->cap < INT_MAX) |
423 | return true; |
424 | continue; |
425 | } |
426 | |
427 | if (e->recursive_p ()) |
428 | { |
429 | d->self_recursive = true; |
430 | if (d->cap < INT_MAX) |
431 | return true; |
432 | continue; |
433 | } |
434 | d->growth += estimate_edge_growth (edge: e); |
435 | if (d->growth > d->cap) |
436 | return true; |
437 | } |
438 | return false; |
439 | } |
440 | |
441 | /* Return estimated savings for eliminating offline copy of NODE by inlining |
442 | it everywhere. */ |
443 | |
444 | static int |
445 | offline_size (struct cgraph_node *node, ipa_size_summary *info) |
446 | { |
447 | if (!DECL_EXTERNAL (node->decl)) |
448 | { |
449 | if (node->will_be_removed_from_program_if_no_direct_calls_p ()) |
450 | return info->size; |
451 | /* COMDAT functions are very often not shared across multiple units |
452 | since they come from various template instantiations. |
453 | Take this into account. */ |
454 | else if (DECL_COMDAT (node->decl) |
455 | && node->can_remove_if_no_direct_calls_p ()) |
456 | { |
457 | int prob = opt_for_fn (node->decl, param_comdat_sharing_probability); |
458 | return (info->size * (100 - prob) + 50) / 100; |
459 | } |
460 | } |
461 | return 0; |
462 | } |
463 | |
464 | /* Estimate the growth caused by inlining NODE into all callers. */ |
465 | |
466 | int |
467 | estimate_growth (struct cgraph_node *node) |
468 | { |
469 | struct growth_data d = { .node: node, .self_recursive: false, .uninlinable: false, .growth: 0, INT_MAX }; |
470 | ipa_size_summary *info = ipa_size_summaries->get (node); |
471 | |
472 | if (node->call_for_symbol_and_aliases (callback: do_estimate_growth_1, data: &d, include_overwritable: true)) |
473 | return 1; |
474 | |
475 | /* For self recursive functions the growth estimation really should be |
476 | infinity. We don't want to return very large values because the growth |
477 | plays various roles in badness computation fractions. Be sure to not |
478 | return zero or negative growths. */ |
479 | if (d.self_recursive) |
480 | d.growth = d.growth < info->size ? info->size : d.growth; |
481 | else if (!d.uninlinable) |
482 | d.growth -= offline_size (node, info); |
483 | |
484 | return d.growth; |
485 | } |
486 | |
487 | /* Verify if there are fewer than MAX_CALLERS. */ |
488 | |
489 | static bool |
490 | check_callers (cgraph_node *node, int *growth, int *n, int offline, |
491 | int min_size, struct cgraph_edge *known_edge) |
492 | { |
493 | ipa_ref *ref; |
494 | |
495 | if (!node->can_remove_if_no_direct_calls_and_refs_p ()) |
496 | return true; |
497 | |
498 | for (cgraph_edge *e = node->callers; e; e = e->next_caller) |
499 | { |
500 | edge_growth_cache_entry *entry; |
501 | |
502 | if (e == known_edge) |
503 | continue; |
504 | if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR) |
505 | return true; |
506 | if (edge_growth_cache != NULL |
507 | && (entry = edge_growth_cache->get (edge: e)) != NULL |
508 | && entry->size != 0) |
509 | *growth += entry->size - (entry->size > 0); |
510 | else |
511 | { |
512 | class ipa_call_summary *es = ipa_call_summaries->get (edge: e); |
513 | if (!es) |
514 | return true; |
515 | *growth += min_size - es->call_stmt_size; |
516 | if (--(*n) < 0) |
517 | return false; |
518 | } |
519 | if (*growth > offline) |
520 | return true; |
521 | } |
522 | |
523 | if (*n > 0) |
524 | FOR_EACH_ALIAS (node, ref) |
525 | if (check_callers (node: dyn_cast <cgraph_node *> (p: ref->referring), growth, n, |
526 | offline, min_size, known_edge)) |
527 | return true; |
528 | |
529 | return false; |
530 | } |
531 | |
532 | |
533 | /* Decide if growth of NODE is positive. This is cheaper than calculating |
534 | actual growth. If edge growth of KNOWN_EDGE is known |
535 | it is passed by EDGE_GROWTH. */ |
536 | |
537 | bool |
538 | growth_positive_p (struct cgraph_node *node, |
539 | struct cgraph_edge * known_edge, int edge_growth) |
540 | { |
541 | struct cgraph_edge *e; |
542 | |
543 | ipa_size_summary *s = ipa_size_summaries->get (node); |
544 | |
545 | /* First quickly check if NODE is removable at all. */ |
546 | int offline = offline_size (node, info: s); |
547 | if (offline <= 0 && known_edge && edge_growth > 0) |
548 | return true; |
549 | |
550 | int min_size = ipa_fn_summaries->get (node)->min_size; |
551 | int n = 10; |
552 | |
553 | int min_growth = known_edge ? edge_growth : 0; |
554 | for (e = node->callers; e; e = e->next_caller) |
555 | { |
556 | edge_growth_cache_entry *entry; |
557 | |
558 | if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR) |
559 | return true; |
560 | if (e == known_edge) |
561 | continue; |
562 | if (edge_growth_cache != NULL |
563 | && (entry = edge_growth_cache->get (edge: e)) != NULL |
564 | && entry->size != 0) |
565 | min_growth += entry->size - (entry->size > 0); |
566 | else |
567 | { |
568 | class ipa_call_summary *es = ipa_call_summaries->get (edge: e); |
569 | if (!es) |
570 | return true; |
571 | min_growth += min_size - es->call_stmt_size; |
572 | if (--n <= 0) |
573 | break; |
574 | } |
575 | if (min_growth > offline) |
576 | return true; |
577 | } |
578 | |
579 | ipa_ref *ref; |
580 | if (n > 0) |
581 | FOR_EACH_ALIAS (node, ref) |
582 | if (check_callers (node: dyn_cast <cgraph_node *> (p: ref->referring), |
583 | growth: &min_growth, n: &n, offline, min_size, known_edge)) |
584 | return true; |
585 | |
586 | struct growth_data d = { .node: node, .self_recursive: false, .uninlinable: false, .growth: 0, .cap: offline }; |
587 | if (node->call_for_symbol_and_aliases (callback: do_estimate_growth_1, data: &d, include_overwritable: true)) |
588 | return true; |
589 | if (d.self_recursive || d.uninlinable) |
590 | return true; |
591 | return (d.growth > offline); |
592 | } |
593 | |