1 | /* Callgraph transformations to handle inlining |
2 | Copyright (C) 2003-2023 Free Software Foundation, Inc. |
3 | Contributed by Jan Hubicka |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | /* The inline decisions are stored in callgraph in "inline plan" and |
22 | applied later. |
23 | |
24 | To mark given call inline, use inline_call function. |
25 | The function marks the edge inlinable and, if necessary, produces |
26 | virtual clone in the callgraph representing the new copy of callee's |
27 | function body. |
28 | |
29 | The inline plan is applied on given function body by inline_transform. */ |
30 | |
31 | #include "config.h" |
32 | #include "system.h" |
33 | #include "coretypes.h" |
34 | #include "tm.h" |
35 | #include "function.h" |
36 | #include "tree.h" |
37 | #include "alloc-pool.h" |
38 | #include "tree-pass.h" |
39 | #include "cgraph.h" |
40 | #include "tree-cfg.h" |
41 | #include "symbol-summary.h" |
42 | #include "tree-vrp.h" |
43 | #include "ipa-prop.h" |
44 | #include "ipa-fnsummary.h" |
45 | #include "ipa-inline.h" |
46 | #include "tree-inline.h" |
47 | #include "function.h" |
48 | #include "cfg.h" |
49 | #include "basic-block.h" |
50 | #include "ipa-utils.h" |
51 | #include "ipa-modref-tree.h" |
52 | #include "ipa-modref.h" |
53 | #include "symtab-thunks.h" |
54 | #include "symtab-clones.h" |
55 | |
56 | int ncalls_inlined; |
57 | int nfunctions_inlined; |
58 | |
59 | /* Scale counts of NODE edges by NUM/DEN. */ |
60 | |
61 | static void |
62 | update_noncloned_counts (struct cgraph_node *node, |
63 | profile_count num, profile_count den) |
64 | { |
65 | struct cgraph_edge *e; |
66 | |
67 | profile_count::adjust_for_ipa_scaling (num: &num, den: &den); |
68 | |
69 | for (e = node->callees; e; e = e->next_callee) |
70 | { |
71 | if (!e->inline_failed) |
72 | update_noncloned_counts (node: e->callee, num, den); |
73 | e->count = e->count.apply_scale (num, den); |
74 | } |
75 | for (e = node->indirect_calls; e; e = e->next_callee) |
76 | e->count = e->count.apply_scale (num, den); |
77 | node->count = node->count.apply_scale (num, den); |
78 | } |
79 | |
80 | /* We removed or are going to remove the last call to NODE. |
81 | Return true if we can and want proactively remove the NODE now. |
82 | This is important to do, since we want inliner to know when offline |
83 | copy of function was removed. */ |
84 | |
85 | static bool |
86 | can_remove_node_now_p_1 (struct cgraph_node *node, struct cgraph_edge *e) |
87 | { |
88 | ipa_ref *ref; |
89 | |
90 | FOR_EACH_ALIAS (node, ref) |
91 | { |
92 | cgraph_node *alias = dyn_cast <cgraph_node *> (p: ref->referring); |
93 | if ((alias->callers && alias->callers != e) |
94 | || !can_remove_node_now_p_1 (node: alias, e)) |
95 | return false; |
96 | } |
97 | /* FIXME: When address is taken of DECL_EXTERNAL function we still |
98 | can remove its offline copy, but we would need to keep unanalyzed node in |
99 | the callgraph so references can point to it. |
100 | |
101 | Also for comdat group we can ignore references inside a group as we |
102 | want to prove the group as a whole to be dead. */ |
103 | return (!node->address_taken |
104 | && node->can_remove_if_no_direct_calls_and_refs_p () |
105 | /* Inlining might enable more devirtualizing, so we want to remove |
106 | those only after all devirtualizable virtual calls are processed. |
107 | Lacking may edges in callgraph we just preserve them post |
108 | inlining. */ |
109 | && (!DECL_VIRTUAL_P (node->decl) |
110 | || !opt_for_fn (node->decl, flag_devirtualize)) |
111 | /* During early inlining some unanalyzed cgraph nodes might be in the |
112 | callgraph and they might refer the function in question. */ |
113 | && !cgraph_new_nodes.exists ()); |
114 | } |
115 | |
116 | /* We are going to eliminate last direct call to NODE (or alias of it) via edge E. |
117 | Verify that the NODE can be removed from unit and if it is contained in comdat |
118 | group that the whole comdat group is removable. */ |
119 | |
120 | static bool |
121 | can_remove_node_now_p (struct cgraph_node *node, struct cgraph_edge *e) |
122 | { |
123 | struct cgraph_node *next; |
124 | if (!can_remove_node_now_p_1 (node, e)) |
125 | return false; |
126 | |
127 | /* When we see same comdat group, we need to be sure that all |
128 | items can be removed. */ |
129 | if (!node->same_comdat_group || !node->externally_visible) |
130 | return true; |
131 | for (next = dyn_cast<cgraph_node *> (p: node->same_comdat_group); |
132 | next != node; next = dyn_cast<cgraph_node *> (p: next->same_comdat_group)) |
133 | { |
134 | if (next->alias) |
135 | continue; |
136 | if ((next->callers && next->callers != e) |
137 | || !can_remove_node_now_p_1 (node: next, e)) |
138 | return false; |
139 | } |
140 | return true; |
141 | } |
142 | |
143 | /* Return true if NODE is a master clone with non-inline clones. */ |
144 | |
145 | static bool |
146 | master_clone_with_noninline_clones_p (struct cgraph_node *node) |
147 | { |
148 | if (node->clone_of) |
149 | return false; |
150 | |
151 | for (struct cgraph_node *n = node->clones; n; n = n->next_sibling_clone) |
152 | if (n->decl != node->decl) |
153 | return true; |
154 | |
155 | return false; |
156 | } |
157 | |
158 | /* E is expected to be an edge being inlined. Clone destination node of |
159 | the edge and redirect it to the new clone. |
160 | DUPLICATE is used for bookkeeping on whether we are actually creating new |
161 | clones or re-using node originally representing out-of-line function call. |
162 | By default the offline copy is removed, when it appears dead after inlining. |
163 | UPDATE_ORIGINAL prevents this transformation. |
164 | If OVERALL_SIZE is non-NULL, the size is updated to reflect the |
165 | transformation. */ |
166 | |
167 | void |
168 | clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, |
169 | bool update_original, int *overall_size) |
170 | { |
171 | struct cgraph_node *inlining_into; |
172 | struct cgraph_edge *next; |
173 | |
174 | if (e->caller->inlined_to) |
175 | inlining_into = e->caller->inlined_to; |
176 | else |
177 | inlining_into = e->caller; |
178 | |
179 | if (duplicate) |
180 | { |
181 | /* We may eliminate the need for out-of-line copy to be output. |
182 | In that case just go ahead and re-use it. This is not just an |
183 | memory optimization. Making offline copy of function disappear |
184 | from the program will improve future decisions on inlining. */ |
185 | if (!e->callee->callers->next_caller |
186 | /* Recursive inlining never wants the master clone to |
187 | be overwritten. */ |
188 | && update_original |
189 | && can_remove_node_now_p (node: e->callee, e) |
190 | /* We cannot overwrite a master clone with non-inline clones |
191 | until after these clones are materialized. */ |
192 | && !master_clone_with_noninline_clones_p (node: e->callee)) |
193 | { |
194 | /* TODO: When callee is in a comdat group, we could remove all of it, |
195 | including all inline clones inlined into it. That would however |
196 | need small function inlining to register edge removal hook to |
197 | maintain the priority queue. |
198 | |
199 | For now we keep the other functions in the group in program until |
200 | cgraph_remove_unreachable_functions gets rid of them. */ |
201 | gcc_assert (!e->callee->inlined_to); |
202 | e->callee->remove_from_same_comdat_group (); |
203 | if (e->callee->definition |
204 | && inline_account_function_p (node: e->callee)) |
205 | { |
206 | gcc_assert (!e->callee->alias); |
207 | if (overall_size) |
208 | *overall_size -= ipa_size_summaries->get (node: e->callee)->size; |
209 | nfunctions_inlined++; |
210 | } |
211 | duplicate = false; |
212 | e->callee->externally_visible = false; |
213 | update_noncloned_counts (node: e->callee, num: e->count, den: e->callee->count); |
214 | |
215 | dump_callgraph_transformation (original: e->callee, clone: inlining_into, |
216 | suffix: "inlining to" ); |
217 | } |
218 | else |
219 | { |
220 | struct cgraph_node *n; |
221 | |
222 | n = e->callee->create_clone (decl: e->callee->decl, |
223 | count: e->count, |
224 | update_original, redirect_callers: vNULL, call_duplication_hook: true, |
225 | new_inlined_to: inlining_into, |
226 | NULL); |
227 | n->used_as_abstract_origin = e->callee->used_as_abstract_origin; |
228 | e->redirect_callee (n); |
229 | } |
230 | } |
231 | else |
232 | e->callee->remove_from_same_comdat_group (); |
233 | |
234 | e->callee->inlined_to = inlining_into; |
235 | if (e->callee->ipa_transforms_to_apply.length ()) |
236 | { |
237 | e->callee->ipa_transforms_to_apply.release (); |
238 | e->callee->ipa_transforms_to_apply = vNULL; |
239 | } |
240 | |
241 | /* Recursively clone all bodies. */ |
242 | for (e = e->callee->callees; e; e = next) |
243 | { |
244 | next = e->next_callee; |
245 | if (!e->inline_failed) |
246 | clone_inlined_nodes (e, duplicate, update_original, overall_size); |
247 | } |
248 | } |
249 | |
250 | /* Check all speculations in N and if any seem useless, resolve them. When a |
251 | first edge is resolved, pop all edges from NEW_EDGES and insert them to |
252 | EDGE_SET. Then remove each resolved edge from EDGE_SET, if it is there. */ |
253 | |
254 | static bool |
255 | check_speculations_1 (cgraph_node *n, vec<cgraph_edge *> *new_edges, |
256 | hash_set <cgraph_edge *> *edge_set) |
257 | { |
258 | bool speculation_removed = false; |
259 | cgraph_edge *next; |
260 | |
261 | for (cgraph_edge *e = n->callees; e; e = next) |
262 | { |
263 | next = e->next_callee; |
264 | if (e->speculative && !speculation_useful_p (e, anticipate_inlining: true)) |
265 | { |
266 | while (new_edges && !new_edges->is_empty ()) |
267 | edge_set->add (k: new_edges->pop ()); |
268 | edge_set->remove (k: e); |
269 | |
270 | cgraph_edge::resolve_speculation (edge: e, NULL); |
271 | speculation_removed = true; |
272 | } |
273 | else if (!e->inline_failed) |
274 | speculation_removed |= check_speculations_1 (n: e->callee, new_edges, |
275 | edge_set); |
276 | } |
277 | return speculation_removed; |
278 | } |
279 | |
280 | /* Push E to NEW_EDGES. Called from hash_set traverse method, which |
281 | unfortunately means this function has to have external linkage, otherwise |
282 | the code will not compile with gcc 4.8. */ |
283 | |
284 | bool |
285 | push_all_edges_in_set_to_vec (cgraph_edge * const &e, |
286 | vec<cgraph_edge *> *new_edges) |
287 | { |
288 | new_edges->safe_push (obj: e); |
289 | return true; |
290 | } |
291 | |
292 | /* Check all speculations in N and if any seem useless, resolve them and remove |
293 | them from NEW_EDGES. */ |
294 | |
295 | static bool |
296 | check_speculations (cgraph_node *n, vec<cgraph_edge *> *new_edges) |
297 | { |
298 | hash_set <cgraph_edge *> edge_set; |
299 | bool res = check_speculations_1 (n, new_edges, edge_set: &edge_set); |
300 | if (!edge_set.is_empty ()) |
301 | edge_set.traverse <vec<cgraph_edge *> *, |
302 | push_all_edges_in_set_to_vec> (a: new_edges); |
303 | return res; |
304 | } |
305 | |
306 | /* Mark all call graph edges coming out of NODE and all nodes that have been |
307 | inlined to it as in_polymorphic_cdtor. */ |
308 | |
309 | static void |
310 | mark_all_inlined_calls_cdtor (cgraph_node *node) |
311 | { |
312 | for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee) |
313 | { |
314 | cs->in_polymorphic_cdtor = true; |
315 | if (!cs->inline_failed) |
316 | mark_all_inlined_calls_cdtor (node: cs->callee); |
317 | } |
318 | for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee) |
319 | cs->in_polymorphic_cdtor = true; |
320 | } |
321 | |
322 | |
323 | /* Mark edge E as inlined and update callgraph accordingly. UPDATE_ORIGINAL |
324 | specify whether profile of original function should be updated. If any new |
325 | indirect edges are discovered in the process, add them to NEW_EDGES, unless |
326 | it is NULL. If UPDATE_OVERALL_SUMMARY is false, do not bother to recompute overall |
327 | size of caller after inlining. Caller is required to eventually do it via |
328 | ipa_update_overall_fn_summary. |
329 | If callee_removed is non-NULL, set it to true if we removed callee node. |
330 | |
331 | Return true iff any new callgraph edges were discovered as a |
332 | result of inlining. */ |
333 | |
334 | bool |
335 | inline_call (struct cgraph_edge *e, bool update_original, |
336 | vec<cgraph_edge *> *new_edges, |
337 | int *overall_size, bool update_overall_summary, |
338 | bool *callee_removed) |
339 | { |
340 | int old_size = 0, new_size = 0; |
341 | struct cgraph_node *to = NULL; |
342 | struct cgraph_edge *curr = e; |
343 | bool comdat_local = e->callee->comdat_local_p (); |
344 | struct cgraph_node *callee = e->callee->ultimate_alias_target (); |
345 | bool new_edges_found = false; |
346 | |
347 | int estimated_growth = 0; |
348 | if (! update_overall_summary) |
349 | estimated_growth = estimate_edge_growth (edge: e); |
350 | /* This is used only for assert bellow. */ |
351 | #if 0 |
352 | bool predicated = inline_edge_summary (e)->predicate != NULL; |
353 | #endif |
354 | |
355 | /* Don't inline inlined edges. */ |
356 | gcc_assert (e->inline_failed); |
357 | /* Don't even think of inlining inline clone. */ |
358 | gcc_assert (!callee->inlined_to); |
359 | |
360 | to = e->caller; |
361 | if (to->inlined_to) |
362 | to = to->inlined_to; |
363 | if (to->thunk) |
364 | { |
365 | struct cgraph_node *target = to->callees->callee; |
366 | thunk_expansion = true; |
367 | |
368 | /* Remove all annotations, but keep thunk info. */ |
369 | thunk_info info = *thunk_info::get (node: to); |
370 | symtab->call_cgraph_removal_hooks (node: to); |
371 | *thunk_info::get_create (node: to) = info; |
372 | if (in_lto_p) |
373 | to->get_untransformed_body (); |
374 | expand_thunk (to, false, true); |
375 | /* When thunk is instrumented we may have multiple callees. */ |
376 | for (e = to->callees; e && e->callee != target; e = e->next_callee) |
377 | ; |
378 | symtab->call_cgraph_insertion_hooks (node: to); |
379 | thunk_expansion = false; |
380 | gcc_assert (e); |
381 | } |
382 | |
383 | |
384 | e->inline_failed = CIF_OK; |
385 | DECL_POSSIBLY_INLINED (callee->decl) = true; |
386 | |
387 | if (DECL_FUNCTION_PERSONALITY (callee->decl)) |
388 | DECL_FUNCTION_PERSONALITY (to->decl) |
389 | = DECL_FUNCTION_PERSONALITY (callee->decl); |
390 | |
391 | bool reload_optimization_node = false; |
392 | if (!opt_for_fn (callee->decl, flag_strict_aliasing) |
393 | && opt_for_fn (to->decl, flag_strict_aliasing)) |
394 | { |
395 | struct gcc_options opts = global_options; |
396 | struct gcc_options opts_set = global_options_set; |
397 | |
398 | cl_optimization_restore (&opts, &opts_set, opts_for_fn (fndecl: to->decl)); |
399 | opts.x_flag_strict_aliasing = false; |
400 | if (dump_file) |
401 | fprintf (stream: dump_file, format: "Dropping flag_strict_aliasing on %s\n" , |
402 | to->dump_name ()); |
403 | DECL_FUNCTION_SPECIFIC_OPTIMIZATION (to->decl) |
404 | = build_optimization_node (opts: &opts, opts_set: &opts_set); |
405 | reload_optimization_node = true; |
406 | } |
407 | |
408 | ipa_fn_summary *caller_info = ipa_fn_summaries->get (node: to); |
409 | ipa_fn_summary *callee_info = ipa_fn_summaries->get (node: callee); |
410 | if (!caller_info->fp_expressions && callee_info->fp_expressions) |
411 | { |
412 | caller_info->fp_expressions = true; |
413 | if (opt_for_fn (callee->decl, flag_rounding_math) |
414 | != opt_for_fn (to->decl, flag_rounding_math) |
415 | || opt_for_fn (callee->decl, flag_trapping_math) |
416 | != opt_for_fn (to->decl, flag_trapping_math) |
417 | || opt_for_fn (callee->decl, flag_unsafe_math_optimizations) |
418 | != opt_for_fn (to->decl, flag_unsafe_math_optimizations) |
419 | || opt_for_fn (callee->decl, flag_finite_math_only) |
420 | != opt_for_fn (to->decl, flag_finite_math_only) |
421 | || opt_for_fn (callee->decl, flag_signaling_nans) |
422 | != opt_for_fn (to->decl, flag_signaling_nans) |
423 | || opt_for_fn (callee->decl, flag_cx_limited_range) |
424 | != opt_for_fn (to->decl, flag_cx_limited_range) |
425 | || opt_for_fn (callee->decl, flag_signed_zeros) |
426 | != opt_for_fn (to->decl, flag_signed_zeros) |
427 | || opt_for_fn (callee->decl, flag_associative_math) |
428 | != opt_for_fn (to->decl, flag_associative_math) |
429 | || opt_for_fn (callee->decl, flag_reciprocal_math) |
430 | != opt_for_fn (to->decl, flag_reciprocal_math) |
431 | || opt_for_fn (callee->decl, flag_fp_int_builtin_inexact) |
432 | != opt_for_fn (to->decl, flag_fp_int_builtin_inexact) |
433 | || opt_for_fn (callee->decl, flag_errno_math) |
434 | != opt_for_fn (to->decl, flag_errno_math)) |
435 | { |
436 | struct gcc_options opts = global_options; |
437 | struct gcc_options opts_set = global_options_set; |
438 | |
439 | cl_optimization_restore (&opts, &opts_set, opts_for_fn (fndecl: to->decl)); |
440 | opts.x_flag_rounding_math |
441 | = opt_for_fn (callee->decl, flag_rounding_math); |
442 | opts.x_flag_trapping_math |
443 | = opt_for_fn (callee->decl, flag_trapping_math); |
444 | opts.x_flag_unsafe_math_optimizations |
445 | = opt_for_fn (callee->decl, flag_unsafe_math_optimizations); |
446 | opts.x_flag_finite_math_only |
447 | = opt_for_fn (callee->decl, flag_finite_math_only); |
448 | opts.x_flag_signaling_nans |
449 | = opt_for_fn (callee->decl, flag_signaling_nans); |
450 | opts.x_flag_cx_limited_range |
451 | = opt_for_fn (callee->decl, flag_cx_limited_range); |
452 | opts.x_flag_signed_zeros |
453 | = opt_for_fn (callee->decl, flag_signed_zeros); |
454 | opts.x_flag_associative_math |
455 | = opt_for_fn (callee->decl, flag_associative_math); |
456 | opts.x_flag_reciprocal_math |
457 | = opt_for_fn (callee->decl, flag_reciprocal_math); |
458 | opts.x_flag_fp_int_builtin_inexact |
459 | = opt_for_fn (callee->decl, flag_fp_int_builtin_inexact); |
460 | opts.x_flag_errno_math |
461 | = opt_for_fn (callee->decl, flag_errno_math); |
462 | if (dump_file) |
463 | fprintf (stream: dump_file, format: "Copying FP flags from %s to %s\n" , |
464 | callee->dump_name (), to->dump_name ()); |
465 | DECL_FUNCTION_SPECIFIC_OPTIMIZATION (to->decl) |
466 | = build_optimization_node (opts: &opts, opts_set: &opts_set); |
467 | reload_optimization_node = true; |
468 | } |
469 | } |
470 | |
471 | /* Reload global optimization flags. */ |
472 | if (reload_optimization_node && DECL_STRUCT_FUNCTION (to->decl) == cfun) |
473 | set_cfun (cfun, force: true); |
474 | |
475 | /* If aliases are involved, redirect edge to the actual destination and |
476 | possibly remove the aliases. */ |
477 | if (e->callee != callee) |
478 | { |
479 | struct cgraph_node *alias = e->callee, *next_alias; |
480 | e->redirect_callee (n: callee); |
481 | while (alias && alias != callee) |
482 | { |
483 | if (!alias->callers |
484 | && can_remove_node_now_p (node: alias, |
485 | e: !e->next_caller && !e->prev_caller ? e : NULL)) |
486 | { |
487 | next_alias = alias->get_alias_target (); |
488 | alias->remove (); |
489 | if (callee_removed) |
490 | *callee_removed = true; |
491 | alias = next_alias; |
492 | } |
493 | else |
494 | break; |
495 | } |
496 | } |
497 | |
498 | clone_inlined_nodes (e, duplicate: true, update_original, overall_size); |
499 | |
500 | gcc_assert (curr->callee->inlined_to == to); |
501 | |
502 | old_size = ipa_size_summaries->get (node: to)->size; |
503 | ipa_merge_modref_summary_after_inlining (e); |
504 | ipa_merge_fn_summary_after_inlining (edge: e); |
505 | if (e->in_polymorphic_cdtor) |
506 | mark_all_inlined_calls_cdtor (node: e->callee); |
507 | if (opt_for_fn (e->caller->decl, optimize)) |
508 | new_edges_found = ipa_propagate_indirect_call_infos (cs: curr, new_edges); |
509 | bool removed_p = check_speculations (n: e->callee, new_edges); |
510 | if (update_overall_summary) |
511 | ipa_update_overall_fn_summary (node: to, reset: new_edges_found || removed_p); |
512 | else |
513 | /* Update self size by the estimate so overall function growth limits |
514 | work for further inlining into this function. Before inlining |
515 | the function we inlined to again we expect the caller to update |
516 | the overall summary. */ |
517 | ipa_size_summaries->get (node: to)->size += estimated_growth; |
518 | new_size = ipa_size_summaries->get (node: to)->size; |
519 | |
520 | if (callee->calls_comdat_local) |
521 | to->calls_comdat_local = true; |
522 | else if (to->calls_comdat_local && comdat_local) |
523 | to->calls_comdat_local = to->check_calls_comdat_local_p (); |
524 | |
525 | /* FIXME: This assert suffers from roundoff errors, disable it for GCC 5 |
526 | and revisit it after conversion to sreals in GCC 6. |
527 | See PR 65654. */ |
528 | #if 0 |
529 | /* Verify that estimated growth match real growth. Allow off-by-one |
530 | error due to ipa_fn_summary::size_scale roudoff errors. */ |
531 | gcc_assert (!update_overall_summary || !overall_size || new_edges_found |
532 | || abs (estimated_growth - (new_size - old_size)) <= 1 |
533 | || speculation_removed |
534 | /* FIXME: a hack. Edges with false predicate are accounted |
535 | wrong, we should remove them from callgraph. */ |
536 | || predicated); |
537 | #endif |
538 | |
539 | /* Account the change of overall unit size; external functions will be |
540 | removed and are thus not accounted. */ |
541 | if (overall_size && inline_account_function_p (node: to)) |
542 | *overall_size += new_size - old_size; |
543 | ncalls_inlined++; |
544 | |
545 | /* This must happen after ipa_merge_fn_summary_after_inlining that rely on jump |
546 | functions of callee to not be updated. */ |
547 | return new_edges_found; |
548 | } |
549 | |
550 | /* For each node that was made the holder of function body by |
551 | save_inline_function_body, this summary contains pointer to the previous |
552 | holder of the body. */ |
553 | |
554 | function_summary <tree *> *ipa_saved_clone_sources; |
555 | |
556 | /* Copy function body of NODE and redirect all inline clones to it. |
557 | This is done before inline plan is applied to NODE when there are |
558 | still some inline clones if it. |
559 | |
560 | This is necessary because inline decisions are not really transitive |
561 | and the other inline clones may have different bodies. */ |
562 | |
563 | static struct cgraph_node * |
564 | save_inline_function_body (struct cgraph_node *node) |
565 | { |
566 | struct cgraph_node *first_clone, *n; |
567 | |
568 | if (dump_file) |
569 | fprintf (stream: dump_file, format: "\nSaving body of %s for later reuse\n" , |
570 | node->dump_name ()); |
571 | |
572 | gcc_assert (node == cgraph_node::get (node->decl)); |
573 | |
574 | /* first_clone will be turned into real function. */ |
575 | first_clone = node->clones; |
576 | |
577 | /* Arrange first clone to not be thunk as those do not have bodies. */ |
578 | if (first_clone->thunk) |
579 | { |
580 | while (first_clone->thunk) |
581 | first_clone = first_clone->next_sibling_clone; |
582 | first_clone->prev_sibling_clone->next_sibling_clone |
583 | = first_clone->next_sibling_clone; |
584 | if (first_clone->next_sibling_clone) |
585 | first_clone->next_sibling_clone->prev_sibling_clone |
586 | = first_clone->prev_sibling_clone; |
587 | first_clone->next_sibling_clone = node->clones; |
588 | first_clone->prev_sibling_clone = NULL; |
589 | node->clones->prev_sibling_clone = first_clone; |
590 | node->clones = first_clone; |
591 | } |
592 | first_clone->decl = copy_node (node->decl); |
593 | first_clone->decl->decl_with_vis.symtab_node = first_clone; |
594 | gcc_assert (first_clone == cgraph_node::get (first_clone->decl)); |
595 | |
596 | /* Now reshape the clone tree, so all other clones descends from |
597 | first_clone. */ |
598 | if (first_clone->next_sibling_clone) |
599 | { |
600 | for (n = first_clone->next_sibling_clone; n->next_sibling_clone; |
601 | n = n->next_sibling_clone) |
602 | n->clone_of = first_clone; |
603 | n->clone_of = first_clone; |
604 | n->next_sibling_clone = first_clone->clones; |
605 | if (first_clone->clones) |
606 | first_clone->clones->prev_sibling_clone = n; |
607 | first_clone->clones = first_clone->next_sibling_clone; |
608 | first_clone->next_sibling_clone->prev_sibling_clone = NULL; |
609 | first_clone->next_sibling_clone = NULL; |
610 | gcc_assert (!first_clone->prev_sibling_clone); |
611 | } |
612 | |
613 | tree prev_body_holder = node->decl; |
614 | if (!ipa_saved_clone_sources) |
615 | { |
616 | ipa_saved_clone_sources = new function_summary <tree *> (symtab); |
617 | ipa_saved_clone_sources->disable_insertion_hook (); |
618 | } |
619 | else |
620 | { |
621 | tree *p = ipa_saved_clone_sources->get (node); |
622 | if (p) |
623 | { |
624 | prev_body_holder = *p; |
625 | gcc_assert (prev_body_holder); |
626 | } |
627 | } |
628 | *ipa_saved_clone_sources->get_create (node: first_clone) = prev_body_holder; |
629 | first_clone->former_clone_of |
630 | = node->former_clone_of ? node->former_clone_of : node->decl; |
631 | first_clone->clone_of = NULL; |
632 | |
633 | /* Now node in question has no clones. */ |
634 | node->clones = NULL; |
635 | |
636 | /* Inline clones share decl with the function they are cloned |
637 | from. Walk the whole clone tree and redirect them all to the |
638 | new decl. */ |
639 | if (first_clone->clones) |
640 | for (n = first_clone->clones; n != first_clone;) |
641 | { |
642 | gcc_assert (n->decl == node->decl); |
643 | n->decl = first_clone->decl; |
644 | if (n->clones) |
645 | n = n->clones; |
646 | else if (n->next_sibling_clone) |
647 | n = n->next_sibling_clone; |
648 | else |
649 | { |
650 | while (n != first_clone && !n->next_sibling_clone) |
651 | n = n->clone_of; |
652 | if (n != first_clone) |
653 | n = n->next_sibling_clone; |
654 | } |
655 | } |
656 | |
657 | /* Copy the OLD_VERSION_NODE function tree to the new version. */ |
658 | tree_function_versioning (node->decl, first_clone->decl, |
659 | NULL, NULL, true, NULL, NULL); |
660 | |
661 | /* The function will be short lived and removed after we inline all the |
662 | clones, but make it internal so we won't confuse ourself. */ |
663 | DECL_EXTERNAL (first_clone->decl) = 0; |
664 | TREE_PUBLIC (first_clone->decl) = 0; |
665 | DECL_COMDAT (first_clone->decl) = 0; |
666 | first_clone->ipa_transforms_to_apply.release (); |
667 | |
668 | /* When doing recursive inlining, the clone may become unnecessary. |
669 | This is possible i.e. in the case when the recursive function is proved to |
670 | be non-throwing and the recursion happens only in the EH landing pad. |
671 | We cannot remove the clone until we are done with saving the body. |
672 | Remove it now. */ |
673 | if (!first_clone->callers) |
674 | { |
675 | first_clone->remove_symbol_and_inline_clones (); |
676 | first_clone = NULL; |
677 | } |
678 | else if (flag_checking) |
679 | first_clone->verify (); |
680 | |
681 | return first_clone; |
682 | } |
683 | |
684 | /* Return true when function body of DECL still needs to be kept around |
685 | for later re-use. */ |
686 | static bool |
687 | preserve_function_body_p (struct cgraph_node *node) |
688 | { |
689 | gcc_assert (symtab->global_info_ready); |
690 | gcc_assert (!node->alias && !node->thunk); |
691 | |
692 | /* Look if there is any non-thunk clone around. */ |
693 | for (node = node->clones; node; node = node->next_sibling_clone) |
694 | if (!node->thunk) |
695 | return true; |
696 | return false; |
697 | } |
698 | |
699 | /* tree-inline can not recurse; materialize all function bodie we will need |
700 | during inlining. This includes inlined functions, but also called functions |
701 | with param manipulation because IPA param manipulation attaches debug |
702 | statements to PARM_DECLs of called clone. Materialize them if needed. |
703 | |
704 | FIXME: This is somehwat broken by design because it does not play well |
705 | with partitioning. */ |
706 | |
707 | static void |
708 | maybe_materialize_called_clones (cgraph_node *node) |
709 | { |
710 | for (cgraph_edge *e = node->callees; e; e = e->next_callee) |
711 | { |
712 | clone_info *info; |
713 | |
714 | if (!e->inline_failed) |
715 | maybe_materialize_called_clones (node: e->callee); |
716 | |
717 | cgraph_node *callee = cgraph_node::get (decl: e->callee->decl); |
718 | if (callee->clone_of |
719 | && (info = clone_info::get (node: callee)) && info->param_adjustments) |
720 | callee->get_untransformed_body (); |
721 | } |
722 | } |
723 | |
724 | /* Apply inline plan to function. */ |
725 | |
726 | unsigned int |
727 | inline_transform (struct cgraph_node *node) |
728 | { |
729 | unsigned int todo = 0; |
730 | struct cgraph_edge *e, *next; |
731 | bool has_inline = false; |
732 | |
733 | /* FIXME: Currently the pass manager is adding inline transform more than |
734 | once to some clones. This needs revisiting after WPA cleanups. */ |
735 | if (cfun->after_inlining) |
736 | return 0; |
737 | |
738 | cgraph_node *next_clone; |
739 | for (cgraph_node *n = node->clones; n; n = next_clone) |
740 | { |
741 | next_clone = n->next_sibling_clone; |
742 | if (n->decl != node->decl) |
743 | n->materialize_clone (); |
744 | } |
745 | node->clear_stmts_in_references (); |
746 | |
747 | /* We might need the body of this function so that we can expand |
748 | it inline somewhere else. */ |
749 | if (preserve_function_body_p (node)) |
750 | save_inline_function_body (node); |
751 | |
752 | profile_count num = node->count; |
753 | profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; |
754 | bool scale = num.initialized_p () && !(num == den); |
755 | if (scale) |
756 | { |
757 | profile_count::adjust_for_ipa_scaling (num: &num, den: &den); |
758 | if (dump_file) |
759 | { |
760 | fprintf (stream: dump_file, format: "Applying count scale " ); |
761 | num.dump (f: dump_file); |
762 | fprintf (stream: dump_file, format: "/" ); |
763 | den.dump (f: dump_file); |
764 | fprintf (stream: dump_file, format: "\n" ); |
765 | } |
766 | |
767 | basic_block bb; |
768 | cfun->cfg->count_max = profile_count::uninitialized (); |
769 | FOR_ALL_BB_FN (bb, cfun) |
770 | { |
771 | bb->count = bb->count.apply_scale (num, den); |
772 | cfun->cfg->count_max = cfun->cfg->count_max.max (other: bb->count); |
773 | } |
774 | ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = node->count; |
775 | } |
776 | |
777 | maybe_materialize_called_clones (node); |
778 | for (e = node->callees; e; e = next) |
779 | { |
780 | if (!e->inline_failed) |
781 | has_inline = true; |
782 | next = e->next_callee; |
783 | cgraph_edge::redirect_call_stmt_to_callee (e); |
784 | } |
785 | node->remove_all_references (); |
786 | |
787 | timevar_push (tv: TV_INTEGRATION); |
788 | if (node->callees && (opt_for_fn (node->decl, optimize) || has_inline)) |
789 | { |
790 | todo = optimize_inline_calls (current_function_decl); |
791 | } |
792 | timevar_pop (tv: TV_INTEGRATION); |
793 | |
794 | cfun->always_inline_functions_inlined = true; |
795 | cfun->after_inlining = true; |
796 | todo |= execute_fixup_cfg (); |
797 | |
798 | if (!(todo & TODO_update_ssa_any)) |
799 | /* Redirecting edges might lead to a need for vops to be recomputed. */ |
800 | todo |= TODO_update_ssa_only_virtuals; |
801 | |
802 | return todo; |
803 | } |
804 | |