1/* Interprocedural constant propagation
2 Copyright (C) 2005-2025 Free Software Foundation, Inc.
3
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23/* Interprocedural constant propagation (IPA-CP).
24
25 The goal of this transformation is to
26
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
30
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
35
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
38
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
47
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
52
53
54 First stage - intraprocedural analysis
55 =======================================
56
57 This phase computes jump_function and modification flags.
58
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
62
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
67
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
70
71 ipcp_generate_summary() is the main function of the first stage.
72
73 Second stage - interprocedural analysis
74 ========================================
75
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
79
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
86
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
92
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
95
96 Third phase - materialization of clones, call statement updates.
97 ============================================
98
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
101 the second stage. */
102
103#define INCLUDE_ALGORITHM
104#include "config.h"
105#include "system.h"
106#include "coretypes.h"
107#include "backend.h"
108#include "tree.h"
109#include "gimple-expr.h"
110#include "gimple.h"
111#include "predict.h"
112#include "sreal.h"
113#include "alloc-pool.h"
114#include "tree-pass.h"
115#include "cgraph.h"
116#include "diagnostic.h"
117#include "fold-const.h"
118#include "gimple-iterator.h"
119#include "gimple-fold.h"
120#include "symbol-summary.h"
121#include "tree-vrp.h"
122#include "ipa-cp.h"
123#include "ipa-prop.h"
124#include "tree-pretty-print.h"
125#include "tree-inline.h"
126#include "ipa-fnsummary.h"
127#include "ipa-utils.h"
128#include "tree-ssa-ccp.h"
129#include "stringpool.h"
130#include "attribs.h"
131#include "dbgcnt.h"
132#include "symtab-clones.h"
133#include "gimple-range.h"
134
135
136/* Allocation pools for values and their sources in ipa-cp. */
137
138object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
139 ("IPA-CP constant values");
140
141object_allocator<ipcp_value<ipa_polymorphic_call_context> >
142 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
143
144object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
145 ("IPA-CP value sources");
146
147object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
148 ("IPA_CP aggregate lattices");
149
150/* Original overall size of the program. */
151
152static long overall_size, orig_overall_size;
153
154/* Node name to unique clone suffix number map. */
155static hash_map<const char *, unsigned> *clone_num_suffixes;
156
157/* Return the param lattices structure corresponding to the Ith formal
158 parameter of the function described by INFO. */
159static inline class ipcp_param_lattices *
160ipa_get_parm_lattices (class ipa_node_params *info, int i)
161{
162 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
163 gcc_checking_assert (!info->ipcp_orig_node);
164 return &(info->lattices[i]);
165}
166
167/* Return the lattice corresponding to the scalar value of the Ith formal
168 parameter of the function described by INFO. */
169static inline ipcp_lattice<tree> *
170ipa_get_scalar_lat (class ipa_node_params *info, int i)
171{
172 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
173 return &plats->itself;
174}
175
176/* Return the lattice corresponding to the scalar value of the Ith formal
177 parameter of the function described by INFO. */
178static inline ipcp_lattice<ipa_polymorphic_call_context> *
179ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
180{
181 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
182 return &plats->ctxlat;
183}
184
185/* Return whether LAT is a lattice with a single constant and without an
186 undefined value. */
187
188template <typename valtype>
189inline bool
190ipcp_lattice<valtype>::is_single_const ()
191{
192 if (bottom || contains_variable || values_count != 1)
193 return false;
194 else
195 return true;
196}
197
198/* Return true iff X and Y should be considered equal values by IPA-CP. */
199
200bool
201values_equal_for_ipcp_p (tree x, tree y)
202{
203 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
204
205 if (x == y)
206 return true;
207
208 if (TREE_CODE (x) == ADDR_EXPR
209 && TREE_CODE (y) == ADDR_EXPR
210 && (TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
211 || (TREE_CODE (TREE_OPERAND (x, 0)) == VAR_DECL
212 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (x, 0))))
213 && (TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL
214 || (TREE_CODE (TREE_OPERAND (y, 0)) == VAR_DECL
215 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (y, 0)))))
216 return TREE_OPERAND (x, 0) == TREE_OPERAND (y, 0)
217 || operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
218 DECL_INITIAL (TREE_OPERAND (y, 0)), flags: 0);
219 else
220 return operand_equal_p (x, y, flags: 0);
221}
222
223/* Print V which is extracted from a value in a lattice to F. This overloaded
224 function is used to print tree constants. */
225
226static void
227print_ipcp_constant_value (FILE * f, tree v)
228{
229 ipa_print_constant_value (f, val: v);
230}
231
232/* Print V which is extracted from a value in a lattice to F. This overloaded
233 function is used to print constant polymorphic call contexts. */
234
235static void
236print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
237{
238 v.dump(f, newline: false);
239}
240
241/* Print a lattice LAT to F. */
242
243template <typename valtype>
244void
245ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
246{
247 ipcp_value<valtype> *val;
248 bool prev = false;
249
250 if (bottom)
251 {
252 fprintf (stream: f, format: "BOTTOM\n");
253 return;
254 }
255
256 if (!values_count && !contains_variable)
257 {
258 fprintf (stream: f, format: "TOP\n");
259 return;
260 }
261
262 if (contains_variable)
263 {
264 fprintf (stream: f, format: "VARIABLE");
265 prev = true;
266 if (dump_benefits)
267 fprintf (stream: f, format: "\n");
268 }
269
270 for (val = values; val; val = val->next)
271 {
272 if (dump_benefits && prev)
273 fprintf (stream: f, format: " ");
274 else if (!dump_benefits && prev)
275 fprintf (stream: f, format: ", ");
276 else
277 prev = true;
278
279 print_ipcp_constant_value (f, val->value);
280
281 if (dump_sources)
282 {
283 ipcp_value_source<valtype> *s;
284
285 if (val->self_recursion_generated_p ())
286 fprintf (f, " [self_gen(%i), from:",
287 val->self_recursion_generated_level);
288 else
289 fprintf (f, " [scc: %i, from:", val->scc_no);
290 for (s = val->sources; s; s = s->next)
291 fprintf (f, " %i(%f)", s->cs->caller->get_uid (),
292 s->cs->sreal_frequency ().to_double ());
293 fprintf (stream: f, format: "]");
294 }
295
296 if (dump_benefits)
297 fprintf (f, " [loc_time: %g, loc_size: %i, "
298 "prop_time: %g, prop_size: %i]\n",
299 val->local_time_benefit.to_double (), val->local_size_cost,
300 val->prop_time_benefit.to_double (), val->prop_size_cost);
301 }
302 if (!dump_benefits)
303 fprintf (stream: f, format: "\n");
304}
305
306/* Print VALUE to F in a form which in usual cases does not take thousands of
307 characters. */
308
309static void
310ipcp_print_widest_int (FILE *f, const widest_int &value)
311{
312 if (value == -1)
313 fprintf (stream: f, format: "-1");
314 else if (wi::arshift (x: value, y: 128) == -1)
315 {
316 char buf[35], *p = buf + 2;
317 widest_int v = wi::zext (x: value, offset: 128);
318 size_t len;
319 print_hex (wi: v, buf);
320 len = strlen (s: p);
321 if (len == 32)
322 {
323 fprintf (stream: f, format: "0xf..f");
324 while (*p == 'f')
325 ++p;
326 }
327 else
328 fprintf (stream: f, format: "0xf..f%0*d", (int) (32 - len), 0);
329 fputs (s: p, stream: f);
330 }
331 else
332 print_hex (wi: value, file: f);
333}
334
335void
336ipcp_bits_lattice::print (FILE *f)
337{
338 if (top_p ())
339 fprintf (stream: f, format: " Bits unknown (TOP)\n");
340 else if (bottom_p ())
341 fprintf (stream: f, format: " Bits unusable (BOTTOM)\n");
342 else
343 {
344 fprintf (stream: f, format: " Bits: value = ");
345 ipcp_print_widest_int (f, value: get_value ());
346 fprintf (stream: f, format: ", mask = ");
347 ipcp_print_widest_int (f, value: get_mask ());
348 fprintf (stream: f, format: "\n");
349 }
350}
351
352/* Print value range lattice to F. */
353
354void
355ipcp_vr_lattice::print (FILE * f)
356{
357 m_vr.dump (f);
358}
359
360/* Print all ipcp_lattices of all functions to F. */
361
362static void
363print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
364{
365 struct cgraph_node *node;
366 int i, count;
367
368 fprintf (stream: f, format: "\nLattices:\n");
369 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
370 {
371 class ipa_node_params *info;
372
373 info = ipa_node_params_sum->get (node);
374 /* Skip unoptimized functions and constprop clones since we don't make
375 lattices for them. */
376 if (!info || info->ipcp_orig_node)
377 continue;
378 fprintf (stream: f, format: " Node: %s:\n", node->dump_name ());
379 count = ipa_get_param_count (info);
380 for (i = 0; i < count; i++)
381 {
382 struct ipcp_agg_lattice *aglat;
383 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
384 fprintf (stream: f, format: " param [%d]: ", i);
385 plats->itself.print (f, dump_sources, dump_benefits);
386 fprintf (stream: f, format: " ctxs: ");
387 plats->ctxlat.print (f, dump_sources, dump_benefits);
388 plats->bits_lattice.print (f);
389 fprintf (stream: f, format: " ");
390 plats->m_value_range.print (f);
391 fprintf (stream: f, format: "\n");
392 if (plats->virt_call)
393 fprintf (stream: f, format: " virt_call flag set\n");
394
395 if (plats->aggs_bottom)
396 {
397 fprintf (stream: f, format: " AGGS BOTTOM\n");
398 continue;
399 }
400 if (plats->aggs_contain_variable)
401 fprintf (stream: f, format: " AGGS VARIABLE\n");
402 for (aglat = plats->aggs; aglat; aglat = aglat->next)
403 {
404 fprintf (stream: f, format: " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
405 plats->aggs_by_ref ? "ref " : "", aglat->offset);
406 aglat->print (f, dump_sources, dump_benefits);
407 }
408 }
409 }
410}
411
412/* Determine whether it is at all technically possible to create clones of NODE
413 and store this information in the ipa_node_params structure associated
414 with NODE. */
415
416static void
417determine_versionability (struct cgraph_node *node,
418 class ipa_node_params *info)
419{
420 const char *reason = NULL;
421
422 /* There are a number of generic reasons functions cannot be versioned. We
423 also cannot remove parameters if there are type attributes such as fnspec
424 present. */
425 if (node->alias || node->thunk)
426 reason = "alias or thunk";
427 else if (!node->versionable)
428 reason = "not a tree_versionable_function";
429 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
430 reason = "insufficient body availability";
431 else if (!opt_for_fn (node->decl, optimize)
432 || !opt_for_fn (node->decl, flag_ipa_cp))
433 reason = "non-optimized function";
434 else if (lookup_attribute (attr_name: "omp declare simd", DECL_ATTRIBUTES (node->decl)))
435 {
436 /* Ideally we should clone the SIMD clones themselves and create
437 vector copies of them, so IPA-cp and SIMD clones can happily
438 coexist, but that may not be worth the effort. */
439 reason = "function has SIMD clones";
440 }
441 else if (lookup_attribute (attr_name: "target_clones", DECL_ATTRIBUTES (node->decl)))
442 {
443 /* Ideally we should clone the target clones themselves and create
444 copies of them, so IPA-cp and target clones can happily
445 coexist, but that may not be worth the effort. */
446 reason = "function target_clones attribute";
447 }
448 /* Don't clone decls local to a comdat group; it breaks and for C++
449 decloned constructors, inlining is always better anyway. */
450 else if (node->comdat_local_p ())
451 reason = "comdat-local function";
452 else if (node->calls_comdat_local)
453 {
454 /* TODO: call is versionable if we make sure that all
455 callers are inside of a comdat group. */
456 reason = "calls comdat-local function";
457 }
458
459 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
460 work only when inlined. Cloning them may still lead to better code
461 because ipa-cp will not give up on cloning further. If the function is
462 external this however leads to wrong code because we may end up producing
463 offline copy of the function. */
464 if (DECL_EXTERNAL (node->decl))
465 for (cgraph_edge *edge = node->callees; !reason && edge;
466 edge = edge->next_callee)
467 if (fndecl_built_in_p (node: edge->callee->decl, klass: BUILT_IN_NORMAL))
468 {
469 if (DECL_FUNCTION_CODE (decl: edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
470 reason = "external function which calls va_arg_pack";
471 if (DECL_FUNCTION_CODE (decl: edge->callee->decl)
472 == BUILT_IN_VA_ARG_PACK_LEN)
473 reason = "external function which calls va_arg_pack_len";
474 }
475
476 if (reason && dump_file && !node->alias && !node->thunk)
477 fprintf (stream: dump_file, format: "Function %s is not versionable, reason: %s.\n",
478 node->dump_name (), reason);
479
480 info->versionable = (reason == NULL);
481}
482
483/* Return true if it is at all technically possible to create clones of a
484 NODE. */
485
486static bool
487ipcp_versionable_function_p (struct cgraph_node *node)
488{
489 ipa_node_params *info = ipa_node_params_sum->get (node);
490 return info && info->versionable;
491}
492
493/* Structure holding accumulated information about callers of a node. */
494
495struct caller_statistics
496{
497 /* If requested (see below), self-recursive call counts are summed into this
498 field. */
499 profile_count rec_count_sum;
500 /* The sum of all ipa counts of all the other (non-recursive) calls. */
501 profile_count count_sum;
502 /* Sum of all frequencies for all calls. */
503 sreal freq_sum;
504 /* Number of calls and calls considered interesting respectively. */
505 int n_calls, n_interesting_calls;
506 /* If itself is set up, also count the number of non-self-recursive
507 calls. */
508 int n_nonrec_calls;
509 /* If non-NULL, this is the node itself and calls from it should have their
510 counts included in rec_count_sum and not count_sum. */
511 cgraph_node *itself;
512 /* True if there is a caller that has no IPA profile. */
513 bool called_without_ipa_profile;
514};
515
516/* Initialize fields of STAT to zeroes and optionally set it up so that edges
517 from IGNORED_CALLER are not counted. */
518
519static inline void
520init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
521{
522 stats->rec_count_sum = profile_count::zero ();
523 stats->count_sum = profile_count::zero ();
524 stats->n_calls = 0;
525 stats->n_interesting_calls = 0;
526 stats->n_nonrec_calls = 0;
527 stats->freq_sum = 0;
528 stats->itself = itself;
529 stats->called_without_ipa_profile = false;
530}
531
532/* We want to propagate across edges that may be executed, however
533 we do not want to check maybe_hot, since call itself may be cold
534 while calee contains some heavy loop which makes propagation still
535 relevant.
536
537 In particular, even edge called once may lead to significant
538 improvement. */
539
540static bool
541cs_interesting_for_ipcp_p (cgraph_edge *e)
542{
543 /* If profile says the edge is executed, we want to optimize. */
544 if (e->count.ipa ().nonzero_p ())
545 return true;
546 /* If local (possibly guseed or adjusted 0 profile) claims edge is
547 not executed, do not propagate.
548 Do not trust AFDO since branch needs to be executed multiple
549 time to count while we want to propagate even call called
550 once during the train run if callee is important. */
551 if (e->count.initialized_p () && !e->count.nonzero_p ()
552 && e->count.quality () != AFDO)
553 return false;
554 /* If we have zero IPA profile, still consider edge for cloning
555 in case we do partial training. */
556 if (e->count.ipa ().initialized_p ()
557 && !opt_for_fn (e->callee->decl,flag_profile_partial_training))
558 return false;
559 return true;
560}
561
562/* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
563 non-thunk incoming edges to NODE. */
564
565static bool
566gather_caller_stats (struct cgraph_node *node, void *data)
567{
568 struct caller_statistics *stats = (struct caller_statistics *) data;
569 struct cgraph_edge *cs;
570
571 for (cs = node->callers; cs; cs = cs->next_caller)
572 if (!cs->caller->thunk)
573 {
574 ipa_node_params *info = ipa_node_params_sum->get (node: cs->caller);
575 if (info && info->node_dead)
576 continue;
577
578 if (cs->count.ipa ().initialized_p ())
579 {
580 if (stats->itself && stats->itself == cs->caller)
581 stats->rec_count_sum += cs->count.ipa ();
582 else
583 stats->count_sum += cs->count.ipa ();
584 }
585 else
586 stats->called_without_ipa_profile = true;
587 stats->freq_sum += cs->sreal_frequency ();
588 stats->n_calls++;
589 if (stats->itself && stats->itself != cs->caller)
590 stats->n_nonrec_calls++;
591
592 /* If profile known to be zero, we do not want to clone for performance.
593 However if call is cold, the called function may still contain
594 important hot loops. */
595 if (cs_interesting_for_ipcp_p (e: cs))
596 stats->n_interesting_calls++;
597 }
598 return false;
599
600}
601
602/* Return true if this NODE is viable candidate for cloning. */
603
604static bool
605ipcp_cloning_candidate_p (struct cgraph_node *node)
606{
607 struct caller_statistics stats;
608
609 gcc_checking_assert (node->has_gimple_body_p ());
610
611 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
612 {
613 if (dump_file)
614 fprintf (stream: dump_file, format: "Not considering %s for cloning; "
615 "-fipa-cp-clone disabled.\n",
616 node->dump_name ());
617 return false;
618 }
619
620 if (node->optimize_for_size_p ())
621 {
622 if (dump_file)
623 fprintf (stream: dump_file, format: "Not considering %s for cloning; "
624 "optimizing it for size.\n",
625 node->dump_name ());
626 return false;
627 }
628
629 init_caller_stats (stats: &stats);
630 node->call_for_symbol_thunks_and_aliases (callback: gather_caller_stats, data: &stats, include_overwritable: false);
631
632 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
633 {
634 if (dump_file)
635 fprintf (stream: dump_file, format: "Considering %s for cloning; code might shrink.\n",
636 node->dump_name ());
637 return true;
638 }
639 if (!stats.n_interesting_calls)
640 {
641 if (dump_file)
642 fprintf (stream: dump_file, format: "Not considering %s for cloning; "
643 "no calls considered interesting by profile.\n",
644 node->dump_name ());
645 return false;
646 }
647 if (dump_file)
648 fprintf (stream: dump_file, format: "Considering %s for cloning.\n",
649 node->dump_name ());
650 return true;
651}
652
653template <typename valtype>
654class value_topo_info
655{
656public:
657 /* Head of the linked list of topologically sorted values. */
658 ipcp_value<valtype> *values_topo;
659 /* Stack for creating SCCs, represented by a linked list too. */
660 ipcp_value<valtype> *stack;
661 /* Counter driving the algorithm in add_val_to_toposort. */
662 int dfs_counter;
663
664 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
665 {}
666 void add_val (ipcp_value<valtype> *cur_val);
667 void propagate_effects ();
668};
669
670/* Arrays representing a topological ordering of call graph nodes and a stack
671 of nodes used during constant propagation and also data required to perform
672 topological sort of values and propagation of benefits in the determined
673 order. */
674
675class ipa_topo_info
676{
677public:
678 /* Array with obtained topological order of cgraph nodes. */
679 struct cgraph_node **order;
680 /* Stack of cgraph nodes used during propagation within SCC until all values
681 in the SCC stabilize. */
682 struct cgraph_node **stack;
683 int nnodes, stack_top;
684
685 value_topo_info<tree> constants;
686 value_topo_info<ipa_polymorphic_call_context> contexts;
687
688 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
689 constants ()
690 {}
691};
692
693/* Skip edges from and to nodes without ipa_cp enabled.
694 Ignore not available symbols. */
695
696static bool
697ignore_edge_p (cgraph_edge *e)
698{
699 enum availability avail;
700 cgraph_node *ultimate_target
701 = e->callee->function_or_virtual_thunk_symbol (avail: &avail, ref: e->caller);
702
703 return (avail <= AVAIL_INTERPOSABLE
704 || !opt_for_fn (ultimate_target->decl, optimize)
705 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
706}
707
708/* Allocate the arrays in TOPO and topologically sort the nodes into order. */
709
710static void
711build_toporder_info (class ipa_topo_info *topo)
712{
713 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
714 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
715
716 gcc_checking_assert (topo->stack_top == 0);
717 topo->nnodes = ipa_reduced_postorder (topo->order, true,
718 ignore_edge: ignore_edge_p);
719}
720
721/* Free information about strongly connected components and the arrays in
722 TOPO. */
723
724static void
725free_toporder_info (class ipa_topo_info *topo)
726{
727 ipa_free_postorder_info ();
728 free (ptr: topo->order);
729 free (ptr: topo->stack);
730}
731
732/* Add NODE to the stack in TOPO, unless it is already there. */
733
734static inline void
735push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
736{
737 ipa_node_params *info = ipa_node_params_sum->get (node);
738 if (info->node_enqueued)
739 return;
740 info->node_enqueued = 1;
741 topo->stack[topo->stack_top++] = node;
742}
743
744/* Pop a node from the stack in TOPO and return it or return NULL if the stack
745 is empty. */
746
747static struct cgraph_node *
748pop_node_from_stack (class ipa_topo_info *topo)
749{
750 if (topo->stack_top)
751 {
752 struct cgraph_node *node;
753 topo->stack_top--;
754 node = topo->stack[topo->stack_top];
755 ipa_node_params_sum->get (node)->node_enqueued = 0;
756 return node;
757 }
758 else
759 return NULL;
760}
761
762/* Set lattice LAT to bottom and return true if it previously was not set as
763 such. */
764
765template <typename valtype>
766inline bool
767ipcp_lattice<valtype>::set_to_bottom ()
768{
769 bool ret = !bottom;
770 bottom = true;
771 return ret;
772}
773
774/* Mark lattice as containing an unknown value and return true if it previously
775 was not marked as such. */
776
777template <typename valtype>
778inline bool
779ipcp_lattice<valtype>::set_contains_variable ()
780{
781 bool ret = !contains_variable;
782 contains_variable = true;
783 return ret;
784}
785
786/* Set all aggregate lattices in PLATS to bottom and return true if they were
787 not previously set as such. */
788
789static inline bool
790set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
791{
792 bool ret = !plats->aggs_bottom;
793 plats->aggs_bottom = true;
794 return ret;
795}
796
797/* Mark all aggregate lattices in PLATS as containing an unknown value and
798 return true if they were not previously marked as such. */
799
800static inline bool
801set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
802{
803 bool ret = !plats->aggs_contain_variable;
804 plats->aggs_contain_variable = true;
805 return ret;
806}
807
808bool
809ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
810{
811 return meet_with_1 (other_vr: other.m_vr);
812}
813
814/* Meet the current value of the lattice with the range described by
815 P_VR. */
816
817bool
818ipcp_vr_lattice::meet_with (const vrange &p_vr)
819{
820 return meet_with_1 (other_vr: p_vr);
821}
822
823/* Meet the current value of the lattice with the range described by
824 OTHER_VR. Return TRUE if anything changed. */
825
826bool
827ipcp_vr_lattice::meet_with_1 (const vrange &other_vr)
828{
829 if (bottom_p ())
830 return false;
831
832 if (other_vr.varying_p ())
833 return set_to_bottom ();
834
835 bool res;
836 if (flag_checking)
837 {
838 value_range save (m_vr);
839 res = m_vr.union_ (r: other_vr);
840 gcc_assert (res == (m_vr != save));
841 }
842 else
843 res = m_vr.union_ (r: other_vr);
844 return res;
845}
846
847/* Return true if value range information in the lattice is yet unknown. */
848
849bool
850ipcp_vr_lattice::top_p () const
851{
852 return m_vr.undefined_p ();
853}
854
855/* Return true if value range information in the lattice is known to be
856 unusable. */
857
858bool
859ipcp_vr_lattice::bottom_p () const
860{
861 return m_vr.varying_p ();
862}
863
864/* Set value range information in the lattice to bottom. Return true if it
865 previously was in a different state. */
866
867bool
868ipcp_vr_lattice::set_to_bottom ()
869{
870 if (m_vr.varying_p ())
871 return false;
872
873 /* Setting an unsupported type here forces the temporary to default
874 to unsupported_range, which can handle VARYING/DEFINED ranges,
875 but nothing else (union, intersect, etc). This allows us to set
876 bottoms on any ranges, and is safe as all users of the lattice
877 check for bottom first. */
878 m_vr.set_type (void_type_node);
879 m_vr.set_varying (void_type_node);
880
881 return true;
882}
883
884/* Set lattice value to bottom, if it already isn't the case. */
885
886bool
887ipcp_bits_lattice::set_to_bottom ()
888{
889 if (bottom_p ())
890 return false;
891 m_lattice_val = IPA_BITS_VARYING;
892 m_value = 0;
893 m_mask = -1;
894 return true;
895}
896
897/* Set to constant if it isn't already. Only meant to be called
898 when switching state from TOP. */
899
900bool
901ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
902{
903 gcc_assert (top_p ());
904 m_lattice_val = IPA_BITS_CONSTANT;
905 m_value = wi::bit_and (x: wi::bit_not (x: mask), y: value);
906 m_mask = mask;
907 return true;
908}
909
910/* Return true if any of the known bits are non-zero. */
911
912bool
913ipcp_bits_lattice::known_nonzero_p () const
914{
915 if (!constant_p ())
916 return false;
917 return wi::ne_p (x: wi::bit_and (x: wi::bit_not (x: m_mask), y: m_value), y: 0);
918}
919
920/* Convert operand to value, mask form. */
921
922void
923ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
924{
925 wide_int get_nonzero_bits (const_tree);
926
927 if (TREE_CODE (operand) == INTEGER_CST)
928 {
929 *valuep = wi::to_widest (t: operand);
930 *maskp = 0;
931 }
932 else
933 {
934 *valuep = 0;
935 *maskp = -1;
936 }
937}
938
939/* Meet operation, similar to ccp_lattice_meet, we xor values
940 if this->value, value have different values at same bit positions, we want
941 to drop that bit to varying. Return true if mask is changed.
942 This function assumes that the lattice value is in CONSTANT state. If
943 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
944
945bool
946ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
947 unsigned precision, bool drop_all_ones)
948{
949 gcc_assert (constant_p ());
950
951 widest_int old_mask = m_mask;
952 m_mask = (m_mask | mask) | (m_value ^ value);
953 if (drop_all_ones)
954 m_mask |= m_value;
955
956 widest_int cap_mask = wi::shifted_mask <widest_int> (start: 0, width: precision, negate_p: true);
957 m_mask |= cap_mask;
958 if (wi::sext (x: m_mask, offset: precision) == -1)
959 return set_to_bottom ();
960
961 m_value &= ~m_mask;
962 return m_mask != old_mask;
963}
964
965/* Meet the bits lattice with operand
966 described by <value, mask, sgn, precision. */
967
968bool
969ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
970 unsigned precision)
971{
972 if (bottom_p ())
973 return false;
974
975 if (top_p ())
976 {
977 if (wi::sext (x: mask, offset: precision) == -1)
978 return set_to_bottom ();
979 return set_to_constant (value, mask);
980 }
981
982 return meet_with_1 (value, mask, precision, drop_all_ones: false);
983}
984
985/* Meet bits lattice with the result of bit_value_binop (other, operand)
986 if code is binary operation or bit_value_unop (other) if code is unary op.
987 In the case when code is nop_expr, no adjustment is required. If
988 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
989
990bool
991ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
992 signop sgn, enum tree_code code, tree operand,
993 bool drop_all_ones)
994{
995 if (other.bottom_p ())
996 return set_to_bottom ();
997
998 if (bottom_p () || other.top_p ())
999 return false;
1000
1001 widest_int adjusted_value, adjusted_mask;
1002
1003 if (TREE_CODE_CLASS (code) == tcc_binary)
1004 {
1005 tree type = TREE_TYPE (operand);
1006 widest_int o_value, o_mask;
1007 get_value_and_mask (operand, valuep: &o_value, maskp: &o_mask);
1008
1009 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1010 sgn, precision, other.get_value (), other.get_mask (),
1011 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1012
1013 if (wi::sext (x: adjusted_mask, offset: precision) == -1)
1014 return set_to_bottom ();
1015 }
1016
1017 else if (TREE_CODE_CLASS (code) == tcc_unary)
1018 {
1019 bit_value_unop (code, sgn, precision, &adjusted_value,
1020 &adjusted_mask, sgn, precision, other.get_value (),
1021 other.get_mask ());
1022
1023 if (wi::sext (x: adjusted_mask, offset: precision) == -1)
1024 return set_to_bottom ();
1025 }
1026
1027 else
1028 return set_to_bottom ();
1029
1030 if (top_p ())
1031 {
1032 if (drop_all_ones)
1033 {
1034 adjusted_mask |= adjusted_value;
1035 adjusted_value &= ~adjusted_mask;
1036 }
1037 widest_int cap_mask = wi::shifted_mask <widest_int> (start: 0, width: precision, negate_p: true);
1038 adjusted_mask |= cap_mask;
1039 if (wi::sext (x: adjusted_mask, offset: precision) == -1)
1040 return set_to_bottom ();
1041 return set_to_constant (value: adjusted_value, mask: adjusted_mask);
1042 }
1043 else
1044 return meet_with_1 (value: adjusted_value, mask: adjusted_mask, precision,
1045 drop_all_ones);
1046}
1047
1048/* Dump the contents of the list to FILE. */
1049
1050void
1051ipa_argagg_value_list::dump (FILE *f)
1052{
1053 bool comma = false;
1054 for (const ipa_argagg_value &av : m_elts)
1055 {
1056 fprintf (stream: f, format: "%s %i[%u]=", comma ? "," : "",
1057 av.index, av.unit_offset);
1058 print_generic_expr (f, av.value);
1059 if (av.by_ref)
1060 fprintf (stream: f, format: "(by_ref)");
1061 if (av.killed)
1062 fprintf (stream: f, format: "(killed)");
1063 comma = true;
1064 }
1065 fprintf (stream: f, format: "\n");
1066}
1067
1068/* Dump the contents of the list to stderr. */
1069
1070void
1071ipa_argagg_value_list::debug ()
1072{
1073 dump (stderr);
1074}
1075
1076/* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1077 NULL if there is no such constant. */
1078
1079const ipa_argagg_value *
1080ipa_argagg_value_list::get_elt (int index, unsigned unit_offset) const
1081{
1082 ipa_argagg_value key;
1083 key.index = index;
1084 key.unit_offset = unit_offset;
1085 const ipa_argagg_value *res
1086 = std::lower_bound (first: m_elts.begin (), last: m_elts.end (), val: key,
1087 comp: [] (const ipa_argagg_value &elt,
1088 const ipa_argagg_value &val)
1089 {
1090 if (elt.index < val.index)
1091 return true;
1092 if (elt.index > val.index)
1093 return false;
1094 if (elt.unit_offset < val.unit_offset)
1095 return true;
1096 return false;
1097 });
1098
1099 if (res == m_elts.end ()
1100 || res->index != index
1101 || res->unit_offset != unit_offset)
1102 res = nullptr;
1103
1104 /* TODO: perhaps remove the check (that the underlying array is indeed
1105 sorted) if it turns out it can be too slow? */
1106 if (!flag_checking)
1107 return res;
1108
1109 const ipa_argagg_value *slow_res = NULL;
1110 int prev_index = -1;
1111 unsigned prev_unit_offset = 0;
1112 for (const ipa_argagg_value &av : m_elts)
1113 {
1114 gcc_assert (prev_index < 0
1115 || prev_index < av.index
1116 || prev_unit_offset < av.unit_offset);
1117 prev_index = av.index;
1118 prev_unit_offset = av.unit_offset;
1119 if (av.index == index
1120 && av.unit_offset == unit_offset)
1121 slow_res = &av;
1122 }
1123 gcc_assert (res == slow_res);
1124
1125 return res;
1126}
1127
1128/* Return the first item describing a constant stored for parameter with INDEX,
1129 regardless of offset or reference, or NULL if there is no such constant. */
1130
1131const ipa_argagg_value *
1132ipa_argagg_value_list::get_elt_for_index (int index) const
1133{
1134 const ipa_argagg_value *res
1135 = std::lower_bound (first: m_elts.begin (), last: m_elts.end (), val: index,
1136 comp: [] (const ipa_argagg_value &elt, unsigned idx)
1137 {
1138 return elt.index < idx;
1139 });
1140 if (res == m_elts.end ()
1141 || res->index != index)
1142 res = nullptr;
1143 return res;
1144}
1145
1146/* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1147 performing any check of whether value is passed by reference, or NULL_TREE
1148 if there is no such constant. */
1149
1150tree
1151ipa_argagg_value_list::get_value (int index, unsigned unit_offset) const
1152{
1153 const ipa_argagg_value *av = get_elt (index, unit_offset);
1154 return av ? av->value : NULL_TREE;
1155}
1156
1157/* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1158 passed by reference or not according to BY_REF, or NULL_TREE if there is
1159 no such constant. */
1160
1161tree
1162ipa_argagg_value_list::get_value (int index, unsigned unit_offset,
1163 bool by_ref) const
1164{
1165 const ipa_argagg_value *av = get_elt (index, unit_offset);
1166 if (av && av->by_ref == by_ref)
1167 return av->value;
1168 return NULL_TREE;
1169}
1170
1171/* Return true if all elements present in OTHER are also present in this
1172 list. */
1173
1174bool
1175ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list &other) const
1176{
1177 unsigned j = 0;
1178 for (unsigned i = 0; i < other.m_elts.size (); i++)
1179 {
1180 unsigned other_index = other.m_elts[i].index;
1181 unsigned other_offset = other.m_elts[i].unit_offset;
1182
1183 while (j < m_elts.size ()
1184 && (m_elts[j].index < other_index
1185 || (m_elts[j].index == other_index
1186 && m_elts[j].unit_offset < other_offset)))
1187 j++;
1188
1189 if (j >= m_elts.size ()
1190 || m_elts[j].index != other_index
1191 || m_elts[j].unit_offset != other_offset
1192 || m_elts[j].by_ref != other.m_elts[i].by_ref
1193 || !m_elts[j].value
1194 || !values_equal_for_ipcp_p (x: m_elts[j].value, y: other.m_elts[i].value))
1195 return false;
1196 }
1197 return true;
1198}
1199
1200/* Push all items in this list that describe parameter SRC_INDEX into RES as
1201 ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1202 offsets but skip those which would end up with a negative offset. */
1203
1204void
1205ipa_argagg_value_list::push_adjusted_values (unsigned src_index,
1206 unsigned dest_index,
1207 unsigned unit_delta,
1208 vec<ipa_argagg_value> *res) const
1209{
1210 const ipa_argagg_value *av = get_elt_for_index (index: src_index);
1211 if (!av)
1212 return;
1213 unsigned prev_unit_offset = 0;
1214 bool first = true;
1215 for (; av < m_elts.end (); ++av)
1216 {
1217 if (av->index > src_index)
1218 return;
1219 if (av->index == src_index
1220 && (av->unit_offset >= unit_delta)
1221 && av->value)
1222 {
1223 ipa_argagg_value new_av;
1224 gcc_checking_assert (av->value);
1225 new_av.value = av->value;
1226 new_av.unit_offset = av->unit_offset - unit_delta;
1227 new_av.index = dest_index;
1228 new_av.by_ref = av->by_ref;
1229 gcc_assert (!av->killed);
1230 new_av.killed = false;
1231
1232 /* Quick check that the offsets we push are indeed increasing. */
1233 gcc_assert (first
1234 || new_av.unit_offset > prev_unit_offset);
1235 prev_unit_offset = new_av.unit_offset;
1236 first = false;
1237
1238 res->safe_push (obj: new_av);
1239 }
1240 }
1241}
1242
1243/* Push to RES information about single lattices describing aggregate values in
1244 PLATS as those describing parameter DEST_INDEX and the original offset minus
1245 UNIT_DELTA. Return true if any item has been pushed to RES. */
1246
1247static bool
1248push_agg_values_from_plats (ipcp_param_lattices *plats, int dest_index,
1249 unsigned unit_delta,
1250 vec<ipa_argagg_value> *res)
1251{
1252 if (plats->aggs_contain_variable)
1253 return false;
1254
1255 bool pushed_sth = false;
1256 bool first = true;
1257 unsigned prev_unit_offset = 0;
1258 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
1259 if (aglat->is_single_const ()
1260 && (aglat->offset / BITS_PER_UNIT - unit_delta) >= 0)
1261 {
1262 ipa_argagg_value iav;
1263 iav.value = aglat->values->value;
1264 iav.unit_offset = aglat->offset / BITS_PER_UNIT - unit_delta;
1265 iav.index = dest_index;
1266 iav.by_ref = plats->aggs_by_ref;
1267 iav.killed = false;
1268
1269 gcc_assert (first
1270 || iav.unit_offset > prev_unit_offset);
1271 prev_unit_offset = iav.unit_offset;
1272 first = false;
1273
1274 pushed_sth = true;
1275 res->safe_push (obj: iav);
1276 }
1277 return pushed_sth;
1278}
1279
1280/* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1281 Return the number of remaining valid entries. */
1282
1283static unsigned
1284intersect_argaggs_with (vec<ipa_argagg_value> &elts,
1285 const vec<ipa_argagg_value> &other)
1286{
1287 unsigned valid_entries = 0;
1288 unsigned j = 0;
1289 for (unsigned i = 0; i < elts.length (); i++)
1290 {
1291 if (!elts[i].value)
1292 continue;
1293
1294 unsigned this_index = elts[i].index;
1295 unsigned this_offset = elts[i].unit_offset;
1296
1297 while (j < other.length ()
1298 && (other[j].index < this_index
1299 || (other[j].index == this_index
1300 && other[j].unit_offset < this_offset)))
1301 j++;
1302
1303 if (j >= other.length ())
1304 {
1305 elts[i].value = NULL_TREE;
1306 continue;
1307 }
1308
1309 if (other[j].index == this_index
1310 && other[j].unit_offset == this_offset
1311 && other[j].by_ref == elts[i].by_ref
1312 && other[j].value
1313 && values_equal_for_ipcp_p (x: other[j].value, y: elts[i].value))
1314 valid_entries++;
1315 else
1316 elts[i].value = NULL_TREE;
1317 }
1318 return valid_entries;
1319}
1320
1321/* Mark bot aggregate and scalar lattices as containing an unknown variable,
1322 return true is any of them has not been marked as such so far. */
1323
1324static inline bool
1325set_all_contains_variable (class ipcp_param_lattices *plats)
1326{
1327 bool ret;
1328 ret = plats->itself.set_contains_variable ();
1329 ret |= plats->ctxlat.set_contains_variable ();
1330 ret |= set_agg_lats_contain_variable (plats);
1331 ret |= plats->bits_lattice.set_to_bottom ();
1332 ret |= plats->m_value_range.set_to_bottom ();
1333 return ret;
1334}
1335
1336/* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1337 points to by the number of callers to NODE. */
1338
1339static bool
1340count_callers (cgraph_node *node, void *data)
1341{
1342 int *caller_count = (int *) data;
1343
1344 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1345 /* Local thunks can be handled transparently, but if the thunk cannot
1346 be optimized out, count it as a real use. */
1347 if (!cs->caller->thunk || !cs->caller->local)
1348 ++*caller_count;
1349 return false;
1350}
1351
1352/* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1353 the one caller of some other node. Set the caller's corresponding flag. */
1354
1355static bool
1356set_single_call_flag (cgraph_node *node, void *)
1357{
1358 cgraph_edge *cs = node->callers;
1359 /* Local thunks can be handled transparently, skip them. */
1360 while (cs && cs->caller->thunk && cs->caller->local)
1361 cs = cs->next_caller;
1362 if (cs)
1363 if (ipa_node_params* info = ipa_node_params_sum->get (node: cs->caller))
1364 {
1365 info->node_calling_single_call = true;
1366 return true;
1367 }
1368 return false;
1369}
1370
1371/* Initialize ipcp_lattices. */
1372
1373static void
1374initialize_node_lattices (struct cgraph_node *node)
1375{
1376 ipa_node_params *info = ipa_node_params_sum->get (node);
1377 struct cgraph_edge *ie;
1378 bool disable = false, variable = false;
1379 int i;
1380
1381 gcc_checking_assert (node->has_gimple_body_p ());
1382
1383 if (!ipa_get_param_count (info))
1384 disable = true;
1385 else if (node->local)
1386 {
1387 int caller_count = 0;
1388 node->call_for_symbol_thunks_and_aliases (callback: count_callers, data: &caller_count,
1389 include_overwritable: true);
1390 gcc_checking_assert (caller_count > 0);
1391 if (caller_count == 1)
1392 node->call_for_symbol_thunks_and_aliases (callback: set_single_call_flag,
1393 NULL, include_overwritable: true);
1394 }
1395 else
1396 {
1397 /* When cloning is allowed, we can assume that externally visible
1398 functions are not called. We will compensate this by cloning
1399 later. */
1400 if (ipcp_versionable_function_p (node)
1401 && ipcp_cloning_candidate_p (node))
1402 variable = true;
1403 else
1404 disable = true;
1405 }
1406
1407 if (dump_file && (dump_flags & TDF_DETAILS)
1408 && !node->alias && !node->thunk)
1409 {
1410 fprintf (stream: dump_file, format: "Initializing lattices of %s\n",
1411 node->dump_name ());
1412 if (disable || variable)
1413 fprintf (stream: dump_file, format: " Marking all lattices as %s\n",
1414 disable ? "BOTTOM" : "VARIABLE");
1415 }
1416
1417 auto_vec<bool, 16> surviving_params;
1418 bool pre_modified = false;
1419
1420 clone_info *cinfo = clone_info::get (node);
1421
1422 if (!disable && cinfo && cinfo->param_adjustments)
1423 {
1424 /* At the moment all IPA optimizations should use the number of
1425 parameters of the prevailing decl as the m_always_copy_start.
1426 Handling any other value would complicate the code below, so for the
1427 time bing let's only assert it is so. */
1428 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1429 == ipa_get_param_count (info))
1430 || cinfo->param_adjustments->m_always_copy_start < 0);
1431
1432 pre_modified = true;
1433 cinfo->param_adjustments->get_surviving_params (surviving_params: &surviving_params);
1434
1435 if (dump_file && (dump_flags & TDF_DETAILS)
1436 && !node->alias && !node->thunk)
1437 {
1438 bool first = true;
1439 for (int j = 0; j < ipa_get_param_count (info); j++)
1440 {
1441 if (j < (int) surviving_params.length ()
1442 && surviving_params[j])
1443 continue;
1444 if (first)
1445 {
1446 fprintf (stream: dump_file,
1447 format: " The following parameters are dead on arrival:");
1448 first = false;
1449 }
1450 fprintf (stream: dump_file, format: " %u", j);
1451 }
1452 if (!first)
1453 fprintf (stream: dump_file, format: "\n");
1454 }
1455 }
1456
1457 for (i = 0; i < ipa_get_param_count (info); i++)
1458 {
1459 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1460 tree type = ipa_get_type (info, i);
1461 if (disable
1462 || !ipa_get_type (info, i)
1463 || (pre_modified && (surviving_params.length () <= (unsigned) i
1464 || !surviving_params[i])))
1465 {
1466 plats->itself.set_to_bottom ();
1467 plats->ctxlat.set_to_bottom ();
1468 set_agg_lats_to_bottom (plats);
1469 plats->bits_lattice.set_to_bottom ();
1470 plats->m_value_range.init (type);
1471 plats->m_value_range.set_to_bottom ();
1472 }
1473 else
1474 {
1475 plats->m_value_range.init (type);
1476 if (variable)
1477 set_all_contains_variable (plats);
1478 }
1479 }
1480
1481 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1482 if (ie->indirect_info->polymorphic
1483 && ie->indirect_info->param_index >= 0)
1484 {
1485 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1486 ipa_get_parm_lattices (info,
1487 i: ie->indirect_info->param_index)->virt_call = 1;
1488 }
1489}
1490
1491/* Return VALUE if it is NULL_TREE or if it can be directly safely IPA-CP
1492 propagated to a parameter of type PARAM_TYPE, or return a fold-converted
1493 VALUE to PARAM_TYPE if that is possible. Return NULL_TREE otherwise. */
1494
1495static tree
1496ipacp_value_safe_for_type (tree param_type, tree value)
1497{
1498 if (!value)
1499 return NULL_TREE;
1500 tree val_type = TREE_TYPE (value);
1501 if (param_type == val_type
1502 || useless_type_conversion_p (param_type, val_type))
1503 return value;
1504 if (fold_convertible_p (param_type, value))
1505 return fold_convert (param_type, value);
1506 else
1507 return NULL_TREE;
1508}
1509
1510/* Return the result of a (possibly arithmetic) operation determined by OPCODE
1511 on the constant value INPUT. OPERAND is 2nd operand for binary operation
1512 and is required for binary operations. RES_TYPE, required when opcode is
1513 not NOP_EXPR, is the type in which any operation is to be performed. Return
1514 NULL_TREE if that cannot be determined or be considered an interprocedural
1515 invariant. */
1516
1517static tree
1518ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1519 tree res_type)
1520{
1521 tree res;
1522
1523 if (opcode == NOP_EXPR)
1524 return input;
1525 if (!is_gimple_ip_invariant (input))
1526 return NULL_TREE;
1527
1528 if (opcode == ASSERT_EXPR)
1529 {
1530 if (values_equal_for_ipcp_p (x: input, y: operand))
1531 return input;
1532 else
1533 return NULL_TREE;
1534 }
1535
1536 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1537 res = fold_unary (opcode, res_type, input);
1538 else
1539 res = fold_binary (opcode, res_type, input, operand);
1540
1541 if (res && !is_gimple_ip_invariant (res))
1542 return NULL_TREE;
1543
1544 return res;
1545}
1546
1547/* Return the result of an ancestor jump function JFUNC on the constant value
1548 INPUT. Return NULL_TREE if that cannot be determined. */
1549
1550static tree
1551ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1552{
1553 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1554 if (TREE_CODE (input) == ADDR_EXPR)
1555 {
1556 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1557 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1558 if (known_eq (off, 0))
1559 return input;
1560 poly_int64 byte_offset = exact_div (a: off, BITS_PER_UNIT);
1561 return build1 (ADDR_EXPR, TREE_TYPE (input),
1562 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1563 build_int_cst (ptr_type_node, byte_offset)));
1564 }
1565 else if (ipa_get_jf_ancestor_keep_null (jfunc)
1566 && zerop (input))
1567 return input;
1568 else
1569 return NULL_TREE;
1570}
1571
1572/* Determine whether JFUNC evaluates to a single known constant value and if
1573 so, return it. Otherwise return NULL. INFO describes the caller node or
1574 the one it is inlined to, so that pass-through jump functions can be
1575 evaluated. PARM_TYPE is the type of the parameter to which the result is
1576 passed. */
1577
1578tree
1579ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1580 tree parm_type)
1581{
1582 if (jfunc->type == IPA_JF_CONST)
1583 return ipa_get_jf_constant (jfunc);
1584 else if (jfunc->type == IPA_JF_PASS_THROUGH
1585 || jfunc->type == IPA_JF_ANCESTOR)
1586 {
1587 tree input;
1588 int idx;
1589
1590 if (jfunc->type == IPA_JF_PASS_THROUGH)
1591 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1592 else
1593 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1594
1595 if (info->ipcp_orig_node)
1596 input = info->known_csts[idx];
1597 else
1598 {
1599 ipcp_lattice<tree> *lat;
1600
1601 if (info->lattices.is_empty ()
1602 || idx >= ipa_get_param_count (info))
1603 return NULL_TREE;
1604 lat = ipa_get_scalar_lat (info, i: idx);
1605 if (!lat->is_single_const ())
1606 return NULL_TREE;
1607 input = lat->values->value;
1608 }
1609
1610 if (!input)
1611 return NULL_TREE;
1612
1613 if (jfunc->type == IPA_JF_PASS_THROUGH)
1614 {
1615 if (!parm_type)
1616 return NULL_TREE;
1617 enum tree_code opcode = ipa_get_jf_pass_through_operation (jfunc);
1618 tree op2 = ipa_get_jf_pass_through_operand (jfunc);
1619 tree op_type
1620 = (opcode == NOP_EXPR) ? NULL_TREE
1621 : ipa_get_jf_pass_through_op_type (jfunc);
1622 tree cstval = ipa_get_jf_arith_result (opcode, input, operand: op2, res_type: op_type);
1623 return ipacp_value_safe_for_type (param_type: parm_type, value: cstval);
1624 }
1625 else
1626 return ipa_get_jf_ancestor_result (jfunc, input);
1627 }
1628 else
1629 return NULL_TREE;
1630}
1631
1632/* Determine whether JFUNC evaluates to single known polymorphic context, given
1633 that INFO describes the caller node or the one it is inlined to, CS is the
1634 call graph edge corresponding to JFUNC and CSIDX index of the described
1635 parameter. */
1636
1637ipa_polymorphic_call_context
1638ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1639 ipa_jump_func *jfunc)
1640{
1641 ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs);
1642 ipa_polymorphic_call_context ctx;
1643 ipa_polymorphic_call_context *edge_ctx
1644 = cs ? ipa_get_ith_polymorhic_call_context (args, i: csidx) : NULL;
1645
1646 if (edge_ctx && !edge_ctx->useless_p ())
1647 ctx = *edge_ctx;
1648
1649 if (jfunc->type == IPA_JF_PASS_THROUGH
1650 || jfunc->type == IPA_JF_ANCESTOR)
1651 {
1652 ipa_polymorphic_call_context srcctx;
1653 int srcidx;
1654 bool type_preserved = true;
1655 if (jfunc->type == IPA_JF_PASS_THROUGH)
1656 {
1657 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1658 return ctx;
1659 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1660 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1661 }
1662 else
1663 {
1664 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1665 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1666 }
1667 if (info->ipcp_orig_node)
1668 {
1669 if (info->known_contexts.exists ())
1670 srcctx = info->known_contexts[srcidx];
1671 }
1672 else
1673 {
1674 if (info->lattices.is_empty ()
1675 || srcidx >= ipa_get_param_count (info))
1676 return ctx;
1677 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1678 lat = ipa_get_poly_ctx_lat (info, i: srcidx);
1679 if (!lat->is_single_const ())
1680 return ctx;
1681 srcctx = lat->values->value;
1682 }
1683 if (srcctx.useless_p ())
1684 return ctx;
1685 if (jfunc->type == IPA_JF_ANCESTOR)
1686 srcctx.offset_by (off: ipa_get_jf_ancestor_offset (jfunc));
1687 if (!type_preserved)
1688 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1689 srcctx.combine_with (ctx);
1690 return srcctx;
1691 }
1692
1693 return ctx;
1694}
1695
1696/* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1697 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1698 the result is a range that is not VARYING nor UNDEFINED. */
1699
1700bool
1701ipa_vr_operation_and_type_effects (vrange &dst_vr,
1702 const vrange &src_vr,
1703 enum tree_code operation,
1704 tree dst_type, tree src_type)
1705{
1706 if (!ipa_vr_supported_type_p (type: dst_type)
1707 || !ipa_vr_supported_type_p (type: src_type))
1708 return false;
1709
1710 range_op_handler handler (operation);
1711 if (!handler)
1712 return false;
1713
1714 value_range varying (dst_type);
1715 varying.set_varying (dst_type);
1716
1717 return (handler.operand_check_p (dst_type, src_type, dst_type)
1718 && handler.fold_range (r&: dst_vr, type: dst_type, lh: src_vr, rh: varying)
1719 && !dst_vr.varying_p ()
1720 && !dst_vr.undefined_p ());
1721}
1722
1723/* Same as above, but the SRC_VR argument is an IPA_VR which must
1724 first be extracted onto a vrange. */
1725
1726bool
1727ipa_vr_operation_and_type_effects (vrange &dst_vr,
1728 const ipa_vr &src_vr,
1729 enum tree_code operation,
1730 tree dst_type, tree src_type)
1731{
1732 value_range tmp;
1733 src_vr.get_vrange (tmp);
1734 return ipa_vr_operation_and_type_effects (dst_vr, src_vr: tmp, operation,
1735 dst_type, src_type);
1736}
1737
1738/* Given a PASS_THROUGH jump function JFUNC that takes as its source SRC_VR of
1739 SRC_TYPE and the result needs to be DST_TYPE, if any value range information
1740 can be deduced at all, intersect VR with it. CONTEXT_NODE is the call graph
1741 node representing the function for which optimization flags should be
1742 evaluated. */
1743
1744static void
1745ipa_vr_intersect_with_arith_jfunc (vrange &vr,
1746 ipa_jump_func *jfunc,
1747 cgraph_node *context_node,
1748 const value_range &src_vr,
1749 tree src_type,
1750 tree dst_type)
1751{
1752 if (src_vr.undefined_p () || src_vr.varying_p ())
1753 return;
1754
1755 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1756 if (TREE_CODE_CLASS (operation) == tcc_unary)
1757 {
1758 value_range op_res;
1759 const value_range *inter_vr;
1760 if (operation != NOP_EXPR)
1761 {
1762 tree operation_type = ipa_get_jf_pass_through_op_type (jfunc);
1763 op_res.set_varying (operation_type);
1764 if (!ipa_vr_operation_and_type_effects (dst_vr&: op_res, src_vr, operation,
1765 dst_type: operation_type, src_type))
1766 return;
1767 if (src_type == dst_type)
1768 {
1769 vr.intersect (op_res);
1770 return;
1771 }
1772 inter_vr = &op_res;
1773 src_type = operation_type;
1774 }
1775 else
1776 inter_vr = &src_vr;
1777
1778 value_range tmp_res (dst_type);
1779 if (ipa_vr_operation_and_type_effects (dst_vr&: tmp_res, src_vr: *inter_vr, operation: NOP_EXPR,
1780 dst_type, src_type))
1781 vr.intersect (tmp_res);
1782 return;
1783 }
1784
1785 tree operand = ipa_get_jf_pass_through_operand (jfunc);
1786 range_op_handler handler (operation);
1787 if (!handler)
1788 return;
1789 value_range op_vr (TREE_TYPE (operand));
1790 ipa_get_range_from_ip_invariant (r&: op_vr, val: operand, node: context_node);
1791
1792 tree operation_type = ipa_get_jf_pass_through_op_type (jfunc);
1793 value_range op_res (operation_type);
1794 if (!ipa_vr_supported_type_p (type: operation_type)
1795 || !handler.operand_check_p (operation_type, src_type, op_vr.type ())
1796 || !handler.fold_range (r&: op_res, type: operation_type, lh: src_vr, rh: op_vr))
1797 return;
1798
1799 value_range tmp_res (dst_type);
1800 if (ipa_vr_operation_and_type_effects (dst_vr&: tmp_res, src_vr: op_res, operation: NOP_EXPR, dst_type,
1801 src_type: operation_type))
1802 vr.intersect (tmp_res);
1803}
1804
1805/* Determine range of JFUNC given that INFO describes the caller node or
1806 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1807 and PARM_TYPE of the parameter. */
1808
1809void
1810ipa_value_range_from_jfunc (vrange &vr,
1811 ipa_node_params *info, cgraph_edge *cs,
1812 ipa_jump_func *jfunc, tree parm_type)
1813{
1814 vr.set_varying (parm_type);
1815
1816 if (jfunc->m_vr && jfunc->m_vr->known_p ())
1817 ipa_vr_operation_and_type_effects (dst_vr&: vr,
1818 src_vr: *jfunc->m_vr,
1819 operation: NOP_EXPR, dst_type: parm_type,
1820 src_type: jfunc->m_vr->type ());
1821 if (vr.singleton_p ())
1822 return;
1823
1824 if (jfunc->type == IPA_JF_PASS_THROUGH)
1825 {
1826 ipcp_transformation *sum
1827 = ipcp_get_transformation_summary (node: cs->caller->inlined_to
1828 ? cs->caller->inlined_to
1829 : cs->caller);
1830 if (!sum || !sum->m_vr)
1831 return;
1832
1833 int idx = ipa_get_jf_pass_through_formal_id (jfunc);
1834
1835 if (!(*sum->m_vr)[idx].known_p ())
1836 return;
1837 tree src_type = ipa_get_type (info, i: idx);
1838 value_range srcvr;
1839 (*sum->m_vr)[idx].get_vrange (srcvr);
1840
1841 ipa_vr_intersect_with_arith_jfunc (vr, jfunc, context_node: cs->caller, src_vr: srcvr, src_type,
1842 dst_type: parm_type);
1843 }
1844}
1845
1846/* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1847 single known constant value and if so, return it. Otherwise return NULL.
1848 NODE and INFO describes the caller node or the one it is inlined to, and
1849 its related info. */
1850
1851tree
1852ipa_agg_value_from_jfunc (ipa_node_params *info, cgraph_node *node,
1853 const ipa_agg_jf_item *item)
1854{
1855 tree value = NULL_TREE;
1856 int src_idx;
1857
1858 if (item->offset < 0
1859 || item->jftype == IPA_JF_UNKNOWN
1860 || item->offset >= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT)
1861 return NULL_TREE;
1862
1863 if (item->jftype == IPA_JF_CONST)
1864 return item->value.constant;
1865
1866 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
1867 || item->jftype == IPA_JF_LOAD_AGG);
1868
1869 src_idx = item->value.pass_through.formal_id;
1870
1871 if (info->ipcp_orig_node)
1872 {
1873 if (item->jftype == IPA_JF_PASS_THROUGH)
1874 value = info->known_csts[src_idx];
1875 else if (ipcp_transformation *ts = ipcp_get_transformation_summary (node))
1876 {
1877 ipa_argagg_value_list avl (ts);
1878 value = avl.get_value (index: src_idx,
1879 unit_offset: item->value.load_agg.offset / BITS_PER_UNIT,
1880 by_ref: item->value.load_agg.by_ref);
1881 }
1882 }
1883 else if (!info->lattices.is_empty ())
1884 {
1885 class ipcp_param_lattices *src_plats
1886 = ipa_get_parm_lattices (info, i: src_idx);
1887
1888 if (item->jftype == IPA_JF_PASS_THROUGH)
1889 {
1890 struct ipcp_lattice<tree> *lat = &src_plats->itself;
1891
1892 if (!lat->is_single_const ())
1893 return NULL_TREE;
1894
1895 value = lat->values->value;
1896 }
1897 else if (src_plats->aggs
1898 && !src_plats->aggs_bottom
1899 && !src_plats->aggs_contain_variable
1900 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
1901 {
1902 struct ipcp_agg_lattice *aglat;
1903
1904 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
1905 {
1906 if (aglat->offset > item->value.load_agg.offset)
1907 break;
1908
1909 if (aglat->offset == item->value.load_agg.offset)
1910 {
1911 if (aglat->is_single_const ())
1912 value = aglat->values->value;
1913 break;
1914 }
1915 }
1916 }
1917 }
1918
1919 if (!value)
1920 return NULL_TREE;
1921
1922 if (item->jftype == IPA_JF_LOAD_AGG)
1923 {
1924 tree load_type = item->value.load_agg.type;
1925 tree value_type = TREE_TYPE (value);
1926
1927 /* Ensure value type is compatible with load type. */
1928 if (!useless_type_conversion_p (load_type, value_type))
1929 return NULL_TREE;
1930 }
1931
1932 tree cstval = ipa_get_jf_arith_result (opcode: item->value.pass_through.operation,
1933 input: value,
1934 operand: item->value.pass_through.operand,
1935 res_type: item->value.pass_through.op_type);
1936 return ipacp_value_safe_for_type (param_type: item->type, value: cstval);
1937}
1938
1939/* Process all items in AGG_JFUNC relative to caller (or the node the original
1940 caller is inlined to) NODE which described by INFO and push the results to
1941 RES as describing values passed in parameter DST_INDEX. */
1942
1943void
1944ipa_push_agg_values_from_jfunc (ipa_node_params *info, cgraph_node *node,
1945 ipa_agg_jump_function *agg_jfunc,
1946 unsigned dst_index,
1947 vec<ipa_argagg_value> *res)
1948{
1949 unsigned prev_unit_offset = 0;
1950 bool first = true;
1951
1952 for (const ipa_agg_jf_item &item : agg_jfunc->items)
1953 {
1954 tree value = ipa_agg_value_from_jfunc (info, node, item: &item);
1955 if (!value)
1956 continue;
1957
1958 ipa_argagg_value iav;
1959 iav.value = value;
1960 iav.unit_offset = item.offset / BITS_PER_UNIT;
1961 iav.index = dst_index;
1962 iav.by_ref = agg_jfunc->by_ref;
1963 iav.killed = 0;
1964
1965 gcc_assert (first
1966 || iav.unit_offset > prev_unit_offset);
1967 prev_unit_offset = iav.unit_offset;
1968 first = false;
1969
1970 res->safe_push (obj: iav);
1971 }
1972}
1973
1974/* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1975 bottom, not containing a variable component and without any known value at
1976 the same time. */
1977
1978DEBUG_FUNCTION void
1979ipcp_verify_propagated_values (void)
1980{
1981 struct cgraph_node *node;
1982
1983 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
1984 {
1985 ipa_node_params *info = ipa_node_params_sum->get (node);
1986 if (!opt_for_fn (node->decl, flag_ipa_cp)
1987 || !opt_for_fn (node->decl, optimize))
1988 continue;
1989 int i, count = ipa_get_param_count (info);
1990
1991 for (i = 0; i < count; i++)
1992 {
1993 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
1994
1995 if (!lat->bottom
1996 && !lat->contains_variable
1997 && lat->values_count == 0)
1998 {
1999 if (dump_file)
2000 {
2001 symtab->dump (f: dump_file);
2002 fprintf (stream: dump_file, format: "\nIPA lattices after constant "
2003 "propagation, before gcc_unreachable:\n");
2004 print_all_lattices (f: dump_file, dump_sources: true, dump_benefits: false);
2005 }
2006
2007 gcc_unreachable ();
2008 }
2009 }
2010 }
2011}
2012
2013/* Return true iff X and Y should be considered equal contexts by IPA-CP. */
2014
2015static bool
2016values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
2017 ipa_polymorphic_call_context y)
2018{
2019 return x.equal_to (x: y);
2020}
2021
2022
2023/* Add a new value source to the value represented by THIS, marking that a
2024 value comes from edge CS and (if the underlying jump function is a
2025 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
2026 parameter described by SRC_INDEX. OFFSET is negative if the source was the
2027 scalar value of the parameter itself or the offset within an aggregate. */
2028
2029template <typename valtype>
2030void
2031ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
2032 int src_idx, HOST_WIDE_INT offset)
2033{
2034 ipcp_value_source<valtype> *src;
2035
2036 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
2037 src->offset = offset;
2038 src->cs = cs;
2039 src->val = src_val;
2040 src->index = src_idx;
2041
2042 src->next = sources;
2043 sources = src;
2044}
2045
2046/* Allocate a new ipcp_value holding a tree constant, initialize its value to
2047 SOURCE and clear all other fields. */
2048
2049static ipcp_value<tree> *
2050allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
2051{
2052 ipcp_value<tree> *val;
2053
2054 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
2055 val->value = cst;
2056 val->self_recursion_generated_level = same_lat_gen_level;
2057 return val;
2058}
2059
2060/* Allocate a new ipcp_value holding a polymorphic context, initialize its
2061 value to SOURCE and clear all other fields. */
2062
2063static ipcp_value<ipa_polymorphic_call_context> *
2064allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
2065 unsigned same_lat_gen_level)
2066{
2067 ipcp_value<ipa_polymorphic_call_context> *val;
2068
2069 val = new (ipcp_poly_ctx_values_pool.allocate ())
2070 ipcp_value<ipa_polymorphic_call_context>();
2071 val->value = ctx;
2072 val->self_recursion_generated_level = same_lat_gen_level;
2073 return val;
2074}
2075
2076/* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
2077 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
2078 meaning. OFFSET -1 means the source is scalar and not a part of an
2079 aggregate. If non-NULL, VAL_P records address of existing or newly added
2080 ipcp_value.
2081
2082 If the value is generated for a self-recursive call as a result of an
2083 arithmetic pass-through jump-function acting on a value in the same lattice,
2084 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
2085 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
2086
2087template <typename valtype>
2088bool
2089ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
2090 ipcp_value<valtype> *src_val,
2091 int src_idx, HOST_WIDE_INT offset,
2092 ipcp_value<valtype> **val_p,
2093 unsigned same_lat_gen_level)
2094{
2095 ipcp_value<valtype> *val, *last_val = NULL;
2096
2097 if (val_p)
2098 *val_p = NULL;
2099
2100 if (bottom)
2101 return false;
2102
2103 for (val = values; val; last_val = val, val = val->next)
2104 if (values_equal_for_ipcp_p (val->value, newval))
2105 {
2106 if (val_p)
2107 *val_p = val;
2108
2109 if (val->self_recursion_generated_level < same_lat_gen_level)
2110 val->self_recursion_generated_level = same_lat_gen_level;
2111
2112 if (ipa_edge_within_scc (cs))
2113 {
2114 ipcp_value_source<valtype> *s;
2115 for (s = val->sources; s; s = s->next)
2116 if (s->cs == cs && s->val == src_val)
2117 break;
2118 if (s)
2119 return false;
2120 }
2121
2122 val->add_source (cs, src_val, src_idx, offset);
2123 return false;
2124 }
2125
2126 if (!same_lat_gen_level && values_count >= opt_for_fn (cs->callee->decl,
2127 param_ipa_cp_value_list_size))
2128 {
2129 /* We can only free sources, not the values themselves, because sources
2130 of other values in this SCC might point to them. */
2131 for (val = values; val; val = val->next)
2132 {
2133 while (val->sources)
2134 {
2135 ipcp_value_source<valtype> *src = val->sources;
2136 val->sources = src->next;
2137 ipcp_sources_pool.remove (object: (ipcp_value_source<tree>*)src);
2138 }
2139 }
2140 values = NULL;
2141 return set_to_bottom ();
2142 }
2143
2144 values_count++;
2145 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2146 val->add_source (cs, src_val, src_idx, offset);
2147 val->next = NULL;
2148
2149 /* Add the new value to end of value list, which can reduce iterations
2150 of propagation stage for recursive function. */
2151 if (last_val)
2152 last_val->next = val;
2153 else
2154 values = val;
2155
2156 if (val_p)
2157 *val_p = val;
2158
2159 return true;
2160}
2161
2162/* A helper function that returns result of operation specified by OPCODE on
2163 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2164 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2165 acting as its second operand. OP_TYPE is the type in which the operation is
2166 performed. */
2167
2168static tree
2169get_val_across_arith_op (enum tree_code opcode,
2170 tree opnd1_type,
2171 tree opnd2,
2172 ipcp_value<tree> *src_val,
2173 tree op_type)
2174{
2175 tree opnd1 = src_val->value;
2176
2177 /* Skip source values that is incompatible with specified type. */
2178 if (opnd1_type
2179 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2180 return NULL_TREE;
2181
2182 return ipa_get_jf_arith_result (opcode, input: opnd1, operand: opnd2, res_type: op_type);
2183}
2184
2185/* Propagate values through an arithmetic transformation described by a jump
2186 function associated with edge CS, taking values from SRC_LAT and putting
2187 them into DEST_LAT. OPND1_TYPE, if non-NULL, is the expected type for the
2188 values in SRC_LAT. OPND2 is a constant value if transformation is a binary
2189 operation. SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes
2190 lattice of a part of an aggregate, otherwise it should be -1. SRC_IDX is
2191 the index of the source parameter. OP_TYPE is the type in which the
2192 operation is performed and can be NULL when OPCODE is NOP_EXPR. RES_TYPE is
2193 the value type of result being propagated into. Return true if DEST_LAT
2194 changed. */
2195
2196static bool
2197propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2198 enum tree_code opcode,
2199 tree opnd1_type,
2200 tree opnd2,
2201 ipcp_lattice<tree> *src_lat,
2202 ipcp_lattice<tree> *dest_lat,
2203 HOST_WIDE_INT src_offset,
2204 int src_idx,
2205 tree op_type,
2206 tree res_type)
2207{
2208 ipcp_value<tree> *src_val;
2209 bool ret = false;
2210
2211 /* Due to circular dependencies, propagating within an SCC through arithmetic
2212 transformation would create infinite number of values. But for
2213 self-feeding recursive function, we could allow propagation in a limited
2214 count, and this can enable a simple kind of recursive function versioning.
2215 For other scenario, we would just make lattices bottom. */
2216 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2217 {
2218 int i;
2219
2220 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2221 param_ipa_cp_max_recursive_depth);
2222 if (src_lat != dest_lat || max_recursive_depth < 1)
2223 return dest_lat->set_contains_variable ();
2224
2225 /* No benefit if recursive execution is in low probability. */
2226 if (cs->sreal_frequency () * 100
2227 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2228 param_ipa_cp_min_recursive_probability))
2229 return dest_lat->set_contains_variable ();
2230
2231 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2232
2233 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2234 {
2235 /* Now we do not use self-recursively generated value as propagation
2236 source, this is absolutely conservative, but could avoid explosion
2237 of lattice's value space, especially when one recursive function
2238 calls another recursive. */
2239 if (src_val->self_recursion_generated_p ())
2240 {
2241 ipcp_value_source<tree> *s;
2242
2243 /* If the lattice has already been propagated for the call site,
2244 no need to do that again. */
2245 for (s = src_val->sources; s; s = s->next)
2246 if (s->cs == cs)
2247 return dest_lat->set_contains_variable ();
2248 }
2249 else
2250 val_seeds.safe_push (obj: src_val);
2251 }
2252
2253 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2254
2255 /* Recursively generate lattice values with a limited count. */
2256 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2257 {
2258 for (int j = 1; j < max_recursive_depth; j++)
2259 {
2260 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2261 src_val, op_type);
2262 cstval = ipacp_value_safe_for_type (param_type: res_type, value: cstval);
2263 if (!cstval)
2264 break;
2265
2266 ret |= dest_lat->add_value (newval: cstval, cs, src_val, src_idx,
2267 offset: src_offset, val_p: &src_val, same_lat_gen_level: j);
2268 gcc_checking_assert (src_val);
2269 }
2270 }
2271 ret |= dest_lat->set_contains_variable ();
2272 }
2273 else
2274 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2275 {
2276 /* Now we do not use self-recursively generated value as propagation
2277 source, otherwise it is easy to make value space of normal lattice
2278 overflow. */
2279 if (src_val->self_recursion_generated_p ())
2280 {
2281 ret |= dest_lat->set_contains_variable ();
2282 continue;
2283 }
2284
2285 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2286 src_val, op_type);
2287 cstval = ipacp_value_safe_for_type (param_type: res_type, value: cstval);
2288 if (cstval)
2289 ret |= dest_lat->add_value (newval: cstval, cs, src_val, src_idx,
2290 offset: src_offset);
2291 else
2292 ret |= dest_lat->set_contains_variable ();
2293 }
2294
2295 return ret;
2296}
2297
2298/* Propagate values through a pass-through jump function JFUNC associated with
2299 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2300 is the index of the source parameter. PARM_TYPE is the type of the
2301 parameter to which the result is passed. */
2302
2303static bool
2304propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2305 ipcp_lattice<tree> *src_lat,
2306 ipcp_lattice<tree> *dest_lat, int src_idx,
2307 tree parm_type)
2308{
2309 gcc_checking_assert (parm_type);
2310 enum tree_code opcode = ipa_get_jf_pass_through_operation (jfunc);
2311 tree op_type = (opcode == NOP_EXPR) ? NULL_TREE
2312 : ipa_get_jf_pass_through_op_type (jfunc);
2313 return propagate_vals_across_arith_jfunc (cs, opcode, NULL_TREE,
2314 opnd2: ipa_get_jf_pass_through_operand (jfunc),
2315 src_lat, dest_lat, src_offset: -1, src_idx, op_type,
2316 res_type: parm_type);
2317}
2318
2319/* Propagate values through an ancestor jump function JFUNC associated with
2320 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2321 is the index of the source parameter. */
2322
2323static bool
2324propagate_vals_across_ancestor (struct cgraph_edge *cs,
2325 struct ipa_jump_func *jfunc,
2326 ipcp_lattice<tree> *src_lat,
2327 ipcp_lattice<tree> *dest_lat, int src_idx,
2328 tree param_type)
2329{
2330 ipcp_value<tree> *src_val;
2331 bool ret = false;
2332
2333 if (ipa_edge_within_scc (cs))
2334 return dest_lat->set_contains_variable ();
2335
2336 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2337 {
2338 tree t = ipa_get_jf_ancestor_result (jfunc, input: src_val->value);
2339 t = ipacp_value_safe_for_type (param_type, value: t);
2340 if (t)
2341 ret |= dest_lat->add_value (newval: t, cs, src_val, src_idx);
2342 else
2343 ret |= dest_lat->set_contains_variable ();
2344 }
2345
2346 return ret;
2347}
2348
2349/* Propagate scalar values across jump function JFUNC that is associated with
2350 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2351 parameter to which the result is passed. */
2352
2353static bool
2354propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2355 struct ipa_jump_func *jfunc,
2356 ipcp_lattice<tree> *dest_lat,
2357 tree param_type)
2358{
2359 if (dest_lat->bottom)
2360 return false;
2361
2362 if (jfunc->type == IPA_JF_CONST)
2363 {
2364 tree val = ipa_get_jf_constant (jfunc);
2365 val = ipacp_value_safe_for_type (param_type, value: val);
2366 if (val)
2367 return dest_lat->add_value (newval: val, cs, NULL, src_idx: 0);
2368 else
2369 return dest_lat->set_contains_variable ();
2370 }
2371 else if (jfunc->type == IPA_JF_PASS_THROUGH
2372 || jfunc->type == IPA_JF_ANCESTOR)
2373 {
2374 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
2375 ipcp_lattice<tree> *src_lat;
2376 int src_idx;
2377 bool ret;
2378
2379 if (jfunc->type == IPA_JF_PASS_THROUGH)
2380 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2381 else
2382 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2383
2384 src_lat = ipa_get_scalar_lat (info: caller_info, i: src_idx);
2385 if (src_lat->bottom)
2386 return dest_lat->set_contains_variable ();
2387
2388 /* If we would need to clone the caller and cannot, do not propagate. */
2389 if (!ipcp_versionable_function_p (node: cs->caller)
2390 && (src_lat->contains_variable
2391 || (src_lat->values_count > 1)))
2392 return dest_lat->set_contains_variable ();
2393
2394 if (jfunc->type == IPA_JF_PASS_THROUGH)
2395 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2396 dest_lat, src_idx,
2397 parm_type: param_type);
2398 else
2399 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2400 src_idx, param_type);
2401
2402 if (src_lat->contains_variable)
2403 ret |= dest_lat->set_contains_variable ();
2404
2405 return ret;
2406 }
2407
2408 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2409 use it for indirect inlining), we should propagate them too. */
2410 return dest_lat->set_contains_variable ();
2411}
2412
2413/* Propagate scalar values across jump function JFUNC that is associated with
2414 edge CS and describes argument IDX and put the values into DEST_LAT. */
2415
2416static bool
2417propagate_context_across_jump_function (cgraph_edge *cs,
2418 ipa_jump_func *jfunc, int idx,
2419 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2420{
2421 if (dest_lat->bottom)
2422 return false;
2423 ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs);
2424 bool ret = false;
2425 bool added_sth = false;
2426 bool type_preserved = true;
2427
2428 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2429 = ipa_get_ith_polymorhic_call_context (args, i: idx);
2430
2431 if (edge_ctx_ptr)
2432 edge_ctx = *edge_ctx_ptr;
2433
2434 if (jfunc->type == IPA_JF_PASS_THROUGH
2435 || jfunc->type == IPA_JF_ANCESTOR)
2436 {
2437 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
2438 int src_idx;
2439 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2440
2441 /* TODO: Once we figure out how to propagate speculations, it will
2442 probably be a good idea to switch to speculation if type_preserved is
2443 not set instead of punting. */
2444 if (jfunc->type == IPA_JF_PASS_THROUGH)
2445 {
2446 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2447 goto prop_fail;
2448 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2449 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2450 }
2451 else
2452 {
2453 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2454 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2455 }
2456
2457 src_lat = ipa_get_poly_ctx_lat (info: caller_info, i: src_idx);
2458 /* If we would need to clone the caller and cannot, do not propagate. */
2459 if (!ipcp_versionable_function_p (node: cs->caller)
2460 && (src_lat->contains_variable
2461 || (src_lat->values_count > 1)))
2462 goto prop_fail;
2463
2464 ipcp_value<ipa_polymorphic_call_context> *src_val;
2465 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2466 {
2467 ipa_polymorphic_call_context cur = src_val->value;
2468
2469 if (!type_preserved)
2470 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2471 if (jfunc->type == IPA_JF_ANCESTOR)
2472 cur.offset_by (off: ipa_get_jf_ancestor_offset (jfunc));
2473 /* TODO: In cases we know how the context is going to be used,
2474 we can improve the result by passing proper OTR_TYPE. */
2475 cur.combine_with (edge_ctx);
2476 if (!cur.useless_p ())
2477 {
2478 if (src_lat->contains_variable
2479 && !edge_ctx.equal_to (x: cur))
2480 ret |= dest_lat->set_contains_variable ();
2481 ret |= dest_lat->add_value (newval: cur, cs, src_val, src_idx);
2482 added_sth = true;
2483 }
2484 }
2485 }
2486
2487 prop_fail:
2488 if (!added_sth)
2489 {
2490 if (!edge_ctx.useless_p ())
2491 ret |= dest_lat->add_value (newval: edge_ctx, cs);
2492 else
2493 ret |= dest_lat->set_contains_variable ();
2494 }
2495
2496 return ret;
2497}
2498
2499/* Propagate bits across jfunc that is associated with
2500 edge cs and update dest_lattice accordingly. */
2501
2502bool
2503propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2504 ipa_jump_func *jfunc,
2505 ipcp_bits_lattice *dest_lattice)
2506{
2507 if (dest_lattice->bottom_p ())
2508 return false;
2509
2510 enum availability availability;
2511 cgraph_node *callee = cs->callee->function_symbol (avail: &availability);
2512 ipa_node_params *callee_info = ipa_node_params_sum->get (node: callee);
2513 tree parm_type = ipa_get_type (info: callee_info, i: idx);
2514
2515 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2516 transform for these cases. Similarly, we can have bad type mismatches
2517 with LTO, avoid doing anything with those too. */
2518 if (!parm_type
2519 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2520 {
2521 if (dump_file && (dump_flags & TDF_DETAILS))
2522 fprintf (stream: dump_file, format: "Setting dest_lattice to bottom, because type of "
2523 "param %i of %s is NULL or unsuitable for bits propagation\n",
2524 idx, cs->callee->dump_name ());
2525
2526 return dest_lattice->set_to_bottom ();
2527 }
2528
2529 if (jfunc->type == IPA_JF_PASS_THROUGH
2530 || jfunc->type == IPA_JF_ANCESTOR)
2531 {
2532 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
2533 tree operand = NULL_TREE;
2534 tree op_type = NULL_TREE;
2535 enum tree_code code;
2536 unsigned src_idx;
2537 bool keep_null = false;
2538
2539 if (jfunc->type == IPA_JF_PASS_THROUGH)
2540 {
2541 code = ipa_get_jf_pass_through_operation (jfunc);
2542 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2543 if (code != NOP_EXPR)
2544 {
2545 operand = ipa_get_jf_pass_through_operand (jfunc);
2546 op_type = ipa_get_jf_pass_through_op_type (jfunc);
2547 }
2548 }
2549 else
2550 {
2551 code = POINTER_PLUS_EXPR;
2552 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2553 unsigned HOST_WIDE_INT offset
2554 = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2555 keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2556 operand = build_int_cstu (size_type_node, offset);
2557 }
2558
2559 class ipcp_param_lattices *src_lats
2560 = ipa_get_parm_lattices (info: caller_info, i: src_idx);
2561
2562 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2563 for eg consider:
2564 int f(int x)
2565 {
2566 g (x & 0xff);
2567 }
2568 Assume lattice for x is bottom, however we can still propagate
2569 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2570 and we store it in jump function during analysis stage. */
2571
2572 if (!src_lats->bits_lattice.bottom_p ())
2573 {
2574 if (!op_type)
2575 op_type = ipa_get_type (info: caller_info, i: src_idx);
2576
2577 unsigned precision = TYPE_PRECISION (op_type);
2578 signop sgn = TYPE_SIGN (op_type);
2579 bool drop_all_ones
2580 = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2581
2582 return dest_lattice->meet_with (other&: src_lats->bits_lattice, precision,
2583 sgn, code, operand, drop_all_ones);
2584 }
2585 }
2586
2587 value_range vr (parm_type);
2588 if (jfunc->m_vr)
2589 {
2590 jfunc->m_vr->get_vrange (vr);
2591 if (!vr.undefined_p () && !vr.varying_p ())
2592 {
2593 irange_bitmask bm = vr.get_bitmask ();
2594 widest_int mask
2595 = widest_int::from (x: bm.mask (), TYPE_SIGN (parm_type));
2596 widest_int value
2597 = widest_int::from (x: bm.value (), TYPE_SIGN (parm_type));
2598 return dest_lattice->meet_with (value, mask,
2599 TYPE_PRECISION (parm_type));
2600 }
2601 }
2602 return dest_lattice->set_to_bottom ();
2603}
2604
2605/* Propagate value range across jump function JFUNC that is associated with
2606 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2607 accordingly. */
2608
2609static bool
2610propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2611 class ipcp_param_lattices *dest_plats,
2612 tree param_type)
2613{
2614 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2615
2616 if (dest_lat->bottom_p ())
2617 return false;
2618
2619 if (!param_type
2620 || !ipa_vr_supported_type_p (type: param_type))
2621 return dest_lat->set_to_bottom ();
2622
2623 value_range vr (param_type);
2624 vr.set_varying (param_type);
2625 if (jfunc->m_vr)
2626 ipa_vr_operation_and_type_effects (dst_vr&: vr, src_vr: *jfunc->m_vr, operation: NOP_EXPR,
2627 dst_type: param_type,
2628 src_type: jfunc->m_vr->type ());
2629
2630 if (jfunc->type == IPA_JF_PASS_THROUGH)
2631 {
2632 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
2633 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2634 class ipcp_param_lattices *src_lats
2635 = ipa_get_parm_lattices (info: caller_info, i: src_idx);
2636 tree operand_type = ipa_get_type (info: caller_info, i: src_idx);
2637
2638 if (src_lats->m_value_range.bottom_p ())
2639 return dest_lat->set_to_bottom ();
2640
2641 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR
2642 || !ipa_edge_within_scc (cs))
2643 ipa_vr_intersect_with_arith_jfunc (vr, jfunc, context_node: cs->caller,
2644 src_vr: src_lats->m_value_range.m_vr,
2645 src_type: operand_type, dst_type: param_type);
2646 }
2647
2648 if (!vr.undefined_p () && !vr.varying_p ())
2649 return dest_lat->meet_with (p_vr: vr);
2650 else
2651 return dest_lat->set_to_bottom ();
2652}
2653
2654/* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2655 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2656 other cases, return false). If there are no aggregate items, set
2657 aggs_by_ref to NEW_AGGS_BY_REF. */
2658
2659static bool
2660set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2661 bool new_aggs_by_ref)
2662{
2663 if (dest_plats->aggs)
2664 {
2665 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2666 {
2667 set_agg_lats_to_bottom (dest_plats);
2668 return true;
2669 }
2670 }
2671 else
2672 dest_plats->aggs_by_ref = new_aggs_by_ref;
2673 return false;
2674}
2675
2676/* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2677 already existing lattice for the given OFFSET and SIZE, marking all skipped
2678 lattices as containing variable and checking for overlaps. If there is no
2679 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2680 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2681 unless there are too many already. If there are two many, return false. If
2682 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2683 skipped lattices were newly marked as containing variable, set *CHANGE to
2684 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2685
2686static bool
2687merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2688 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2689 struct ipcp_agg_lattice ***aglat,
2690 bool pre_existing, bool *change, int max_agg_items)
2691{
2692 gcc_checking_assert (offset >= 0);
2693
2694 while (**aglat && (**aglat)->offset < offset)
2695 {
2696 if ((**aglat)->offset + (**aglat)->size > offset)
2697 {
2698 set_agg_lats_to_bottom (dest_plats);
2699 return false;
2700 }
2701 *change |= (**aglat)->set_contains_variable ();
2702 *aglat = &(**aglat)->next;
2703 }
2704
2705 if (**aglat && (**aglat)->offset == offset)
2706 {
2707 if ((**aglat)->size != val_size)
2708 {
2709 set_agg_lats_to_bottom (dest_plats);
2710 return false;
2711 }
2712 gcc_assert (!(**aglat)->next
2713 || (**aglat)->next->offset >= offset + val_size);
2714 return true;
2715 }
2716 else
2717 {
2718 struct ipcp_agg_lattice *new_al;
2719
2720 if (**aglat && (**aglat)->offset < offset + val_size)
2721 {
2722 set_agg_lats_to_bottom (dest_plats);
2723 return false;
2724 }
2725 if (dest_plats->aggs_count == max_agg_items)
2726 return false;
2727 dest_plats->aggs_count++;
2728 new_al = ipcp_agg_lattice_pool.allocate ();
2729
2730 new_al->offset = offset;
2731 new_al->size = val_size;
2732 new_al->contains_variable = pre_existing;
2733
2734 new_al->next = **aglat;
2735 **aglat = new_al;
2736 return true;
2737 }
2738}
2739
2740/* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2741 containing an unknown value. */
2742
2743static bool
2744set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2745{
2746 bool ret = false;
2747 while (aglat)
2748 {
2749 ret |= aglat->set_contains_variable ();
2750 aglat = aglat->next;
2751 }
2752 return ret;
2753}
2754
2755/* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2756 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2757 parameter used for lattice value sources. Return true if DEST_PLATS changed
2758 in any way. */
2759
2760static bool
2761merge_aggregate_lattices (struct cgraph_edge *cs,
2762 class ipcp_param_lattices *dest_plats,
2763 class ipcp_param_lattices *src_plats,
2764 int src_idx, HOST_WIDE_INT offset_delta)
2765{
2766 bool pre_existing = dest_plats->aggs != NULL;
2767 struct ipcp_agg_lattice **dst_aglat;
2768 bool ret = false;
2769
2770 if (set_check_aggs_by_ref (dest_plats, new_aggs_by_ref: src_plats->aggs_by_ref))
2771 return true;
2772 if (src_plats->aggs_bottom)
2773 return set_agg_lats_contain_variable (dest_plats);
2774 if (src_plats->aggs_contain_variable)
2775 ret |= set_agg_lats_contain_variable (dest_plats);
2776 dst_aglat = &dest_plats->aggs;
2777
2778 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2779 param_ipa_max_agg_items);
2780 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2781 src_aglat;
2782 src_aglat = src_aglat->next)
2783 {
2784 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2785
2786 if (new_offset < 0)
2787 continue;
2788 if (merge_agg_lats_step (dest_plats, offset: new_offset, val_size: src_aglat->size,
2789 aglat: &dst_aglat, pre_existing, change: &ret, max_agg_items))
2790 {
2791 struct ipcp_agg_lattice *new_al = *dst_aglat;
2792
2793 dst_aglat = &(*dst_aglat)->next;
2794 if (src_aglat->bottom)
2795 {
2796 ret |= new_al->set_contains_variable ();
2797 continue;
2798 }
2799 if (src_aglat->contains_variable)
2800 ret |= new_al->set_contains_variable ();
2801 for (ipcp_value<tree> *val = src_aglat->values;
2802 val;
2803 val = val->next)
2804 ret |= new_al->add_value (newval: val->value, cs, src_val: val, src_idx,
2805 offset: src_aglat->offset);
2806 }
2807 else if (dest_plats->aggs_bottom)
2808 return true;
2809 }
2810 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2811 return ret;
2812}
2813
2814/* Determine whether there is anything to propagate FROM SRC_PLATS through a
2815 pass-through JFUNC and if so, whether it has conform and conforms to the
2816 rules about propagating values passed by reference. */
2817
2818static bool
2819agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2820 struct ipa_jump_func *jfunc)
2821{
2822 return src_plats->aggs
2823 && (!src_plats->aggs_by_ref
2824 || ipa_get_jf_pass_through_agg_preserved (jfunc));
2825}
2826
2827/* Propagate values through ITEM, jump function for a part of an aggregate,
2828 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2829 associated with the jump function. Return true if AGLAT changed in any
2830 way. */
2831
2832static bool
2833propagate_aggregate_lattice (struct cgraph_edge *cs,
2834 struct ipa_agg_jf_item *item,
2835 struct ipcp_agg_lattice *aglat)
2836{
2837 class ipa_node_params *caller_info;
2838 class ipcp_param_lattices *src_plats;
2839 struct ipcp_lattice<tree> *src_lat;
2840 HOST_WIDE_INT src_offset;
2841 int src_idx;
2842 tree load_type;
2843 bool ret;
2844
2845 if (item->jftype == IPA_JF_CONST)
2846 {
2847 tree value = item->value.constant;
2848
2849 gcc_checking_assert (is_gimple_ip_invariant (value));
2850 return aglat->add_value (newval: value, cs, NULL, src_idx: 0);
2851 }
2852
2853 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2854 || item->jftype == IPA_JF_LOAD_AGG);
2855
2856 caller_info = ipa_node_params_sum->get (node: cs->caller);
2857 src_idx = item->value.pass_through.formal_id;
2858 src_plats = ipa_get_parm_lattices (info: caller_info, i: src_idx);
2859
2860 if (item->jftype == IPA_JF_PASS_THROUGH)
2861 {
2862 load_type = NULL_TREE;
2863 src_lat = &src_plats->itself;
2864 src_offset = -1;
2865 }
2866 else
2867 {
2868 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
2869 struct ipcp_agg_lattice *src_aglat;
2870
2871 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
2872 if (src_aglat->offset >= load_offset)
2873 break;
2874
2875 load_type = item->value.load_agg.type;
2876 if (!src_aglat
2877 || src_aglat->offset > load_offset
2878 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
2879 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
2880 return aglat->set_contains_variable ();
2881
2882 src_lat = src_aglat;
2883 src_offset = load_offset;
2884 }
2885
2886 if (src_lat->bottom
2887 || (!ipcp_versionable_function_p (node: cs->caller)
2888 && !src_lat->is_single_const ()))
2889 return aglat->set_contains_variable ();
2890
2891 ret = propagate_vals_across_arith_jfunc (cs,
2892 opcode: item->value.pass_through.operation,
2893 opnd1_type: load_type,
2894 opnd2: item->value.pass_through.operand,
2895 src_lat, dest_lat: aglat,
2896 src_offset,
2897 src_idx,
2898 op_type: item->value.pass_through.op_type,
2899 res_type: item->type);
2900
2901 if (src_lat->contains_variable)
2902 ret |= aglat->set_contains_variable ();
2903
2904 return ret;
2905}
2906
2907/* Propagate scalar values across jump function JFUNC that is associated with
2908 edge CS and put the values into DEST_LAT. */
2909
2910static bool
2911propagate_aggs_across_jump_function (struct cgraph_edge *cs,
2912 struct ipa_jump_func *jfunc,
2913 class ipcp_param_lattices *dest_plats)
2914{
2915 bool ret = false;
2916
2917 if (dest_plats->aggs_bottom)
2918 return false;
2919
2920 if (jfunc->type == IPA_JF_PASS_THROUGH
2921 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
2922 {
2923 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
2924 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2925 class ipcp_param_lattices *src_plats;
2926
2927 src_plats = ipa_get_parm_lattices (info: caller_info, i: src_idx);
2928 if (agg_pass_through_permissible_p (src_plats, jfunc))
2929 {
2930 /* Currently we do not produce clobber aggregate jump
2931 functions, replace with merging when we do. */
2932 gcc_assert (!jfunc->agg.items);
2933 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
2934 src_idx, offset_delta: 0);
2935 return ret;
2936 }
2937 }
2938 else if (jfunc->type == IPA_JF_ANCESTOR
2939 && ipa_get_jf_ancestor_agg_preserved (jfunc))
2940 {
2941 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
2942 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2943 class ipcp_param_lattices *src_plats;
2944
2945 src_plats = ipa_get_parm_lattices (info: caller_info, i: src_idx);
2946 if (src_plats->aggs && src_plats->aggs_by_ref)
2947 {
2948 /* Currently we do not produce clobber aggregate jump
2949 functions, replace with merging when we do. */
2950 gcc_assert (!jfunc->agg.items);
2951 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
2952 offset_delta: ipa_get_jf_ancestor_offset (jfunc));
2953 }
2954 else if (!src_plats->aggs_by_ref)
2955 ret |= set_agg_lats_to_bottom (dest_plats);
2956 else
2957 ret |= set_agg_lats_contain_variable (dest_plats);
2958 return ret;
2959 }
2960
2961 if (jfunc->agg.items)
2962 {
2963 bool pre_existing = dest_plats->aggs != NULL;
2964 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
2965 struct ipa_agg_jf_item *item;
2966 int i;
2967
2968 if (set_check_aggs_by_ref (dest_plats, new_aggs_by_ref: jfunc->agg.by_ref))
2969 return true;
2970
2971 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2972 param_ipa_max_agg_items);
2973 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
2974 {
2975 HOST_WIDE_INT val_size;
2976
2977 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
2978 continue;
2979 val_size = tree_to_shwi (TYPE_SIZE (item->type));
2980
2981 if (merge_agg_lats_step (dest_plats, offset: item->offset, val_size,
2982 aglat: &aglat, pre_existing, change: &ret, max_agg_items))
2983 {
2984 ret |= propagate_aggregate_lattice (cs, item, aglat: *aglat);
2985 aglat = &(*aglat)->next;
2986 }
2987 else if (dest_plats->aggs_bottom)
2988 return true;
2989 }
2990
2991 ret |= set_chain_of_aglats_contains_variable (*aglat);
2992 }
2993 else
2994 ret |= set_agg_lats_contain_variable (dest_plats);
2995
2996 return ret;
2997}
2998
2999/* Return true if on the way cfrom CS->caller to the final (non-alias and
3000 non-thunk) destination, the call passes through a thunk. */
3001
3002static bool
3003call_passes_through_thunk (cgraph_edge *cs)
3004{
3005 cgraph_node *alias_or_thunk = cs->callee;
3006 while (alias_or_thunk->alias)
3007 alias_or_thunk = alias_or_thunk->get_alias_target ();
3008 return alias_or_thunk->thunk;
3009}
3010
3011/* Propagate constants from the caller to the callee of CS. INFO describes the
3012 caller. */
3013
3014static bool
3015propagate_constants_across_call (struct cgraph_edge *cs)
3016{
3017 class ipa_node_params *callee_info;
3018 enum availability availability;
3019 cgraph_node *callee;
3020 class ipa_edge_args *args;
3021 bool ret = false;
3022 int i, args_count, parms_count;
3023
3024 callee = cs->callee->function_symbol (avail: &availability);
3025 if (!callee->definition)
3026 return false;
3027 gcc_checking_assert (callee->has_gimple_body_p ());
3028 callee_info = ipa_node_params_sum->get (node: callee);
3029 if (!callee_info)
3030 return false;
3031
3032 args = ipa_edge_args_sum->get (edge: cs);
3033 parms_count = ipa_get_param_count (info: callee_info);
3034 if (parms_count == 0)
3035 return false;
3036 if (!args
3037 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
3038 || !opt_for_fn (cs->caller->decl, optimize))
3039 {
3040 for (i = 0; i < parms_count; i++)
3041 ret |= set_all_contains_variable (ipa_get_parm_lattices (info: callee_info,
3042 i));
3043 return ret;
3044 }
3045 args_count = ipa_get_cs_argument_count (args);
3046
3047 /* If this call goes through a thunk we must not propagate to the first (0th)
3048 parameter. However, we might need to uncover a thunk from below a series
3049 of aliases first. */
3050 if (call_passes_through_thunk (cs))
3051 {
3052 ret |= set_all_contains_variable (ipa_get_parm_lattices (info: callee_info,
3053 i: 0));
3054 i = 1;
3055 }
3056 else
3057 i = 0;
3058
3059 for (; (i < args_count) && (i < parms_count); i++)
3060 {
3061 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
3062 class ipcp_param_lattices *dest_plats;
3063 tree param_type = ipa_get_type (info: callee_info, i);
3064
3065 dest_plats = ipa_get_parm_lattices (info: callee_info, i);
3066 if (availability == AVAIL_INTERPOSABLE)
3067 ret |= set_all_contains_variable (dest_plats);
3068 else
3069 {
3070 ret |= propagate_scalar_across_jump_function (cs, jfunc: jump_func,
3071 dest_lat: &dest_plats->itself,
3072 param_type);
3073 ret |= propagate_context_across_jump_function (cs, jfunc: jump_func, idx: i,
3074 dest_lat: &dest_plats->ctxlat);
3075 ret
3076 |= propagate_bits_across_jump_function (cs, idx: i, jfunc: jump_func,
3077 dest_lattice: &dest_plats->bits_lattice);
3078 ret |= propagate_aggs_across_jump_function (cs, jfunc: jump_func,
3079 dest_plats);
3080 if (opt_for_fn (callee->decl, flag_ipa_vrp))
3081 ret |= propagate_vr_across_jump_function (cs, jfunc: jump_func,
3082 dest_plats, param_type);
3083 else
3084 ret |= dest_plats->m_value_range.set_to_bottom ();
3085 }
3086 }
3087 for (; i < parms_count; i++)
3088 ret |= set_all_contains_variable (ipa_get_parm_lattices (info: callee_info, i));
3089
3090 return ret;
3091}
3092
3093/* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3094 KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3095 the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3096 KNOWN_AGGS is ignored. */
3097
3098static tree
3099ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
3100 const vec<tree> &known_csts,
3101 const vec<ipa_polymorphic_call_context> &known_contexts,
3102 const ipa_argagg_value_list &avs,
3103 bool *speculative)
3104{
3105 int param_index = ie->indirect_info->param_index;
3106 HOST_WIDE_INT anc_offset;
3107 tree t = NULL;
3108 tree target = NULL;
3109
3110 *speculative = false;
3111
3112 if (param_index == -1)
3113 return NULL_TREE;
3114
3115 if (!ie->indirect_info->polymorphic)
3116 {
3117 tree t = NULL;
3118
3119 if (ie->indirect_info->agg_contents)
3120 {
3121 t = NULL;
3122 if ((unsigned) param_index < known_csts.length ()
3123 && known_csts[param_index])
3124 t = ipa_find_agg_cst_from_init (scalar: known_csts[param_index],
3125 offset: ie->indirect_info->offset,
3126 by_ref: ie->indirect_info->by_ref);
3127
3128 if (!t && ie->indirect_info->guaranteed_unmodified)
3129 t = avs.get_value (index: param_index,
3130 unit_offset: ie->indirect_info->offset / BITS_PER_UNIT,
3131 by_ref: ie->indirect_info->by_ref);
3132 }
3133 else if ((unsigned) param_index < known_csts.length ())
3134 t = known_csts[param_index];
3135
3136 if (t
3137 && TREE_CODE (t) == ADDR_EXPR
3138 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3139 return TREE_OPERAND (t, 0);
3140 else
3141 return NULL_TREE;
3142 }
3143
3144 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3145 return NULL_TREE;
3146
3147 gcc_assert (!ie->indirect_info->agg_contents);
3148 gcc_assert (!ie->indirect_info->by_ref);
3149 anc_offset = ie->indirect_info->offset;
3150
3151 t = NULL;
3152
3153 if ((unsigned) param_index < known_csts.length ()
3154 && known_csts[param_index])
3155 t = ipa_find_agg_cst_from_init (scalar: known_csts[param_index],
3156 offset: ie->indirect_info->offset, by_ref: true);
3157
3158 /* Try to work out value of virtual table pointer value in replacements. */
3159 /* or known aggregate values. */
3160 if (!t)
3161 t = avs.get_value (index: param_index,
3162 unit_offset: ie->indirect_info->offset / BITS_PER_UNIT,
3163 by_ref: true);
3164
3165 /* If we found the virtual table pointer, lookup the target. */
3166 if (t)
3167 {
3168 tree vtable;
3169 unsigned HOST_WIDE_INT offset;
3170 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3171 {
3172 bool can_refer;
3173 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3174 vtable, offset, can_refer: &can_refer);
3175 if (can_refer)
3176 {
3177 if (!target
3178 || fndecl_built_in_p (node: target, name1: BUILT_IN_UNREACHABLE)
3179 || !possible_polymorphic_call_target_p
3180 (e: ie, n: cgraph_node::get (decl: target)))
3181 {
3182 /* Do not speculate builtin_unreachable, it is stupid! */
3183 if (ie->indirect_info->vptr_changed)
3184 return NULL;
3185 target = ipa_impossible_devirt_target (ie, target);
3186 }
3187 *speculative = ie->indirect_info->vptr_changed;
3188 if (!*speculative)
3189 return target;
3190 }
3191 }
3192 }
3193
3194 /* Do we know the constant value of pointer? */
3195 if (!t && (unsigned) param_index < known_csts.length ())
3196 t = known_csts[param_index];
3197
3198 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3199
3200 ipa_polymorphic_call_context context;
3201 if (known_contexts.length () > (unsigned int) param_index)
3202 {
3203 context = known_contexts[param_index];
3204 context.offset_by (off: anc_offset);
3205 if (ie->indirect_info->vptr_changed)
3206 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3207 otr_type: ie->indirect_info->otr_type);
3208 if (t)
3209 {
3210 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3211 (t, ie->indirect_info->otr_type, anc_offset);
3212 if (!ctx2.useless_p ())
3213 context.combine_with (ctx2, otr_type: ie->indirect_info->otr_type);
3214 }
3215 }
3216 else if (t)
3217 {
3218 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3219 anc_offset);
3220 if (ie->indirect_info->vptr_changed)
3221 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3222 otr_type: ie->indirect_info->otr_type);
3223 }
3224 else
3225 return NULL_TREE;
3226
3227 vec <cgraph_node *>targets;
3228 bool final;
3229
3230 targets = possible_polymorphic_call_targets
3231 (ie->indirect_info->otr_type,
3232 ie->indirect_info->otr_token,
3233 context, copletep: &final);
3234 if (!final || targets.length () > 1)
3235 {
3236 struct cgraph_node *node;
3237 if (*speculative)
3238 return target;
3239 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3240 || ie->speculative || !ie->maybe_hot_p ())
3241 return NULL;
3242 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3243 ie->indirect_info->otr_token,
3244 context);
3245 if (node)
3246 {
3247 *speculative = true;
3248 target = node->decl;
3249 }
3250 else
3251 return NULL;
3252 }
3253 else
3254 {
3255 *speculative = false;
3256 if (targets.length () == 1)
3257 target = targets[0]->decl;
3258 else
3259 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3260 }
3261
3262 if (target && !possible_polymorphic_call_target_p (e: ie,
3263 n: cgraph_node::get (decl: target)))
3264 {
3265 if (*speculative)
3266 return NULL;
3267 target = ipa_impossible_devirt_target (ie, target);
3268 }
3269
3270 return target;
3271}
3272
3273/* If an indirect edge IE can be turned into a direct one based on data in
3274 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3275 whether the discovered target is only speculative guess. */
3276
3277tree
3278ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3279 ipa_call_arg_values *avals,
3280 bool *speculative)
3281{
3282 ipa_argagg_value_list avl (avals);
3283 return ipa_get_indirect_edge_target_1 (ie, known_csts: avals->m_known_vals,
3284 known_contexts: avals->m_known_contexts,
3285 avs: avl, speculative);
3286}
3287
3288/* Calculate devirtualization time bonus for NODE, assuming we know information
3289 about arguments stored in AVALS. */
3290
3291static int
3292devirtualization_time_bonus (struct cgraph_node *node,
3293 ipa_auto_call_arg_values *avals)
3294{
3295 struct cgraph_edge *ie;
3296 int res = 0;
3297
3298 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3299 {
3300 struct cgraph_node *callee;
3301 class ipa_fn_summary *isummary;
3302 enum availability avail;
3303 tree target;
3304 bool speculative;
3305
3306 ipa_argagg_value_list avl (avals);
3307 target = ipa_get_indirect_edge_target_1 (ie, known_csts: avals->m_known_vals,
3308 known_contexts: avals->m_known_contexts,
3309 avs: avl, speculative: &speculative);
3310 if (!target)
3311 continue;
3312
3313 /* Only bare minimum benefit for clearly un-inlineable targets. */
3314 res += 1;
3315 callee = cgraph_node::get (decl: target);
3316 if (!callee || !callee->definition)
3317 continue;
3318 callee = callee->function_symbol (avail: &avail);
3319 if (avail < AVAIL_AVAILABLE)
3320 continue;
3321 isummary = ipa_fn_summaries->get (node: callee);
3322 if (!isummary || !isummary->inlinable)
3323 continue;
3324
3325 int size = ipa_size_summaries->get (node: callee)->size;
3326 /* FIXME: The values below need re-considering and perhaps also
3327 integrating into the cost metrics, at lest in some very basic way. */
3328 int max_inline_insns_auto
3329 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3330 if (size <= max_inline_insns_auto / 4)
3331 res += 31 / ((int)speculative + 1);
3332 else if (size <= max_inline_insns_auto / 2)
3333 res += 15 / ((int)speculative + 1);
3334 else if (size <= max_inline_insns_auto
3335 || DECL_DECLARED_INLINE_P (callee->decl))
3336 res += 7 / ((int)speculative + 1);
3337 }
3338
3339 return res;
3340}
3341
3342/* Return time bonus incurred because of hints stored in ESTIMATES. */
3343
3344static int
3345hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3346{
3347 int result = 0;
3348 ipa_hints hints = estimates.hints;
3349 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3350 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3351
3352 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3353
3354 if (hints & INLINE_HINT_loop_iterations)
3355 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3356
3357 if (hints & INLINE_HINT_loop_stride)
3358 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3359
3360 return result;
3361}
3362
3363/* If there is a reason to penalize the function described by INFO in the
3364 cloning goodness evaluation, do so. */
3365
3366static inline sreal
3367incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3368 sreal evaluation)
3369{
3370 if (info->node_within_scc && !info->node_is_self_scc)
3371 evaluation = (evaluation
3372 * (100 - opt_for_fn (node->decl,
3373 param_ipa_cp_recursion_penalty))) / 100;
3374
3375 if (info->node_calling_single_call)
3376 evaluation = (evaluation
3377 * (100 - opt_for_fn (node->decl,
3378 param_ipa_cp_single_call_penalty)))
3379 / 100;
3380
3381 return evaluation;
3382}
3383
3384/* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3385 and SIZE_COST and with the sum of frequencies of incoming edges to the
3386 potential new clone in FREQUENCIES. */
3387
3388static bool
3389good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3390 sreal freq_sum, profile_count count_sum,
3391 int size_cost, bool called_without_ipa_profile)
3392{
3393 gcc_assert (count_sum.ipa () == count_sum);
3394 if (time_benefit == 0
3395 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3396 || node->optimize_for_size_p ()
3397 /* If there is no call which was executed in profiling or where
3398 profile is missing, we do not want to clone. */
3399 || (!called_without_ipa_profile && !count_sum.nonzero_p ()))
3400 return false;
3401
3402 gcc_assert (size_cost > 0);
3403
3404 ipa_node_params *info = ipa_node_params_sum->get (node);
3405 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3406 /* If we know the execution IPA execution counts, we can estimate overall
3407 speedup of the program. */
3408 if (count_sum.nonzero_p ())
3409 {
3410 profile_count saved_time = count_sum * time_benefit;
3411 sreal evaluation = saved_time.to_sreal_scale (in: profile_count::one ())
3412 / size_cost;
3413 evaluation = incorporate_penalties (node, info, evaluation);
3414
3415 if (dump_file && (dump_flags & TDF_DETAILS))
3416 {
3417 fprintf (stream: dump_file, format: " good_cloning_opportunity_p (time: %g, "
3418 "size: %i, count_sum: ", time_benefit.to_double (),
3419 size_cost);
3420 count_sum.dump (f: dump_file);
3421 fprintf (stream: dump_file, format: ", overall time saved: ");
3422 saved_time.dump (f: dump_file);
3423 fprintf (stream: dump_file, format: "%s%s) -> evaluation: %.2f, threshold: %i\n",
3424 info->node_within_scc
3425 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3426 info->node_calling_single_call ? ", single_call" : "",
3427 evaluation.to_double (), eval_threshold);
3428 }
3429 gcc_checking_assert (saved_time == saved_time.ipa ());
3430 if (!maybe_hot_count_p (NULL, saved_time))
3431 {
3432 if (dump_file && (dump_flags & TDF_DETAILS))
3433 fprintf (stream: dump_file, format: " not cloning: time saved is not hot\n");
3434 }
3435 /* Evaulation approximately corresponds to time saved per instruction
3436 introduced. This is likely almost always going to be true, since we
3437 already checked that time saved is large enough to be considered
3438 hot. */
3439 else if (evaluation.to_int () >= eval_threshold)
3440 return true;
3441 /* If all call sites have profile known; we know we do not want t clone.
3442 If there are calls with unknown profile; try local heuristics. */
3443 if (!called_without_ipa_profile)
3444 return false;
3445 }
3446 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3447 evaluation = incorporate_penalties (node, info, evaluation);
3448 evaluation *= 1000;
3449
3450 if (dump_file && (dump_flags & TDF_DETAILS))
3451 fprintf (stream: dump_file, format: " good_cloning_opportunity_p (time: %g, "
3452 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3453 "threshold: %i\n",
3454 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3455 info->node_within_scc
3456 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3457 info->node_calling_single_call ? ", single_call" : "",
3458 evaluation.to_double (), eval_threshold);
3459
3460 return evaluation.to_int () >= eval_threshold;
3461}
3462
3463/* Grow vectors in AVALS and fill them with information about values of
3464 parameters that are known to be independent of the context. Only calculate
3465 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3466 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3467 parameters will be stored in it.
3468
3469 TODO: Also grow context independent value range vectors. */
3470
3471static bool
3472gather_context_independent_values (class ipa_node_params *info,
3473 ipa_auto_call_arg_values *avals,
3474 bool calculate_aggs,
3475 int *removable_params_cost)
3476{
3477 int i, count = ipa_get_param_count (info);
3478 bool ret = false;
3479
3480 avals->m_known_vals.safe_grow_cleared (len: count, exact: true);
3481 avals->m_known_contexts.safe_grow_cleared (len: count, exact: true);
3482
3483 if (removable_params_cost)
3484 *removable_params_cost = 0;
3485
3486 for (i = 0; i < count; i++)
3487 {
3488 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3489 ipcp_lattice<tree> *lat = &plats->itself;
3490
3491 if (lat->is_single_const ())
3492 {
3493 ipcp_value<tree> *val = lat->values;
3494 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3495 avals->m_known_vals[i] = val->value;
3496 if (removable_params_cost)
3497 *removable_params_cost
3498 += estimate_move_cost (TREE_TYPE (val->value), false);
3499 ret = true;
3500 }
3501 else if (removable_params_cost
3502 && !ipa_is_param_used (info, i))
3503 *removable_params_cost
3504 += ipa_get_param_move_cost (info, i);
3505
3506 if (!ipa_is_param_used (info, i))
3507 continue;
3508
3509 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3510 /* Do not account known context as reason for cloning. We can see
3511 if it permits devirtualization. */
3512 if (ctxlat->is_single_const ())
3513 avals->m_known_contexts[i] = ctxlat->values->value;
3514
3515 if (calculate_aggs)
3516 ret |= push_agg_values_from_plats (plats, dest_index: i, unit_delta: 0, res: &avals->m_known_aggs);
3517 }
3518
3519 return ret;
3520}
3521
3522/* Perform time and size measurement of NODE with the context given in AVALS,
3523 calculate the benefit compared to the node without specialization and store
3524 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3525 context-independent or unused removable parameters and EST_MOVE_COST, the
3526 estimated movement of the considered parameter. */
3527
3528static void
3529perform_estimation_of_a_value (cgraph_node *node,
3530 ipa_auto_call_arg_values *avals,
3531 int removable_params_cost, int est_move_cost,
3532 ipcp_value_base *val)
3533{
3534 sreal time_benefit;
3535 ipa_call_estimates estimates;
3536
3537 estimate_ipcp_clone_size_and_time (node, avals, estimates: &estimates);
3538
3539 /* Extern inline functions have no cloning local time benefits because they
3540 will be inlined anyway. The only reason to clone them is if it enables
3541 optimization in any of the functions they call. */
3542 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3543 time_benefit = 0;
3544 else
3545 time_benefit = (estimates.nonspecialized_time - estimates.time)
3546 + (devirtualization_time_bonus (node, avals)
3547 + hint_time_bonus (node, estimates)
3548 + removable_params_cost + est_move_cost);
3549
3550 int size = estimates.size;
3551 gcc_checking_assert (size >=0);
3552 /* The inliner-heuristics based estimates may think that in certain
3553 contexts some functions do not have any size at all but we want
3554 all specializations to have at least a tiny cost, not least not to
3555 divide by zero. */
3556 if (size == 0)
3557 size = 1;
3558
3559 val->local_time_benefit = time_benefit;
3560 val->local_size_cost = size;
3561}
3562
3563/* Get the overall limit oof growth based on parameters extracted from growth.
3564 it does not really make sense to mix functions with different overall growth
3565 limits but it is possible and if it happens, we do not want to select one
3566 limit at random. */
3567
3568static long
3569get_max_overall_size (cgraph_node *node)
3570{
3571 long max_new_size = orig_overall_size;
3572 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3573 if (max_new_size < large_unit)
3574 max_new_size = large_unit;
3575 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3576 max_new_size += max_new_size * unit_growth / 100 + 1;
3577 return max_new_size;
3578}
3579
3580/* Return true if NODE should be cloned just for a parameter removal, possibly
3581 dumping a reason if not. */
3582
3583static bool
3584clone_for_param_removal_p (cgraph_node *node)
3585{
3586 if (!node->can_change_signature)
3587 {
3588 if (dump_file && (dump_flags & TDF_DETAILS))
3589 fprintf (stream: dump_file, format: " Not considering cloning to remove parameters, "
3590 "function cannot change signature.\n");
3591 return false;
3592 }
3593 if (node->can_be_local_p ())
3594 {
3595 if (dump_file && (dump_flags & TDF_DETAILS))
3596 fprintf (stream: dump_file, format: " Not considering cloning to remove parameters, "
3597 "IPA-SRA can do it potentially better.\n");
3598 return false;
3599 }
3600 return true;
3601}
3602
3603/* Iterate over known values of parameters of NODE and estimate the local
3604 effects in terms of time and size they have. */
3605
3606static void
3607estimate_local_effects (struct cgraph_node *node)
3608{
3609 ipa_node_params *info = ipa_node_params_sum->get (node);
3610 int count = ipa_get_param_count (info);
3611 bool always_const;
3612 int removable_params_cost;
3613
3614 if (!count || !ipcp_versionable_function_p (node))
3615 return;
3616
3617 if (dump_file && (dump_flags & TDF_DETAILS))
3618 fprintf (stream: dump_file, format: "\nEstimating effects for %s.\n", node->dump_name ());
3619
3620 ipa_auto_call_arg_values avals;
3621 always_const = gather_context_independent_values (info, avals: &avals, calculate_aggs: true,
3622 removable_params_cost: &removable_params_cost);
3623 int devirt_bonus = devirtualization_time_bonus (node, avals: &avals);
3624 if (always_const || devirt_bonus
3625 || (removable_params_cost && clone_for_param_removal_p (node)))
3626 {
3627 struct caller_statistics stats;
3628 ipa_call_estimates estimates;
3629
3630 init_caller_stats (stats: &stats);
3631 node->call_for_symbol_thunks_and_aliases (callback: gather_caller_stats, data: &stats,
3632 include_overwritable: false);
3633 estimate_ipcp_clone_size_and_time (node, avals: &avals, estimates: &estimates);
3634 sreal time = estimates.nonspecialized_time - estimates.time;
3635 time += devirt_bonus;
3636 time += hint_time_bonus (node, estimates);
3637 time += removable_params_cost;
3638 int size = estimates.size - stats.n_calls * removable_params_cost;
3639
3640 if (dump_file)
3641 fprintf (stream: dump_file, format: " - context independent values, size: %i, "
3642 "time_benefit: %f\n", size, (time).to_double ());
3643
3644 if (size <= 0 || node->local)
3645 {
3646 info->do_clone_for_all_contexts = true;
3647
3648 if (dump_file)
3649 fprintf (stream: dump_file, format: " Decided to specialize for all "
3650 "known contexts, code not going to grow.\n");
3651 }
3652 else if (good_cloning_opportunity_p (node, time_benefit: time, freq_sum: stats.freq_sum,
3653 count_sum: stats.count_sum, size_cost: size,
3654 called_without_ipa_profile: stats.called_without_ipa_profile))
3655 {
3656 if (size + overall_size <= get_max_overall_size (node))
3657 {
3658 info->do_clone_for_all_contexts = true;
3659 overall_size += size;
3660
3661 if (dump_file)
3662 fprintf (stream: dump_file, format: " Decided to specialize for all "
3663 "known contexts, growth (to %li) deemed "
3664 "beneficial.\n", overall_size);
3665 }
3666 else if (dump_file && (dump_flags & TDF_DETAILS))
3667 fprintf (stream: dump_file, format: " Not cloning for all contexts because "
3668 "maximum unit size would be reached with %li.\n",
3669 size + overall_size);
3670 }
3671 else if (dump_file && (dump_flags & TDF_DETAILS))
3672 fprintf (stream: dump_file, format: " Not cloning for all contexts because "
3673 "!good_cloning_opportunity_p.\n");
3674
3675 }
3676
3677 for (int i = 0; i < count; i++)
3678 {
3679 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3680 ipcp_lattice<tree> *lat = &plats->itself;
3681 ipcp_value<tree> *val;
3682
3683 if (lat->bottom
3684 || !lat->values
3685 || avals.m_known_vals[i])
3686 continue;
3687
3688 for (val = lat->values; val; val = val->next)
3689 {
3690 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3691 avals.m_known_vals[i] = val->value;
3692
3693 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3694 perform_estimation_of_a_value (node, avals: &avals, removable_params_cost,
3695 est_move_cost: emc, val);
3696
3697 if (dump_file && (dump_flags & TDF_DETAILS))
3698 {
3699 fprintf (stream: dump_file, format: " - estimates for value ");
3700 print_ipcp_constant_value (f: dump_file, v: val->value);
3701 fprintf (stream: dump_file, format: " for ");
3702 ipa_dump_param (dump_file, info, i);
3703 fprintf (stream: dump_file, format: ": time_benefit: %g, size: %i\n",
3704 val->local_time_benefit.to_double (),
3705 val->local_size_cost);
3706 }
3707 }
3708 avals.m_known_vals[i] = NULL_TREE;
3709 }
3710
3711 for (int i = 0; i < count; i++)
3712 {
3713 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3714
3715 if (!plats->virt_call)
3716 continue;
3717
3718 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3719 ipcp_value<ipa_polymorphic_call_context> *val;
3720
3721 if (ctxlat->bottom
3722 || !ctxlat->values
3723 || !avals.m_known_contexts[i].useless_p ())
3724 continue;
3725
3726 for (val = ctxlat->values; val; val = val->next)
3727 {
3728 avals.m_known_contexts[i] = val->value;
3729 perform_estimation_of_a_value (node, avals: &avals, removable_params_cost,
3730 est_move_cost: 0, val);
3731
3732 if (dump_file && (dump_flags & TDF_DETAILS))
3733 {
3734 fprintf (stream: dump_file, format: " - estimates for polymorphic context ");
3735 print_ipcp_constant_value (f: dump_file, v: val->value);
3736 fprintf (stream: dump_file, format: " for ");
3737 ipa_dump_param (dump_file, info, i);
3738 fprintf (stream: dump_file, format: ": time_benefit: %g, size: %i\n",
3739 val->local_time_benefit.to_double (),
3740 val->local_size_cost);
3741 }
3742 }
3743 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3744 }
3745
3746 unsigned all_ctx_len = avals.m_known_aggs.length ();
3747 auto_vec<ipa_argagg_value, 32> all_ctx;
3748 all_ctx.reserve_exact (nelems: all_ctx_len);
3749 all_ctx.splice (src: avals.m_known_aggs);
3750 avals.m_known_aggs.safe_grow_cleared (len: all_ctx_len + 1);
3751
3752 unsigned j = 0;
3753 for (int index = 0; index < count; index++)
3754 {
3755 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i: index);
3756
3757 if (plats->aggs_bottom || !plats->aggs)
3758 continue;
3759
3760 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3761 {
3762 ipcp_value<tree> *val;
3763 if (aglat->bottom || !aglat->values
3764 /* If the following is true, the one value is already part of all
3765 context estimations. */
3766 || (!plats->aggs_contain_variable
3767 && aglat->is_single_const ()))
3768 continue;
3769
3770 unsigned unit_offset = aglat->offset / BITS_PER_UNIT;
3771 while (j < all_ctx_len
3772 && (all_ctx[j].index < index
3773 || (all_ctx[j].index == index
3774 && all_ctx[j].unit_offset < unit_offset)))
3775 {
3776 avals.m_known_aggs[j] = all_ctx[j];
3777 j++;
3778 }
3779
3780 for (unsigned k = j; k < all_ctx_len; k++)
3781 avals.m_known_aggs[k+1] = all_ctx[k];
3782
3783 for (val = aglat->values; val; val = val->next)
3784 {
3785 avals.m_known_aggs[j].value = val->value;
3786 avals.m_known_aggs[j].unit_offset = unit_offset;
3787 avals.m_known_aggs[j].index = index;
3788 avals.m_known_aggs[j].by_ref = plats->aggs_by_ref;
3789 avals.m_known_aggs[j].killed = false;
3790
3791 perform_estimation_of_a_value (node, avals: &avals,
3792 removable_params_cost, est_move_cost: 0, val);
3793
3794 if (dump_file && (dump_flags & TDF_DETAILS))
3795 {
3796 fprintf (stream: dump_file, format: " - estimates for value ");
3797 print_ipcp_constant_value (f: dump_file, v: val->value);
3798 fprintf (stream: dump_file, format: " for ");
3799 ipa_dump_param (dump_file, info, i: index);
3800 fprintf (stream: dump_file, format: "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3801 "]: time_benefit: %g, size: %i\n",
3802 plats->aggs_by_ref ? "ref " : "",
3803 aglat->offset,
3804 val->local_time_benefit.to_double (),
3805 val->local_size_cost);
3806 }
3807 }
3808 }
3809 }
3810}
3811
3812
3813/* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3814 topological sort of values. */
3815
3816template <typename valtype>
3817void
3818value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3819{
3820 ipcp_value_source<valtype> *src;
3821
3822 if (cur_val->dfs)
3823 return;
3824
3825 dfs_counter++;
3826 cur_val->dfs = dfs_counter;
3827 cur_val->low_link = dfs_counter;
3828
3829 cur_val->topo_next = stack;
3830 stack = cur_val;
3831 cur_val->on_stack = true;
3832
3833 for (src = cur_val->sources; src; src = src->next)
3834 if (src->val)
3835 {
3836 if (src->val->dfs == 0)
3837 {
3838 add_val (cur_val: src->val);
3839 if (src->val->low_link < cur_val->low_link)
3840 cur_val->low_link = src->val->low_link;
3841 }
3842 else if (src->val->on_stack
3843 && src->val->dfs < cur_val->low_link)
3844 cur_val->low_link = src->val->dfs;
3845 }
3846
3847 if (cur_val->dfs == cur_val->low_link)
3848 {
3849 ipcp_value<valtype> *v, *scc_list = NULL;
3850
3851 do
3852 {
3853 v = stack;
3854 stack = v->topo_next;
3855 v->on_stack = false;
3856 v->scc_no = cur_val->dfs;
3857
3858 v->scc_next = scc_list;
3859 scc_list = v;
3860 }
3861 while (v != cur_val);
3862
3863 cur_val->topo_next = values_topo;
3864 values_topo = cur_val;
3865 }
3866}
3867
3868/* Add all values in lattices associated with NODE to the topological sort if
3869 they are not there yet. */
3870
3871static void
3872add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
3873{
3874 ipa_node_params *info = ipa_node_params_sum->get (node);
3875 int i, count = ipa_get_param_count (info);
3876
3877 for (i = 0; i < count; i++)
3878 {
3879 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3880 ipcp_lattice<tree> *lat = &plats->itself;
3881 struct ipcp_agg_lattice *aglat;
3882
3883 if (!lat->bottom)
3884 {
3885 ipcp_value<tree> *val;
3886 for (val = lat->values; val; val = val->next)
3887 topo->constants.add_val (cur_val: val);
3888 }
3889
3890 if (!plats->aggs_bottom)
3891 for (aglat = plats->aggs; aglat; aglat = aglat->next)
3892 if (!aglat->bottom)
3893 {
3894 ipcp_value<tree> *val;
3895 for (val = aglat->values; val; val = val->next)
3896 topo->constants.add_val (cur_val: val);
3897 }
3898
3899 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3900 if (!ctxlat->bottom)
3901 {
3902 ipcp_value<ipa_polymorphic_call_context> *ctxval;
3903 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
3904 topo->contexts.add_val (cur_val: ctxval);
3905 }
3906 }
3907}
3908
3909/* One pass of constants propagation along the call graph edges, from callers
3910 to callees (requires topological ordering in TOPO), iterate over strongly
3911 connected components. */
3912
3913static void
3914propagate_constants_topo (class ipa_topo_info *topo)
3915{
3916 int i;
3917
3918 for (i = topo->nnodes - 1; i >= 0; i--)
3919 {
3920 unsigned j;
3921 struct cgraph_node *v, *node = topo->order[i];
3922 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
3923
3924 /* First, iteratively propagate within the strongly connected component
3925 until all lattices stabilize. */
3926 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3927 if (v->has_gimple_body_p ())
3928 {
3929 if (opt_for_fn (v->decl, flag_ipa_cp)
3930 && opt_for_fn (v->decl, optimize))
3931 push_node_to_stack (topo, node: v);
3932 /* When V is not optimized, we can not push it to stack, but
3933 still we need to set all its callees lattices to bottom. */
3934 else
3935 {
3936 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3937 propagate_constants_across_call (cs);
3938 }
3939 }
3940
3941 v = pop_node_from_stack (topo);
3942 while (v)
3943 {
3944 struct cgraph_edge *cs;
3945 class ipa_node_params *info = NULL;
3946 bool self_scc = true;
3947
3948 for (cs = v->callees; cs; cs = cs->next_callee)
3949 if (ipa_edge_within_scc (cs))
3950 {
3951 cgraph_node *callee = cs->callee->function_symbol ();
3952
3953 if (v != callee)
3954 self_scc = false;
3955
3956 if (!info)
3957 {
3958 info = ipa_node_params_sum->get (node: v);
3959 info->node_within_scc = true;
3960 }
3961
3962 if (propagate_constants_across_call (cs))
3963 push_node_to_stack (topo, node: callee);
3964 }
3965
3966 if (info)
3967 info->node_is_self_scc = self_scc;
3968
3969 v = pop_node_from_stack (topo);
3970 }
3971
3972 /* Afterwards, propagate along edges leading out of the SCC, calculates
3973 the local effects of the discovered constants and all valid values to
3974 their topological sort. */
3975 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3976 if (v->has_gimple_body_p ()
3977 && opt_for_fn (v->decl, flag_ipa_cp)
3978 && opt_for_fn (v->decl, optimize))
3979 {
3980 struct cgraph_edge *cs;
3981
3982 estimate_local_effects (node: v);
3983 add_all_node_vals_to_toposort (node: v, topo);
3984 for (cs = v->callees; cs; cs = cs->next_callee)
3985 if (!ipa_edge_within_scc (cs))
3986 propagate_constants_across_call (cs);
3987 }
3988 cycle_nodes.release ();
3989 }
3990}
3991
3992/* Propagate the estimated effects of individual values along the topological
3993 from the dependent values to those they depend on. */
3994
3995template <typename valtype>
3996void
3997value_topo_info<valtype>::propagate_effects ()
3998{
3999 ipcp_value<valtype> *base;
4000 hash_set<ipcp_value<valtype> *> processed_srcvals;
4001
4002 for (base = values_topo; base; base = base->topo_next)
4003 {
4004 ipcp_value_source<valtype> *src;
4005 ipcp_value<valtype> *val;
4006 sreal time = 0;
4007 HOST_WIDE_INT size = 0;
4008
4009 for (val = base; val; val = val->scc_next)
4010 {
4011 time = time + val->local_time_benefit + val->prop_time_benefit;
4012 size = size + val->local_size_cost + val->prop_size_cost;
4013 }
4014
4015 for (val = base; val; val = val->scc_next)
4016 {
4017 processed_srcvals.empty ();
4018 for (src = val->sources; src; src = src->next)
4019 if (src->val
4020 && cs_interesting_for_ipcp_p (src->cs))
4021 {
4022 if (!processed_srcvals.add (src->val))
4023 {
4024 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
4025 if (prop_size < INT_MAX)
4026 src->val->prop_size_cost = prop_size;
4027 else
4028 continue;
4029 }
4030
4031 int special_factor = 1;
4032 if (val->same_scc (src->val))
4033 special_factor
4034 = opt_for_fn(src->cs->caller->decl,
4035 param_ipa_cp_recursive_freq_factor);
4036 else if (val->self_recursion_generated_p ()
4037 && (src->cs->callee->function_symbol ()
4038 == src->cs->caller))
4039 {
4040 int max_recur_gen_depth
4041 = opt_for_fn(src->cs->caller->decl,
4042 param_ipa_cp_max_recursive_depth);
4043 special_factor = max_recur_gen_depth
4044 - val->self_recursion_generated_level + 1;
4045 }
4046
4047 src->val->prop_time_benefit
4048 += time * special_factor * src->cs->sreal_frequency ();
4049 }
4050
4051 if (size < INT_MAX)
4052 {
4053 val->prop_time_benefit = time;
4054 val->prop_size_cost = size;
4055 }
4056 else
4057 {
4058 val->prop_time_benefit = 0;
4059 val->prop_size_cost = 0;
4060 }
4061 }
4062 }
4063}
4064
4065
4066/* Propagate constants, polymorphic contexts and their effects from the
4067 summaries interprocedurally. */
4068
4069static void
4070ipcp_propagate_stage (class ipa_topo_info *topo)
4071{
4072 struct cgraph_node *node;
4073
4074 if (dump_file)
4075 fprintf (stream: dump_file, format: "\n Propagating constants:\n\n");
4076
4077 FOR_EACH_DEFINED_FUNCTION (node)
4078 {
4079 if (node->has_gimple_body_p ()
4080 && opt_for_fn (node->decl, flag_ipa_cp)
4081 && opt_for_fn (node->decl, optimize))
4082 {
4083 ipa_node_params *info = ipa_node_params_sum->get (node);
4084 determine_versionability (node, info);
4085
4086 unsigned nlattices = ipa_get_param_count (info);
4087 info->lattices.safe_grow_cleared (len: nlattices, exact: true);
4088 initialize_node_lattices (node);
4089 }
4090 ipa_size_summary *s = ipa_size_summaries->get (node);
4091 if (node->definition && !node->alias && s != NULL)
4092 overall_size += s->self_size;
4093 }
4094
4095 orig_overall_size = overall_size;
4096
4097 if (dump_file)
4098 fprintf (stream: dump_file, format: "\noverall_size: %li\n", overall_size);
4099
4100 propagate_constants_topo (topo);
4101 if (flag_checking)
4102 ipcp_verify_propagated_values ();
4103 topo->constants.propagate_effects ();
4104 topo->contexts.propagate_effects ();
4105
4106 if (dump_file)
4107 {
4108 fprintf (stream: dump_file, format: "\nIPA lattices after all propagation:\n");
4109 print_all_lattices (f: dump_file, dump_sources: (dump_flags & TDF_DETAILS), dump_benefits: true);
4110 }
4111}
4112
4113/* Discover newly direct outgoing edges from NODE which is a new clone with
4114 known KNOWN_CSTS and make them direct. */
4115
4116static void
4117ipcp_discover_new_direct_edges (struct cgraph_node *node,
4118 vec<tree> known_csts,
4119 vec<ipa_polymorphic_call_context>
4120 known_contexts,
4121 vec<ipa_argagg_value, va_gc> *aggvals)
4122{
4123 struct cgraph_edge *ie, *next_ie;
4124 bool found = false;
4125
4126 for (ie = node->indirect_calls; ie; ie = next_ie)
4127 {
4128 tree target;
4129 bool speculative;
4130
4131 next_ie = ie->next_callee;
4132 ipa_argagg_value_list avs (aggvals);
4133 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4134 avs, speculative: &speculative);
4135 if (target)
4136 {
4137 bool agg_contents = ie->indirect_info->agg_contents;
4138 bool polymorphic = ie->indirect_info->polymorphic;
4139 int param_index = ie->indirect_info->param_index;
4140 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4141 speculative);
4142 found = true;
4143
4144 if (cs && !agg_contents && !polymorphic)
4145 {
4146 ipa_node_params *info = ipa_node_params_sum->get (node);
4147 int c = ipa_get_controlled_uses (info, i: param_index);
4148 if (c != IPA_UNDESCRIBED_USE
4149 && !ipa_get_param_load_dereferenced (info, i: param_index))
4150 {
4151 struct ipa_ref *to_del;
4152
4153 c--;
4154 ipa_set_controlled_uses (info, i: param_index, val: c);
4155 if (dump_file && (dump_flags & TDF_DETAILS))
4156 fprintf (stream: dump_file, format: " controlled uses count of param "
4157 "%i bumped down to %i\n", param_index, c);
4158 if (c == 0
4159 && (to_del = node->find_reference (referred_node: cs->callee, NULL, lto_stmt_uid: 0,
4160 use_type: IPA_REF_ADDR)))
4161 {
4162 if (dump_file && (dump_flags & TDF_DETAILS))
4163 fprintf (stream: dump_file, format: " and even removing its "
4164 "cloning-created reference\n");
4165 to_del->remove_reference ();
4166 }
4167 }
4168 }
4169 }
4170 }
4171 /* Turning calls to direct calls will improve overall summary. */
4172 if (found)
4173 ipa_update_overall_fn_summary (node);
4174}
4175
4176class edge_clone_summary;
4177static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4178
4179/* Edge clone summary. */
4180
4181class edge_clone_summary
4182{
4183public:
4184 /* Default constructor. */
4185 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4186
4187 /* Default destructor. */
4188 ~edge_clone_summary ()
4189 {
4190 if (prev_clone)
4191 edge_clone_summaries->get (edge: prev_clone)->next_clone = next_clone;
4192 if (next_clone)
4193 edge_clone_summaries->get (edge: next_clone)->prev_clone = prev_clone;
4194 }
4195
4196 cgraph_edge *prev_clone;
4197 cgraph_edge *next_clone;
4198};
4199
4200class edge_clone_summary_t:
4201 public call_summary <edge_clone_summary *>
4202{
4203public:
4204 edge_clone_summary_t (symbol_table *symtab):
4205 call_summary <edge_clone_summary *> (symtab)
4206 {
4207 m_initialize_when_cloning = true;
4208 }
4209
4210 void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4211 edge_clone_summary *src_data,
4212 edge_clone_summary *dst_data) final override;
4213};
4214
4215/* Edge duplication hook. */
4216
4217void
4218edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4219 edge_clone_summary *src_data,
4220 edge_clone_summary *dst_data)
4221{
4222 if (src_data->next_clone)
4223 edge_clone_summaries->get (edge: src_data->next_clone)->prev_clone = dst_edge;
4224 dst_data->prev_clone = src_edge;
4225 dst_data->next_clone = src_data->next_clone;
4226 src_data->next_clone = dst_edge;
4227}
4228
4229/* Return true is CS calls DEST or its clone for all contexts. When
4230 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4231 edges from/to an all-context clone. */
4232
4233static bool
4234calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4235 bool allow_recursion_to_clone)
4236{
4237 enum availability availability;
4238 cgraph_node *callee = cs->callee->function_symbol (avail: &availability);
4239
4240 if (availability <= AVAIL_INTERPOSABLE)
4241 return false;
4242 if (callee == dest)
4243 return true;
4244 if (!allow_recursion_to_clone && cs->caller == callee)
4245 return false;
4246
4247 ipa_node_params *info = ipa_node_params_sum->get (node: callee);
4248 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4249}
4250
4251/* Return true if edge CS does bring about the value described by SRC to
4252 DEST_VAL of node DEST or its clone for all contexts. */
4253
4254static bool
4255cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4256 cgraph_node *dest, ipcp_value<tree> *dest_val)
4257{
4258 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
4259
4260 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, allow_recursion_to_clone: !src->val)
4261 || caller_info->node_dead)
4262 return false;
4263
4264 if (!src->val)
4265 return true;
4266
4267 if (caller_info->ipcp_orig_node)
4268 {
4269 tree t = NULL_TREE;
4270 if (src->offset == -1)
4271 t = caller_info->known_csts[src->index];
4272 else if (ipcp_transformation *ts
4273 = ipcp_get_transformation_summary (node: cs->caller))
4274 {
4275 ipa_argagg_value_list avl (ts);
4276 t = avl.get_value (index: src->index, unit_offset: src->offset / BITS_PER_UNIT);
4277 }
4278 return (t != NULL_TREE
4279 && values_equal_for_ipcp_p (x: src->val->value, y: t));
4280 }
4281 else
4282 {
4283 if (src->val == dest_val)
4284 return true;
4285
4286 struct ipcp_agg_lattice *aglat;
4287 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info: caller_info,
4288 i: src->index);
4289 if (src->offset == -1)
4290 return (plats->itself.is_single_const ()
4291 && values_equal_for_ipcp_p (x: src->val->value,
4292 y: plats->itself.values->value));
4293 else
4294 {
4295 if (plats->aggs_bottom || plats->aggs_contain_variable)
4296 return false;
4297 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4298 if (aglat->offset == src->offset)
4299 return (aglat->is_single_const ()
4300 && values_equal_for_ipcp_p (x: src->val->value,
4301 y: aglat->values->value));
4302 }
4303 return false;
4304 }
4305}
4306
4307/* Return true if edge CS does bring about the value described by SRC to
4308 DST_VAL of node DEST or its clone for all contexts. */
4309
4310static bool
4311cgraph_edge_brings_value_p (cgraph_edge *cs,
4312 ipcp_value_source<ipa_polymorphic_call_context> *src,
4313 cgraph_node *dest,
4314 ipcp_value<ipa_polymorphic_call_context> *)
4315{
4316 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
4317
4318 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, allow_recursion_to_clone: true)
4319 || caller_info->node_dead)
4320 return false;
4321 if (!src->val)
4322 return true;
4323
4324 if (caller_info->ipcp_orig_node)
4325 return (caller_info->known_contexts.length () > (unsigned) src->index)
4326 && values_equal_for_ipcp_p (x: src->val->value,
4327 y: caller_info->known_contexts[src->index]);
4328
4329 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info: caller_info,
4330 i: src->index);
4331 return plats->ctxlat.is_single_const ()
4332 && values_equal_for_ipcp_p (x: src->val->value,
4333 y: plats->ctxlat.values->value);
4334}
4335
4336/* Get the next clone in the linked list of clones of an edge. */
4337
4338static inline struct cgraph_edge *
4339get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4340{
4341 edge_clone_summary *s = edge_clone_summaries->get (edge: cs);
4342 return s != NULL ? s->next_clone : NULL;
4343}
4344
4345/* Given VAL that is intended for DEST, iterate over all its sources and if any
4346 of them is viable and hot, return true. In that case, for those that still
4347 hold, add their edge frequency and their number and cumulative profile
4348 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4349 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4350
4351template <typename valtype>
4352static bool
4353get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4354 sreal *freq_sum, int *caller_count,
4355 profile_count *rec_count_sum,
4356 profile_count *nonrec_count_sum,
4357 bool *called_without_ipa_profile)
4358{
4359 ipcp_value_source<valtype> *src;
4360 sreal freq = 0;
4361 int count = 0;
4362 profile_count rec_cnt = profile_count::zero ();
4363 profile_count nonrec_cnt = profile_count::zero ();
4364 bool interesting = false;
4365 bool non_self_recursive = false;
4366 *called_without_ipa_profile = false;
4367
4368 for (src = val->sources; src; src = src->next)
4369 {
4370 struct cgraph_edge *cs = src->cs;
4371 while (cs)
4372 {
4373 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4374 {
4375 count++;
4376 freq += cs->sreal_frequency ();
4377 interesting |= cs_interesting_for_ipcp_p (e: cs);
4378 if (cs->caller != dest)
4379 {
4380 non_self_recursive = true;
4381 if (cs->count.ipa ().initialized_p ())
4382 rec_cnt += cs->count.ipa ();
4383 else
4384 *called_without_ipa_profile = true;
4385 }
4386 else if (cs->count.ipa ().initialized_p ())
4387 nonrec_cnt += cs->count.ipa ();
4388 else
4389 *called_without_ipa_profile = true;
4390 }
4391 cs = get_next_cgraph_edge_clone (cs);
4392 }
4393 }
4394
4395 /* If the only edges bringing a value are self-recursive ones, do not bother
4396 evaluating it. */
4397 if (!non_self_recursive)
4398 return false;
4399
4400 *freq_sum = freq;
4401 *caller_count = count;
4402 *rec_count_sum = rec_cnt;
4403 *nonrec_count_sum = nonrec_cnt;
4404
4405 return interesting;
4406}
4407
4408/* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4409 to let a non-self-recursive caller be the first element. Thus, we can
4410 simplify intersecting operations on values that arrive from all of these
4411 callers, especially when there exists self-recursive call. Return true if
4412 this kind of adjustment is possible. */
4413
4414static bool
4415adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4416 cgraph_node *node)
4417{
4418 for (unsigned i = 0; i < callers.length (); i++)
4419 {
4420 cgraph_edge *cs = callers[i];
4421
4422 if (cs->caller != node)
4423 {
4424 if (i > 0)
4425 {
4426 callers[i] = callers[0];
4427 callers[0] = cs;
4428 }
4429 return true;
4430 }
4431 }
4432 return false;
4433}
4434
4435/* Return a vector of incoming edges that do bring value VAL to node DEST. It
4436 is assumed their number is known and equal to CALLER_COUNT. */
4437
4438template <typename valtype>
4439static vec<cgraph_edge *>
4440gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4441 int caller_count)
4442{
4443 ipcp_value_source<valtype> *src;
4444 vec<cgraph_edge *> ret;
4445
4446 ret.create (nelems: caller_count);
4447 for (src = val->sources; src; src = src->next)
4448 {
4449 struct cgraph_edge *cs = src->cs;
4450 while (cs)
4451 {
4452 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4453 ret.quick_push (obj: cs);
4454 cs = get_next_cgraph_edge_clone (cs);
4455 }
4456 }
4457
4458 if (caller_count > 1)
4459 adjust_callers_for_value_intersection (callers&: ret, node: dest);
4460
4461 return ret;
4462}
4463
4464/* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4465 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4466 should be set to true when the reference created for the constant should be
4467 a load one and not an address one because the corresponding parameter p is
4468 only used as *p. */
4469
4470static struct ipa_replace_map *
4471get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4472 bool force_load_ref)
4473{
4474 struct ipa_replace_map *replace_map;
4475
4476 replace_map = ggc_alloc<ipa_replace_map> ();
4477 if (dump_file)
4478 {
4479 fprintf (stream: dump_file, format: " replacing ");
4480 ipa_dump_param (dump_file, info, i: parm_num);
4481
4482 fprintf (stream: dump_file, format: " with const ");
4483 print_generic_expr (dump_file, value);
4484
4485 if (force_load_ref)
4486 fprintf (stream: dump_file, format: " - forcing load reference\n");
4487 else
4488 fprintf (stream: dump_file, format: "\n");
4489 }
4490 replace_map->parm_num = parm_num;
4491 replace_map->new_tree = value;
4492 replace_map->force_load_ref = force_load_ref;
4493 return replace_map;
4494}
4495
4496/* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4497 one, otherwise it will be referred to as the original node. */
4498
4499static void
4500dump_profile_updates (cgraph_node *node, bool spec)
4501{
4502 if (spec)
4503 fprintf (stream: dump_file, format: " setting count of the specialized node %s to ",
4504 node->dump_name ());
4505 else
4506 fprintf (stream: dump_file, format: " setting count of the original node %s to ",
4507 node->dump_name ());
4508
4509 node->count.dump (f: dump_file);
4510 fprintf (stream: dump_file, format: "\n");
4511 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4512 {
4513 fprintf (stream: dump_file, format: " edge to %s has count ",
4514 cs->callee->dump_name ());
4515 cs->count.dump (f: dump_file);
4516 fprintf (stream: dump_file, format: "\n");
4517 }
4518}
4519
4520/* With partial train run we do not want to assume that original's count is
4521 zero whenever we redurect all executed edges to clone. Simply drop profile
4522 to local one in this case. In eany case, return the new value. ORIG_NODE
4523 is the original node and its count has not been updaed yet. */
4524
4525profile_count
4526lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4527{
4528 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4529 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4530 && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4531 remainder = orig_node->count.guessed_local ();
4532
4533 return remainder;
4534}
4535
4536/* Structure to sum counts coming from nodes other than the original node and
4537 its clones. */
4538
4539struct gather_other_count_struct
4540{
4541 cgraph_node *orig;
4542 profile_count other_count;
4543};
4544
4545/* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4546 counts that come from non-self-recursive calls.. */
4547
4548static bool
4549gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4550{
4551 gather_other_count_struct *desc = (gather_other_count_struct *) data;
4552 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4553 if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4554 desc->other_count += cs->count.ipa ();
4555 return false;
4556}
4557
4558/* Structure to help analyze if we need to boost counts of some clones of some
4559 non-recursive edges to match the new callee count. */
4560
4561struct desc_incoming_count_struct
4562{
4563 cgraph_node *orig;
4564 hash_set <cgraph_edge *> *processed_edges;
4565 profile_count count;
4566 unsigned unproc_orig_rec_edges;
4567};
4568
4569/* Go over edges calling NODE and its thunks and gather information about
4570 incoming counts so that we know if we need to make any adjustments. */
4571
4572static void
4573analyze_clone_icoming_counts (cgraph_node *node,
4574 desc_incoming_count_struct *desc)
4575{
4576 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4577 if (cs->caller->thunk)
4578 {
4579 analyze_clone_icoming_counts (node: cs->caller, desc);
4580 continue;
4581 }
4582 else
4583 {
4584 if (cs->count.initialized_p ())
4585 desc->count += cs->count.ipa ();
4586 if (!desc->processed_edges->contains (k: cs)
4587 && cs->caller->clone_of == desc->orig)
4588 desc->unproc_orig_rec_edges++;
4589 }
4590}
4591
4592/* If caller edge counts of a clone created for a self-recursive arithmetic
4593 jump function must be adjusted because it is coming from a the "seed" clone
4594 for the first value and so has been excessively scaled back as if it was not
4595 a recursive call, adjust it so that the incoming counts of NODE match its
4596 count. NODE is the node or its thunk. */
4597
4598static void
4599adjust_clone_incoming_counts (cgraph_node *node,
4600 desc_incoming_count_struct *desc)
4601{
4602 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4603 if (cs->caller->thunk)
4604 {
4605 adjust_clone_incoming_counts (node: cs->caller, desc);
4606 profile_count sum = profile_count::zero ();
4607 for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4608 if (e->count.initialized_p ())
4609 sum += e->count.ipa ();
4610 cs->count = cs->count.combine_with_ipa_count (ipa: sum);
4611 }
4612 else if (!desc->processed_edges->contains (k: cs)
4613 && cs->caller->clone_of == desc->orig
4614 && cs->count.compatible_p (other: desc->count))
4615 {
4616 cs->count += desc->count;
4617 if (dump_file)
4618 {
4619 fprintf (stream: dump_file, format: " Adjusted count of an incoming edge of "
4620 "a clone %s -> %s to ", cs->caller->dump_name (),
4621 cs->callee->dump_name ());
4622 cs->count.dump (f: dump_file);
4623 fprintf (stream: dump_file, format: "\n");
4624 }
4625 }
4626}
4627
4628/* When ORIG_NODE has been cloned for values which have been generated fora
4629 self-recursive call as a result of an arithmetic pass-through
4630 jump-functions, adjust its count together with counts of all such clones in
4631 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4632
4633 The function sums the counts of the original node and all its clones that
4634 cannot be attributed to a specific clone because it comes from a
4635 non-recursive edge. This sum is then evenly divided between the clones and
4636 on top of that each one gets all the counts which can be attributed directly
4637 to it. */
4638
4639static void
4640update_counts_for_self_gen_clones (cgraph_node *orig_node,
4641 const vec<cgraph_node *> &self_gen_clones)
4642{
4643 profile_count redist_sum = orig_node->count.ipa ();
4644 if (!redist_sum.nonzero_p ())
4645 return;
4646
4647 if (dump_file)
4648 fprintf (stream: dump_file, format: " Updating profile of self recursive clone "
4649 "series\n");
4650
4651 gather_other_count_struct gocs;
4652 gocs.orig = orig_node;
4653 gocs.other_count = profile_count::zero ();
4654
4655 auto_vec <profile_count, 8> other_edges_count;
4656 for (cgraph_node *n : self_gen_clones)
4657 {
4658 gocs.other_count = profile_count::zero ();
4659 n->call_for_symbol_thunks_and_aliases (callback: gather_count_of_non_rec_edges,
4660 data: &gocs, include_overwritable: false);
4661 other_edges_count.safe_push (obj: gocs.other_count);
4662 redist_sum -= gocs.other_count;
4663 }
4664
4665 hash_set<cgraph_edge *> processed_edges;
4666 unsigned i = 0;
4667 for (cgraph_node *n : self_gen_clones)
4668 {
4669 profile_count new_count
4670 = (redist_sum / self_gen_clones.length () + other_edges_count[i]);
4671 new_count = lenient_count_portion_handling (remainder: new_count, orig_node);
4672 n->scale_profile_to (ipa_count: new_count);
4673 for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4674 processed_edges.add (k: cs);
4675
4676 i++;
4677 }
4678
4679 /* There are still going to be edges to ORIG_NODE that have one or more
4680 clones coming from another node clone in SELF_GEN_CLONES and which we
4681 scaled by the same amount, which means that the total incoming sum of
4682 counts to ORIG_NODE will be too high, scale such edges back. */
4683 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4684 {
4685 if (cs->callee->ultimate_alias_target () == orig_node)
4686 {
4687 unsigned den = 0;
4688 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (cs: e))
4689 if (e->callee->ultimate_alias_target () == orig_node
4690 && processed_edges.contains (k: e))
4691 den++;
4692 if (den > 0)
4693 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (cs: e))
4694 if (e->callee->ultimate_alias_target () == orig_node
4695 && processed_edges.contains (k: e))
4696 e->count /= den;
4697 }
4698 }
4699
4700 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4701 along self-recursive edges are likely to have fairly low count and so
4702 edges from them to nodes in the self_gen_clones do not correspond to the
4703 artificially distributed count of the nodes, the total sum of incoming
4704 edges to some clones might be too low. Detect this situation and correct
4705 it. */
4706 for (cgraph_node *n : self_gen_clones)
4707 {
4708 if (!n->count.ipa ().nonzero_p ())
4709 continue;
4710
4711 desc_incoming_count_struct desc;
4712 desc.orig = orig_node;
4713 desc.processed_edges = &processed_edges;
4714 desc.count = profile_count::zero ();
4715 desc.unproc_orig_rec_edges = 0;
4716 analyze_clone_icoming_counts (node: n, desc: &desc);
4717
4718 if (n->count.differs_from_p (other: desc.count))
4719 {
4720 if (n->count > desc.count
4721 && desc.unproc_orig_rec_edges > 0)
4722 {
4723 desc.count = n->count - desc.count;
4724 desc.count = desc.count /= desc.unproc_orig_rec_edges;
4725 adjust_clone_incoming_counts (node: n, desc: &desc);
4726 }
4727 else if (dump_file)
4728 fprintf (stream: dump_file,
4729 format: " Unable to fix up incoming counts for %s.\n",
4730 n->dump_name ());
4731 }
4732 }
4733
4734 if (dump_file)
4735 for (cgraph_node *n : self_gen_clones)
4736 dump_profile_updates (node: n, spec: n != orig_node);
4737 return;
4738}
4739
4740/* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4741 their profile information to reflect this. This function should not be used
4742 for clones generated for arithmetic pass-through jump functions on a
4743 self-recursive call graph edge, that situation is handled by
4744 update_counts_for_self_gen_clones. */
4745
4746static void
4747update_profiling_info (struct cgraph_node *orig_node,
4748 struct cgraph_node *new_node)
4749{
4750 struct caller_statistics stats;
4751 profile_count new_sum;
4752 profile_count remainder, orig_node_count = orig_node->count.ipa ();
4753
4754 if (!orig_node_count.nonzero_p ())
4755 return;
4756
4757 if (dump_file)
4758 {
4759 fprintf (stream: dump_file, format: " Updating profile from original count: ");
4760 orig_node_count.dump (f: dump_file);
4761 fprintf (stream: dump_file, format: "\n");
4762 }
4763
4764 init_caller_stats (stats: &stats, itself: new_node);
4765 new_node->call_for_symbol_thunks_and_aliases (callback: gather_caller_stats, data: &stats,
4766 include_overwritable: false);
4767 new_sum = stats.count_sum;
4768
4769 bool orig_edges_processed = false;
4770 if (new_sum > orig_node_count)
4771 {
4772 /* Profile has alreay gone astray, keep what we have but lower it
4773 to global0adjusted or to local if we have partial training. */
4774 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
4775 orig_node->make_profile_local ();
4776 else
4777 orig_node->make_profile_global0 (quality: GUESSED_GLOBAL0_ADJUSTED);
4778 orig_edges_processed = true;
4779 }
4780 else if (stats.rec_count_sum.nonzero_p ())
4781 {
4782 int new_nonrec_calls = stats.n_nonrec_calls;
4783 /* There are self-recursive edges which are likely to bring in the
4784 majority of calls but which we must divide in between the original and
4785 new node. */
4786 init_caller_stats (stats: &stats, itself: orig_node);
4787 orig_node->call_for_symbol_thunks_and_aliases (callback: gather_caller_stats,
4788 data: &stats, include_overwritable: false);
4789 int orig_nonrec_calls = stats.n_nonrec_calls;
4790 profile_count orig_nonrec_call_count = stats.count_sum;
4791
4792 if (orig_node->local)
4793 {
4794 if (!orig_nonrec_call_count.nonzero_p ())
4795 {
4796 if (dump_file)
4797 fprintf (stream: dump_file, format: " The original is local and the only "
4798 "incoming edges from non-dead callers with nonzero "
4799 "counts are self-recursive, assuming it is cold.\n");
4800 /* The NEW_NODE count and counts of all its outgoing edges
4801 are still unmodified copies of ORIG_NODE's. Just clear
4802 the latter and bail out. */
4803 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
4804 orig_node->make_profile_local ();
4805 else
4806 orig_node->make_profile_global0 (quality: GUESSED_GLOBAL0_ADJUSTED);
4807 return;
4808 }
4809 }
4810 else
4811 {
4812 /* Let's behave as if there was another caller that accounts for all
4813 the calls that were either indirect or from other compilation
4814 units. */
4815 orig_nonrec_calls++;
4816 profile_count pretend_caller_count
4817 = (orig_node_count - new_sum - orig_nonrec_call_count
4818 - stats.rec_count_sum);
4819 orig_nonrec_call_count += pretend_caller_count;
4820 }
4821
4822 /* Divide all "unexplained" counts roughly proportionally to sums of
4823 counts of non-recursive calls.
4824
4825 We put rather arbitrary limits on how many counts we claim because the
4826 number of non-self-recursive incoming count is only a rough guideline
4827 and there are cases (such as mcf) where using it blindly just takes
4828 too many. And if lattices are considered in the opposite order we
4829 could also take too few. */
4830 profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
4831
4832 int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
4833 profile_count new_part
4834 = MAX(MIN (unexp.apply_scale (new_sum,
4835 new_sum + orig_nonrec_call_count),
4836 unexp.apply_scale (limit_den - 1, limit_den)),
4837 unexp.apply_scale (new_nonrec_calls, limit_den));
4838 if (dump_file)
4839 {
4840 fprintf (stream: dump_file, format: " Claiming ");
4841 new_part.dump (f: dump_file);
4842 fprintf (stream: dump_file, format: " of unexplained ");
4843 unexp.dump (f: dump_file);
4844 fprintf (stream: dump_file, format: " counts because of self-recursive "
4845 "calls\n");
4846 }
4847 new_sum += new_part;
4848 remainder = lenient_count_portion_handling (remainder: orig_node_count - new_sum,
4849 orig_node);
4850 }
4851 else
4852 remainder = lenient_count_portion_handling (remainder: orig_node_count - new_sum,
4853 orig_node);
4854
4855 new_node->scale_profile_to (ipa_count: new_sum);
4856
4857 if (!orig_edges_processed)
4858 orig_node->scale_profile_to (ipa_count: remainder);
4859
4860 if (dump_file)
4861 {
4862 dump_profile_updates (node: new_node, spec: true);
4863 dump_profile_updates (node: orig_node, spec: false);
4864 }
4865}
4866
4867/* Update the respective profile of specialized NEW_NODE and the original
4868 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4869 have been redirected to the specialized version. */
4870
4871static void
4872update_specialized_profile (struct cgraph_node *new_node,
4873 struct cgraph_node *orig_node,
4874 profile_count redirected_sum)
4875{
4876 if (dump_file)
4877 {
4878 fprintf (stream: dump_file, format: " the sum of counts of redirected edges is ");
4879 redirected_sum.dump (f: dump_file);
4880 fprintf (stream: dump_file, format: "\n old ipa count of the original node is ");
4881 orig_node->count.dump (f: dump_file);
4882 fprintf (stream: dump_file, format: "\n");
4883 }
4884 if (!orig_node->count.ipa ().nonzero_p ()
4885 || !redirected_sum.nonzero_p ())
4886 return;
4887
4888 orig_node->scale_profile_to
4889 (ipa_count: lenient_count_portion_handling (remainder: orig_node->count.ipa () - redirected_sum,
4890 orig_node));
4891
4892 new_node->scale_profile_to (ipa_count: new_node->count.ipa () + redirected_sum);
4893
4894 if (dump_file)
4895 {
4896 dump_profile_updates (node: new_node, spec: true);
4897 dump_profile_updates (node: orig_node, spec: false);
4898 }
4899}
4900
4901static void adjust_references_in_caller (cgraph_edge *cs,
4902 symtab_node *symbol, int index);
4903
4904/* Simple structure to pass a symbol and index (with same meaning as parameters
4905 of adjust_references_in_caller) through a void* parameter of a
4906 call_for_symbol_thunks_and_aliases callback. */
4907struct symbol_and_index_together
4908{
4909 symtab_node *symbol;
4910 int index;
4911};
4912
4913/* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4914 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4915static bool
4916adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
4917{
4918 symbol_and_index_together *pack = (symbol_and_index_together *) data;
4919 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4920 if (!cs->caller->thunk)
4921 adjust_references_in_caller (cs, symbol: pack->symbol, index: pack->index);
4922 return false;
4923}
4924
4925/* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4926 variable which is only dereferenced and which is represented by SYMBOL. See
4927 if we can remove ADDR reference in callers assosiated witht the call. */
4928
4929static void
4930adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
4931{
4932 ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs);
4933 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i: index);
4934 if (jfunc->type == IPA_JF_CONST)
4935 {
4936 ipa_ref *to_del = cs->caller->find_reference (referred_node: symbol, stmt: cs->call_stmt,
4937 lto_stmt_uid: cs->lto_stmt_uid,
4938 use_type: IPA_REF_ADDR);
4939 if (!to_del)
4940 return;
4941 to_del->remove_reference ();
4942 ipa_zap_jf_refdesc (jfunc);
4943 if (dump_file)
4944 fprintf (stream: dump_file, format: " Removed a reference from %s to %s.\n",
4945 cs->caller->dump_name (), symbol->dump_name ());
4946 return;
4947 }
4948
4949 if (jfunc->type != IPA_JF_PASS_THROUGH
4950 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR
4951 || ipa_get_jf_pass_through_refdesc_decremented (jfunc))
4952 return;
4953
4954 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
4955 cgraph_node *caller = cs->caller;
4956 ipa_node_params *caller_info = ipa_node_params_sum->get (node: caller);
4957 /* TODO: This consistency check may be too big and not really
4958 that useful. Consider removing it. */
4959 tree cst;
4960 if (caller_info->ipcp_orig_node)
4961 cst = caller_info->known_csts[fidx];
4962 else
4963 {
4964 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info: caller_info, i: fidx);
4965 gcc_assert (lat->is_single_const ());
4966 cst = lat->values->value;
4967 }
4968 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
4969 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
4970 == symbol));
4971
4972 int cuses = ipa_get_controlled_uses (info: caller_info, i: fidx);
4973 if (cuses == IPA_UNDESCRIBED_USE)
4974 return;
4975 gcc_assert (cuses > 0);
4976 cuses--;
4977 ipa_set_controlled_uses (info: caller_info, i: fidx, val: cuses);
4978 ipa_set_jf_pass_through_refdesc_decremented (jfunc, value: true);
4979 if (dump_file && (dump_flags & TDF_DETAILS))
4980 fprintf (stream: dump_file, format: " Controlled uses of parameter %i of %s dropped "
4981 "to %i.\n", fidx, caller->dump_name (), cuses);
4982 if (cuses)
4983 return;
4984
4985 if (caller_info->ipcp_orig_node)
4986 {
4987 /* Cloning machinery has created a reference here, we need to either
4988 remove it or change it to a read one. */
4989 ipa_ref *to_del = caller->find_reference (referred_node: symbol, NULL, lto_stmt_uid: 0, use_type: IPA_REF_ADDR);
4990 if (to_del)
4991 {
4992 to_del->remove_reference ();
4993 if (dump_file)
4994 fprintf (stream: dump_file, format: " Removed a reference from %s to %s.\n",
4995 cs->caller->dump_name (), symbol->dump_name ());
4996 if (ipa_get_param_load_dereferenced (info: caller_info, i: fidx))
4997 {
4998 caller->create_reference (referred_node: symbol, use_type: IPA_REF_LOAD, NULL);
4999 if (dump_file)
5000 fprintf (stream: dump_file,
5001 format: " ...and replaced it with LOAD one.\n");
5002 }
5003 }
5004 }
5005
5006 symbol_and_index_together pack;
5007 pack.symbol = symbol;
5008 pack.index = fidx;
5009 if (caller->can_change_signature)
5010 caller->call_for_symbol_thunks_and_aliases (callback: adjust_refs_in_act_callers,
5011 data: &pack, include_overwritable: true);
5012}
5013
5014
5015/* Return true if we would like to remove a parameter from NODE when cloning it
5016 with KNOWN_CSTS scalar constants. */
5017
5018static bool
5019want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5020{
5021 auto_vec<bool, 16> surviving;
5022 bool filled_vec = false;
5023 ipa_node_params *info = ipa_node_params_sum->get (node);
5024 int i, count = ipa_get_param_count (info);
5025
5026 for (i = 0; i < count; i++)
5027 {
5028 if (!known_csts[i] && ipa_is_param_used (info, i))
5029 continue;
5030
5031 if (!filled_vec)
5032 {
5033 clone_info *info = clone_info::get (node);
5034 if (!info || !info->param_adjustments)
5035 return true;
5036 info->param_adjustments->get_surviving_params (surviving_params: &surviving);
5037 filled_vec = true;
5038 }
5039 if (surviving.length() < (unsigned) i && surviving[i])
5040 return true;
5041 }
5042 return false;
5043}
5044
5045/* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5046 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5047 redirect all edges in CALLERS to it. */
5048
5049static struct cgraph_node *
5050create_specialized_node (struct cgraph_node *node,
5051 vec<tree> known_csts,
5052 vec<ipa_polymorphic_call_context> known_contexts,
5053 vec<ipa_argagg_value, va_gc> *aggvals,
5054 vec<cgraph_edge *> &callers)
5055{
5056 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5057 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5058 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5059 struct cgraph_node *new_node;
5060 int i, count = ipa_get_param_count (info);
5061 clone_info *cinfo = clone_info::get (node);
5062 ipa_param_adjustments *old_adjustments = cinfo
5063 ? cinfo->param_adjustments : NULL;
5064 ipa_param_adjustments *new_adjustments;
5065 gcc_assert (!info->ipcp_orig_node);
5066 gcc_assert (node->can_change_signature
5067 || !old_adjustments);
5068
5069 if (old_adjustments)
5070 {
5071 /* At the moment all IPA optimizations should use the number of
5072 parameters of the prevailing decl as the m_always_copy_start.
5073 Handling any other value would complicate the code below, so for the
5074 time bing let's only assert it is so. */
5075 gcc_assert (old_adjustments->m_always_copy_start == count
5076 || old_adjustments->m_always_copy_start < 0);
5077 int old_adj_count = vec_safe_length (v: old_adjustments->m_adj_params);
5078 for (i = 0; i < old_adj_count; i++)
5079 {
5080 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5081 if (!node->can_change_signature
5082 || old_adj->op != IPA_PARAM_OP_COPY
5083 || (!known_csts[old_adj->base_index]
5084 && ipa_is_param_used (info, i: old_adj->base_index)))
5085 {
5086 ipa_adjusted_param new_adj = *old_adj;
5087
5088 new_adj.prev_clone_adjustment = true;
5089 new_adj.prev_clone_index = i;
5090 vec_safe_push (v&: new_params, obj: new_adj);
5091 }
5092 }
5093 bool skip_return = old_adjustments->m_skip_return;
5094 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5095 ipa_param_adjustments (new_params, count,
5096 skip_return));
5097 }
5098 else if (node->can_change_signature
5099 && want_remove_some_param_p (node, known_csts))
5100 {
5101 ipa_adjusted_param adj;
5102 memset (s: &adj, c: 0, n: sizeof (adj));
5103 adj.op = IPA_PARAM_OP_COPY;
5104 for (i = 0; i < count; i++)
5105 if (!known_csts[i] && ipa_is_param_used (info, i))
5106 {
5107 adj.base_index = i;
5108 adj.prev_clone_index = i;
5109 vec_safe_push (v&: new_params, obj: adj);
5110 }
5111 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5112 ipa_param_adjustments (new_params, count, false));
5113 }
5114 else
5115 new_adjustments = NULL;
5116
5117 auto_vec<cgraph_edge *, 2> self_recursive_calls;
5118 for (i = callers.length () - 1; i >= 0; i--)
5119 {
5120 cgraph_edge *cs = callers[i];
5121 if (cs->caller == node)
5122 {
5123 self_recursive_calls.safe_push (obj: cs);
5124 callers.unordered_remove (ix: i);
5125 }
5126 }
5127 replace_trees = cinfo ? vec_safe_copy (src: cinfo->tree_map) : NULL;
5128 for (i = 0; i < count; i++)
5129 {
5130 tree t = known_csts[i];
5131 if (!t)
5132 continue;
5133
5134 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5135
5136 bool load_ref = false;
5137 symtab_node *ref_symbol;
5138 if (TREE_CODE (t) == ADDR_EXPR)
5139 {
5140 tree base = get_base_address (TREE_OPERAND (t, 0));
5141 if (TREE_CODE (base) == VAR_DECL
5142 && ipa_get_controlled_uses (info, i) == 0
5143 && ipa_get_param_load_dereferenced (info, i)
5144 && (ref_symbol = symtab_node::get (decl: base)))
5145 {
5146 load_ref = true;
5147 if (node->can_change_signature)
5148 for (cgraph_edge *caller : callers)
5149 adjust_references_in_caller (cs: caller, symbol: ref_symbol, index: i);
5150 }
5151 }
5152
5153 ipa_replace_map *replace_map = get_replacement_map (info, value: t, parm_num: i, force_load_ref: load_ref);
5154 if (replace_map)
5155 vec_safe_push (v&: replace_trees, obj: replace_map);
5156 }
5157
5158 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5159 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5160 node->decl)));
5161 new_node = node->create_virtual_clone (redirect_callers: callers, tree_map: replace_trees,
5162 param_adjustments: new_adjustments, suffix: "constprop",
5163 num_suffix: suffix_counter);
5164 suffix_counter++;
5165
5166 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5167 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5168 {
5169 cgraph_edge *cs = get_next_cgraph_edge_clone (cs: self_recursive_calls[j]);
5170 /* Cloned edges can disappear during cloning as speculation can be
5171 resolved, check that we have one and that it comes from the last
5172 cloning. */
5173 if (cs && cs->caller == new_node)
5174 cs->redirect_callee_duplicating_thunks (n: new_node);
5175 /* Any future code that would make more than one clone of an outgoing
5176 edge would confuse this mechanism, so let's check that does not
5177 happen. */
5178 gcc_checking_assert (!cs
5179 || !get_next_cgraph_edge_clone (cs)
5180 || get_next_cgraph_edge_clone (cs)->caller != new_node);
5181 }
5182 if (have_self_recursive_calls)
5183 new_node->expand_all_artificial_thunks ();
5184
5185 ipa_set_node_agg_value_chain (node: new_node, aggs: aggvals);
5186 for (const ipa_argagg_value &av : aggvals)
5187 new_node->maybe_create_reference (val: av.value, NULL);
5188
5189 if (dump_file && (dump_flags & TDF_DETAILS))
5190 {
5191 fprintf (stream: dump_file, format: " the new node is %s.\n", new_node->dump_name ());
5192 if (known_contexts.exists ())
5193 {
5194 for (i = 0; i < count; i++)
5195 if (!known_contexts[i].useless_p ())
5196 {
5197 fprintf (stream: dump_file, format: " known ctx %i is ", i);
5198 known_contexts[i].dump (f: dump_file);
5199 }
5200 }
5201 if (aggvals)
5202 {
5203 fprintf (stream: dump_file, format: " Aggregate replacements:");
5204 ipa_argagg_value_list avs (aggvals);
5205 avs.dump (f: dump_file);
5206 }
5207 }
5208
5209 new_info = ipa_node_params_sum->get (node: new_node);
5210 new_info->ipcp_orig_node = node;
5211 new_node->ipcp_clone = true;
5212 new_info->known_csts = known_csts;
5213 new_info->known_contexts = known_contexts;
5214
5215 ipcp_discover_new_direct_edges (node: new_node, known_csts, known_contexts,
5216 aggvals);
5217
5218 return new_node;
5219}
5220
5221/* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5222 pass-through function to itself when the cgraph_node involved is not an
5223 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5224 no-operation pass-through. */
5225
5226static bool
5227self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5228 bool simple = true)
5229{
5230 enum availability availability;
5231 if (cs->caller == cs->callee->function_symbol (avail: &availability)
5232 && availability > AVAIL_INTERPOSABLE
5233 && jfunc->type == IPA_JF_PASS_THROUGH
5234 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5235 && ipa_get_jf_pass_through_formal_id (jfunc) == i
5236 && ipa_node_params_sum->get (node: cs->caller)
5237 && !ipa_node_params_sum->get (node: cs->caller)->ipcp_orig_node)
5238 return true;
5239 return false;
5240}
5241
5242/* Return true if JFUNC, which describes a part of an aggregate represented or
5243 pointed to by the i-th parameter of call CS, is a pass-through function to
5244 itself when the cgraph_node involved is not an IPA-CP clone.. When
5245 SIMPLE is true, further check if JFUNC is a simple no-operation
5246 pass-through. */
5247
5248static bool
5249self_recursive_agg_pass_through_p (const cgraph_edge *cs,
5250 const ipa_agg_jf_item *jfunc,
5251 int i, bool simple = true)
5252{
5253 enum availability availability;
5254 if (cs->caller == cs->callee->function_symbol (avail: &availability)
5255 && availability > AVAIL_INTERPOSABLE
5256 && jfunc->jftype == IPA_JF_LOAD_AGG
5257 && jfunc->offset == jfunc->value.load_agg.offset
5258 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5259 && jfunc->value.pass_through.formal_id == i
5260 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5261 && ipa_node_params_sum->get (node: cs->caller)
5262 && !ipa_node_params_sum->get (node: cs->caller)->ipcp_orig_node)
5263 return true;
5264 return false;
5265}
5266
5267/* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5268 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5269
5270static void
5271find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5272 vec<tree> &known_csts,
5273 const vec<cgraph_edge *> &callers)
5274{
5275 ipa_node_params *info = ipa_node_params_sum->get (node);
5276 int i, count = ipa_get_param_count (info);
5277
5278 for (i = 0; i < count; i++)
5279 {
5280 struct cgraph_edge *cs;
5281 tree newval = NULL_TREE;
5282 int j;
5283 bool first = true;
5284 tree type = ipa_get_type (info, i);
5285
5286 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5287 continue;
5288
5289 FOR_EACH_VEC_ELT (callers, j, cs)
5290 {
5291 struct ipa_jump_func *jump_func;
5292 tree t;
5293
5294 ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs);
5295 if (!args
5296 || i >= ipa_get_cs_argument_count (args)
5297 || (i == 0
5298 && call_passes_through_thunk (cs)))
5299 {
5300 newval = NULL_TREE;
5301 break;
5302 }
5303 jump_func = ipa_get_ith_jump_func (args, i);
5304
5305 /* Besides simple pass-through jump function, arithmetic jump
5306 function could also introduce argument-direct-pass-through for
5307 self-feeding recursive call. For example,
5308
5309 fn (int i)
5310 {
5311 fn (i & 1);
5312 }
5313
5314 Given that i is 0, recursive propagation via (i & 1) also gets
5315 0. */
5316 if (self_recursive_pass_through_p (cs, jfunc: jump_func, i, simple: false))
5317 {
5318 gcc_assert (newval);
5319 enum tree_code opcode
5320 = ipa_get_jf_pass_through_operation (jfunc: jump_func);
5321 tree op_type = (opcode == NOP_EXPR) ? NULL_TREE
5322 : ipa_get_jf_pass_through_op_type (jfunc: jump_func);
5323 t = ipa_get_jf_arith_result (opcode, input: newval,
5324 operand: ipa_get_jf_pass_through_operand (jfunc: jump_func),
5325 res_type: op_type);
5326 t = ipacp_value_safe_for_type (param_type: type, value: t);
5327 }
5328 else
5329 t = ipa_value_from_jfunc (info: ipa_node_params_sum->get (node: cs->caller),
5330 jfunc: jump_func, parm_type: type);
5331 if (!t
5332 || (newval
5333 && !values_equal_for_ipcp_p (x: t, y: newval))
5334 || (!first && !newval))
5335 {
5336 newval = NULL_TREE;
5337 break;
5338 }
5339 else
5340 newval = t;
5341 first = false;
5342 }
5343
5344 if (newval)
5345 {
5346 if (dump_file && (dump_flags & TDF_DETAILS))
5347 {
5348 fprintf (stream: dump_file, format: " adding an extra known scalar value ");
5349 print_ipcp_constant_value (f: dump_file, v: newval);
5350 fprintf (stream: dump_file, format: " for ");
5351 ipa_dump_param (dump_file, info, i);
5352 fprintf (stream: dump_file, format: "\n");
5353 }
5354
5355 known_csts[i] = newval;
5356 }
5357 }
5358}
5359
5360/* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5361 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5362 CALLERS. */
5363
5364static void
5365find_more_contexts_for_caller_subset (cgraph_node *node,
5366 vec<ipa_polymorphic_call_context>
5367 *known_contexts,
5368 const vec<cgraph_edge *> &callers)
5369{
5370 ipa_node_params *info = ipa_node_params_sum->get (node);
5371 int i, count = ipa_get_param_count (info);
5372
5373 for (i = 0; i < count; i++)
5374 {
5375 cgraph_edge *cs;
5376
5377 if (ipa_get_poly_ctx_lat (info, i)->bottom
5378 || (known_contexts->exists ()
5379 && !(*known_contexts)[i].useless_p ()))
5380 continue;
5381
5382 ipa_polymorphic_call_context newval;
5383 bool first = true;
5384 int j;
5385
5386 FOR_EACH_VEC_ELT (callers, j, cs)
5387 {
5388 ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs);
5389 if (!args
5390 || i >= ipa_get_cs_argument_count (args))
5391 return;
5392 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5393 ipa_polymorphic_call_context ctx;
5394 ctx = ipa_context_from_jfunc (info: ipa_node_params_sum->get (node: cs->caller),
5395 cs, csidx: i, jfunc);
5396 if (first)
5397 {
5398 newval = ctx;
5399 first = false;
5400 }
5401 else
5402 newval.meet_with (ctx);
5403 if (newval.useless_p ())
5404 break;
5405 }
5406
5407 if (!newval.useless_p ())
5408 {
5409 if (dump_file && (dump_flags & TDF_DETAILS))
5410 {
5411 fprintf (stream: dump_file, format: " adding an extra known polymorphic "
5412 "context ");
5413 print_ipcp_constant_value (f: dump_file, v: newval);
5414 fprintf (stream: dump_file, format: " for ");
5415 ipa_dump_param (dump_file, info, i);
5416 fprintf (stream: dump_file, format: "\n");
5417 }
5418
5419 if (!known_contexts->exists ())
5420 known_contexts->safe_grow_cleared (len: ipa_get_param_count (info),
5421 exact: true);
5422 (*known_contexts)[i] = newval;
5423 }
5424
5425 }
5426}
5427
5428/* Push all aggregate values coming along edge CS for parameter number INDEX to
5429 RES. If INTERIM is non-NULL, it contains the current interim state of
5430 collected aggregate values which can be used to compute values passed over
5431 self-recursive edges.
5432
5433 This basically one iteration of push_agg_values_from_edge over one
5434 parameter, which allows for simpler early returns. */
5435
5436static void
5437push_agg_values_for_index_from_edge (struct cgraph_edge *cs, int index,
5438 vec<ipa_argagg_value> *res,
5439 const ipa_argagg_value_list *interim)
5440{
5441 bool agg_values_from_caller = false;
5442 bool agg_jf_preserved = false;
5443 unsigned unit_delta = UINT_MAX;
5444 int src_idx = -1;
5445 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args: ipa_edge_args_sum->get (edge: cs),
5446 i: index);
5447
5448 if (jfunc->type == IPA_JF_PASS_THROUGH
5449 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5450 {
5451 agg_values_from_caller = true;
5452 agg_jf_preserved = ipa_get_jf_pass_through_agg_preserved (jfunc);
5453 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5454 unit_delta = 0;
5455 }
5456 else if (jfunc->type == IPA_JF_ANCESTOR
5457 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5458 {
5459 agg_values_from_caller = true;
5460 agg_jf_preserved = true;
5461 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5462 unit_delta = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
5463 }
5464
5465 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
5466 if (agg_values_from_caller)
5467 {
5468 if (caller_info->ipcp_orig_node)
5469 {
5470 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5471 ipcp_transformation *ts
5472 = ipcp_get_transformation_summary (node: cs->caller);
5473 ipa_node_params *orig_info = ipa_node_params_sum->get (node: orig_node);
5474 ipcp_param_lattices *orig_plats
5475 = ipa_get_parm_lattices (info: orig_info, i: src_idx);
5476 if (ts
5477 && orig_plats->aggs
5478 && (agg_jf_preserved || !orig_plats->aggs_by_ref))
5479 {
5480 ipa_argagg_value_list src (ts);
5481 src.push_adjusted_values (src_index: src_idx, dest_index: index, unit_delta, res);
5482 return;
5483 }
5484 }
5485 else
5486 {
5487 ipcp_param_lattices *src_plats
5488 = ipa_get_parm_lattices (info: caller_info, i: src_idx);
5489 if (src_plats->aggs
5490 && !src_plats->aggs_bottom
5491 && (agg_jf_preserved || !src_plats->aggs_by_ref))
5492 {
5493 if (interim && self_recursive_pass_through_p (cs, jfunc, i: index))
5494 {
5495 interim->push_adjusted_values (src_index: src_idx, dest_index: index, unit_delta,
5496 res);
5497 return;
5498 }
5499 if (!src_plats->aggs_contain_variable)
5500 {
5501 push_agg_values_from_plats (plats: src_plats, dest_index: index, unit_delta,
5502 res);
5503 return;
5504 }
5505 }
5506 }
5507 }
5508
5509 if (!jfunc->agg.items)
5510 return;
5511 bool first = true;
5512 unsigned prev_unit_offset = 0;
5513 for (const ipa_agg_jf_item &agg_jf : *jfunc->agg.items)
5514 {
5515 tree value, srcvalue;
5516 /* Besides simple pass-through aggregate jump function, arithmetic
5517 aggregate jump function could also bring same aggregate value as
5518 parameter passed-in for self-feeding recursive call. For example,
5519
5520 fn (int *i)
5521 {
5522 int j = *i & 1;
5523 fn (&j);
5524 }
5525
5526 Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5527 if (interim
5528 && self_recursive_agg_pass_through_p (cs, jfunc: &agg_jf, i: index, simple: false)
5529 && (srcvalue = interim->get_value(index,
5530 unit_offset: agg_jf.offset / BITS_PER_UNIT)))
5531 {
5532 value = ipa_get_jf_arith_result (opcode: agg_jf.value.pass_through.operation,
5533 input: srcvalue,
5534 operand: agg_jf.value.pass_through.operand,
5535 res_type: agg_jf.value.pass_through.op_type);
5536 value = ipacp_value_safe_for_type (param_type: agg_jf.type, value);
5537 }
5538 else
5539 value = ipa_agg_value_from_jfunc (info: caller_info, node: cs->caller,
5540 item: &agg_jf);
5541 if (value)
5542 {
5543 struct ipa_argagg_value iav;
5544 iav.value = value;
5545 iav.unit_offset = agg_jf.offset / BITS_PER_UNIT;
5546 iav.index = index;
5547 iav.by_ref = jfunc->agg.by_ref;
5548 iav.killed = false;
5549
5550 gcc_assert (first
5551 || iav.unit_offset > prev_unit_offset);
5552 prev_unit_offset = iav.unit_offset;
5553 first = false;
5554
5555 res->safe_push (obj: iav);
5556 }
5557 }
5558 return;
5559}
5560
5561/* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5562 description of ultimate callee of CS or the one it was cloned from (the
5563 summary where lattices are). If INTERIM is non-NULL, it contains the
5564 current interim state of collected aggregate values which can be used to
5565 compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5566 is true) and to skip values which clearly will not be part of intersection
5567 with INTERIM. */
5568
5569static void
5570push_agg_values_from_edge (struct cgraph_edge *cs,
5571 ipa_node_params *dest_info,
5572 vec<ipa_argagg_value> *res,
5573 const ipa_argagg_value_list *interim,
5574 bool optimize_self_recursion)
5575{
5576 ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs);
5577 if (!args)
5578 return;
5579
5580 int count = MIN (ipa_get_param_count (dest_info),
5581 ipa_get_cs_argument_count (args));
5582
5583 unsigned interim_index = 0;
5584 for (int index = 0; index < count; index++)
5585 {
5586 if (interim)
5587 {
5588 while (interim_index < interim->m_elts.size ()
5589 && interim->m_elts[interim_index].value
5590 && interim->m_elts[interim_index].index < index)
5591 interim_index++;
5592 if (interim_index >= interim->m_elts.size ()
5593 || interim->m_elts[interim_index].index > index)
5594 continue;
5595 }
5596
5597 ipcp_param_lattices *plats = ipa_get_parm_lattices (info: dest_info, i: index);
5598 if (!ipa_is_param_used (info: dest_info, i: index)
5599 || plats->aggs_bottom)
5600 continue;
5601 push_agg_values_for_index_from_edge (cs, index, res,
5602 interim: optimize_self_recursion ? interim
5603 : NULL);
5604 }
5605}
5606
5607
5608/* Look at edges in CALLERS and collect all known aggregate values that arrive
5609 from all of them. Return nullptr if there are none. */
5610
5611static struct vec<ipa_argagg_value, va_gc> *
5612find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5613 const vec<cgraph_edge *> &callers)
5614{
5615 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5616 if (dest_info->ipcp_orig_node)
5617 dest_info = ipa_node_params_sum->get (node: dest_info->ipcp_orig_node);
5618
5619 /* gather_edges_for_value puts a non-recursive call into the first element of
5620 callers if it can. */
5621 auto_vec<ipa_argagg_value, 32> interim;
5622 push_agg_values_from_edge (cs: callers[0], dest_info, res: &interim, NULL, optimize_self_recursion: true);
5623
5624 unsigned valid_entries = interim.length ();
5625 if (!valid_entries)
5626 return nullptr;
5627
5628 unsigned caller_count = callers.length();
5629 for (unsigned i = 1; i < caller_count; i++)
5630 {
5631 auto_vec<ipa_argagg_value, 32> last;
5632 ipa_argagg_value_list avs (&interim);
5633 push_agg_values_from_edge (cs: callers[i], dest_info, res: &last, interim: &avs, optimize_self_recursion: true);
5634
5635 valid_entries = intersect_argaggs_with (elts&: interim, other: last);
5636 if (!valid_entries)
5637 return nullptr;
5638 }
5639
5640 vec<ipa_argagg_value, va_gc> *res = NULL;
5641 vec_safe_reserve_exact (v&: res, nelems: valid_entries);
5642 for (const ipa_argagg_value &av : interim)
5643 if (av.value)
5644 res->quick_push(obj: av);
5645 gcc_checking_assert (res->length () == valid_entries);
5646 return res;
5647}
5648
5649/* Determine whether CS also brings all scalar values that the NODE is
5650 specialized for. */
5651
5652static bool
5653cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5654 struct cgraph_node *node)
5655{
5656 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5657 int count = ipa_get_param_count (info: dest_info);
5658 class ipa_node_params *caller_info;
5659 class ipa_edge_args *args;
5660 int i;
5661
5662 caller_info = ipa_node_params_sum->get (node: cs->caller);
5663 args = ipa_edge_args_sum->get (edge: cs);
5664 for (i = 0; i < count; i++)
5665 {
5666 struct ipa_jump_func *jump_func;
5667 tree val, t;
5668
5669 val = dest_info->known_csts[i];
5670 if (!val)
5671 continue;
5672
5673 if (i >= ipa_get_cs_argument_count (args))
5674 return false;
5675 jump_func = ipa_get_ith_jump_func (args, i);
5676 t = ipa_value_from_jfunc (info: caller_info, jfunc: jump_func,
5677 parm_type: ipa_get_type (info: dest_info, i));
5678 if (!t || !values_equal_for_ipcp_p (x: val, y: t))
5679 return false;
5680 }
5681 return true;
5682}
5683
5684/* Determine whether CS also brings all aggregate values that NODE is
5685 specialized for. */
5686
5687static bool
5688cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5689 struct cgraph_node *node)
5690{
5691 ipcp_transformation *ts = ipcp_get_transformation_summary (node);
5692 if (!ts || vec_safe_is_empty (v: ts->m_agg_values))
5693 return true;
5694
5695 const ipa_argagg_value_list existing (ts->m_agg_values);
5696 auto_vec<ipa_argagg_value, 32> edge_values;
5697 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5698 gcc_checking_assert (dest_info->ipcp_orig_node);
5699 dest_info = ipa_node_params_sum->get (node: dest_info->ipcp_orig_node);
5700 push_agg_values_from_edge (cs, dest_info, res: &edge_values, interim: &existing, optimize_self_recursion: false);
5701 const ipa_argagg_value_list avl (&edge_values);
5702 return avl.superset_of_p (other: existing);
5703}
5704
5705/* Given an original NODE and a VAL for which we have already created a
5706 specialized clone, look whether there are incoming edges that still lead
5707 into the old node but now also bring the requested value and also conform to
5708 all other criteria such that they can be redirected the special node.
5709 This function can therefore redirect the final edge in a SCC. */
5710
5711template <typename valtype>
5712static void
5713perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5714{
5715 ipcp_value_source<valtype> *src;
5716 profile_count redirected_sum = profile_count::zero ();
5717
5718 for (src = val->sources; src; src = src->next)
5719 {
5720 struct cgraph_edge *cs = src->cs;
5721 while (cs)
5722 {
5723 if (cgraph_edge_brings_value_p (cs, src, node, val)
5724 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5725 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5726 {
5727 if (dump_file)
5728 fprintf (dump_file, " - adding an extra caller %s of %s\n",
5729 cs->caller->dump_name (),
5730 val->spec_node->dump_name ());
5731
5732 cs->redirect_callee_duplicating_thunks (n: val->spec_node);
5733 val->spec_node->expand_all_artificial_thunks ();
5734 if (cs->count.ipa ().initialized_p ())
5735 redirected_sum = redirected_sum + cs->count.ipa ();
5736 }
5737 cs = get_next_cgraph_edge_clone (cs);
5738 }
5739 }
5740
5741 if (redirected_sum.nonzero_p ())
5742 update_specialized_profile (val->spec_node, node, redirected_sum);
5743}
5744
5745/* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5746
5747static bool
5748known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
5749{
5750 ipa_polymorphic_call_context *ctx;
5751 int i;
5752
5753 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
5754 if (!ctx->useless_p ())
5755 return true;
5756 return false;
5757}
5758
5759/* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5760
5761static vec<ipa_polymorphic_call_context>
5762copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
5763{
5764 if (known_contexts_useful_p (known_contexts))
5765 return known_contexts.copy ();
5766 else
5767 return vNULL;
5768}
5769
5770/* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5771 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5772 copy too. */
5773
5774static void
5775copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5776 vec<tree> *known_csts,
5777 vec<ipa_polymorphic_call_context> *known_contexts,
5778 ipcp_value<tree> *val, int index)
5779{
5780 *known_csts = avals->m_known_vals.copy ();
5781 *known_contexts = copy_useful_known_contexts (known_contexts: avals->m_known_contexts);
5782 (*known_csts)[index] = val->value;
5783}
5784
5785/* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5786 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5787 INDEX. */
5788
5789static void
5790copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5791 vec<tree> *known_csts,
5792 vec<ipa_polymorphic_call_context> *known_contexts,
5793 ipcp_value<ipa_polymorphic_call_context> *val,
5794 int index)
5795{
5796 *known_csts = avals->m_known_vals.copy ();
5797 *known_contexts = avals->m_known_contexts.copy ();
5798 (*known_contexts)[index] = val->value;
5799}
5800
5801/* Return true if OFFSET indicates this was not an aggregate value or there is
5802 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
5803 AGGVALS list. */
5804
5805DEBUG_FUNCTION bool
5806ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *aggvals,
5807 int index, HOST_WIDE_INT offset, tree value)
5808{
5809 if (offset == -1)
5810 return true;
5811
5812 const ipa_argagg_value_list avl (aggvals);
5813 tree v = avl.get_value (index, unit_offset: offset / BITS_PER_UNIT);
5814 return v && values_equal_for_ipcp_p (x: v, y: value);
5815}
5816
5817/* Return true if offset is minus one because source of a polymorphic context
5818 cannot be an aggregate value. */
5819
5820DEBUG_FUNCTION bool
5821ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *,
5822 int , HOST_WIDE_INT offset,
5823 ipa_polymorphic_call_context)
5824{
5825 return offset == -1;
5826}
5827
5828/* Decide whether to create a special version of NODE for value VAL of
5829 parameter at the given INDEX. If OFFSET is -1, the value is for the
5830 parameter itself, otherwise it is stored at the given OFFSET of the
5831 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
5832 is a vector which contains clones created for self-recursive calls with an
5833 arithmetic pass-through jump function. */
5834
5835template <typename valtype>
5836static bool
5837decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
5838 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
5839 vec<cgraph_node *> *self_gen_clones)
5840{
5841 int caller_count;
5842 sreal freq_sum;
5843 profile_count count_sum, rec_count_sum;
5844 vec<cgraph_edge *> callers;
5845 bool called_without_ipa_profile;
5846
5847 if (val->spec_node)
5848 {
5849 perhaps_add_new_callers (node, val);
5850 return false;
5851 }
5852 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
5853 {
5854 if (dump_file && (dump_flags & TDF_DETAILS))
5855 fprintf (dump_file, " Ignoring candidate value because "
5856 "maximum unit size would be reached with %li.\n",
5857 val->local_size_cost + overall_size);
5858 return false;
5859 }
5860 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
5861 &rec_count_sum, &count_sum,
5862 &called_without_ipa_profile))
5863 return false;
5864
5865 if (!dbg_cnt (index: ipa_cp_values))
5866 return false;
5867
5868 if (val->self_recursion_generated_p ())
5869 {
5870 /* The edge counts in this case might not have been adjusted yet.
5871 Nevertleless, even if they were it would be only a guesswork which we
5872 can do now. The recursive part of the counts can be derived from the
5873 count of the original node anyway. */
5874 if (node->count.ipa ().nonzero_p ())
5875 {
5876 unsigned dem = self_gen_clones->length () + 1;
5877 rec_count_sum = node->count.ipa () / dem;
5878 }
5879 else
5880 rec_count_sum = profile_count::zero ();
5881 }
5882
5883 /* get_info_about_necessary_edges only sums up ipa counts. */
5884 count_sum += rec_count_sum;
5885
5886 if (dump_file && (dump_flags & TDF_DETAILS))
5887 {
5888 fprintf (stream: dump_file, format: " - considering value ");
5889 print_ipcp_constant_value (dump_file, val->value);
5890 fprintf (stream: dump_file, format: " for ");
5891 ipa_dump_param (dump_file, info: ipa_node_params_sum->get (node), i: index);
5892 if (offset != -1)
5893 fprintf (stream: dump_file, format: ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
5894 fprintf (stream: dump_file, format: " (caller_count: %i)\n", caller_count);
5895 }
5896
5897 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
5898 freq_sum, count_sum,
5899 val->local_size_cost,
5900 called_without_ipa_profile)
5901 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
5902 freq_sum, count_sum, val->prop_size_cost,
5903 called_without_ipa_profile))
5904 return false;
5905
5906 if (dump_file)
5907 fprintf (stream: dump_file, format: " Creating a specialized node of %s.\n",
5908 node->dump_name ());
5909
5910 vec<tree> known_csts;
5911 vec<ipa_polymorphic_call_context> known_contexts;
5912
5913 callers = gather_edges_for_value (val, node, caller_count);
5914 if (offset == -1)
5915 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
5916 else
5917 {
5918 known_csts = avals->m_known_vals.copy ();
5919 known_contexts = copy_useful_known_contexts (known_contexts: avals->m_known_contexts);
5920 }
5921 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
5922 find_more_contexts_for_caller_subset (node, known_contexts: &known_contexts, callers);
5923 vec<ipa_argagg_value, va_gc> *aggvals
5924 = find_aggregate_values_for_callers_subset (node, callers);
5925 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
5926 offset, val->value));
5927 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
5928 aggvals, callers);
5929
5930 if (val->self_recursion_generated_p ())
5931 self_gen_clones->safe_push (obj: val->spec_node);
5932 else
5933 update_profiling_info (node, val->spec_node);
5934
5935 callers.release ();
5936 overall_size += val->local_size_cost;
5937 if (dump_file && (dump_flags & TDF_DETAILS))
5938 fprintf (stream: dump_file, format: " overall size reached %li\n",
5939 overall_size);
5940
5941 /* TODO: If for some lattice there is only one other known value
5942 left, make a special node for it too. */
5943
5944 return true;
5945}
5946
5947/* Like irange::contains_p(), but convert VAL to the range of R if
5948 necessary. */
5949
5950static inline bool
5951ipa_range_contains_p (const vrange &r, tree val)
5952{
5953 if (r.undefined_p ())
5954 return false;
5955
5956 tree type = r.type ();
5957 if (!wi::fits_to_tree_p (x: wi::to_wide (t: val), type))
5958 return false;
5959
5960 val = fold_convert (type, val);
5961 return r.contains_p (cst: val);
5962}
5963
5964/* Decide whether and what specialized clones of NODE should be created. */
5965
5966static bool
5967decide_whether_version_node (struct cgraph_node *node)
5968{
5969 ipa_node_params *info = ipa_node_params_sum->get (node);
5970 int i, count = ipa_get_param_count (info);
5971 bool ret = false;
5972
5973 if (count == 0)
5974 return false;
5975
5976 if (dump_file && (dump_flags & TDF_DETAILS))
5977 fprintf (stream: dump_file, format: "\nEvaluating opportunities for %s.\n",
5978 node->dump_name ());
5979
5980 auto_vec <cgraph_node *, 9> self_gen_clones;
5981 ipa_auto_call_arg_values avals;
5982 gather_context_independent_values (info, avals: &avals, calculate_aggs: false, NULL);
5983
5984 for (i = 0; i < count;i++)
5985 {
5986 if (!ipa_is_param_used (info, i))
5987 continue;
5988
5989 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
5990 ipcp_lattice<tree> *lat = &plats->itself;
5991 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
5992
5993 if (!lat->bottom
5994 && !avals.m_known_vals[i])
5995 {
5996 ipcp_value<tree> *val;
5997 for (val = lat->values; val; val = val->next)
5998 {
5999 /* If some values generated for self-recursive calls with
6000 arithmetic jump functions fall outside of the known
6001 range for the parameter, we can skip them. */
6002 if (TREE_CODE (val->value) == INTEGER_CST
6003 && !plats->m_value_range.bottom_p ()
6004 && !ipa_range_contains_p (r: plats->m_value_range.m_vr,
6005 val: val->value))
6006 {
6007 /* This can happen also if a constant present in the source
6008 code falls outside of the range of parameter's type, so we
6009 cannot assert. */
6010 if (dump_file && (dump_flags & TDF_DETAILS))
6011 {
6012 fprintf (stream: dump_file, format: " - skipping%s value ",
6013 val->self_recursion_generated_p ()
6014 ? " self_recursion_generated" : "");
6015 print_ipcp_constant_value (f: dump_file, v: val->value);
6016 fprintf (stream: dump_file, format: " because it is outside known "
6017 "value range.\n");
6018 }
6019 continue;
6020 }
6021 ret |= decide_about_value (node, index: i, offset: -1, val, avals: &avals,
6022 self_gen_clones: &self_gen_clones);
6023 }
6024 }
6025
6026 if (!plats->aggs_bottom)
6027 {
6028 struct ipcp_agg_lattice *aglat;
6029 ipcp_value<tree> *val;
6030 for (aglat = plats->aggs; aglat; aglat = aglat->next)
6031 if (!aglat->bottom && aglat->values
6032 /* If the following is false, the one value has been considered
6033 for cloning for all contexts. */
6034 && (plats->aggs_contain_variable
6035 || !aglat->is_single_const ()))
6036 for (val = aglat->values; val; val = val->next)
6037 ret |= decide_about_value (node, index: i, offset: aglat->offset, val, avals: &avals,
6038 self_gen_clones: &self_gen_clones);
6039 }
6040
6041 if (!ctxlat->bottom
6042 && avals.m_known_contexts[i].useless_p ())
6043 {
6044 ipcp_value<ipa_polymorphic_call_context> *val;
6045 for (val = ctxlat->values; val; val = val->next)
6046 ret |= decide_about_value (node, index: i, offset: -1, val, avals: &avals,
6047 self_gen_clones: &self_gen_clones);
6048 }
6049 }
6050
6051 if (!self_gen_clones.is_empty ())
6052 {
6053 self_gen_clones.safe_push (obj: node);
6054 update_counts_for_self_gen_clones (orig_node: node, self_gen_clones);
6055 }
6056
6057 if (info->do_clone_for_all_contexts)
6058 {
6059 if (!dbg_cnt (index: ipa_cp_values))
6060 {
6061 info->do_clone_for_all_contexts = false;
6062 return ret;
6063 }
6064
6065 struct cgraph_node *clone;
6066 auto_vec<cgraph_edge *> callers = node->collect_callers ();
6067
6068 for (int i = callers.length () - 1; i >= 0; i--)
6069 {
6070 cgraph_edge *cs = callers[i];
6071 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
6072
6073 if (caller_info && caller_info->node_dead)
6074 callers.unordered_remove (ix: i);
6075 }
6076
6077 if (!adjust_callers_for_value_intersection (callers, node))
6078 {
6079 /* If node is not called by anyone, or all its caller edges are
6080 self-recursive, the node is not really in use, no need to do
6081 cloning. */
6082 info->do_clone_for_all_contexts = false;
6083 return ret;
6084 }
6085
6086 if (dump_file)
6087 fprintf (stream: dump_file, format: " - Creating a specialized node of %s "
6088 "for all known contexts.\n", node->dump_name ());
6089
6090 vec<tree> known_csts = avals.m_known_vals.copy ();
6091 vec<ipa_polymorphic_call_context> known_contexts
6092 = copy_useful_known_contexts (known_contexts: avals.m_known_contexts);
6093 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6094 find_more_contexts_for_caller_subset (node, known_contexts: &known_contexts, callers);
6095 vec<ipa_argagg_value, va_gc> *aggvals
6096 = find_aggregate_values_for_callers_subset (node, callers);
6097
6098 if (!known_contexts_useful_p (known_contexts))
6099 {
6100 known_contexts.release ();
6101 known_contexts = vNULL;
6102 }
6103 clone = create_specialized_node (node, known_csts, known_contexts,
6104 aggvals, callers);
6105 info->do_clone_for_all_contexts = false;
6106 ipa_node_params_sum->get (node: clone)->is_all_contexts_clone = true;
6107 ret = true;
6108 }
6109
6110 return ret;
6111}
6112
6113/* Transitively mark all callees of NODE within the same SCC as not dead. */
6114
6115static void
6116spread_undeadness (struct cgraph_node *node)
6117{
6118 struct cgraph_edge *cs;
6119
6120 for (cs = node->callees; cs; cs = cs->next_callee)
6121 if (ipa_edge_within_scc (cs))
6122 {
6123 struct cgraph_node *callee;
6124 class ipa_node_params *info;
6125
6126 callee = cs->callee->function_symbol (NULL);
6127 info = ipa_node_params_sum->get (node: callee);
6128
6129 if (info && info->node_dead)
6130 {
6131 info->node_dead = 0;
6132 spread_undeadness (node: callee);
6133 }
6134 }
6135}
6136
6137/* Return true if NODE has a caller from outside of its SCC that is not
6138 dead. Worker callback for cgraph_for_node_and_aliases. */
6139
6140static bool
6141has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6142 void *data ATTRIBUTE_UNUSED)
6143{
6144 struct cgraph_edge *cs;
6145
6146 for (cs = node->callers; cs; cs = cs->next_caller)
6147 if (cs->caller->thunk
6148 && cs->caller->call_for_symbol_thunks_and_aliases
6149 (callback: has_undead_caller_from_outside_scc_p, NULL, include_overwritable: true))
6150 return true;
6151 else if (!ipa_edge_within_scc (cs))
6152 {
6153 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
6154 if (!caller_info /* Unoptimized caller are like dead ones. */
6155 || !caller_info->node_dead)
6156 return true;
6157 }
6158 return false;
6159}
6160
6161
6162/* Identify nodes within the same SCC as NODE which are no longer needed
6163 because of new clones and will be removed as unreachable. */
6164
6165static void
6166identify_dead_nodes (struct cgraph_node *node)
6167{
6168 struct cgraph_node *v;
6169 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6170 if (v->local)
6171 {
6172 ipa_node_params *info = ipa_node_params_sum->get (node: v);
6173 if (info
6174 && !v->call_for_symbol_thunks_and_aliases
6175 (callback: has_undead_caller_from_outside_scc_p, NULL, include_overwritable: true))
6176 info->node_dead = 1;
6177 }
6178
6179 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6180 {
6181 ipa_node_params *info = ipa_node_params_sum->get (node: v);
6182 if (info && !info->node_dead)
6183 spread_undeadness (node: v);
6184 }
6185
6186 if (dump_file && (dump_flags & TDF_DETAILS))
6187 {
6188 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6189 if (ipa_node_params_sum->get (node: v)
6190 && ipa_node_params_sum->get (node: v)->node_dead)
6191 fprintf (stream: dump_file, format: " Marking node as dead: %s.\n",
6192 v->dump_name ());
6193 }
6194}
6195
6196/* The decision stage. Iterate over the topological order of call graph nodes
6197 TOPO and make specialized clones if deemed beneficial. */
6198
6199static void
6200ipcp_decision_stage (class ipa_topo_info *topo)
6201{
6202 int i;
6203
6204 if (dump_file)
6205 fprintf (stream: dump_file, format: "\nIPA decision stage:\n\n");
6206
6207 for (i = topo->nnodes - 1; i >= 0; i--)
6208 {
6209 struct cgraph_node *node = topo->order[i];
6210 bool change = false, iterate = true;
6211
6212 while (iterate)
6213 {
6214 struct cgraph_node *v;
6215 iterate = false;
6216 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6217 if (v->has_gimple_body_p ()
6218 && ipcp_versionable_function_p (node: v))
6219 iterate |= decide_whether_version_node (node: v);
6220
6221 change |= iterate;
6222 }
6223 if (change)
6224 identify_dead_nodes (node);
6225 }
6226}
6227
6228/* Look up all VR and bits information that we have discovered and copy it
6229 over to the transformation summary. */
6230
6231static void
6232ipcp_store_vr_results (void)
6233{
6234 cgraph_node *node;
6235
6236 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6237 {
6238 ipa_node_params *info = ipa_node_params_sum->get (node);
6239 bool dumped_sth = false;
6240 bool found_useful_result = false;
6241 bool do_vr = true;
6242 bool do_bits = true;
6243
6244 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6245 {
6246 if (dump_file)
6247 fprintf (stream: dump_file, format: "Not considering %s for VR discovery "
6248 "and propagate; -fipa-ipa-vrp: disabled.\n",
6249 node->dump_name ());
6250 do_vr = false;
6251 }
6252 if (!info || !opt_for_fn (node->decl, flag_ipa_bit_cp))
6253 {
6254 if (dump_file)
6255 fprintf (stream: dump_file, format: "Not considering %s for ipa bitwise "
6256 "propagation ; -fipa-bit-cp: disabled.\n",
6257 node->dump_name ());
6258 do_bits = false;
6259 }
6260 if (!do_bits && !do_vr)
6261 continue;
6262
6263 if (info->ipcp_orig_node)
6264 info = ipa_node_params_sum->get (node: info->ipcp_orig_node);
6265 if (info->lattices.is_empty ())
6266 /* Newly expanded artificial thunks do not have lattices. */
6267 continue;
6268
6269 unsigned count = ipa_get_param_count (info);
6270 for (unsigned i = 0; i < count; i++)
6271 {
6272 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6273 if (do_vr
6274 && !plats->m_value_range.bottom_p ()
6275 && !plats->m_value_range.top_p ())
6276 {
6277 found_useful_result = true;
6278 break;
6279 }
6280 if (do_bits && plats->bits_lattice.constant_p ())
6281 {
6282 found_useful_result = true;
6283 break;
6284 }
6285 }
6286 if (!found_useful_result)
6287 continue;
6288
6289 ipcp_transformation_initialize ();
6290 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6291 vec_safe_reserve_exact (v&: ts->m_vr, nelems: count);
6292
6293 for (unsigned i = 0; i < count; i++)
6294 {
6295 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6296 ipcp_bits_lattice *bits = NULL;
6297
6298 if (do_bits
6299 && plats->bits_lattice.constant_p ()
6300 && dbg_cnt (index: ipa_cp_bits))
6301 bits = &plats->bits_lattice;
6302
6303 if (do_vr
6304 && !plats->m_value_range.bottom_p ()
6305 && !plats->m_value_range.top_p ()
6306 && dbg_cnt (index: ipa_cp_vr))
6307 {
6308 if (bits)
6309 {
6310 value_range tmp = plats->m_value_range.m_vr;
6311 tree type = ipa_get_type (info, i);
6312 irange_bitmask bm (wide_int::from (x: bits->get_value (),
6313 TYPE_PRECISION (type),
6314 TYPE_SIGN (type)),
6315 wide_int::from (x: bits->get_mask (),
6316 TYPE_PRECISION (type),
6317 TYPE_SIGN (type)));
6318 tmp.update_bitmask (bm);
6319 // Reflecting the bitmask on the ranges can sometime
6320 // produce an UNDEFINED value if the the bitmask update
6321 // was previously deferred. See PR 120048.
6322 if (tmp.undefined_p ())
6323 tmp.set_varying (type);
6324 ipa_vr vr (tmp);
6325 ts->m_vr->quick_push (obj: vr);
6326 }
6327 else
6328 {
6329 ipa_vr vr (plats->m_value_range.m_vr);
6330 ts->m_vr->quick_push (obj: vr);
6331 }
6332 }
6333 else if (bits)
6334 {
6335 tree type = ipa_get_type (info, i);
6336 value_range tmp;
6337 tmp.set_varying (type);
6338 irange_bitmask bm (wide_int::from (x: bits->get_value (),
6339 TYPE_PRECISION (type),
6340 TYPE_SIGN (type)),
6341 wide_int::from (x: bits->get_mask (),
6342 TYPE_PRECISION (type),
6343 TYPE_SIGN (type)));
6344 tmp.update_bitmask (bm);
6345 // Reflecting the bitmask on the ranges can sometime
6346 // produce an UNDEFINED value if the the bitmask update
6347 // was previously deferred. See PR 120048.
6348 if (tmp.undefined_p ())
6349 tmp.set_varying (type);
6350 ipa_vr vr (tmp);
6351 ts->m_vr->quick_push (obj: vr);
6352 }
6353 else
6354 {
6355 ipa_vr vr;
6356 ts->m_vr->quick_push (obj: vr);
6357 }
6358
6359 if (!dump_file || !bits)
6360 continue;
6361
6362 if (!dumped_sth)
6363 {
6364 fprintf (stream: dump_file, format: "Propagated bits info for function %s:\n",
6365 node->dump_name ());
6366 dumped_sth = true;
6367 }
6368 fprintf (stream: dump_file, format: " param %i: value = ", i);
6369 ipcp_print_widest_int (f: dump_file, value: bits->get_value ());
6370 fprintf (stream: dump_file, format: ", mask = ");
6371 ipcp_print_widest_int (f: dump_file, value: bits->get_mask ());
6372 fprintf (stream: dump_file, format: "\n");
6373 }
6374 }
6375}
6376
6377/* The IPCP driver. */
6378
6379static unsigned int
6380ipcp_driver (void)
6381{
6382 class ipa_topo_info topo;
6383
6384 if (edge_clone_summaries == NULL)
6385 edge_clone_summaries = new edge_clone_summary_t (symtab);
6386
6387 ipa_check_create_node_params ();
6388 ipa_check_create_edge_args ();
6389 clone_num_suffixes = new hash_map<const char *, unsigned>;
6390
6391 if (dump_file)
6392 {
6393 fprintf (stream: dump_file, format: "\nIPA structures before propagation:\n");
6394 if (dump_flags & TDF_DETAILS)
6395 ipa_print_all_params (dump_file);
6396 ipa_print_all_jump_functions (f: dump_file);
6397 }
6398
6399 /* Topological sort. */
6400 build_toporder_info (topo: &topo);
6401 /* Do the interprocedural propagation. */
6402 ipcp_propagate_stage (topo: &topo);
6403 /* Decide what constant propagation and cloning should be performed. */
6404 ipcp_decision_stage (topo: &topo);
6405 /* Store results of value range and bits propagation. */
6406 ipcp_store_vr_results ();
6407
6408 /* Free all IPCP structures. */
6409 delete clone_num_suffixes;
6410 free_toporder_info (topo: &topo);
6411 delete edge_clone_summaries;
6412 edge_clone_summaries = NULL;
6413 ipa_free_all_structures_after_ipa_cp ();
6414 if (dump_file)
6415 fprintf (stream: dump_file, format: "\nIPA constant propagation end\n");
6416 return 0;
6417}
6418
6419/* Initialization and computation of IPCP data structures. This is the initial
6420 intraprocedural analysis of functions, which gathers information to be
6421 propagated later on. */
6422
6423static void
6424ipcp_generate_summary (void)
6425{
6426 struct cgraph_node *node;
6427
6428 if (dump_file)
6429 fprintf (stream: dump_file, format: "\nIPA constant propagation start:\n");
6430 ipa_register_cgraph_hooks ();
6431
6432 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6433 ipa_analyze_node (node);
6434}
6435
6436namespace {
6437
6438const pass_data pass_data_ipa_cp =
6439{
6440 .type: IPA_PASS, /* type */
6441 .name: "cp", /* name */
6442 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
6443 .tv_id: TV_IPA_CONSTANT_PROP, /* tv_id */
6444 .properties_required: 0, /* properties_required */
6445 .properties_provided: 0, /* properties_provided */
6446 .properties_destroyed: 0, /* properties_destroyed */
6447 .todo_flags_start: 0, /* todo_flags_start */
6448 .todo_flags_finish: ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6449};
6450
6451class pass_ipa_cp : public ipa_opt_pass_d
6452{
6453public:
6454 pass_ipa_cp (gcc::context *ctxt)
6455 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6456 ipcp_generate_summary, /* generate_summary */
6457 NULL, /* write_summary */
6458 NULL, /* read_summary */
6459 ipcp_write_transformation_summaries, /*
6460 write_optimization_summary */
6461 ipcp_read_transformation_summaries, /*
6462 read_optimization_summary */
6463 NULL, /* stmt_fixup */
6464 0, /* function_transform_todo_flags_start */
6465 ipcp_transform_function, /* function_transform */
6466 NULL) /* variable_transform */
6467 {}
6468
6469 /* opt_pass methods: */
6470 bool gate (function *) final override
6471 {
6472 /* FIXME: We should remove the optimize check after we ensure we never run
6473 IPA passes when not optimizing. */
6474 return (flag_ipa_cp && optimize) || in_lto_p;
6475 }
6476
6477 unsigned int execute (function *) final override { return ipcp_driver (); }
6478
6479}; // class pass_ipa_cp
6480
6481} // anon namespace
6482
6483ipa_opt_pass_d *
6484make_pass_ipa_cp (gcc::context *ctxt)
6485{
6486 return new pass_ipa_cp (ctxt);
6487}
6488
6489/* Reset all state within ipa-cp.cc so that we can rerun the compiler
6490 within the same process. For use by toplev::finalize. */
6491
6492void
6493ipa_cp_cc_finalize (void)
6494{
6495 overall_size = 0;
6496 orig_overall_size = 0;
6497 ipcp_free_transformation_sum ();
6498}
6499
6500/* Given PARAM which must be a parameter of function FNDECL described by THIS,
6501 return its index in the DECL_ARGUMENTS chain, using a pre-computed
6502 DECL_UID-sorted vector if available (which is pre-computed only if there are
6503 many parameters). Can return -1 if param is static chain not represented
6504 among DECL_ARGUMENTS. */
6505
6506int
6507ipcp_transformation::get_param_index (const_tree fndecl, const_tree param) const
6508{
6509 gcc_assert (TREE_CODE (param) == PARM_DECL);
6510 if (m_uid_to_idx)
6511 {
6512 unsigned puid = DECL_UID (param);
6513 const ipa_uid_to_idx_map_elt *res
6514 = std::lower_bound (first: m_uid_to_idx->begin(), last: m_uid_to_idx->end (), val: puid,
6515 comp: [] (const ipa_uid_to_idx_map_elt &elt, unsigned uid)
6516 {
6517 return elt.uid < uid;
6518 });
6519 if (res == m_uid_to_idx->end ()
6520 || res->uid != puid)
6521 {
6522 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6523 return -1;
6524 }
6525 return res->index;
6526 }
6527
6528 unsigned index = 0;
6529 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6530 if (p == param)
6531 return (int) index;
6532
6533 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6534 return -1;
6535}
6536
6537/* Helper function to qsort a vector of ipa_uid_to_idx_map_elt elements
6538 according to the uid. */
6539
6540static int
6541compare_uids (const void *a, const void *b)
6542{
6543 const ipa_uid_to_idx_map_elt *e1 = (const ipa_uid_to_idx_map_elt *) a;
6544 const ipa_uid_to_idx_map_elt *e2 = (const ipa_uid_to_idx_map_elt *) b;
6545 if (e1->uid < e2->uid)
6546 return -1;
6547 if (e1->uid > e2->uid)
6548 return 1;
6549 gcc_unreachable ();
6550}
6551
6552/* Assuming THIS describes FNDECL and it has sufficiently many parameters to
6553 justify the overhead, create a DECL_UID-sorted vector to speed up mapping
6554 from parameters to their indices in DECL_ARGUMENTS chain. */
6555
6556void
6557ipcp_transformation::maybe_create_parm_idx_map (tree fndecl)
6558{
6559 int c = count_formal_params (fndecl);
6560 if (c < 32)
6561 return;
6562
6563 m_uid_to_idx = NULL;
6564 vec_safe_reserve (v&: m_uid_to_idx, nelems: c, exact: true);
6565 unsigned index = 0;
6566 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6567 {
6568 ipa_uid_to_idx_map_elt elt;
6569 elt.uid = DECL_UID (p);
6570 elt.index = index;
6571 m_uid_to_idx->quick_push (obj: elt);
6572 }
6573 m_uid_to_idx->qsort (compare_uids);
6574}
6575

source code of gcc/ipa-cp.cc