1/* Interprocedural scalar replacement of aggregates
2 Copyright (C) 2019-2023 Free Software Foundation, Inc.
3 Contributed by Martin Jambor <mjambor@suse.cz>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* IPA-SRA is an interprocedural pass that removes unused function return
22 values (turning functions returning a value which is never used into void
23 functions) and removes unused function parameters. It can also replace an
24 aggregate parameter by a set of other parameters representing part of the
25 original, turning those passed by reference into new ones which pass the
26 value directly.
27
28 The pass is a true IPA one, which means that it works in three stages in
29 order to be able to take advantage of LTO. First, summaries about functions
30 and each calls are generated. Function summaries (often called call graph
31 node summaries) contain mainly information about which parameters are
32 potential transformation candidates and which bits of candidates are
33 accessed. We differentiate between accesses done as a part of a call
34 statement (which might be not necessary if the callee is also transformed)
35 and others (which are mandatory). Call summaries (often called call graph
36 edge summaries) contain information about which function formal parameters
37 feed into which actual call arguments so that if two parameters are only
38 used in a sum which is then passed to another function which then however
39 does not use this parameter, all three parameters of the two functions can
40 be eliminated. Edge summaries also have flags whether the return value is
41 used or if it is only returned in the caller too. In LTO mode these
42 summaries are then streamed to the object file in the compilation phase and
43 streamed back in in the WPA analysis stage.
44
45 The interprocedural analysis phase traverses the graph in topological order
46 in two sweeps, one in each direction. First, from callees to callers for
47 parameter removal and splitting. Each strongly-connected component is
48 processed iteratively until the situation in it stabilizes. The pass from
49 callers to callees is then carried out to remove unused return values in a
50 very similar fashion.
51
52 Because parameter manipulation has big implications for call redirection
53 which is done only after all call graph nodes materialize, the
54 transformation phase is not part of this patch but is carried out by the
55 clone materialization and edge redirection itself, see comments in
56 ipa-param-manipulation.h for more details. */
57
58
59#include "config.h"
60#include "system.h"
61#include "coretypes.h"
62#include "backend.h"
63#include "tree.h"
64#include "gimple.h"
65#include "predict.h"
66#include "tree-pass.h"
67#include "ssa.h"
68#include "cgraph.h"
69#include "gimple-pretty-print.h"
70#include "alias.h"
71#include "tree-eh.h"
72#include "gimple-iterator.h"
73#include "gimple-walk.h"
74#include "tree-dfa.h"
75#include "tree-sra.h"
76#include "alloc-pool.h"
77#include "symbol-summary.h"
78#include "dbgcnt.h"
79#include "tree-inline.h"
80#include "ipa-utils.h"
81#include "builtins.h"
82#include "cfganal.h"
83#include "tree-streamer.h"
84#include "internal-fn.h"
85#include "symtab-clones.h"
86#include "attribs.h"
87#include "ipa-prop.h"
88
89static void ipa_sra_summarize_function (cgraph_node *);
90
91/* Bits used to track size of an aggregate in bytes interprocedurally. */
92#define ISRA_ARG_SIZE_LIMIT_BITS 16
93#define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS)
94/* How many parameters can feed into a call actual argument and still be
95 tracked. */
96#define IPA_SRA_MAX_PARAM_FLOW_LEN 7
97
98/* Structure describing accesses to a specific portion of an aggregate
99 parameter, as given by the offset and size. Any smaller accesses that occur
100 within a function that fall within another access form a tree. The pass
101 cannot analyze parameters with only partially overlapping accesses. */
102
103struct GTY(()) param_access
104{
105 /* Type that a potential replacement should have. This field only has
106 meaning in the summary building and transformation phases, when it is
107 reconstructed from the body. Must not be touched in IPA analysis
108 stage. */
109 tree type;
110
111 /* Alias reference type to be used in MEM_REFs when adjusting caller
112 arguments. */
113 tree alias_ptr_type;
114
115 /* Values returned by get_ref_base_and_extent but converted to bytes and
116 stored as unsigned ints. */
117 unsigned unit_offset;
118 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
119
120 /* Set once we are sure that the access will really end up in a potentially
121 transformed function - initially not set for portions of formal parameters
122 that are only used as actual function arguments passed to callees. */
123 unsigned certain : 1;
124 /* Set if the access has reverse scalar storage order. */
125 unsigned reverse : 1;
126};
127
128/* This structure has the same purpose as the one above and additionally it
129 contains some fields that are only necessary in the summary generation
130 phase. */
131
132struct gensum_param_access
133{
134 /* Values returned by get_ref_base_and_extent. */
135 HOST_WIDE_INT offset;
136 HOST_WIDE_INT size;
137
138 /* if this access has any children (in terms of the definition above), this
139 points to the first one. */
140 struct gensum_param_access *first_child;
141 /* In intraprocedural SRA, pointer to the next sibling in the access tree as
142 described above. */
143 struct gensum_param_access *next_sibling;
144
145 /* Type that a potential replacement should have. This field only has
146 meaning in the summary building and transformation phases, when it is
147 reconstructed from the body. Must not be touched in IPA analysis
148 stage. */
149 tree type;
150 /* Alias reference type to be used in MEM_REFs when adjusting caller
151 arguments. */
152 tree alias_ptr_type;
153
154 /* Cumulative count of all loads. */
155 profile_count load_count;
156 /* Have there been writes to or reads from this exact location except for as
157 arguments to a function call that can be tracked. */
158 bool nonarg;
159
160 /* Set if the access has reverse scalar storage order. */
161 bool reverse;
162};
163
164/* Summary describing a parameter in the IPA stages. */
165
166struct GTY(()) isra_param_desc
167{
168 /* List of access representatives to the parameters, sorted according to
169 their offset. */
170 vec <param_access *, va_gc> *accesses;
171
172 /* Unit size limit of total size of all replacements. */
173 unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS;
174 /* Sum of unit sizes of all certain replacements. */
175 unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS;
176 /* Minimum offset that is known to be safe to dereference because of callers
177 pass pointers to DECLs of at least this size or because of dereferences in
178 callers. */
179 unsigned safe_size : ISRA_ARG_SIZE_LIMIT_BITS;
180
181 /* A parameter that is used only in call arguments and can be removed if all
182 concerned actual arguments are removed. */
183 unsigned locally_unused : 1;
184 /* An aggregate that is a candidate for breaking up or complete removal. */
185 unsigned split_candidate : 1;
186 /* Is this a parameter passing stuff by reference? */
187 unsigned by_ref : 1;
188 /* If set, this parameter can only be a candidate for removal if the function
189 is going to loose its return value. */
190 unsigned remove_only_when_retval_removed : 1;
191 /* If set, this parameter can only be a candidate for splitting if the
192 function is going to loose its return value. Can only be meaningfully set
193 for by_ref parameters. */
194 unsigned split_only_when_retval_removed : 1;
195 /* Parameter hint set during IPA analysis when there is a caller which does
196 not construct the argument just to pass it to calls. Only meaningful for
197 by_ref parameters. */
198 unsigned not_specially_constructed : 1;
199 /* Only meaningful for by_ref parameters. If set, this parameter can only be
200 a split candidate if all callers pass pointers that are known to point to
201 a chunk of memory large enough to contain all accesses. */
202 unsigned conditionally_dereferenceable : 1;
203 /* Set when safe_size has been updated from at least one caller. */
204 unsigned safe_size_set : 1;
205};
206
207/* Structure used when generating summaries that describes a parameter. */
208
209struct gensum_param_desc
210{
211 /* Roots of param_accesses. */
212 gensum_param_access *accesses;
213 /* Number of accesses in the access tree rooted in field accesses. */
214 unsigned access_count;
215
216 /* If the below is non-zero, this is the number of uses as actual
217 arguments. */
218 int call_uses;
219 /* Number of times this parameter has been directly passed to. */
220 unsigned ptr_pt_count;
221
222 /* Size limit of total size of all replacements. */
223 unsigned param_size_limit;
224 /* Sum of sizes of nonarg accesses. */
225 unsigned nonarg_acc_size;
226
227 /* A parameter that is used only in call arguments and can be removed if all
228 concerned actual arguments are removed. */
229 bool locally_unused;
230 /* An aggregate that is a candidate for breaking up or a pointer passing data
231 by reference that is a candidate for being converted to a set of
232 parameters passing those data by value. */
233 bool split_candidate;
234 /* Is this a parameter passing stuff by reference (either a pointer or a
235 source language reference type)? */
236 bool by_ref;
237 /* If this parameter passes stuff by reference, can it be safely dereferenced
238 without performing further checks (for example because it is a
239 REFERENCE_TYPE)? */
240 bool safe_ref;
241 /* If set, this parameter can only be a candidate for removal if the function
242 is going to loose its return value. */
243 bool remove_only_when_retval_removed;
244 /* If set, this parameter can only be a candidate for splitting if the
245 function is going to loose its return value. Can only be meaningfully set
246 for by_ref parameters. */
247 bool split_only_when_retval_removed;
248 /* Only meaningful for by_ref parameters. If set, this parameter can only be
249 a split candidate if all callers pass pointers that are known to point to
250 a chunk of memory large enough to contain all accesses. */
251 bool conditionally_dereferenceable;
252
253 /* The number of this parameter as they are ordered in function decl. */
254 int param_number;
255 /* For parameters passing data by reference, this is parameter index to
256 compute indices to bb_dereferences. */
257 int deref_index;
258};
259
260/* Properly deallocate accesses of DESC. TODO: Since this data structure is
261 allocated in GC memory, this is not necessary and we can consider removing
262 the function. */
263
264static void
265free_param_decl_accesses (isra_param_desc *desc)
266{
267 unsigned len = vec_safe_length (v: desc->accesses);
268 for (unsigned i = 0; i < len; ++i)
269 ggc_free ((*desc->accesses)[i]);
270 vec_free (v&: desc->accesses);
271}
272
273/* Class used to convey information about functions from the
274 intra-procedural analysis stage to inter-procedural one. */
275
276class GTY((for_user)) isra_func_summary
277{
278public:
279 /* initialize the object. */
280
281 isra_func_summary ()
282 : m_parameters (NULL), m_candidate (false), m_returns_value (false),
283 m_return_ignored (false), m_queued (false)
284 {}
285
286 /* Destroy m_parameters. */
287
288 ~isra_func_summary ();
289
290 /* Mark the function as not a candidate for any IPA-SRA transformation.
291 Return true if it was a candidate until now. */
292
293 bool zap ();
294
295 /* Vector of parameter descriptors corresponding to the function being
296 analyzed. */
297 vec<isra_param_desc, va_gc> *m_parameters;
298
299 /* Whether the node is even a candidate for any IPA-SRA transformation at
300 all. */
301 unsigned m_candidate : 1;
302
303 /* Whether the original function returns any value. */
304 unsigned m_returns_value : 1;
305
306 /* Set to true if all call statements do not actually use the returned
307 value. */
308
309 unsigned m_return_ignored : 1;
310
311 /* Whether the node is already queued in IPA SRA stack during processing of
312 call graphs SCCs. */
313
314 unsigned m_queued : 1;
315};
316
317/* Deallocate the memory pointed to by isra_func_summary. TODO: Since this
318 data structure is allocated in GC memory, this is not necessary and we can
319 consider removing the destructor. */
320
321isra_func_summary::~isra_func_summary ()
322{
323 unsigned len = vec_safe_length (v: m_parameters);
324 for (unsigned i = 0; i < len; ++i)
325 free_param_decl_accesses (desc: &(*m_parameters)[i]);
326 vec_free (v&: m_parameters);
327}
328
329/* Mark the function as not a candidate for any IPA-SRA transformation. Return
330 true if it was a candidate until now. */
331
332bool
333isra_func_summary::zap ()
334{
335 bool ret = m_candidate;
336 m_candidate = false;
337
338 /* TODO: see the destructor above. */
339 unsigned len = vec_safe_length (v: m_parameters);
340 for (unsigned i = 0; i < len; ++i)
341 free_param_decl_accesses (desc: &(*m_parameters)[i]);
342 vec_free (v&: m_parameters);
343
344 return ret;
345}
346
347/* Structure to describe which formal parameters feed into a particular actual
348 argument. */
349
350struct isra_param_flow
351{
352 /* Number of elements in array inputs that contain valid data. */
353 char length;
354 /* Indices of formal parameters that feed into the described actual argument.
355 If aggregate_pass_through or pointer_pass_through below are true, it must
356 contain exactly one element which is passed through from a formal
357 parameter if the given number. Otherwise, the array contains indices of
358 callee's formal parameters which are used to calculate value of this
359 actual argument. */
360 unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN];
361
362 /* Offset within the formal parameter. */
363 unsigned unit_offset;
364 /* When aggregate_pass_through is set, this is the size of the portion of an
365 aggregate formal parameter that is being passed. Otherwise, this is size
366 of pointed to memory that is known to be valid be dereferenced. */
367 unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS;
368
369 /* True when the value of this actual argument is a portion of a formal
370 parameter. */
371 unsigned aggregate_pass_through : 1;
372 /* True when the value of this actual copy is a verbatim pass through of an
373 obtained pointer. */
374 unsigned pointer_pass_through : 1;
375 /* True when it is safe to copy access candidates here from the callee, which
376 would mean introducing dereferences into callers of the caller. */
377 unsigned safe_to_import_accesses : 1;
378 /* True when the passed value is an address of a structure that has been
379 constructed in the caller just to be passed by reference to functions
380 (i.e. is never read). */
381 unsigned constructed_for_calls : 1;
382};
383
384/* Structure used to convey information about calls from the intra-procedural
385 analysis stage to inter-procedural one. */
386
387class isra_call_summary
388{
389public:
390 isra_call_summary ()
391 : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
392 m_bit_aligned_arg (false), m_before_any_store (false)
393 {}
394
395 void init_inputs (unsigned arg_count);
396 void dump (FILE *f);
397
398 /* Information about what formal parameters of the caller are used to compute
399 individual actual arguments of this call. */
400 auto_vec <isra_param_flow> m_arg_flow;
401
402 /* Set to true if the call statement does not have a LHS. */
403 unsigned m_return_ignored : 1;
404
405 /* Set to true if the LHS of call statement is only used to construct the
406 return value of the caller. */
407 unsigned m_return_returned : 1;
408
409 /* Set when any of the call arguments are not byte-aligned. */
410 unsigned m_bit_aligned_arg : 1;
411
412 /* Set to true if the call happend before any (other) store to memory in the
413 caller. */
414 unsigned m_before_any_store : 1;
415};
416
417/* Class to manage function summaries. */
418
419class GTY((user)) ipa_sra_function_summaries
420 : public function_summary <isra_func_summary *>
421{
422public:
423 ipa_sra_function_summaries (symbol_table *table, bool ggc):
424 function_summary<isra_func_summary *> (table, ggc) { }
425
426 void duplicate (cgraph_node *, cgraph_node *,
427 isra_func_summary *old_sum,
428 isra_func_summary *new_sum) final override;
429 void insert (cgraph_node *, isra_func_summary *) final override;
430};
431
432/* Hook that is called by summary when a node is duplicated. */
433
434void
435ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *,
436 isra_func_summary *old_sum,
437 isra_func_summary *new_sum)
438{
439 /* TODO: Somehow stop copying when ISRA is doing the cloning, it is
440 useless. */
441 new_sum->m_candidate = old_sum->m_candidate;
442 new_sum->m_returns_value = old_sum->m_returns_value;
443 new_sum->m_return_ignored = old_sum->m_return_ignored;
444 gcc_assert (!old_sum->m_queued);
445 new_sum->m_queued = false;
446
447 unsigned param_count = vec_safe_length (v: old_sum->m_parameters);
448 if (!param_count)
449 return;
450 vec_safe_reserve_exact (v&: new_sum->m_parameters, nelems: param_count);
451 new_sum->m_parameters->quick_grow_cleared (len: param_count);
452 for (unsigned i = 0; i < param_count; i++)
453 {
454 isra_param_desc *s = &(*old_sum->m_parameters)[i];
455 isra_param_desc *d = &(*new_sum->m_parameters)[i];
456
457 d->param_size_limit = s->param_size_limit;
458 d->size_reached = s->size_reached;
459 d->safe_size = s->safe_size;
460 d->locally_unused = s->locally_unused;
461 d->split_candidate = s->split_candidate;
462 d->by_ref = s->by_ref;
463 d->remove_only_when_retval_removed = s->remove_only_when_retval_removed;
464 d->split_only_when_retval_removed = s->split_only_when_retval_removed;
465 d->not_specially_constructed = s->not_specially_constructed;
466 d->conditionally_dereferenceable = s->conditionally_dereferenceable;
467 d->safe_size_set = s->safe_size_set;
468
469 unsigned acc_count = vec_safe_length (v: s->accesses);
470 vec_safe_reserve_exact (v&: d->accesses, nelems: acc_count);
471 for (unsigned j = 0; j < acc_count; j++)
472 {
473 param_access *from = (*s->accesses)[j];
474 param_access *to = ggc_cleared_alloc<param_access> ();
475 to->type = from->type;
476 to->alias_ptr_type = from->alias_ptr_type;
477 to->unit_offset = from->unit_offset;
478 to->unit_size = from->unit_size;
479 to->certain = from->certain;
480 to->reverse = from->reverse;
481 d->accesses->quick_push (obj: to);
482 }
483 }
484}
485
486/* Pointer to the pass function summary holder. */
487
488static GTY(()) ipa_sra_function_summaries *func_sums;
489
490/* Hook that is called by summary when new node appears. */
491
492void
493ipa_sra_function_summaries::insert (cgraph_node *node, isra_func_summary *)
494{
495 if (opt_for_fn (node->decl, flag_ipa_sra))
496 {
497 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
498 ipa_sra_summarize_function (node);
499 pop_cfun ();
500 }
501 else
502 func_sums->remove (node);
503}
504
505/* Class to manage call summaries. */
506
507class ipa_sra_call_summaries: public call_summary <isra_call_summary *>
508{
509public:
510 ipa_sra_call_summaries (symbol_table *table):
511 call_summary<isra_call_summary *> (table) { }
512
513 /* Duplicate info when an edge is cloned. */
514 void duplicate (cgraph_edge *, cgraph_edge *,
515 isra_call_summary *old_sum,
516 isra_call_summary *new_sum) final override;
517};
518
519static ipa_sra_call_summaries *call_sums;
520
521
522/* Initialize m_arg_flow of a particular instance of isra_call_summary.
523 ARG_COUNT is the number of actual arguments passed. */
524
525void
526isra_call_summary::init_inputs (unsigned arg_count)
527{
528 if (arg_count == 0)
529 {
530 gcc_checking_assert (m_arg_flow.length () == 0);
531 return;
532 }
533 if (m_arg_flow.length () == 0)
534 {
535 m_arg_flow.reserve_exact (nelems: arg_count);
536 m_arg_flow.quick_grow_cleared (len: arg_count);
537 }
538 else
539 gcc_checking_assert (arg_count == m_arg_flow.length ());
540}
541
542/* Dump all information in call summary to F. */
543
544void
545isra_call_summary::dump (FILE *f)
546{
547 if (m_return_ignored)
548 fprintf (stream: f, format: " return value ignored\n");
549 if (m_return_returned)
550 fprintf (stream: f, format: " return value used only to compute caller return value\n");
551 if (m_before_any_store)
552 fprintf (stream: f, format: " happens before any store to memory\n");
553 for (unsigned i = 0; i < m_arg_flow.length (); i++)
554 {
555 fprintf (stream: f, format: " Parameter %u:\n", i);
556 isra_param_flow *ipf = &m_arg_flow[i];
557
558 if (ipf->length)
559 {
560 bool first = true;
561 fprintf (stream: f, format: " Scalar param sources: ");
562 for (int j = 0; j < ipf->length; j++)
563 {
564 if (!first)
565 fprintf (stream: f, format: ", ");
566 else
567 first = false;
568 fprintf (stream: f, format: "%i", (int) ipf->inputs[j]);
569 }
570 fprintf (stream: f, format: "\n");
571 }
572 if (ipf->aggregate_pass_through)
573 fprintf (stream: f, format: " Aggregate pass through from the param given above, "
574 "unit offset: %u , unit size: %u\n",
575 ipf->unit_offset, ipf->unit_size);
576 else if (ipf->unit_size > 0)
577 fprintf (stream: f, format: " Known dereferenceable size: %u\n", ipf->unit_size);
578 if (ipf->pointer_pass_through)
579 fprintf (stream: f, format: " Pointer pass through from the param given above, "
580 "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses);
581 if (ipf->constructed_for_calls)
582 fprintf (stream: f, format: " Variable constructed just to be passed to "
583 "calls.\n");
584 }
585}
586
587/* Duplicate edge summary when an edge is cloned. */
588
589void
590ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
591 isra_call_summary *old_sum,
592 isra_call_summary *new_sum)
593{
594 unsigned arg_count = old_sum->m_arg_flow.length ();
595 new_sum->init_inputs (arg_count);
596 for (unsigned i = 0; i < arg_count; i++)
597 new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i];
598
599 new_sum->m_return_ignored = old_sum->m_return_ignored;
600 new_sum->m_return_returned = old_sum->m_return_returned;
601 new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
602 new_sum->m_before_any_store = old_sum->m_before_any_store;
603}
604
605
606/* With all GTY stuff done, we can move to anonymous namespace. */
607namespace {
608/* Quick mapping from a decl to its param descriptor. */
609
610hash_map<tree, gensum_param_desc *> *decl2desc;
611
612/* All local DECLs ever loaded from of and of those that have their address
613 assigned to a variable. */
614
615hash_set <tree> *loaded_decls;
616
617/* Countdown of allowed Alias Analysis steps during summary building. */
618
619int aa_walking_limit;
620
621/* This is a table in which for each basic block and parameter there is a
622 distance (offset + size) in that parameter which is dereferenced and
623 accessed in that BB. */
624HOST_WIDE_INT *bb_dereferences = NULL;
625/* How many by-reference parameters there are in the current function. */
626int unsafe_by_ref_count;
627
628/* Bitmap of BBs that can cause the function to "stop" progressing by
629 returning, throwing externally, looping infinitely or calling a function
630 which might abort etc.. */
631bitmap final_bbs;
632
633/* Obstack to allocate various small structures required only when generating
634 summary for a function. */
635struct obstack gensum_obstack;
636
637/* Return false the function is apparently unsuitable for IPA-SRA based on it's
638 attributes, return true otherwise. NODE is the cgraph node of the current
639 function. */
640
641static bool
642ipa_sra_preliminary_function_checks (cgraph_node *node)
643{
644 if (!node->can_change_signature)
645 {
646 if (dump_file)
647 fprintf (stream: dump_file, format: "Function cannot change signature.\n");
648 return false;
649 }
650
651 if (!tree_versionable_function_p (node->decl))
652 {
653 if (dump_file)
654 fprintf (stream: dump_file, format: "Function is not versionable.\n");
655 return false;
656 }
657
658 if (!opt_for_fn (node->decl, optimize)
659 || !opt_for_fn (node->decl, flag_ipa_sra))
660 {
661 if (dump_file)
662 fprintf (stream: dump_file, format: "Not optimizing or IPA-SRA turned off for this "
663 "function.\n");
664 return false;
665 }
666
667 if (DECL_VIRTUAL_P (node->decl))
668 {
669 if (dump_file)
670 fprintf (stream: dump_file, format: "Function is a virtual method.\n");
671 return false;
672 }
673
674 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
675 if (fun->stdarg)
676 {
677 if (dump_file)
678 fprintf (stream: dump_file, format: "Function uses stdarg. \n");
679 return false;
680 }
681
682 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
683 {
684 if (dump_file)
685 fprintf (stream: dump_file, format: "Always inline function will be inlined "
686 "anyway. \n");
687 return false;
688 }
689
690 return true;
691}
692
693/* Print access tree starting at ACCESS to F. */
694
695static void
696dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent)
697{
698 fprintf (stream: f, format: " ");
699 for (unsigned i = 0; i < indent; i++)
700 fprintf (stream: f, format: " ");
701 fprintf (stream: f, format: " * Access to offset: " HOST_WIDE_INT_PRINT_DEC,
702 access->offset);
703 fprintf (stream: f, format: ", size: " HOST_WIDE_INT_PRINT_DEC, access->size);
704 fprintf (stream: f, format: ", type: ");
705 print_generic_expr (f, access->type);
706 fprintf (stream: f, format: ", alias_ptr_type: ");
707 print_generic_expr (f, access->alias_ptr_type);
708 fprintf (stream: f, format: ", load_count: ");
709 access->load_count.dump (f);
710 fprintf (stream: f, format: ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse);
711 for (gensum_param_access *ch = access->first_child;
712 ch;
713 ch = ch->next_sibling)
714 dump_gensum_access (f, access: ch, indent: indent + 2);
715}
716
717
718/* Print access tree starting at ACCESS to F. */
719
720static void
721dump_isra_access (FILE *f, param_access *access)
722{
723 fprintf (stream: f, format: " * Access to unit offset: %u", access->unit_offset);
724 fprintf (stream: f, format: ", unit size: %u", access->unit_size);
725 fprintf (stream: f, format: ", type: ");
726 print_generic_expr (f, access->type);
727 fprintf (stream: f, format: ", alias_ptr_type: ");
728 print_generic_expr (f, access->alias_ptr_type);
729 if (access->certain)
730 fprintf (stream: f, format: ", certain");
731 else
732 fprintf (stream: f, format: ", not certain");
733 if (access->reverse)
734 fprintf (stream: f, format: ", reverse");
735 fprintf (stream: f, format: "\n");
736}
737
738/* Dump access tree starting at ACCESS to stderr. */
739
740DEBUG_FUNCTION void
741debug_isra_access (param_access *access)
742{
743 dump_isra_access (stderr, access);
744}
745
746/* Dump DESC to F. */
747
748static void
749dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc)
750{
751 if (desc->locally_unused)
752 fprintf (stream: f, format: " unused with %i call_uses%s\n", desc->call_uses,
753 desc->remove_only_when_retval_removed ?
754 " remove_only_when_retval_removed" : "");
755 if (!desc->split_candidate)
756 {
757 fprintf (stream: f, format: " not a candidate\n");
758 return;
759 }
760 if (desc->by_ref)
761 fprintf (stream: f, format: " %s%s%s by_ref with %u pass throughs\n",
762 desc->safe_ref ? "safe" : "unsafe",
763 desc->conditionally_dereferenceable
764 ? " conditionally_dereferenceable" : "",
765 desc->split_only_when_retval_removed
766 ? " split_only_when_retval_removed" : "",
767 desc->ptr_pt_count);
768
769 for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling)
770 dump_gensum_access (f, access: acc, indent: 2);
771}
772
773/* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to
774 F. */
775
776static void
777dump_gensum_param_descriptors (FILE *f, tree fndecl,
778 vec<gensum_param_desc> *param_descriptions)
779{
780 tree parm = DECL_ARGUMENTS (fndecl);
781 for (unsigned i = 0;
782 i < param_descriptions->length ();
783 ++i, parm = DECL_CHAIN (parm))
784 {
785 fprintf (stream: f, format: " Descriptor for parameter %i ", i);
786 print_generic_expr (f, parm, TDF_UID);
787 fprintf (stream: f, format: "\n");
788 dump_gensum_param_descriptor (f, desc: &(*param_descriptions)[i]);
789 }
790}
791
792
793/* Dump DESC to F. If HINTS is true, also dump IPA-analysis computed
794 hints. */
795
796static void
797dump_isra_param_descriptor (FILE *f, isra_param_desc *desc, bool hints)
798{
799 if (desc->locally_unused)
800 {
801 fprintf (stream: f, format: " (locally) unused\n");
802 }
803 if (!desc->split_candidate)
804 {
805 fprintf (stream: f, format: " not a candidate for splitting");
806 if (hints && desc->by_ref && desc->safe_size_set)
807 fprintf (stream: f, format: ", safe_size: %u", (unsigned) desc->safe_size);
808 fprintf (stream: f, format: "\n");
809 return;
810 }
811 fprintf (stream: f, format: " param_size_limit: %u, size_reached: %u%s",
812 desc->param_size_limit, desc->size_reached,
813 desc->by_ref ? ", by_ref" : "");
814 if (desc->remove_only_when_retval_removed)
815 fprintf (stream: f, format: ", remove_only_when_retval_removed");
816 if (desc->split_only_when_retval_removed)
817 fprintf (stream: f, format: ", split_only_when_retval_removed");
818 if (desc->by_ref && desc->conditionally_dereferenceable)
819 fprintf (stream: f, format: ", conditionally_dereferenceable");
820 if (hints)
821 {
822 if (desc->by_ref && !desc->not_specially_constructed)
823 fprintf (stream: f, format: ", args_specially_constructed");
824 if (desc->by_ref && desc->safe_size_set)
825 fprintf (stream: f, format: ", safe_size: %u", (unsigned) desc->safe_size);
826 }
827 fprintf (stream: f, format: "\n");
828
829 for (unsigned i = 0; i < vec_safe_length (v: desc->accesses); ++i)
830 {
831 param_access *access = (*desc->accesses)[i];
832 dump_isra_access (f, access);
833 }
834}
835
836/* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to F.
837 If HINTS is true, also dump IPA-analysis computed hints. */
838
839static void
840dump_isra_param_descriptors (FILE *f, tree fndecl, isra_func_summary *ifs,
841 bool hints)
842{
843 tree parm = DECL_ARGUMENTS (fndecl);
844 if (!ifs->m_parameters)
845 {
846 fprintf (stream: f, format: " parameter descriptors not available\n");
847 return;
848 }
849
850 for (unsigned i = 0;
851 i < ifs->m_parameters->length ();
852 ++i, parm = DECL_CHAIN (parm))
853 {
854 fprintf (stream: f, format: " Descriptor for parameter %i ", i);
855 print_generic_expr (f, parm, TDF_UID);
856 fprintf (stream: f, format: "\n");
857 dump_isra_param_descriptor (f, desc: &(*ifs->m_parameters)[i], hints);
858 }
859}
860
861/* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the
862 function fails return false, otherwise return true. SRC must fit into an
863 unsigned char. Used for purposes of transitive unused parameter
864 removal. */
865
866static bool
867add_src_to_param_flow (isra_param_flow *param_flow, int src)
868{
869 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
870 if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN)
871 return false;
872
873 param_flow->inputs[(int) param_flow->length] = src;
874 param_flow->length++;
875 return true;
876}
877
878/* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert
879 it is the only input. Used for purposes of transitive parameter
880 splitting. */
881
882static void
883set_single_param_flow_source (isra_param_flow *param_flow, int src)
884{
885 gcc_checking_assert (src >= 0 && src <= UCHAR_MAX);
886 if (param_flow->length == 0)
887 {
888 param_flow->inputs[0] = src;
889 param_flow->length = 1;
890 }
891 else if (param_flow->length == 1)
892 gcc_assert (param_flow->inputs[0] == src);
893 else
894 gcc_unreachable ();
895}
896
897/* Assert that there is only a single value in PARAM_FLOW's inputs and return
898 it. */
899
900static unsigned
901get_single_param_flow_source (const isra_param_flow *param_flow)
902{
903 gcc_assert (param_flow->length == 1);
904 return param_flow->inputs[0];
905}
906
907/* Inspect all uses of NAME and simple arithmetic calculations involving NAME
908 in FUN represented with NODE and return a negative number if any of them is
909 used for something else than either an actual call argument, simple return,
910 simple arithmetic operation or debug statement. If there are no such uses,
911 return the number of actual arguments that this parameter eventually feeds
912 to (or zero if there is none). If there are any simple return uses, set
913 DESC->remove_only_when_retval_removed. For any such parameter, mark
914 PARM_NUM as one of its sources. ANALYZED is a bitmap that tracks which SSA
915 names we have already started investigating. */
916
917static int
918isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name,
919 int parm_num, bitmap analyzed,
920 gensum_param_desc *desc)
921{
922 int res = 0;
923 imm_use_iterator imm_iter;
924 gimple *stmt;
925
926 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
927 {
928 if (is_gimple_debug (gs: stmt)
929 || gimple_clobber_p (s: stmt))
930 continue;
931
932 /* TODO: We could handle at least const builtin functions like arithmetic
933 operations below. */
934 if (is_gimple_call (gs: stmt))
935 {
936 int all_uses = 0;
937 use_operand_p use_p;
938 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
939 all_uses++;
940
941 gcall *call = as_a <gcall *> (p: stmt);
942 unsigned arg_count;
943 if (gimple_call_internal_p (gs: call)
944 || (arg_count = gimple_call_num_args (gs: call)) == 0)
945 {
946 res = -1;
947 break;
948 }
949
950 cgraph_edge *cs = node->get_edge (call_stmt: stmt);
951 gcc_checking_assert (cs);
952 isra_call_summary *csum = call_sums->get_create (edge: cs);
953 csum->init_inputs (arg_count);
954
955 int simple_uses = 0;
956 for (unsigned i = 0; i < arg_count; i++)
957 if (gimple_call_arg (gs: call, index: i) == name)
958 {
959 if (!add_src_to_param_flow (param_flow: &csum->m_arg_flow[i], src: parm_num))
960 {
961 simple_uses = -1;
962 break;
963 }
964 simple_uses++;
965 }
966
967 if (simple_uses < 0
968 || all_uses != simple_uses)
969 {
970 res = -1;
971 break;
972 }
973 res += all_uses;
974 }
975 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
976 && ((is_gimple_assign (gs: stmt) && !gimple_has_volatile_ops (stmt))
977 || gimple_code (g: stmt) == GIMPLE_PHI))
978 {
979 tree lhs;
980 if (gimple_code (g: stmt) == GIMPLE_PHI)
981 lhs = gimple_phi_result (gs: stmt);
982 else
983 lhs = gimple_assign_lhs (gs: stmt);
984
985 if (TREE_CODE (lhs) != SSA_NAME)
986 {
987 res = -1;
988 break;
989 }
990 gcc_assert (!gimple_vdef (stmt));
991 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)))
992 {
993 int tmp = isra_track_scalar_value_uses (fun, node, name: lhs, parm_num,
994 analyzed, desc);
995 if (tmp < 0)
996 {
997 res = tmp;
998 break;
999 }
1000 res += tmp;
1001 }
1002 }
1003 else if (greturn *gr = dyn_cast<greturn *>(p: stmt))
1004 {
1005 tree rv = gimple_return_retval (gs: gr);
1006 if (rv != name)
1007 {
1008 res = -1;
1009 break;
1010 }
1011 desc->remove_only_when_retval_removed = true;
1012 }
1013 else
1014 {
1015 res = -1;
1016 break;
1017 }
1018 }
1019 return res;
1020}
1021
1022/* Inspect all uses of PARM, which must be a gimple register, in FUN (which is
1023 also described by NODE) and simple arithmetic calculations involving PARM
1024 and return false if any of them is used for something else than either an
1025 actual call argument, simple return, simple arithmetic operation or debug
1026 statement. If there are no such uses, return true and store the number of
1027 actual arguments that this parameter eventually feeds to (or zero if there
1028 is none) to DESC->call_uses and set DESC->remove_only_when_retval_removed if
1029 there are any uses in return statemens. For any such parameter, mark
1030 PARM_NUM as one of its sources.
1031
1032 This function is similar to ptr_parm_has_nonarg_uses but its results are
1033 meant for unused parameter removal, as opposed to splitting of parameters
1034 passed by reference or converting them to passed by value. */
1035
1036static bool
1037isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm,
1038 int parm_num, gensum_param_desc *desc)
1039{
1040 gcc_checking_assert (is_gimple_reg (parm));
1041
1042 tree name = ssa_default_def (fun, parm);
1043 if (!name || has_zero_uses (var: name))
1044 {
1045 desc->call_uses = 0;
1046 return false;
1047 }
1048
1049 /* Edge summaries can only handle callers with fewer than 256 parameters. */
1050 if (parm_num > UCHAR_MAX)
1051 return true;
1052
1053 bitmap analyzed = BITMAP_ALLOC (NULL);
1054 int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num,
1055 analyzed, desc);
1056 BITMAP_FREE (analyzed);
1057 if (call_uses < 0)
1058 return true;
1059 desc->call_uses = call_uses;
1060 return false;
1061}
1062
1063/* Scan immediate uses of a default definition SSA name of a parameter PARM and
1064 examine whether there are any nonarg uses that are not actual arguments or
1065 otherwise infeasible uses. If so, return true, otherwise return false.
1066 Create pass-through IPA flow records for any direct uses as argument calls
1067 and if returning false, store their number into DESC->ptr_pt_count. If
1068 removal of return value would still allow splitting, return true but set
1069 DESC->split_only_when_retval_removed. NODE and FUN must represent the
1070 function that is currently analyzed, PARM_NUM must be the index of the
1071 analyzed parameter.
1072
1073 This function is similar to isra_track_scalar_param_local_uses but its
1074 results are meant for splitting of parameters passed by reference or turning
1075 them into bits passed by value, as opposed to generic unused parameter
1076 removal. */
1077
1078static bool
1079ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm,
1080 int parm_num, gensum_param_desc *desc)
1081{
1082 imm_use_iterator ui;
1083 gimple *stmt;
1084 tree name = ssa_default_def (fun, parm);
1085 bool ret = false;
1086 unsigned pt_count = 0;
1087
1088 if (!name || has_zero_uses (var: name))
1089 return false;
1090
1091 /* Edge summaries can only handle callers with fewer than 256 parameters. */
1092 if (parm_num > UCHAR_MAX)
1093 return true;
1094
1095 FOR_EACH_IMM_USE_STMT (stmt, ui, name)
1096 {
1097 unsigned uses_ok = 0;
1098 use_operand_p use_p;
1099
1100 if (is_gimple_debug (gs: stmt)
1101 || gimple_clobber_p (s: stmt))
1102 continue;
1103
1104 if (gimple_assign_single_p (gs: stmt))
1105 {
1106 tree rhs = gimple_assign_rhs1 (gs: stmt);
1107 if (!TREE_THIS_VOLATILE (rhs))
1108 {
1109 while (handled_component_p (t: rhs))
1110 rhs = TREE_OPERAND (rhs, 0);
1111 if (TREE_CODE (rhs) == MEM_REF
1112 && TREE_OPERAND (rhs, 0) == name
1113 && integer_zerop (TREE_OPERAND (rhs, 1))
1114 && types_compatible_p (TREE_TYPE (rhs),
1115 TREE_TYPE (TREE_TYPE (name))))
1116 uses_ok++;
1117 }
1118 }
1119 else if (is_gimple_call (gs: stmt))
1120 {
1121 gcall *call = as_a <gcall *> (p: stmt);
1122 unsigned arg_count;
1123 if (gimple_call_internal_p (gs: call)
1124 || (arg_count = gimple_call_num_args (gs: call)) == 0)
1125 {
1126 ret = true;
1127 break;
1128 }
1129
1130 cgraph_edge *cs = node->get_edge (call_stmt: stmt);
1131 gcc_checking_assert (cs);
1132 isra_call_summary *csum = call_sums->get_create (edge: cs);
1133 csum->init_inputs (arg_count);
1134
1135 for (unsigned i = 0; i < arg_count; ++i)
1136 {
1137 tree arg = gimple_call_arg (gs: stmt, index: i);
1138
1139 if (arg == name)
1140 {
1141 /* TODO: Allow &MEM_REF[name + offset] here,
1142 ipa_param_body_adjustments::modify_call_stmt has to be
1143 adjusted too. */
1144 csum->m_arg_flow[i].pointer_pass_through = true;
1145 set_single_param_flow_source (param_flow: &csum->m_arg_flow[i], src: parm_num);
1146 pt_count++;
1147 uses_ok++;
1148 continue;
1149 }
1150
1151 if (!TREE_THIS_VOLATILE (arg))
1152 {
1153 while (handled_component_p (t: arg))
1154 arg = TREE_OPERAND (arg, 0);
1155 if (TREE_CODE (arg) == MEM_REF
1156 && TREE_OPERAND (arg, 0) == name
1157 && integer_zerop (TREE_OPERAND (arg, 1))
1158 && types_compatible_p (TREE_TYPE (arg),
1159 TREE_TYPE (TREE_TYPE (name))))
1160 uses_ok++;
1161 }
1162 }
1163 }
1164 else if (greturn *gr = dyn_cast<greturn *>(p: stmt))
1165 {
1166 tree rv = gimple_return_retval (gs: gr);
1167 if (rv == name)
1168 {
1169 uses_ok++;
1170 /* Analysis for feasibility of removal must have already reached
1171 the conclusion that the flag must be set if it completed. */
1172 gcc_assert (!desc->locally_unused
1173 || desc->remove_only_when_retval_removed);
1174 desc->split_only_when_retval_removed = true;
1175 }
1176 }
1177
1178 /* If the number of valid uses does not match the number of
1179 uses in this stmt there is an unhandled use. */
1180 unsigned all_uses = 0;
1181 FOR_EACH_IMM_USE_ON_STMT (use_p, ui)
1182 all_uses++;
1183
1184 gcc_checking_assert (uses_ok <= all_uses);
1185 if (uses_ok != all_uses)
1186 {
1187 ret = true;
1188 break;
1189 }
1190 }
1191
1192 desc->ptr_pt_count = pt_count;
1193 return ret;
1194}
1195
1196/* Initialize vector of parameter descriptors of NODE. Return true if there
1197 are any candidates for splitting or unused aggregate parameter removal (the
1198 function may return false if there are candidates for removal of register
1199 parameters). */
1200
1201static bool
1202create_parameter_descriptors (cgraph_node *node,
1203 vec<gensum_param_desc> *param_descriptions)
1204{
1205 function *fun = DECL_STRUCT_FUNCTION (node->decl);
1206 bool ret = false;
1207
1208 int num = 0;
1209 for (tree parm = DECL_ARGUMENTS (node->decl);
1210 parm;
1211 parm = DECL_CHAIN (parm), num++)
1212 {
1213 const char *msg;
1214 gensum_param_desc *desc = &(*param_descriptions)[num];
1215 /* param_descriptions vector is grown cleared in the caller. */
1216 desc->param_number = num;
1217 decl2desc->put (k: parm, v: desc);
1218
1219 if (dump_file && (dump_flags & TDF_DETAILS))
1220 print_generic_expr (dump_file, parm, TDF_UID);
1221
1222 tree type = TREE_TYPE (parm);
1223 if (TREE_THIS_VOLATILE (parm))
1224 {
1225 if (dump_file && (dump_flags & TDF_DETAILS))
1226 fprintf (stream: dump_file, format: " not a candidate, is volatile\n");
1227 continue;
1228 }
1229 if (!is_gimple_reg_type (type) && is_va_list_type (type))
1230 {
1231 if (dump_file && (dump_flags & TDF_DETAILS))
1232 fprintf (stream: dump_file, format: " not a candidate, is a va_list type\n");
1233 continue;
1234 }
1235 if (TREE_ADDRESSABLE (parm))
1236 {
1237 if (dump_file && (dump_flags & TDF_DETAILS))
1238 fprintf (stream: dump_file, format: " not a candidate, is addressable\n");
1239 continue;
1240 }
1241 if (TREE_ADDRESSABLE (type))
1242 {
1243 if (dump_file && (dump_flags & TDF_DETAILS))
1244 fprintf (stream: dump_file, format: " not a candidate, type cannot be split\n");
1245 continue;
1246 }
1247
1248 if (is_gimple_reg (parm)
1249 && !isra_track_scalar_param_local_uses (fun, node, parm, parm_num: num, desc))
1250 {
1251 desc->locally_unused = true;
1252
1253 if (dump_file && (dump_flags & TDF_DETAILS))
1254 fprintf (stream: dump_file, format: " is a scalar with only %i call uses%s\n",
1255 desc->call_uses,
1256 desc->remove_only_when_retval_removed
1257 ? " and return uses": "");
1258 }
1259
1260 if (POINTER_TYPE_P (type))
1261 {
1262 desc->by_ref = true;
1263 if (TREE_CODE (type) == REFERENCE_TYPE
1264 || (num == 0
1265 && TREE_CODE (TREE_TYPE (node->decl)) == METHOD_TYPE))
1266 desc->safe_ref = true;
1267 else
1268 desc->safe_ref = false;
1269 type = TREE_TYPE (type);
1270
1271 if (TREE_CODE (type) == FUNCTION_TYPE
1272 || TREE_CODE (type) == METHOD_TYPE)
1273 {
1274 if (dump_file && (dump_flags & TDF_DETAILS))
1275 fprintf (stream: dump_file, format: " not a candidate, reference to "
1276 "a function\n");
1277 continue;
1278 }
1279 if (TYPE_VOLATILE (type))
1280 {
1281 if (dump_file && (dump_flags & TDF_DETAILS))
1282 fprintf (stream: dump_file, format: " not a candidate, reference to "
1283 "a volatile type\n");
1284 continue;
1285 }
1286 if (TREE_CODE (type) == ARRAY_TYPE
1287 && TYPE_NONALIASED_COMPONENT (type))
1288 {
1289 if (dump_file && (dump_flags & TDF_DETAILS))
1290 fprintf (stream: dump_file, format: " not a candidate, reference to "
1291 "a nonaliased component array\n");
1292 continue;
1293 }
1294 if (!is_gimple_reg (parm))
1295 {
1296 if (dump_file && (dump_flags & TDF_DETAILS))
1297 fprintf (stream: dump_file, format: " not a candidate, a reference which is "
1298 "not a gimple register (probably addressable)\n");
1299 continue;
1300 }
1301 if (is_va_list_type (type))
1302 {
1303 if (dump_file && (dump_flags & TDF_DETAILS))
1304 fprintf (stream: dump_file, format: " not a candidate, reference to "
1305 "a va list\n");
1306 continue;
1307 }
1308 if (ptr_parm_has_nonarg_uses (node, fun, parm, parm_num: num, desc))
1309 {
1310 if (dump_file && (dump_flags & TDF_DETAILS))
1311 fprintf (stream: dump_file, format: " not a candidate, reference has "
1312 "nonarg uses\n");
1313 continue;
1314 }
1315 }
1316 else if (!AGGREGATE_TYPE_P (type))
1317 {
1318 /* This is in an else branch because scalars passed by reference are
1319 still candidates to be passed by value. */
1320 if (dump_file && (dump_flags & TDF_DETAILS))
1321 fprintf (stream: dump_file, format: " not a candidate, not an aggregate\n");
1322 continue;
1323 }
1324
1325 if (!COMPLETE_TYPE_P (type))
1326 {
1327 if (dump_file && (dump_flags & TDF_DETAILS))
1328 fprintf (stream: dump_file, format: " not a candidate, not a complete type\n");
1329 continue;
1330 }
1331 if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
1332 {
1333 if (dump_file && (dump_flags & TDF_DETAILS))
1334 fprintf (stream: dump_file, format: " not a candidate, size not representable\n");
1335 continue;
1336 }
1337 unsigned HOST_WIDE_INT type_size
1338 = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT;
1339 if (type_size == 0
1340 || type_size >= ISRA_ARG_SIZE_LIMIT)
1341 {
1342 if (dump_file && (dump_flags & TDF_DETAILS))
1343 fprintf (stream: dump_file, format: " not a candidate, has zero or huge size\n");
1344 continue;
1345 }
1346 if (type_internals_preclude_sra_p (type, msg: &msg))
1347 {
1348 if (dump_file && (dump_flags & TDF_DETAILS))
1349 fprintf (stream: dump_file, format: " not a candidate, %s\n", msg);
1350 continue;
1351 }
1352
1353 if (dump_file && (dump_flags & TDF_DETAILS))
1354 fprintf (stream: dump_file, format: " is a candidate\n");
1355
1356 ret = true;
1357 desc->split_candidate = true;
1358 if (desc->by_ref && !desc->safe_ref)
1359 desc->deref_index = unsafe_by_ref_count++;
1360 }
1361 return ret;
1362}
1363
1364/* Return pointer to descriptor of parameter DECL or NULL if it cannot be
1365 found, which happens if DECL is for a static chain. */
1366
1367static gensum_param_desc *
1368get_gensum_param_desc (tree decl)
1369{
1370 if (!decl2desc)
1371 return NULL;
1372 gcc_checking_assert (TREE_CODE (decl) == PARM_DECL);
1373 gensum_param_desc **slot = decl2desc->get (k: decl);
1374 if (!slot)
1375 /* This can happen for static chains which we cannot handle so far. */
1376 return NULL;
1377 gcc_checking_assert (*slot);
1378 return *slot;
1379}
1380
1381
1382/* Remove parameter described by DESC from candidates for IPA-SRA splitting and
1383 write REASON to the dump file if there is one. */
1384
1385static void
1386disqualify_split_candidate (gensum_param_desc *desc, const char *reason)
1387{
1388 if (!desc->split_candidate)
1389 return;
1390
1391 if (dump_file && (dump_flags & TDF_DETAILS))
1392 fprintf (stream: dump_file, format: "! Disqualifying parameter number %i - %s\n",
1393 desc->param_number, reason);
1394
1395 desc->split_candidate = false;
1396}
1397
1398/* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if
1399 there is one. */
1400
1401static void
1402disqualify_split_candidate (tree decl, const char *reason)
1403{
1404 gensum_param_desc *desc = get_gensum_param_desc (decl);
1405 if (desc)
1406 disqualify_split_candidate (desc, reason);
1407}
1408
1409/* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But
1410 first, check that there are not too many of them already. If so, do not
1411 allocate anything and return NULL. */
1412
1413static gensum_param_access *
1414allocate_access (gensum_param_desc *desc,
1415 HOST_WIDE_INT offset, HOST_WIDE_INT size)
1416{
1417 if (desc->access_count
1418 == (unsigned) param_ipa_sra_max_replacements)
1419 {
1420 disqualify_split_candidate (desc, reason: "Too many replacement candidates");
1421 return NULL;
1422 }
1423
1424 gensum_param_access *access
1425 = (gensum_param_access *) obstack_alloc (&gensum_obstack,
1426 sizeof (gensum_param_access));
1427 memset (s: access, c: 0, n: sizeof (*access));
1428 access->offset = offset;
1429 access->size = size;
1430 access->load_count = profile_count::zero ();
1431 return access;
1432}
1433
1434/* In what context scan_expr_access has been called, whether it deals with a
1435 load, a function argument, or a store. Please note that in rare
1436 circumstances when it is not clear if the access is a load or store,
1437 ISRA_CTX_STORE is used too. */
1438
1439enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE};
1440
1441/* Return an access describing memory access to the variable described by DESC
1442 at OFFSET with SIZE in context CTX, starting at pointer to the linked list
1443 at a certain tree level FIRST. Attempt to create it and put into the
1444 appropriate place in the access tree if does not exist, but fail and return
1445 NULL if there are already too many accesses, if it would create a partially
1446 overlapping access or if an access would end up within a pre-existing
1447 non-call access. */
1448
1449static gensum_param_access *
1450get_access_1 (gensum_param_desc *desc, gensum_param_access **first,
1451 HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx)
1452{
1453 gensum_param_access *access = *first, **ptr = first;
1454
1455 if (!access)
1456 {
1457 /* No pre-existing access at this level, just create it. */
1458 gensum_param_access *a = allocate_access (desc, offset, size);
1459 if (!a)
1460 return NULL;
1461 *first = a;
1462 return *first;
1463 }
1464
1465 if (access->offset >= offset + size)
1466 {
1467 /* We want to squeeze it in front of the very first access, just do
1468 it. */
1469 gensum_param_access *r = allocate_access (desc, offset, size);
1470 if (!r)
1471 return NULL;
1472 r->next_sibling = access;
1473 *first = r;
1474 return r;
1475 }
1476
1477 /* Skip all accesses that have to come before us until the next sibling is
1478 already too far. */
1479 while (offset >= access->offset + access->size
1480 && access->next_sibling
1481 && access->next_sibling->offset < offset + size)
1482 {
1483 ptr = &access->next_sibling;
1484 access = access->next_sibling;
1485 }
1486
1487 /* At this point we know we do not belong before access. */
1488 gcc_assert (access->offset < offset + size);
1489
1490 if (access->offset == offset && access->size == size)
1491 /* We found what we were looking for. */
1492 return access;
1493
1494 if (access->offset <= offset
1495 && access->offset + access->size >= offset + size)
1496 {
1497 /* We fit into access which is larger than us. We need to find/create
1498 something below access. But we only allow nesting in call
1499 arguments. */
1500 if (access->nonarg)
1501 return NULL;
1502
1503 return get_access_1 (desc, first: &access->first_child, offset, size, ctx);
1504 }
1505
1506 if (offset <= access->offset
1507 && offset + size >= access->offset + access->size)
1508 /* We are actually bigger than access, which fully fits into us, take its
1509 place and make all accesses fitting into it its children. */
1510 {
1511 /* But first, we only allow nesting in call arguments so check if that is
1512 what we are trying to represent. */
1513 if (ctx != ISRA_CTX_ARG)
1514 return NULL;
1515
1516 gensum_param_access *r = allocate_access (desc, offset, size);
1517 if (!r)
1518 return NULL;
1519 r->first_child = access;
1520
1521 while (access->next_sibling
1522 && access->next_sibling->offset < offset + size)
1523 access = access->next_sibling;
1524 if (access->offset + access->size > offset + size)
1525 {
1526 /* This must be a different access, which are sorted, so the
1527 following must be true and this signals a partial overlap. */
1528 gcc_assert (access->offset > offset);
1529 return NULL;
1530 }
1531
1532 r->next_sibling = access->next_sibling;
1533 access->next_sibling = NULL;
1534 *ptr = r;
1535 return r;
1536 }
1537
1538 if (offset >= access->offset + access->size)
1539 {
1540 /* We belong after access. */
1541 gensum_param_access *r = allocate_access (desc, offset, size);
1542 if (!r)
1543 return NULL;
1544 r->next_sibling = access->next_sibling;
1545 access->next_sibling = r;
1546 return r;
1547 }
1548
1549 if (offset < access->offset)
1550 {
1551 /* We know the following, otherwise we would have created a
1552 super-access. */
1553 gcc_checking_assert (offset + size < access->offset + access->size);
1554 return NULL;
1555 }
1556
1557 if (offset + size > access->offset + access->size)
1558 {
1559 /* Likewise. */
1560 gcc_checking_assert (offset > access->offset);
1561 return NULL;
1562 }
1563
1564 gcc_unreachable ();
1565}
1566
1567/* Return an access describing memory access to the variable described by DESC
1568 at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt
1569 to create if it does not exist, but fail and return NULL if there are
1570 already too many accesses, if it would create a partially overlapping access
1571 or if an access would end up in a non-call access. */
1572
1573static gensum_param_access *
1574get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size,
1575 isra_scan_context ctx)
1576{
1577 gcc_checking_assert (desc->split_candidate);
1578
1579 gensum_param_access *access = get_access_1 (desc, first: &desc->accesses, offset,
1580 size, ctx);
1581 if (!access)
1582 {
1583 disqualify_split_candidate (desc,
1584 reason: "Bad access overlap or too many accesses");
1585 return NULL;
1586 }
1587
1588 switch (ctx)
1589 {
1590 case ISRA_CTX_STORE:
1591 gcc_assert (!desc->by_ref);
1592 /* Fall-through */
1593 case ISRA_CTX_LOAD:
1594 access->nonarg = true;
1595 break;
1596 case ISRA_CTX_ARG:
1597 break;
1598 }
1599
1600 return access;
1601}
1602
1603/* Verify that parameter access tree starting with ACCESS is in good shape.
1604 PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of
1605 ACCESS or zero if there is none. */
1606
1607static bool
1608verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset,
1609 HOST_WIDE_INT parent_size)
1610{
1611 while (access)
1612 {
1613 gcc_assert (access->offset >= 0 && access->size >= 0);
1614
1615 if (parent_size != 0)
1616 {
1617 if (access->offset < parent_offset)
1618 {
1619 error ("Access offset before parent offset");
1620 return true;
1621 }
1622 if (access->size >= parent_size)
1623 {
1624 error ("Access size greater or equal to its parent size");
1625 return true;
1626 }
1627 if (access->offset + access->size > parent_offset + parent_size)
1628 {
1629 error ("Access terminates outside of its parent");
1630 return true;
1631 }
1632 }
1633
1634 if (verify_access_tree_1 (access: access->first_child, parent_offset: access->offset,
1635 parent_size: access->size))
1636 return true;
1637
1638 if (access->next_sibling
1639 && (access->next_sibling->offset < access->offset + access->size))
1640 {
1641 error ("Access overlaps with its sibling");
1642 return true;
1643 }
1644
1645 access = access->next_sibling;
1646 }
1647 return false;
1648}
1649
1650/* Verify that parameter access tree starting with ACCESS is in good shape,
1651 halt compilation and dump the tree to stderr if not. */
1652
1653DEBUG_FUNCTION void
1654isra_verify_access_tree (gensum_param_access *access)
1655{
1656 if (verify_access_tree_1 (access, parent_offset: 0, parent_size: 0))
1657 {
1658 for (; access; access = access->next_sibling)
1659 dump_gensum_access (stderr, access, indent: 2);
1660 internal_error ("IPA-SRA access verification failed");
1661 }
1662}
1663
1664
1665/* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine
1666 GIMPLE_ASM operands with memory constrains which cannot be scalarized. */
1667
1668static bool
1669asm_visit_addr (gimple *, tree op, tree, void *)
1670{
1671 op = get_base_address (t: op);
1672 if (op
1673 && TREE_CODE (op) == PARM_DECL)
1674 disqualify_split_candidate (decl: op, reason: "Non-scalarizable GIMPLE_ASM operand.");
1675
1676 return false;
1677}
1678
1679/* Mark a dereference of parameter identified by DESC of distance DIST in a
1680 basic block BB, unless the BB has already been marked as a potentially
1681 final. */
1682
1683static void
1684mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist,
1685 basic_block bb)
1686{
1687 gcc_assert (desc->by_ref);
1688 gcc_checking_assert (desc->split_candidate);
1689
1690 if (desc->safe_ref
1691 || bitmap_bit_p (final_bbs, bb->index))
1692 return;
1693
1694 int idx = bb->index * unsafe_by_ref_count + desc->deref_index;
1695 if (bb_dereferences[idx] < dist)
1696 bb_dereferences[idx] = dist;
1697}
1698
1699/* Return true, if any potential replacements should use NEW_TYPE as opposed to
1700 previously recorded OLD_TYPE. */
1701
1702static bool
1703type_prevails_p (tree old_type, tree new_type)
1704{
1705 if (old_type == new_type)
1706 return false;
1707
1708 /* Non-aggregates are always better. */
1709 if (!is_gimple_reg_type (type: old_type)
1710 && is_gimple_reg_type (type: new_type))
1711 return true;
1712 if (is_gimple_reg_type (type: old_type)
1713 && !is_gimple_reg_type (type: new_type))
1714 return false;
1715
1716 /* Prefer any complex or vector type over any other scalar type. */
1717 if (TREE_CODE (old_type) != COMPLEX_TYPE
1718 && TREE_CODE (old_type) != VECTOR_TYPE
1719 && (TREE_CODE (new_type) == COMPLEX_TYPE
1720 || VECTOR_TYPE_P (new_type)))
1721 return true;
1722 if ((TREE_CODE (old_type) == COMPLEX_TYPE
1723 || VECTOR_TYPE_P (old_type))
1724 && TREE_CODE (new_type) != COMPLEX_TYPE
1725 && TREE_CODE (new_type) != VECTOR_TYPE)
1726 return false;
1727
1728 /* Use the integral type with the bigger precision. */
1729 if (INTEGRAL_TYPE_P (old_type)
1730 && INTEGRAL_TYPE_P (new_type))
1731 return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type));
1732
1733 /* Attempt to disregard any integral type with non-full precision. */
1734 if (INTEGRAL_TYPE_P (old_type)
1735 && (TREE_INT_CST_LOW (TYPE_SIZE (old_type))
1736 != TYPE_PRECISION (old_type)))
1737 return true;
1738 if (INTEGRAL_TYPE_P (new_type)
1739 && (TREE_INT_CST_LOW (TYPE_SIZE (new_type))
1740 != TYPE_PRECISION (new_type)))
1741 return false;
1742 /* Stabilize the selection. */
1743 return TYPE_UID (old_type) < TYPE_UID (new_type);
1744}
1745
1746/* When scanning an expression which is a call argument, this structure
1747 specifies the call and the position of the argument. */
1748
1749struct scan_call_info
1750{
1751 /* Call graph edge representing the call. */
1752 cgraph_edge *cs;
1753 /* Total number of arguments in the call. */
1754 unsigned argument_count;
1755 /* Number of the actual argument being scanned. */
1756 unsigned arg_idx;
1757};
1758
1759/* Record use of ACCESS which belongs to a parameter described by DESC in a
1760 call argument described by CALL_INFO. */
1761
1762static void
1763record_nonregister_call_use (gensum_param_desc *desc,
1764 scan_call_info *call_info,
1765 unsigned unit_offset, unsigned unit_size)
1766{
1767 isra_call_summary *csum = call_sums->get_create (edge: call_info->cs);
1768 csum->init_inputs (arg_count: call_info->argument_count);
1769
1770 isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx];
1771 param_flow->aggregate_pass_through = true;
1772 set_single_param_flow_source (param_flow, src: desc->param_number);
1773 param_flow->unit_offset = unit_offset;
1774 param_flow->unit_size = unit_size;
1775 desc->call_uses++;
1776}
1777
1778/* Callback of walk_aliased_vdefs, just mark that there was a possible
1779 modification. */
1780
1781static bool
1782mark_maybe_modified (ao_ref *, tree, void *data)
1783{
1784 bool *maybe_modified = (bool *) data;
1785 *maybe_modified = true;
1786 return true;
1787}
1788
1789/* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX
1790 specifies whether EXPR is used in a load, store or as an argument call. BB
1791 must be the basic block in which expr resides. If CTX specifies call
1792 argument context, CALL_INFO must describe that call and argument position,
1793 otherwise it is ignored. */
1794
1795static void
1796scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx,
1797 basic_block bb, scan_call_info *call_info = NULL)
1798{
1799 poly_int64 poffset, psize, pmax_size;
1800 HOST_WIDE_INT offset, size, max_size;
1801 tree base;
1802 bool deref = false;
1803 bool reverse;
1804
1805 if (TREE_CODE (expr) == ADDR_EXPR)
1806 {
1807 if (ctx == ISRA_CTX_ARG)
1808 return;
1809 tree t = get_base_address (TREE_OPERAND (expr, 0));
1810 if (VAR_P (t) && !TREE_STATIC (t))
1811 loaded_decls->add (k: t);
1812 return;
1813 }
1814 if (TREE_CODE (expr) == SSA_NAME
1815 || CONSTANT_CLASS_P (expr))
1816 return;
1817
1818 if (TREE_CODE (expr) == BIT_FIELD_REF
1819 || TREE_CODE (expr) == IMAGPART_EXPR
1820 || TREE_CODE (expr) == REALPART_EXPR)
1821 expr = TREE_OPERAND (expr, 0);
1822
1823 base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse);
1824
1825 if (TREE_CODE (base) == MEM_REF)
1826 {
1827 tree op = TREE_OPERAND (base, 0);
1828 if (TREE_CODE (op) != SSA_NAME
1829 || !SSA_NAME_IS_DEFAULT_DEF (op))
1830 return;
1831 base = SSA_NAME_VAR (op);
1832 if (!base)
1833 return;
1834 deref = true;
1835 }
1836 else if (VAR_P (base)
1837 && !TREE_STATIC (base)
1838 && (ctx == ISRA_CTX_ARG
1839 || ctx == ISRA_CTX_LOAD))
1840 loaded_decls->add (k: base);
1841
1842 if (TREE_CODE (base) != PARM_DECL)
1843 return;
1844
1845 gensum_param_desc *desc = get_gensum_param_desc (decl: base);
1846 if (!desc || !desc->split_candidate)
1847 return;
1848
1849 if (!poffset.is_constant (const_value: &offset)
1850 || !psize.is_constant (const_value: &size)
1851 || !pmax_size.is_constant (const_value: &max_size))
1852 {
1853 disqualify_split_candidate (desc, reason: "Encountered a polynomial-sized "
1854 "access.");
1855 return;
1856 }
1857 if (size < 0 || size != max_size)
1858 {
1859 disqualify_split_candidate (desc, reason: "Encountered a variable sized access.");
1860 return;
1861 }
1862 if (TREE_CODE (expr) == COMPONENT_REF
1863 && DECL_BIT_FIELD (TREE_OPERAND (expr, 1)))
1864 {
1865 disqualify_split_candidate (desc, reason: "Encountered a bit-field access.");
1866 return;
1867 }
1868 if (offset < 0)
1869 {
1870 disqualify_split_candidate (desc, reason: "Encountered an access at a "
1871 "negative offset.");
1872 return;
1873 }
1874 gcc_assert ((offset % BITS_PER_UNIT) == 0);
1875 gcc_assert ((size % BITS_PER_UNIT) == 0);
1876 if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT)
1877 || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT)
1878 {
1879 disqualify_split_candidate (desc, reason: "Encountered an access with too big "
1880 "offset or size");
1881 return;
1882 }
1883
1884 tree type = TREE_TYPE (expr);
1885 unsigned int exp_align = get_object_alignment (expr);
1886
1887 if (exp_align < TYPE_ALIGN (type))
1888 {
1889 disqualify_split_candidate (desc, reason: "Underaligned access.");
1890 return;
1891 }
1892
1893 if (deref)
1894 {
1895 if (!desc->by_ref)
1896 {
1897 disqualify_split_candidate (desc, reason: "Dereferencing a non-reference.");
1898 return;
1899 }
1900 else if (ctx == ISRA_CTX_STORE)
1901 {
1902 disqualify_split_candidate (desc, reason: "Storing to data passed by "
1903 "reference.");
1904 return;
1905 }
1906
1907 if (!aa_walking_limit)
1908 {
1909 disqualify_split_candidate (desc, reason: "Out of alias analysis step "
1910 "limit.");
1911 return;
1912 }
1913
1914 gcc_checking_assert (gimple_vuse (stmt));
1915 bool maybe_modified = false;
1916 ao_ref ar;
1917
1918 ao_ref_init (&ar, expr);
1919 bitmap visited = BITMAP_ALLOC (NULL);
1920 int walked = walk_aliased_vdefs (&ar, gimple_vuse (g: stmt),
1921 mark_maybe_modified, &maybe_modified,
1922 &visited, NULL, limit: aa_walking_limit);
1923 BITMAP_FREE (visited);
1924 if (walked > 0)
1925 {
1926 gcc_assert (aa_walking_limit > walked);
1927 aa_walking_limit = aa_walking_limit - walked;
1928 }
1929 if (walked < 0)
1930 aa_walking_limit = 0;
1931 if (maybe_modified || walked < 0)
1932 {
1933 disqualify_split_candidate (desc, reason: "Data passed by reference possibly "
1934 "modified through an alias.");
1935 return;
1936 }
1937 else
1938 mark_param_dereference (desc, dist: offset + size, bb);
1939 }
1940 else
1941 /* Pointer parameters with direct uses should have been ruled out by
1942 analyzing SSA default def when looking at the parameters. */
1943 gcc_assert (!desc->by_ref);
1944
1945 gensum_param_access *access = get_access (desc, offset, size, ctx);
1946 if (!access)
1947 return;
1948
1949 if (ctx == ISRA_CTX_ARG)
1950 {
1951 gcc_checking_assert (call_info);
1952
1953 if (!deref)
1954 record_nonregister_call_use (desc, call_info, unit_offset: offset / BITS_PER_UNIT,
1955 unit_size: size / BITS_PER_UNIT);
1956 else
1957 /* This is not a pass-through of a pointer, this is a use like any
1958 other. */
1959 access->nonarg = true;
1960 }
1961 else if (ctx == ISRA_CTX_LOAD && bb->count.initialized_p ())
1962 access->load_count += bb->count;
1963
1964 if (!access->type)
1965 {
1966 access->type = type;
1967 access->alias_ptr_type = reference_alias_ptr_type (expr);
1968 access->reverse = reverse;
1969 }
1970 else
1971 {
1972 if (exp_align < TYPE_ALIGN (access->type))
1973 {
1974 disqualify_split_candidate (desc, reason: "Reference has lower alignment "
1975 "than a previous one.");
1976 return;
1977 }
1978 if (access->alias_ptr_type != reference_alias_ptr_type (expr))
1979 {
1980 disqualify_split_candidate (desc, reason: "Multiple alias pointer types.");
1981 return;
1982 }
1983 if (access->reverse != reverse)
1984 {
1985 disqualify_split_candidate (desc, reason: "Both normal and reverse "
1986 "scalar storage order.");
1987 return;
1988 }
1989 if (!deref
1990 && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type))
1991 && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type)))
1992 {
1993 /* We need the same aggregate type on all accesses to be able to
1994 distinguish transformation spots from pass-through arguments in
1995 the transformation phase. */
1996 disqualify_split_candidate (desc, reason: "We do not support aggregate "
1997 "type punning.");
1998 return;
1999 }
2000
2001 if (type_prevails_p (old_type: access->type, new_type: type))
2002 access->type = type;
2003 }
2004}
2005
2006/* Scan body function described by NODE and FUN and create access trees for
2007 parameters. */
2008
2009static void
2010scan_function (cgraph_node *node, struct function *fun)
2011{
2012 basic_block bb;
2013
2014 FOR_EACH_BB_FN (bb, fun)
2015 {
2016 gimple_stmt_iterator gsi;
2017 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
2018 {
2019 gimple *stmt = gsi_stmt (i: gsi);
2020
2021 if (final_bbs && stmt_can_throw_external (fun, stmt))
2022 bitmap_set_bit (final_bbs, bb->index);
2023 switch (gimple_code (g: stmt))
2024 {
2025 case GIMPLE_RETURN:
2026 {
2027 tree t = gimple_return_retval (gs: as_a <greturn *> (p: stmt));
2028 if (t != NULL_TREE)
2029 scan_expr_access (expr: t, stmt, ctx: ISRA_CTX_LOAD, bb);
2030 if (final_bbs)
2031 bitmap_set_bit (final_bbs, bb->index);
2032 }
2033 break;
2034
2035 case GIMPLE_ASSIGN:
2036 if (gimple_assign_single_p (gs: stmt)
2037 && !gimple_clobber_p (s: stmt))
2038 {
2039 tree rhs = gimple_assign_rhs1 (gs: stmt);
2040 scan_expr_access (expr: rhs, stmt, ctx: ISRA_CTX_LOAD, bb);
2041 tree lhs = gimple_assign_lhs (gs: stmt);
2042 scan_expr_access (expr: lhs, stmt, ctx: ISRA_CTX_STORE, bb);
2043 }
2044 break;
2045
2046 case GIMPLE_CALL:
2047 {
2048 unsigned argument_count = gimple_call_num_args (gs: stmt);
2049 isra_scan_context ctx = ISRA_CTX_ARG;
2050 scan_call_info call_info, *call_info_p = &call_info;
2051 if (gimple_call_internal_p (gs: stmt))
2052 {
2053 call_info_p = NULL;
2054 ctx = ISRA_CTX_LOAD;
2055 internal_fn ifn = gimple_call_internal_fn (gs: stmt);
2056 if (internal_store_fn_p (ifn))
2057 ctx = ISRA_CTX_STORE;
2058 }
2059 else
2060 {
2061 call_info.cs = node->get_edge (call_stmt: stmt);
2062 call_info.argument_count = argument_count;
2063 }
2064
2065 for (unsigned i = 0; i < argument_count; i++)
2066 {
2067 call_info.arg_idx = i;
2068 scan_expr_access (expr: gimple_call_arg (gs: stmt, index: i), stmt,
2069 ctx, bb, call_info: call_info_p);
2070 }
2071
2072 tree lhs = gimple_call_lhs (gs: stmt);
2073 if (lhs)
2074 scan_expr_access (expr: lhs, stmt, ctx: ISRA_CTX_STORE, bb);
2075 int flags = gimple_call_flags (stmt);
2076 if (final_bbs
2077 && (((flags & (ECF_CONST | ECF_PURE)) == 0)
2078 || (flags & ECF_LOOPING_CONST_OR_PURE)))
2079 bitmap_set_bit (final_bbs, bb->index);
2080 }
2081 break;
2082
2083 case GIMPLE_ASM:
2084 {
2085 gasm *asm_stmt = as_a <gasm *> (p: stmt);
2086 walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL,
2087 asm_visit_addr);
2088 if (final_bbs)
2089 bitmap_set_bit (final_bbs, bb->index);
2090
2091 for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++)
2092 {
2093 tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i));
2094 scan_expr_access (expr: t, stmt, ctx: ISRA_CTX_LOAD, bb);
2095 }
2096 for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++)
2097 {
2098 tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i));
2099 scan_expr_access (expr: t, stmt, ctx: ISRA_CTX_STORE, bb);
2100 }
2101 }
2102 break;
2103
2104 default:
2105 break;
2106 }
2107 }
2108 }
2109}
2110
2111/* Return true if SSA_NAME NAME of function described by FUN is only used in
2112 return statements, or if results of any operations it is involved in are
2113 only used in return statements. ANALYZED is a bitmap that tracks which SSA
2114 names we have already started investigating. */
2115
2116static bool
2117ssa_name_only_returned_p (function *fun, tree name, bitmap analyzed)
2118{
2119 bool res = true;
2120 imm_use_iterator imm_iter;
2121 gimple *stmt;
2122
2123 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name)
2124 {
2125 if (is_gimple_debug (gs: stmt))
2126 continue;
2127
2128 if (gimple_code (g: stmt) == GIMPLE_RETURN)
2129 {
2130 tree t = gimple_return_retval (gs: as_a <greturn *> (p: stmt));
2131 if (t != name)
2132 {
2133 res = false;
2134 break;
2135 }
2136 }
2137 else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt)
2138 && ((is_gimple_assign (gs: stmt) && !gimple_has_volatile_ops (stmt))
2139 || gimple_code (g: stmt) == GIMPLE_PHI))
2140 {
2141 /* TODO: And perhaps for const function calls too? */
2142 tree lhs;
2143 if (gimple_code (g: stmt) == GIMPLE_PHI)
2144 lhs = gimple_phi_result (gs: stmt);
2145 else
2146 lhs = gimple_assign_lhs (gs: stmt);
2147
2148 if (TREE_CODE (lhs) != SSA_NAME)
2149 {
2150 res = false;
2151 break;
2152 }
2153 gcc_assert (!gimple_vdef (stmt));
2154 if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))
2155 && !ssa_name_only_returned_p (fun, name: lhs, analyzed))
2156 {
2157 res = false;
2158 break;
2159 }
2160 }
2161 else
2162 {
2163 res = false;
2164 break;
2165 }
2166 }
2167 return res;
2168}
2169
2170/* Inspect the uses of the return value of the call associated with CS, and if
2171 it is not used or if it is only used to construct the return value of the
2172 caller, mark it as such in call or caller summary. Also check for
2173 misaligned arguments. */
2174
2175static void
2176isra_analyze_call (cgraph_edge *cs)
2177{
2178 gcall *call_stmt = cs->call_stmt;
2179 unsigned count = gimple_call_num_args (gs: call_stmt);
2180 isra_call_summary *csum = call_sums->get_create (edge: cs);
2181
2182 for (unsigned i = 0; i < count; i++)
2183 {
2184 tree arg = gimple_call_arg (gs: call_stmt, index: i);
2185 if (TREE_CODE (arg) == ADDR_EXPR)
2186 {
2187 poly_int64 poffset, psize, pmax_size;
2188 bool reverse;
2189 tree base = get_ref_base_and_extent (TREE_OPERAND (arg, 0), &poffset,
2190 &psize, &pmax_size, &reverse);
2191 HOST_WIDE_INT offset;
2192 unsigned HOST_WIDE_INT ds;
2193 if (DECL_P (base)
2194 && (poffset.is_constant (const_value: &offset))
2195 && tree_fits_uhwi_p (DECL_SIZE (base))
2196 && ((ds = tree_to_uhwi (DECL_SIZE (base)) - offset)
2197 < ISRA_ARG_SIZE_LIMIT * BITS_PER_UNIT))
2198 {
2199 csum->init_inputs (arg_count: count);
2200 gcc_assert (!csum->m_arg_flow[i].aggregate_pass_through);
2201 csum->m_arg_flow[i].unit_size = ds / BITS_PER_UNIT;
2202 }
2203
2204 if (TREE_CODE (base) == VAR_DECL
2205 && !TREE_STATIC (base)
2206 && !loaded_decls->contains (k: base))
2207 {
2208 csum->init_inputs (arg_count: count);
2209 csum->m_arg_flow[i].constructed_for_calls = true;
2210 }
2211 }
2212
2213 if (is_gimple_reg (arg))
2214 continue;
2215
2216 tree offset;
2217 poly_int64 bitsize, bitpos;
2218 machine_mode mode;
2219 int unsignedp, reversep, volatilep = 0;
2220 get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode,
2221 &unsignedp, &reversep, &volatilep);
2222 if (!multiple_p (a: bitpos, BITS_PER_UNIT))
2223 {
2224 csum->m_bit_aligned_arg = true;
2225 break;
2226 }
2227 }
2228
2229 tree lhs = gimple_call_lhs (gs: call_stmt);
2230 if (lhs)
2231 {
2232 /* TODO: Also detect aggregates on a LHS of a call that are only returned
2233 from this function (without being read anywhere). */
2234 if (TREE_CODE (lhs) == SSA_NAME)
2235 {
2236 bitmap analyzed = BITMAP_ALLOC (NULL);
2237 if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs->caller->decl),
2238 name: lhs, analyzed))
2239 csum->m_return_returned = true;
2240 BITMAP_FREE (analyzed);
2241 }
2242 }
2243 else
2244 csum->m_return_ignored = true;
2245}
2246
2247/* Look at all calls going out of NODE, described also by IFS and perform all
2248 analyses necessary for IPA-SRA that are not done at body scan time or done
2249 even when body is not scanned because the function is not a candidate. */
2250
2251static void
2252isra_analyze_all_outgoing_calls (cgraph_node *node)
2253{
2254 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2255 isra_analyze_call (cs);
2256 for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
2257 isra_analyze_call (cs);
2258}
2259
2260/* Dump a dereferences table with heading STR to file F. */
2261
2262static void
2263dump_dereferences_table (FILE *f, struct function *fun, const char *str)
2264{
2265 basic_block bb;
2266
2267 fprintf (stream: dump_file, format: "%s", str);
2268 FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun),
2269 EXIT_BLOCK_PTR_FOR_FN (fun), next_bb)
2270 {
2271 fprintf (stream: f, format: "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index));
2272 if (bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2273 {
2274 int i;
2275 for (i = 0; i < unsafe_by_ref_count; i++)
2276 {
2277 int idx = bb->index * unsafe_by_ref_count + i;
2278 fprintf (stream: f, format: " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]);
2279 }
2280 }
2281 fprintf (stream: f, format: "\n");
2282 }
2283 fprintf (stream: dump_file, format: "\n");
2284}
2285
2286/* Propagate distances in bb_dereferences in the opposite direction than the
2287 control flow edges, in each step storing the maximum of the current value
2288 and the minimum of all successors. These steps are repeated until the table
2289 stabilizes. Note that BBs which might terminate the functions (according to
2290 final_bbs bitmap) never updated in this way. */
2291
2292static void
2293propagate_dereference_distances (struct function *fun)
2294{
2295 basic_block bb;
2296
2297 if (dump_file && (dump_flags & TDF_DETAILS))
2298 dump_dereferences_table (f: dump_file, fun,
2299 str: "Dereference table before propagation:\n");
2300
2301 auto_vec<basic_block> queue (last_basic_block_for_fn (fun));
2302 queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
2303 FOR_EACH_BB_FN (bb, fun)
2304 {
2305 queue.quick_push (obj: bb);
2306 bb->aux = bb;
2307 }
2308
2309 while (!queue.is_empty ())
2310 {
2311 edge_iterator ei;
2312 edge e;
2313 bool change = false;
2314 int i;
2315
2316 bb = queue.pop ();
2317 bb->aux = NULL;
2318
2319 if (bitmap_bit_p (final_bbs, bb->index))
2320 continue;
2321
2322 for (i = 0; i < unsafe_by_ref_count; i++)
2323 {
2324 int idx = bb->index * unsafe_by_ref_count + i;
2325 bool first = true;
2326 HOST_WIDE_INT inh = 0;
2327
2328 FOR_EACH_EDGE (e, ei, bb->succs)
2329 {
2330 int succ_idx = e->dest->index * unsafe_by_ref_count + i;
2331
2332 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
2333 continue;
2334
2335 if (first)
2336 {
2337 first = false;
2338 inh = bb_dereferences [succ_idx];
2339 }
2340 else if (bb_dereferences [succ_idx] < inh)
2341 inh = bb_dereferences [succ_idx];
2342 }
2343
2344 if (!first && bb_dereferences[idx] < inh)
2345 {
2346 bb_dereferences[idx] = inh;
2347 change = true;
2348 }
2349 }
2350
2351 if (change)
2352 FOR_EACH_EDGE (e, ei, bb->preds)
2353 {
2354 if (e->src->aux)
2355 continue;
2356
2357 e->src->aux = e->src;
2358 queue.quick_push (obj: e->src);
2359 }
2360 }
2361
2362 if (dump_file && (dump_flags & TDF_DETAILS))
2363 dump_dereferences_table (f: dump_file, fun,
2364 str: "Dereference table after propagation:\n");
2365}
2366
2367/* Return true if the ACCESS loads happen frequently enough in FUN to risk
2368 moving them to the caller and only pass the result. */
2369
2370static bool
2371dereference_probable_p (struct function *fun, gensum_param_access *access)
2372{
2373 int threshold = opt_for_fn (fun->decl, param_ipa_sra_deref_prob_threshold);
2374 return access->load_count
2375 >= ENTRY_BLOCK_PTR_FOR_FN (fun)->count.apply_scale (num: threshold, den: 100);
2376}
2377
2378/* Perform basic checks on ACCESS to PARM (of FUN) described by DESC and all
2379 its children, return true if the parameter cannot be split, otherwise return
2380 false and update *NONARG_ACC_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be
2381 the index of the entry BB in the function of PARM. */
2382
2383static bool
2384check_gensum_access (struct function *fun, tree parm, gensum_param_desc *desc,
2385 gensum_param_access *access,
2386 HOST_WIDE_INT *nonarg_acc_size, bool *only_calls,
2387 int entry_bb_index)
2388{
2389 if (access->nonarg)
2390 {
2391 *only_calls = false;
2392 *nonarg_acc_size += access->size;
2393
2394 if (access->first_child)
2395 {
2396 disqualify_split_candidate (desc, reason: "Overlapping non-call uses.");
2397 return true;
2398 }
2399 }
2400 /* Do not decompose a non-BLKmode param in a way that would create
2401 BLKmode params. Especially for by-reference passing (thus,
2402 pointer-type param) this is hardly worthwhile. */
2403 if (DECL_MODE (parm) != BLKmode
2404 && TYPE_MODE (access->type) == BLKmode)
2405 {
2406 disqualify_split_candidate (desc, reason: "Would convert a non-BLK to a BLK.");
2407 return true;
2408 }
2409
2410 if (desc->by_ref)
2411 {
2412 if (desc->safe_ref)
2413 {
2414 if (!dereference_probable_p (fun, access))
2415 {
2416 disqualify_split_candidate (desc, reason: "Dereferences in callers "
2417 "would happen much more frequently.");
2418 return true;
2419 }
2420 }
2421 else
2422 {
2423 int idx = (entry_bb_index * unsafe_by_ref_count + desc->deref_index);
2424 if ((access->offset + access->size) > bb_dereferences[idx])
2425 {
2426 if (!dereference_probable_p (fun, access))
2427 {
2428 disqualify_split_candidate (desc, reason: "Would create a possibly "
2429 "illegal dereference in a "
2430 "caller.");
2431 return true;
2432 }
2433 desc->conditionally_dereferenceable = true;
2434 }
2435 }
2436 }
2437
2438 for (gensum_param_access *ch = access->first_child;
2439 ch;
2440 ch = ch->next_sibling)
2441 if (check_gensum_access (fun, parm, desc, access: ch, nonarg_acc_size, only_calls,
2442 entry_bb_index))
2443 return true;
2444
2445 return false;
2446}
2447
2448/* Copy data from FROM and all of its children to a vector of accesses in IPA
2449 descriptor DESC. */
2450
2451static void
2452copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc)
2453{
2454 param_access *to = ggc_cleared_alloc<param_access> ();
2455 gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0);
2456 gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0);
2457 to->unit_offset = from->offset / BITS_PER_UNIT;
2458 to->unit_size = from->size / BITS_PER_UNIT;
2459 to->type = from->type;
2460 to->alias_ptr_type = from->alias_ptr_type;
2461 to->certain = from->nonarg;
2462 to->reverse = from->reverse;
2463 vec_safe_push (v&: desc->accesses, obj: to);
2464
2465 for (gensum_param_access *ch = from->first_child;
2466 ch;
2467 ch = ch->next_sibling)
2468 copy_accesses_to_ipa_desc (from: ch, desc);
2469}
2470
2471/* Analyze function body scan results stored in param_accesses and
2472 param_accesses, detect possible transformations and store information of
2473 those in function summary. NODE, FUN and IFS are all various structures
2474 describing the currently analyzed function. */
2475
2476static void
2477process_scan_results (cgraph_node *node, struct function *fun,
2478 isra_func_summary *ifs,
2479 vec<gensum_param_desc> *param_descriptions)
2480{
2481 bool check_pass_throughs = false;
2482 bool dereferences_propagated = false;
2483 tree parm = DECL_ARGUMENTS (node->decl);
2484 unsigned param_count = param_descriptions->length();
2485
2486 for (unsigned desc_index = 0;
2487 desc_index < param_count;
2488 desc_index++, parm = DECL_CHAIN (parm))
2489 {
2490 gensum_param_desc *desc = &(*param_descriptions)[desc_index];
2491 if (!desc->split_candidate)
2492 continue;
2493
2494 if (flag_checking)
2495 isra_verify_access_tree (access: desc->accesses);
2496
2497 if (!dereferences_propagated
2498 && desc->by_ref
2499 && !desc->safe_ref
2500 && desc->accesses)
2501 {
2502 propagate_dereference_distances (fun);
2503 dereferences_propagated = true;
2504 }
2505
2506 HOST_WIDE_INT nonarg_acc_size = 0;
2507 bool only_calls = true;
2508 bool check_failed = false;
2509
2510 int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index;
2511 for (gensum_param_access *acc = desc->accesses;
2512 acc;
2513 acc = acc->next_sibling)
2514 if (check_gensum_access (fun, parm, desc, access: acc, nonarg_acc_size: &nonarg_acc_size,
2515 only_calls: &only_calls, entry_bb_index))
2516 {
2517 check_failed = true;
2518 break;
2519 }
2520 if (check_failed)
2521 continue;
2522
2523 if (only_calls)
2524 desc->locally_unused = true;
2525
2526 HOST_WIDE_INT cur_param_size
2527 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm)));
2528 HOST_WIDE_INT param_size_limit, optimistic_limit;
2529 if (!desc->by_ref || optimize_function_for_size_p (fun))
2530 {
2531 param_size_limit = cur_param_size;
2532 optimistic_limit = cur_param_size;
2533 }
2534 else
2535 {
2536 param_size_limit
2537 = opt_for_fn (node->decl,
2538 param_ipa_sra_ptr_growth_factor) * cur_param_size;
2539 optimistic_limit
2540 = (opt_for_fn (node->decl, param_ipa_sra_ptrwrap_growth_factor)
2541 * param_size_limit);
2542 }
2543
2544 if (nonarg_acc_size > optimistic_limit
2545 || (!desc->by_ref && nonarg_acc_size == param_size_limit))
2546 {
2547 disqualify_split_candidate (desc, reason: "Would result into a too big set "
2548 "of replacements even in best "
2549 "scenarios.");
2550 }
2551 else
2552 {
2553 /* create_parameter_descriptors makes sure unit sizes of all
2554 candidate parameters fit unsigned integers restricted to
2555 ISRA_ARG_SIZE_LIMIT. */
2556 desc->param_size_limit = param_size_limit / BITS_PER_UNIT;
2557 desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT;
2558 if (desc->split_candidate && desc->ptr_pt_count)
2559 {
2560 gcc_assert (desc->by_ref);
2561 check_pass_throughs = true;
2562 }
2563 }
2564 }
2565
2566 /* When a pointer parameter is passed-through to a callee, in which it is
2567 only used to read only one or a few items, we can attempt to transform it
2568 to obtaining and passing through the items instead of the pointer. But we
2569 must take extra care that 1) we do not introduce any segfault by moving
2570 dereferences above control flow and that 2) the data is not modified
2571 through an alias in this function. The IPA analysis must not introduce
2572 any accesses candidates unless it can prove both.
2573
2574 The current solution is very crude as it consists of ensuring that the
2575 call postdominates entry BB and that the definition of VUSE of the call is
2576 default definition. TODO: For non-recursive callees in the same
2577 compilation unit we could do better by doing analysis in topological order
2578 an looking into access candidates of callees, using their alias_ptr_types
2579 to attempt real AA. We could also use the maximum known dereferenced
2580 offset in this function at IPA level.
2581
2582 TODO: Measure the overhead and the effect of just being pessimistic.
2583 Maybe this is only -O3 material? */
2584
2585 hash_map<gimple *, bool> analyzed_stmts;
2586 bitmap always_executed_bbs = NULL;
2587 if (check_pass_throughs)
2588 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
2589 {
2590 gcall *call_stmt = cs->call_stmt;
2591 tree vuse = gimple_vuse (g: call_stmt);
2592
2593 /* If the callee is a const function, we don't get a VUSE. In such
2594 case there will be no memory accesses in the called function (or the
2595 const attribute is wrong) and then we just don't care. */
2596 bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse);
2597
2598 unsigned count = gimple_call_num_args (gs: call_stmt);
2599 isra_call_summary *csum = call_sums->get_create (edge: cs);
2600 csum->init_inputs (arg_count: count);
2601 csum->m_before_any_store = uses_memory_as_obtained;
2602 for (unsigned argidx = 0; argidx < count; argidx++)
2603 {
2604 if (!csum->m_arg_flow[argidx].pointer_pass_through)
2605 continue;
2606 unsigned pidx
2607 = get_single_param_flow_source (param_flow: &csum->m_arg_flow[argidx]);
2608 gensum_param_desc *desc = &(*param_descriptions)[pidx];
2609 if (!desc->split_candidate)
2610 {
2611 csum->m_arg_flow[argidx].pointer_pass_through = false;
2612 continue;
2613 }
2614 if (!uses_memory_as_obtained)
2615 continue;
2616
2617 if (desc->safe_ref)
2618 {
2619 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2620 continue;
2621 }
2622
2623 /* Walk basic block and see if its execution can terminate earlier.
2624 Keep the info for later re-use to avoid quadratic behavoiur here. */
2625 gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
2626 bool safe = true;
2627 int n = 0;
2628 for (gsi_prev (i: &gsi); !gsi_end_p (i: gsi); gsi_prev (i: &gsi))
2629 {
2630 bool *b = analyzed_stmts.get (k: gsi_stmt (i: gsi));
2631 if (b)
2632 {
2633 safe = *b;
2634 gsi_next (i: &gsi);
2635 break;
2636 }
2637 n++;
2638 if (stmt_may_terminate_function_p (fun, stmt: gsi_stmt (i: gsi), assume_return_or_eh: false))
2639 {
2640 safe = false;
2641 break;
2642 }
2643 }
2644 if (n)
2645 {
2646 if (gsi_end_p (i: gsi))
2647 gsi = gsi_start_bb (bb: gimple_bb (g: call_stmt));
2648 for (; gsi_stmt (i: gsi) != call_stmt; gsi_next (i: &gsi))
2649 analyzed_stmts.get_or_insert (k: gsi_stmt (i: gsi)) = safe;
2650 }
2651
2652 if (safe && !always_executed_bbs)
2653 {
2654 mark_dfs_back_edges ();
2655 always_executed_bbs = find_always_executed_bbs (fun, assume_return_or_eh: false);
2656 }
2657 if (safe && bitmap_bit_p (always_executed_bbs, gimple_bb (g: call_stmt)->index))
2658 csum->m_arg_flow[argidx].safe_to_import_accesses = true;
2659 }
2660
2661 }
2662 BITMAP_FREE (always_executed_bbs);
2663
2664 /* TODO: Add early exit if we disqualified everything. This also requires
2665 that we either relax the restriction that
2666 ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs
2667 or store the number of parameters to IPA-SRA function summary and use that
2668 when just removing params. */
2669
2670 vec_safe_reserve_exact (v&: ifs->m_parameters, nelems: param_count);
2671 ifs->m_parameters->quick_grow_cleared (len: param_count);
2672 for (unsigned desc_index = 0; desc_index < param_count; desc_index++)
2673 {
2674 gensum_param_desc *s = &(*param_descriptions)[desc_index];
2675 isra_param_desc *d = &(*ifs->m_parameters)[desc_index];
2676
2677 d->param_size_limit = s->param_size_limit;
2678 d->size_reached = s->nonarg_acc_size;
2679 d->locally_unused = s->locally_unused;
2680 d->split_candidate = s->split_candidate;
2681 d->by_ref = s->by_ref;
2682 d->remove_only_when_retval_removed = s->remove_only_when_retval_removed;
2683 d->split_only_when_retval_removed = s->split_only_when_retval_removed;
2684 d->conditionally_dereferenceable = s->conditionally_dereferenceable;
2685
2686 for (gensum_param_access *acc = s->accesses;
2687 acc;
2688 acc = acc->next_sibling)
2689 copy_accesses_to_ipa_desc (from: acc, desc: d);
2690 }
2691
2692 if (dump_file)
2693 dump_isra_param_descriptors (f: dump_file, fndecl: node->decl, ifs, hints: false);
2694}
2695
2696/* Return true if there are any overlaps among certain accesses of DESC. If
2697 non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access
2698 too. DESC is assumed to be a split candidate that is not locally
2699 unused. */
2700
2701static bool
2702overlapping_certain_accesses_p (isra_param_desc *desc,
2703 bool *certain_access_present_p)
2704{
2705 unsigned pclen = vec_safe_length (v: desc->accesses);
2706 for (unsigned i = 0; i < pclen; i++)
2707 {
2708 param_access *a1 = (*desc->accesses)[i];
2709
2710 if (!a1->certain)
2711 continue;
2712 if (certain_access_present_p)
2713 *certain_access_present_p = true;
2714 for (unsigned j = i + 1; j < pclen; j++)
2715 {
2716 param_access *a2 = (*desc->accesses)[j];
2717 if (a2->certain
2718 && a1->unit_offset < a2->unit_offset + a2->unit_size
2719 && a1->unit_offset + a1->unit_size > a2->unit_offset)
2720 return true;
2721 }
2722 }
2723 return false;
2724}
2725
2726/* Check for any overlaps of certain param accesses among splitting candidates
2727 and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also
2728 check that used splitting candidates have at least one certain access. */
2729
2730static void
2731verify_splitting_accesses (cgraph_node *node, bool certain_must_exist)
2732{
2733 isra_func_summary *ifs = func_sums->get (node);
2734 if (!ifs || !ifs->m_candidate)
2735 return;
2736 unsigned param_count = vec_safe_length (v: ifs->m_parameters);
2737 for (unsigned pidx = 0; pidx < param_count; pidx++)
2738 {
2739 isra_param_desc *desc = &(*ifs->m_parameters)[pidx];
2740 if (!desc->split_candidate || desc->locally_unused)
2741 continue;
2742
2743 bool certain_access_present = !certain_must_exist;
2744 if (overlapping_certain_accesses_p (desc, certain_access_present_p: &certain_access_present))
2745 internal_error ("function %qs, parameter %u, has IPA-SRA accesses "
2746 "which overlap", node->dump_name (), pidx);
2747 if (!certain_access_present)
2748 internal_error ("function %qs, parameter %u, is used but does not "
2749 "have any certain IPA-SRA access",
2750 node->dump_name (), pidx);
2751 }
2752}
2753
2754/* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in
2755 this compilation unit and create summary structures describing IPA-SRA
2756 opportunities and constraints in them. */
2757
2758static void
2759ipa_sra_generate_summary (void)
2760{
2761 struct cgraph_node *node;
2762
2763 gcc_checking_assert (!func_sums);
2764 gcc_checking_assert (!call_sums);
2765 func_sums
2766 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
2767 ipa_sra_function_summaries (symtab, true));
2768 call_sums = new ipa_sra_call_summaries (symtab);
2769
2770 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
2771 ipa_sra_summarize_function (node);
2772 return;
2773}
2774
2775/* Write intraprocedural analysis information about E and all of its outgoing
2776 edges into a stream for LTO WPA. */
2777
2778static void
2779isra_write_edge_summary (output_block *ob, cgraph_edge *e)
2780{
2781 isra_call_summary *csum = call_sums->get (edge: e);
2782 unsigned input_count = csum->m_arg_flow.length ();
2783 streamer_write_uhwi (ob, input_count);
2784 for (unsigned i = 0; i < input_count; i++)
2785 {
2786 isra_param_flow *ipf = &csum->m_arg_flow[i];
2787 streamer_write_hwi (ob, ipf->length);
2788 bitpack_d bp = bitpack_create (s: ob->main_stream);
2789 for (int j = 0; j < ipf->length; j++)
2790 bp_pack_value (bp: &bp, val: ipf->inputs[j], nbits: 8);
2791 bp_pack_value (bp: &bp, val: ipf->aggregate_pass_through, nbits: 1);
2792 bp_pack_value (bp: &bp, val: ipf->pointer_pass_through, nbits: 1);
2793 bp_pack_value (bp: &bp, val: ipf->safe_to_import_accesses, nbits: 1);
2794 bp_pack_value (bp: &bp, val: ipf->constructed_for_calls, nbits: 1);
2795 streamer_write_bitpack (bp: &bp);
2796 streamer_write_uhwi (ob, ipf->unit_offset);
2797 streamer_write_uhwi (ob, ipf->unit_size);
2798 }
2799 bitpack_d bp = bitpack_create (s: ob->main_stream);
2800 bp_pack_value (bp: &bp, val: csum->m_return_ignored, nbits: 1);
2801 bp_pack_value (bp: &bp, val: csum->m_return_returned, nbits: 1);
2802 bp_pack_value (bp: &bp, val: csum->m_bit_aligned_arg, nbits: 1);
2803 bp_pack_value (bp: &bp, val: csum->m_before_any_store, nbits: 1);
2804 streamer_write_bitpack (bp: &bp);
2805}
2806
2807/* Write intraprocedural analysis information about NODE and all of its outgoing
2808 edges into a stream for LTO WPA. */
2809
2810static void
2811isra_write_node_summary (output_block *ob, cgraph_node *node)
2812{
2813 isra_func_summary *ifs = func_sums->get (node);
2814 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2815 int node_ref = lto_symtab_encoder_encode (encoder, node);
2816 streamer_write_uhwi (ob, node_ref);
2817
2818 unsigned param_desc_count = vec_safe_length (v: ifs->m_parameters);
2819 streamer_write_uhwi (ob, param_desc_count);
2820 for (unsigned i = 0; i < param_desc_count; i++)
2821 {
2822 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2823 unsigned access_count = vec_safe_length (v: desc->accesses);
2824 streamer_write_uhwi (ob, access_count);
2825 for (unsigned j = 0; j < access_count; j++)
2826 {
2827 param_access *acc = (*desc->accesses)[j];
2828 stream_write_tree (ob, acc->type, true);
2829 stream_write_tree (ob, acc->alias_ptr_type, true);
2830 streamer_write_uhwi (ob, acc->unit_offset);
2831 streamer_write_uhwi (ob, acc->unit_size);
2832 bitpack_d bp = bitpack_create (s: ob->main_stream);
2833 bp_pack_value (bp: &bp, val: acc->certain, nbits: 1);
2834 bp_pack_value (bp: &bp, val: acc->reverse, nbits: 1);
2835 streamer_write_bitpack (bp: &bp);
2836 }
2837 streamer_write_uhwi (ob, desc->param_size_limit);
2838 streamer_write_uhwi (ob, desc->size_reached);
2839 gcc_assert (desc->safe_size == 0);
2840 bitpack_d bp = bitpack_create (s: ob->main_stream);
2841 bp_pack_value (bp: &bp, val: desc->locally_unused, nbits: 1);
2842 bp_pack_value (bp: &bp, val: desc->split_candidate, nbits: 1);
2843 bp_pack_value (bp: &bp, val: desc->by_ref, nbits: 1);
2844 gcc_assert (!desc->not_specially_constructed);
2845 bp_pack_value (bp: &bp, val: desc->remove_only_when_retval_removed, nbits: 1);
2846 bp_pack_value (bp: &bp, val: desc->split_only_when_retval_removed, nbits: 1);
2847 bp_pack_value (bp: &bp, val: desc->conditionally_dereferenceable, nbits: 1);
2848 gcc_assert (!desc->safe_size_set);
2849 streamer_write_bitpack (bp: &bp);
2850 }
2851 bitpack_d bp = bitpack_create (s: ob->main_stream);
2852 bp_pack_value (bp: &bp, val: ifs->m_candidate, nbits: 1);
2853 bp_pack_value (bp: &bp, val: ifs->m_returns_value, nbits: 1);
2854 bp_pack_value (bp: &bp, val: ifs->m_return_ignored, nbits: 1);
2855 gcc_assert (!ifs->m_queued);
2856 streamer_write_bitpack (bp: &bp);
2857
2858 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2859 isra_write_edge_summary (ob, e);
2860 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2861 isra_write_edge_summary (ob, e);
2862}
2863
2864/* Write intraprocedural analysis information into a stream for LTO WPA. */
2865
2866static void
2867ipa_sra_write_summary (void)
2868{
2869 if (!func_sums || !call_sums)
2870 return;
2871
2872 struct output_block *ob = create_output_block (LTO_section_ipa_sra);
2873 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
2874 ob->symbol = NULL;
2875
2876 unsigned int count = 0;
2877 lto_symtab_encoder_iterator lsei;
2878 for (lsei = lsei_start_function_in_partition (encoder);
2879 !lsei_end_p (lsei);
2880 lsei_next_function_in_partition (lsei: &lsei))
2881 {
2882 cgraph_node *node = lsei_cgraph_node (lsei);
2883 if (node->has_gimple_body_p ()
2884 && func_sums->get (node) != NULL)
2885 count++;
2886 }
2887 streamer_write_uhwi (ob, count);
2888
2889 /* Process all of the functions. */
2890 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
2891 lsei_next_function_in_partition (lsei: &lsei))
2892 {
2893 cgraph_node *node = lsei_cgraph_node (lsei);
2894 if (node->has_gimple_body_p ()
2895 && func_sums->get (node) != NULL)
2896 isra_write_node_summary (ob, node);
2897 }
2898 streamer_write_char_stream (obs: ob->main_stream, c: 0);
2899 produce_asm (ob, NULL);
2900 destroy_output_block (ob);
2901}
2902
2903/* Read intraprocedural analysis information about E and all of its outgoing
2904 edges into a stream for LTO WPA. */
2905
2906static void
2907isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
2908{
2909 isra_call_summary *csum = call_sums->get_create (edge: cs);
2910 unsigned input_count = streamer_read_uhwi (ib);
2911 csum->init_inputs (arg_count: input_count);
2912 for (unsigned i = 0; i < input_count; i++)
2913 {
2914 isra_param_flow *ipf = &csum->m_arg_flow[i];
2915 ipf->length = streamer_read_hwi (ib);
2916 bitpack_d bp = streamer_read_bitpack (ib);
2917 for (int j = 0; j < ipf->length; j++)
2918 ipf->inputs[j] = bp_unpack_value (bp: &bp, nbits: 8);
2919 ipf->aggregate_pass_through = bp_unpack_value (bp: &bp, nbits: 1);
2920 ipf->pointer_pass_through = bp_unpack_value (bp: &bp, nbits: 1);
2921 ipf->safe_to_import_accesses = bp_unpack_value (bp: &bp, nbits: 1);
2922 ipf->constructed_for_calls = bp_unpack_value (bp: &bp, nbits: 1);
2923 ipf->unit_offset = streamer_read_uhwi (ib);
2924 ipf->unit_size = streamer_read_uhwi (ib);
2925 }
2926 bitpack_d bp = streamer_read_bitpack (ib);
2927 csum->m_return_ignored = bp_unpack_value (bp: &bp, nbits: 1);
2928 csum->m_return_returned = bp_unpack_value (bp: &bp, nbits: 1);
2929 csum->m_bit_aligned_arg = bp_unpack_value (bp: &bp, nbits: 1);
2930 csum->m_before_any_store = bp_unpack_value (bp: &bp, nbits: 1);
2931}
2932
2933/* Read intraprocedural analysis information about NODE and all of its outgoing
2934 edges into a stream for LTO WPA. */
2935
2936static void
2937isra_read_node_info (struct lto_input_block *ib, cgraph_node *node,
2938 struct data_in *data_in)
2939{
2940 isra_func_summary *ifs = func_sums->get_create (node);
2941 unsigned param_desc_count = streamer_read_uhwi (ib);
2942 if (param_desc_count > 0)
2943 {
2944 vec_safe_reserve_exact (v&: ifs->m_parameters, nelems: param_desc_count);
2945 ifs->m_parameters->quick_grow_cleared (len: param_desc_count);
2946 }
2947 for (unsigned i = 0; i < param_desc_count; i++)
2948 {
2949 isra_param_desc *desc = &(*ifs->m_parameters)[i];
2950 unsigned access_count = streamer_read_uhwi (ib);
2951 for (unsigned j = 0; j < access_count; j++)
2952 {
2953 param_access *acc = ggc_cleared_alloc<param_access> ();
2954 acc->type = stream_read_tree (ib, data_in);
2955 acc->alias_ptr_type = stream_read_tree (ib, data_in);
2956 acc->unit_offset = streamer_read_uhwi (ib);
2957 acc->unit_size = streamer_read_uhwi (ib);
2958 bitpack_d bp = streamer_read_bitpack (ib);
2959 acc->certain = bp_unpack_value (bp: &bp, nbits: 1);
2960 acc->reverse = bp_unpack_value (bp: &bp, nbits: 1);
2961 vec_safe_push (v&: desc->accesses, obj: acc);
2962 }
2963 desc->param_size_limit = streamer_read_uhwi (ib);
2964 desc->size_reached = streamer_read_uhwi (ib);
2965 desc->safe_size = 0;
2966 bitpack_d bp = streamer_read_bitpack (ib);
2967 desc->locally_unused = bp_unpack_value (bp: &bp, nbits: 1);
2968 desc->split_candidate = bp_unpack_value (bp: &bp, nbits: 1);
2969 desc->by_ref = bp_unpack_value (bp: &bp, nbits: 1);
2970 desc->not_specially_constructed = 0;
2971 desc->remove_only_when_retval_removed = bp_unpack_value (bp: &bp, nbits: 1);
2972 desc->split_only_when_retval_removed = bp_unpack_value (bp: &bp, nbits: 1);
2973 desc->conditionally_dereferenceable = bp_unpack_value (bp: &bp, nbits: 1);
2974 desc->safe_size_set = 0;
2975 }
2976 bitpack_d bp = streamer_read_bitpack (ib);
2977 ifs->m_candidate = bp_unpack_value (bp: &bp, nbits: 1);
2978 ifs->m_returns_value = bp_unpack_value (bp: &bp, nbits: 1);
2979 ifs->m_return_ignored = bp_unpack_value (bp: &bp, nbits: 1);
2980 ifs->m_queued = 0;
2981
2982 for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2983 isra_read_edge_summary (ib, cs: e);
2984 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2985 isra_read_edge_summary (ib, cs: e);
2986}
2987
2988/* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with
2989 data DATA. TODO: This function was copied almost verbatim from ipa-prop.cc,
2990 it should be possible to unify them somehow. */
2991
2992static void
2993isra_read_summary_section (struct lto_file_decl_data *file_data,
2994 const char *data, size_t len)
2995{
2996 const struct lto_function_header *header =
2997 (const struct lto_function_header *) data;
2998 const int cfg_offset = sizeof (struct lto_function_header);
2999 const int main_offset = cfg_offset + header->cfg_size;
3000 const int string_offset = main_offset + header->main_size;
3001 struct data_in *data_in;
3002 unsigned int i;
3003 unsigned int count;
3004
3005 lto_input_block ib_main ((const char *) data + main_offset,
3006 header->main_size, file_data);
3007
3008 data_in =
3009 lto_data_in_create (file_data, (const char *) data + string_offset,
3010 header->string_size, vNULL);
3011 count = streamer_read_uhwi (&ib_main);
3012
3013 for (i = 0; i < count; i++)
3014 {
3015 unsigned int index;
3016 struct cgraph_node *node;
3017 lto_symtab_encoder_t encoder;
3018
3019 index = streamer_read_uhwi (&ib_main);
3020 encoder = file_data->symtab_node_encoder;
3021 node = dyn_cast<cgraph_node *> (p: lto_symtab_encoder_deref (encoder,
3022 ref: index));
3023 gcc_assert (node->definition);
3024 isra_read_node_info (ib: &ib_main, node, data_in);
3025 }
3026 lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data,
3027 len);
3028 lto_data_in_delete (data_in);
3029}
3030
3031/* Read intraprocedural analysis information into a stream for LTO WPA. */
3032
3033static void
3034ipa_sra_read_summary (void)
3035{
3036 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3037 struct lto_file_decl_data *file_data;
3038 unsigned int j = 0;
3039
3040 gcc_checking_assert (!func_sums);
3041 gcc_checking_assert (!call_sums);
3042 func_sums
3043 = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ())
3044 ipa_sra_function_summaries (symtab, true));
3045 call_sums = new ipa_sra_call_summaries (symtab);
3046
3047 while ((file_data = file_data_vec[j++]))
3048 {
3049 size_t len;
3050 const char *data
3051 = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len);
3052 if (data)
3053 isra_read_summary_section (file_data, data, len);
3054 }
3055}
3056
3057/* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. If
3058 HINTS is true, also dump IPA-analysis computed hints. */
3059
3060static void
3061ipa_sra_dump_all_summaries (FILE *f, bool hints)
3062{
3063 cgraph_node *node;
3064 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
3065 {
3066 fprintf (stream: f, format: "\nSummary for node %s:\n", node->dump_name ());
3067
3068 isra_func_summary *ifs = func_sums->get (node);
3069 if (!ifs)
3070 fprintf (stream: f, format: " Function does not have any associated IPA-SRA "
3071 "summary\n");
3072 else if (!ifs->m_candidate)
3073 fprintf (stream: f, format: " Not a candidate function\n");
3074 else
3075 {
3076 if (ifs->m_returns_value)
3077 fprintf (stream: f, format: " Returns value\n");
3078 if (vec_safe_is_empty (v: ifs->m_parameters))
3079 fprintf (stream: f, format: " No parameter information. \n");
3080 else
3081 for (unsigned i = 0; i < ifs->m_parameters->length (); ++i)
3082 {
3083 fprintf (stream: f, format: " Descriptor for parameter %i:\n", i);
3084 dump_isra_param_descriptor (f, desc: &(*ifs->m_parameters)[i], hints);
3085 }
3086 fprintf (stream: f, format: "\n");
3087 }
3088
3089 struct cgraph_edge *cs;
3090 for (cs = node->callees; cs; cs = cs->next_callee)
3091 {
3092 fprintf (stream: f, format: " Summary for edge %s->%s:\n", cs->caller->dump_name (),
3093 cs->callee->dump_name ());
3094 isra_call_summary *csum = call_sums->get (edge: cs);
3095 if (csum)
3096 csum->dump (f);
3097 else
3098 fprintf (stream: f, format: " Call summary is MISSING!\n");
3099 }
3100
3101 }
3102 fprintf (stream: f, format: "\n\n");
3103}
3104
3105/* Perform function-scope viability tests that can be only made at IPA level
3106 and return false if the function is deemed unsuitable for IPA-SRA. */
3107
3108static bool
3109ipa_sra_ipa_function_checks (cgraph_node *node)
3110{
3111 if (!node->can_be_local_p ())
3112 {
3113 if (dump_file)
3114 fprintf (stream: dump_file, format: "Function %s disqualified because it cannot be "
3115 "made local.\n", node->dump_name ());
3116 return false;
3117 }
3118 if (!node->can_change_signature)
3119 {
3120 if (dump_file)
3121 fprintf (stream: dump_file, format: "Function can not change signature.\n");
3122 return false;
3123 }
3124
3125 return true;
3126}
3127
3128/* Issues found out by check_callers_for_issues. */
3129
3130struct caller_issues
3131{
3132 /* The candidate being considered. */
3133 cgraph_node *candidate;
3134 /* There is a thunk among callers. */
3135 bool thunk;
3136 /* Set if there is at least one caller that is OK. */
3137 bool there_is_one;
3138 /* Call site with no available information. */
3139 bool unknown_callsite;
3140 /* Call from outside the candidate's comdat group. */
3141 bool call_from_outside_comdat;
3142 /* There is a bit-aligned load into one of non-gimple-typed arguments. */
3143 bool bit_aligned_aggregate_argument;
3144};
3145
3146/* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues
3147 that apply. */
3148
3149static bool
3150check_for_caller_issues (struct cgraph_node *node, void *data)
3151{
3152 struct caller_issues *issues = (struct caller_issues *) data;
3153
3154 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3155 {
3156 if (cs->caller->thunk)
3157 {
3158 issues->thunk = true;
3159 /* TODO: We should be able to process at least some types of
3160 thunks. */
3161 return true;
3162 }
3163 if (issues->candidate->calls_comdat_local
3164 && issues->candidate->same_comdat_group
3165 && !issues->candidate->in_same_comdat_group_p (target: cs->caller))
3166 {
3167 issues->call_from_outside_comdat = true;
3168 return true;
3169 }
3170
3171 isra_call_summary *csum = call_sums->get (edge: cs);
3172 if (!csum)
3173 {
3174 issues->unknown_callsite = true;
3175 return true;
3176 }
3177
3178 if (csum->m_bit_aligned_arg)
3179 issues->bit_aligned_aggregate_argument = true;
3180
3181 issues->there_is_one = true;
3182 }
3183 return false;
3184}
3185
3186/* Look at all incoming edges to NODE, including aliases and thunks and look
3187 for problems. Return true if NODE type should not be modified at all. */
3188
3189static bool
3190check_all_callers_for_issues (cgraph_node *node)
3191{
3192 struct caller_issues issues;
3193 memset (s: &issues, c: 0, n: sizeof (issues));
3194 issues.candidate = node;
3195
3196 node->call_for_symbol_and_aliases (callback: check_for_caller_issues, data: &issues, include_overwritable: true);
3197 if (issues.unknown_callsite)
3198 {
3199 if (dump_file && (dump_flags & TDF_DETAILS))
3200 fprintf (stream: dump_file, format: "A call of %s has not been analyzed. Disabling "
3201 "all modifications.\n", node->dump_name ());
3202 return true;
3203 }
3204 /* TODO: We should be able to process at least some types of thunks. */
3205 if (issues.thunk)
3206 {
3207 if (dump_file && (dump_flags & TDF_DETAILS))
3208 fprintf (stream: dump_file, format: "A call of %s is through thunk, which are not"
3209 " handled yet. Disabling all modifications.\n",
3210 node->dump_name ());
3211 return true;
3212 }
3213 if (issues.call_from_outside_comdat)
3214 {
3215 if (dump_file)
3216 fprintf (stream: dump_file, format: "Function would become private comdat called "
3217 "outside of its comdat group.\n");
3218 return true;
3219 }
3220
3221 if (issues.bit_aligned_aggregate_argument)
3222 {
3223 /* Let's only remove parameters/return values from such functions.
3224 TODO: We could only prevent splitting the problematic parameters if
3225 anybody thinks it is worth it. */
3226 if (dump_file && (dump_flags & TDF_DETAILS))
3227 fprintf (stream: dump_file, format: "A call of %s has bit-aligned aggregate argument,"
3228 " disabling parameter splitting.\n", node->dump_name ());
3229
3230 isra_func_summary *ifs = func_sums->get (node);
3231 gcc_checking_assert (ifs);
3232 unsigned param_count = vec_safe_length (v: ifs->m_parameters);
3233 for (unsigned i = 0; i < param_count; i++)
3234 (*ifs->m_parameters)[i].split_candidate = false;
3235 }
3236 if (!issues.there_is_one)
3237 {
3238 if (dump_file && (dump_flags & TDF_DETAILS))
3239 fprintf (stream: dump_file, format: "There is no call to %s that we can modify. "
3240 "Disabling all modifications.\n", node->dump_name ());
3241 return true;
3242 }
3243 return false;
3244}
3245
3246/* Find the access with corresponding OFFSET and SIZE among accesses in
3247 PARAM_DESC and return it or NULL if such an access is not there. */
3248
3249static param_access *
3250find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size)
3251{
3252 unsigned pclen = vec_safe_length (v: param_desc->accesses);
3253
3254 /* The search is linear but the number of stored accesses is bound by
3255 PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */
3256
3257 for (unsigned i = 0; i < pclen; i++)
3258 if ((*param_desc->accesses)[i]->unit_offset == offset
3259 && (*param_desc->accesses)[i]->unit_size == size)
3260 return (*param_desc->accesses)[i];
3261
3262 return NULL;
3263}
3264
3265/* Return iff the total size of definite replacement SIZE would violate the
3266 limit set for it in PARAM. */
3267
3268static bool
3269size_would_violate_limit_p (isra_param_desc *desc, unsigned size)
3270{
3271 unsigned limit = desc->param_size_limit;
3272 if (size > limit
3273 || (!desc->by_ref && size == limit))
3274 return true;
3275 return false;
3276}
3277
3278/* Increase reached size of DESC by SIZE or disqualify it if it would violate
3279 the set limit. IDX is the parameter number which is dumped when
3280 disqualifying. */
3281
3282static void
3283bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx)
3284{
3285 unsigned after = desc->size_reached + size;
3286 if (size_would_violate_limit_p (desc, size: after))
3287 {
3288 if (dump_file && (dump_flags & TDF_DETAILS))
3289 fprintf (stream: dump_file, format: " ...size limit reached, disqualifying "
3290 "candidate parameter %u\n", idx);
3291 desc->split_candidate = false;
3292 return;
3293 }
3294 desc->size_reached = after;
3295}
3296
3297/* Take all actions required to deal with an edge CS that represents a call to
3298 an unknown or un-analyzed function, for both parameter removal and
3299 splitting. */
3300
3301static void
3302process_edge_to_unknown_caller (cgraph_edge *cs)
3303{
3304 isra_func_summary *from_ifs = func_sums->get (node: cs->caller);
3305 gcc_checking_assert (from_ifs);
3306 isra_call_summary *csum = call_sums->get (edge: cs);
3307
3308 if (dump_file && (dump_flags & TDF_DETAILS))
3309 fprintf (stream: dump_file, format: "Processing an edge to an unknown caller from %s:\n",
3310 cs->caller->dump_name ());
3311
3312 unsigned args_count = csum->m_arg_flow.length ();
3313 for (unsigned i = 0; i < args_count; i++)
3314 {
3315 isra_param_flow *ipf = &csum->m_arg_flow[i];
3316
3317 if (ipf->pointer_pass_through)
3318 {
3319 isra_param_desc *param_desc
3320 = &(*from_ifs->m_parameters)[get_single_param_flow_source (param_flow: ipf)];
3321 param_desc->locally_unused = false;
3322 param_desc->split_candidate = false;
3323 continue;
3324 }
3325 if (ipf->aggregate_pass_through)
3326 {
3327 unsigned idx = get_single_param_flow_source (param_flow: ipf);
3328 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3329
3330 param_desc->locally_unused = false;
3331 if (!param_desc->split_candidate)
3332 continue;
3333 gcc_assert (!param_desc->by_ref);
3334 param_access *pacc = find_param_access (param_desc, offset: ipf->unit_offset,
3335 size: ipf->unit_size);
3336 gcc_checking_assert (pacc);
3337 pacc->certain = true;
3338 if (overlapping_certain_accesses_p (desc: param_desc, NULL))
3339 {
3340 if (dump_file && (dump_flags & TDF_DETAILS))
3341 fprintf (stream: dump_file, format: " ...leading to overlap, "
3342 " disqualifying candidate parameter %u\n",
3343 idx);
3344 param_desc->split_candidate = false;
3345 }
3346 else
3347 bump_reached_size (desc: param_desc, size: pacc->unit_size, idx);
3348 ipf->aggregate_pass_through = false;
3349 continue;
3350 }
3351
3352 for (int j = 0; j < ipf->length; j++)
3353 {
3354 int input_idx = ipf->inputs[j];
3355 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3356 }
3357 }
3358}
3359
3360/* Propagate parameter removal information through cross-SCC edge CS,
3361 i.e. decrease the use count in the caller parameter descriptor for each use
3362 in this call. */
3363
3364static void
3365param_removal_cross_scc_edge (cgraph_edge *cs)
3366{
3367 enum availability availability;
3368 cgraph_node *callee = cs->callee->function_symbol (avail: &availability);
3369 isra_func_summary *to_ifs = func_sums->get (node: callee);
3370 if (!to_ifs || !to_ifs->m_candidate
3371 || (availability < AVAIL_AVAILABLE)
3372 || vec_safe_is_empty (v: to_ifs->m_parameters))
3373 {
3374 process_edge_to_unknown_caller (cs);
3375 return;
3376 }
3377 isra_func_summary *from_ifs = func_sums->get (node: cs->caller);
3378 gcc_checking_assert (from_ifs);
3379
3380 isra_call_summary *csum = call_sums->get (edge: cs);
3381 unsigned args_count = csum->m_arg_flow.length ();
3382 unsigned param_count = vec_safe_length (v: to_ifs->m_parameters);
3383
3384 for (unsigned i = 0; i < args_count; i++)
3385 {
3386 bool unused_in_callee;
3387 if (i < param_count)
3388 unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused;
3389 else
3390 unused_in_callee = false;
3391
3392 if (!unused_in_callee)
3393 {
3394 isra_param_flow *ipf = &csum->m_arg_flow[i];
3395 for (int j = 0; j < ipf->length; j++)
3396 {
3397 int input_idx = ipf->inputs[j];
3398 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3399 }
3400 }
3401 }
3402}
3403
3404/* Unless it is already there, push NODE which is also described by IFS to
3405 STACK. */
3406
3407static void
3408isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs,
3409 vec<cgraph_node *> *stack)
3410{
3411 if (!ifs->m_queued)
3412 {
3413 ifs->m_queued = true;
3414 stack->safe_push (obj: node);
3415 }
3416}
3417
3418/* If parameter with index INPUT_IDX is marked as locally unused, mark it as
3419 used and push CALLER on STACK. */
3420
3421static void
3422isra_mark_caller_param_used (isra_func_summary *from_ifs, int input_idx,
3423 cgraph_node *caller, vec<cgraph_node *> *stack)
3424{
3425 if ((*from_ifs->m_parameters)[input_idx].locally_unused)
3426 {
3427 (*from_ifs->m_parameters)[input_idx].locally_unused = false;
3428 isra_push_node_to_stack (node: caller, ifs: from_ifs, stack);
3429 }
3430}
3431
3432/* Combine safe_size of DESC with SIZE and return true if it has changed. */
3433
3434static bool
3435update_safe_size (isra_param_desc *desc, unsigned size)
3436{
3437 if (!desc->safe_size_set)
3438 {
3439 desc->safe_size_set = 1;
3440 desc->safe_size = size;
3441 return true;
3442 }
3443 if (desc->safe_size <= size)
3444 return false;
3445 desc->safe_size = size;
3446 return true;
3447}
3448
3449/* Set all param hints in DESC to the pessimistic values. Return true if any
3450 hints that need to potentially trigger further propagation have changed. */
3451
3452static bool
3453flip_all_hints_pessimistic (isra_param_desc *desc)
3454{
3455 desc->not_specially_constructed = true;
3456 return update_safe_size (desc, size: 0);
3457}
3458
3459/* Because we have not analyzed or otherwise problematic caller, go over all
3460 parameter int flags of IFS describing a call graph node of a calllee and
3461 turn them pessimistic. Return true if any hints that need to potentially
3462 trigger further propagation have changed. */
3463
3464static bool
3465flip_all_param_hints_pessimistic (isra_func_summary *ifs)
3466{
3467 if (!ifs || !ifs->m_candidate)
3468 return false;
3469
3470 bool ret = false;
3471 unsigned param_count = vec_safe_length (v: ifs->m_parameters);
3472
3473 for (unsigned i = 0; i < param_count; i++)
3474 ret |= flip_all_hints_pessimistic (desc: &(*ifs->m_parameters)[i]);
3475
3476 return ret;
3477}
3478
3479/* Propagate hints accross edge CS which ultimately leads to a node described
3480 by TO_IFS. Return true if any hints of the callee which should potentially
3481 trigger further propagation have changed. */
3482
3483static bool
3484propagate_param_hints_accross_call (cgraph_edge *cs, isra_func_summary *to_ifs)
3485{
3486 if (!to_ifs || !to_ifs->m_candidate)
3487 return false;
3488
3489 isra_call_summary *csum = call_sums->get (edge: cs);
3490 bool ret = false;
3491 unsigned args_count = csum->m_arg_flow.length ();
3492 unsigned param_count = vec_safe_length (v: to_ifs->m_parameters);
3493
3494 for (unsigned i = 0; i < param_count; i++)
3495 {
3496 isra_param_desc *desc = &(*to_ifs->m_parameters)[i];
3497 if (i >= args_count)
3498 {
3499 ret |= flip_all_hints_pessimistic (desc);
3500 continue;
3501 }
3502
3503 if (desc->by_ref)
3504 {
3505 isra_param_flow *ipf = &csum->m_arg_flow[i];
3506
3507 if (!ipf->constructed_for_calls)
3508 desc->not_specially_constructed = true;
3509
3510 if (ipf->pointer_pass_through)
3511 {
3512 isra_func_summary *from_ifs = func_sums->get (node: cs->caller);
3513 int srcidx = get_single_param_flow_source (param_flow: ipf);
3514 if (vec_safe_length (v: from_ifs->m_parameters) > (unsigned) srcidx)
3515 {
3516 isra_param_desc *src_d = &(*from_ifs->m_parameters)[srcidx];
3517 if (src_d->safe_size_set)
3518 ret |= update_safe_size (desc, size: src_d->safe_size);
3519 }
3520 else
3521 ret |= update_safe_size (desc, size: 0);
3522 }
3523 else if (!ipf->aggregate_pass_through)
3524 ret |= update_safe_size (desc, size: ipf->unit_size);
3525 else
3526 /* LTOing type-mismatched programs can end up here. */
3527 ret |= update_safe_size (desc, size: 0);
3528 }
3529 }
3530 return ret;
3531}
3532
3533/* Propagate hints from NODE described by FROM_IFS to all its (dorect) callees,
3534 push those that may need re-visiting onto STACK. */
3535
3536static void
3537propagate_hints_to_all_callees (cgraph_node *node, isra_func_summary *from_ifs,
3538 vec<cgraph_node *> *stack)
3539{
3540 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
3541 {
3542 enum availability availability;
3543 cgraph_node *callee = cs->callee->function_symbol (avail: &availability);
3544 isra_func_summary *to_ifs = func_sums->get (node: callee);
3545 if (!from_ifs)
3546 {
3547 if (flip_all_param_hints_pessimistic (ifs: to_ifs)
3548 && ipa_edge_within_scc (cs))
3549 isra_push_node_to_stack (node: callee, ifs: to_ifs, stack);
3550 }
3551 else if (propagate_param_hints_accross_call (cs, to_ifs)
3552 && ipa_edge_within_scc (cs))
3553 isra_push_node_to_stack (node: callee, ifs: to_ifs, stack);
3554 }
3555}
3556
3557/* Propagate information that any parameter is not used only locally within a
3558 SCC across CS to the caller, which must be in the same SCC as the
3559 callee. Push any callers that need to be re-processed to STACK. */
3560
3561static void
3562propagate_used_across_scc_edge (cgraph_edge *cs, vec<cgraph_node *> *stack)
3563{
3564 isra_func_summary *from_ifs = func_sums->get (node: cs->caller);
3565 if (!from_ifs || vec_safe_is_empty (v: from_ifs->m_parameters))
3566 return;
3567
3568 isra_call_summary *csum = call_sums->get (edge: cs);
3569 gcc_checking_assert (csum);
3570 unsigned args_count = csum->m_arg_flow.length ();
3571 enum availability availability;
3572 cgraph_node *callee = cs->callee->function_symbol (avail: &availability);
3573 isra_func_summary *to_ifs = func_sums->get (node: callee);
3574
3575 unsigned param_count
3576 = (to_ifs && (availability >= AVAIL_AVAILABLE))
3577 ? vec_safe_length (v: to_ifs->m_parameters) : 0;
3578 for (unsigned i = 0; i < args_count; i++)
3579 {
3580 if (i < param_count
3581 && (*to_ifs->m_parameters)[i].locally_unused)
3582 continue;
3583
3584 /* The argument is needed in the callee it, we must mark the parameter as
3585 used also in the caller and its callers within this SCC. */
3586 isra_param_flow *ipf = &csum->m_arg_flow[i];
3587 for (int j = 0; j < ipf->length; j++)
3588 {
3589 int input_idx = ipf->inputs[j];
3590 isra_mark_caller_param_used (from_ifs, input_idx, caller: cs->caller, stack);
3591 }
3592 }
3593}
3594
3595/* Propagate information that any parameter is not used only locally within a
3596 SCC (i.e. is used also elsewhere) to all callers of NODE that are in the
3597 same SCC. Push any callers that need to be re-processed to STACK. */
3598
3599static bool
3600propagate_used_to_scc_callers (cgraph_node *node, void *data)
3601{
3602 vec<cgraph_node *> *stack = (vec<cgraph_node *> *) data;
3603 cgraph_edge *cs;
3604 for (cs = node->callers; cs; cs = cs->next_caller)
3605 if (ipa_edge_within_scc (cs))
3606 propagate_used_across_scc_edge (cs, stack);
3607 return false;
3608}
3609
3610/* Return true iff all certain accesses in ARG_DESC are also present as
3611 certain accesses in PARAM_DESC. */
3612
3613static bool
3614all_callee_accesses_present_p (isra_param_desc *param_desc,
3615 isra_param_desc *arg_desc)
3616{
3617 unsigned aclen = vec_safe_length (v: arg_desc->accesses);
3618 for (unsigned j = 0; j < aclen; j++)
3619 {
3620 param_access *argacc = (*arg_desc->accesses)[j];
3621 if (!argacc->certain)
3622 continue;
3623 param_access *pacc = find_param_access (param_desc, offset: argacc->unit_offset,
3624 size: argacc->unit_size);
3625 if (!pacc
3626 || !pacc->certain
3627 || !types_compatible_p (type1: argacc->type, type2: pacc->type))
3628 return false;
3629 }
3630 return true;
3631}
3632
3633/* Type internal to function pull_accesses_from_callee. Unfortunately gcc 4.8
3634 does not allow instantiating an auto_vec with a type defined within a
3635 function so it is a global type. */
3636enum acc_prop_kind {ACC_PROP_DONT, ACC_PROP_COPY, ACC_PROP_CERTAIN};
3637
3638
3639/* Attempt to propagate all definite accesses from ARG_DESC to PARAM_DESC,
3640 (which belongs to CALLER) if they would not violate some constraint there.
3641 If successful, return NULL, otherwise return the string reason for failure
3642 (which can be written to the dump file). DELTA_OFFSET is the known offset
3643 of the actual argument withing the formal parameter (so of ARG_DESCS within
3644 PARAM_DESCS), ARG_SIZE is the size of the actual argument or zero, if not
3645 known. In case of success, set *CHANGE_P to true if propagation actually
3646 changed anything. */
3647
3648static const char *
3649pull_accesses_from_callee (cgraph_node *caller, isra_param_desc *param_desc,
3650 isra_param_desc *arg_desc,
3651 unsigned delta_offset, unsigned arg_size,
3652 bool *change_p)
3653{
3654 unsigned pclen = vec_safe_length (v: param_desc->accesses);
3655 unsigned aclen = vec_safe_length (v: arg_desc->accesses);
3656 unsigned prop_count = 0;
3657 unsigned prop_size = 0;
3658 bool change = false;
3659
3660 auto_vec <enum acc_prop_kind, 8> prop_kinds (aclen);
3661 for (unsigned j = 0; j < aclen; j++)
3662 {
3663 param_access *argacc = (*arg_desc->accesses)[j];
3664 prop_kinds.safe_push (obj: ACC_PROP_DONT);
3665
3666 if (arg_size > 0
3667 && argacc->unit_offset + argacc->unit_size > arg_size)
3668 return "callee access outsize size boundary";
3669
3670 if (!argacc->certain)
3671 continue;
3672
3673 unsigned offset = argacc->unit_offset + delta_offset;
3674 /* Given that accesses are initially stored according to increasing
3675 offset and decreasing size in case of equal offsets, the following
3676 searches could be written more efficiently if we kept the ordering
3677 when copying. But the number of accesses is capped at
3678 PARAM_IPA_SRA_MAX_REPLACEMENTS (so most likely 8) and the code gets
3679 messy quickly, so let's improve on that only if necessary. */
3680
3681 bool exact_match = false;
3682 for (unsigned i = 0; i < pclen; i++)
3683 {
3684 /* Check for overlaps. */
3685 param_access *pacc = (*param_desc->accesses)[i];
3686 if (pacc->unit_offset == offset
3687 && pacc->unit_size == argacc->unit_size)
3688 {
3689 if (argacc->alias_ptr_type != pacc->alias_ptr_type
3690 || !types_compatible_p (type1: argacc->type, type2: pacc->type)
3691 || argacc->reverse != pacc->reverse)
3692 return "propagated access types would not match existing ones";
3693
3694 exact_match = true;
3695 if (!pacc->certain)
3696 {
3697 prop_kinds[j] = ACC_PROP_CERTAIN;
3698 prop_size += argacc->unit_size;
3699 change = true;
3700 }
3701 continue;
3702 }
3703
3704 if (offset < pacc->unit_offset + pacc->unit_size
3705 && offset + argacc->unit_size > pacc->unit_offset)
3706 {
3707 /* None permissible with load accesses, possible to fit into
3708 argument ones. */
3709 if (pacc->certain
3710 || offset < pacc->unit_offset
3711 || (offset + argacc->unit_size
3712 > pacc->unit_offset + pacc->unit_size))
3713 return "a propagated access would conflict in caller";
3714 }
3715 }
3716
3717 if (!exact_match)
3718 {
3719 prop_kinds[j] = ACC_PROP_COPY;
3720 prop_count++;
3721 prop_size += argacc->unit_size;
3722 change = true;
3723 }
3724 }
3725
3726 if (!change)
3727 return NULL;
3728
3729 if ((prop_count + pclen
3730 > (unsigned) opt_for_fn (caller->decl, param_ipa_sra_max_replacements))
3731 || size_would_violate_limit_p (desc: param_desc,
3732 size: param_desc->size_reached + prop_size))
3733 return "propagating accesses would violate the count or size limit";
3734
3735 *change_p = true;
3736 for (unsigned j = 0; j < aclen; j++)
3737 {
3738 if (prop_kinds[j] == ACC_PROP_COPY)
3739 {
3740 param_access *argacc = (*arg_desc->accesses)[j];
3741
3742 param_access *copy = ggc_cleared_alloc<param_access> ();
3743 copy->unit_offset = argacc->unit_offset + delta_offset;
3744 copy->unit_size = argacc->unit_size;
3745 copy->type = argacc->type;
3746 copy->alias_ptr_type = argacc->alias_ptr_type;
3747 copy->certain = true;
3748 copy->reverse = argacc->reverse;
3749 vec_safe_push (v&: param_desc->accesses, obj: copy);
3750 }
3751 else if (prop_kinds[j] == ACC_PROP_CERTAIN)
3752 {
3753 param_access *argacc = (*arg_desc->accesses)[j];
3754 param_access *csp
3755 = find_param_access (param_desc, offset: argacc->unit_offset + delta_offset,
3756 size: argacc->unit_size);
3757 csp->certain = true;
3758 }
3759 }
3760
3761 param_desc->size_reached += prop_size;
3762
3763 return NULL;
3764}
3765
3766/* Propagate parameter splitting information through call graph edge CS.
3767 Return true if any changes that might need to be propagated within SCCs have
3768 been made. The function also clears the aggregate_pass_through and
3769 pointer_pass_through in call summaries which do not need to be processed
3770 again if this CS is revisited when iterating while changes are propagated
3771 within an SCC. */
3772
3773static bool
3774param_splitting_across_edge (cgraph_edge *cs)
3775{
3776 bool res = false;
3777 bool cross_scc = !ipa_edge_within_scc (cs);
3778 enum availability availability;
3779 cgraph_node *callee = cs->callee->function_symbol (avail: &availability);
3780 isra_func_summary *from_ifs = func_sums->get (node: cs->caller);
3781 gcc_checking_assert (from_ifs && from_ifs->m_parameters);
3782
3783 isra_call_summary *csum = call_sums->get (edge: cs);
3784 gcc_checking_assert (csum);
3785 unsigned args_count = csum->m_arg_flow.length ();
3786 isra_func_summary *to_ifs = func_sums->get (node: callee);
3787 unsigned param_count
3788 = ((to_ifs && to_ifs->m_candidate && (availability >= AVAIL_AVAILABLE))
3789 ? vec_safe_length (v: to_ifs->m_parameters)
3790 : 0);
3791
3792 if (dump_file && (dump_flags & TDF_DETAILS))
3793 fprintf (stream: dump_file, format: "Splitting across %s->%s:\n",
3794 cs->caller->dump_name (), callee->dump_name ());
3795
3796 unsigned i;
3797 for (i = 0; (i < args_count) && (i < param_count); i++)
3798 {
3799 isra_param_desc *arg_desc = &(*to_ifs->m_parameters)[i];
3800 isra_param_flow *ipf = &csum->m_arg_flow[i];
3801
3802 if (arg_desc->locally_unused)
3803 {
3804 if (dump_file && (dump_flags & TDF_DETAILS))
3805 fprintf (stream: dump_file, format: " ->%u: unused in callee\n", i);
3806 ipf->pointer_pass_through = false;
3807 continue;
3808 }
3809
3810 if (ipf->pointer_pass_through)
3811 {
3812 int idx = get_single_param_flow_source (param_flow: ipf);
3813 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3814 if (!param_desc->split_candidate)
3815 continue;
3816 gcc_assert (param_desc->by_ref);
3817
3818 if (!arg_desc->split_candidate || !arg_desc->by_ref)
3819 {
3820 if (dump_file && (dump_flags & TDF_DETAILS))
3821 fprintf (stream: dump_file, format: " %u->%u: not candidate or not by "
3822 "reference in callee\n", idx, i);
3823 param_desc->split_candidate = false;
3824 ipf->pointer_pass_through = false;
3825 res = true;
3826 }
3827 else if (!ipf->safe_to_import_accesses)
3828 {
3829 if (!csum->m_before_any_store
3830 || !all_callee_accesses_present_p (param_desc, arg_desc))
3831 {
3832 if (dump_file && (dump_flags & TDF_DETAILS))
3833 fprintf (stream: dump_file, format: " %u->%u: cannot import accesses.\n",
3834 idx, i);
3835 param_desc->split_candidate = false;
3836 ipf->pointer_pass_through = false;
3837 res = true;
3838
3839 }
3840 else
3841 {
3842 if (dump_file && (dump_flags & TDF_DETAILS))
3843 fprintf (stream: dump_file, format: " %u->%u: verified callee accesses "
3844 "present.\n", idx, i);
3845 if (cross_scc)
3846 ipf->pointer_pass_through = false;
3847 }
3848 }
3849 else
3850 {
3851 const char *pull_failure
3852 = pull_accesses_from_callee (caller: cs->caller, param_desc, arg_desc,
3853 delta_offset: 0, arg_size: 0, change_p: &res);
3854 if (pull_failure)
3855 {
3856 if (dump_file && (dump_flags & TDF_DETAILS))
3857 fprintf (stream: dump_file, format: " %u->%u: by_ref access pull "
3858 "failed: %s.\n", idx, i, pull_failure);
3859 param_desc->split_candidate = false;
3860 ipf->pointer_pass_through = false;
3861 res = true;
3862 }
3863 else
3864 {
3865 if (dump_file && (dump_flags & TDF_DETAILS))
3866 fprintf (stream: dump_file, format: " %u->%u: by_ref access pull "
3867 "succeeded.\n", idx, i);
3868 if (cross_scc)
3869 ipf->pointer_pass_through = false;
3870 }
3871 }
3872 }
3873 else if (ipf->aggregate_pass_through)
3874 {
3875 int idx = get_single_param_flow_source (param_flow: ipf);
3876 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3877 if (!param_desc->split_candidate)
3878 continue;
3879 gcc_assert (!param_desc->by_ref);
3880 param_access *pacc = find_param_access (param_desc, offset: ipf->unit_offset,
3881 size: ipf->unit_size);
3882 gcc_checking_assert (pacc);
3883
3884 if (pacc->certain)
3885 {
3886 if (dump_file && (dump_flags & TDF_DETAILS))
3887 fprintf (stream: dump_file, format: " %u->%u: already certain\n", idx, i);
3888 ipf->aggregate_pass_through = false;
3889 }
3890 else if (!arg_desc->split_candidate || arg_desc->by_ref)
3891 {
3892 if (dump_file && (dump_flags & TDF_DETAILS))
3893 fprintf (stream: dump_file, format: " %u->%u: not candidate or by "
3894 "reference in callee\n", idx, i);
3895
3896 pacc->certain = true;
3897 if (overlapping_certain_accesses_p (desc: param_desc, NULL))
3898 {
3899 if (dump_file && (dump_flags & TDF_DETAILS))
3900 fprintf (stream: dump_file, format: " ...leading to overlap, "
3901 " disqualifying candidate parameter %u\n",
3902 idx);
3903 param_desc->split_candidate = false;
3904 }
3905 else
3906 bump_reached_size (desc: param_desc, size: pacc->unit_size, idx);
3907
3908 ipf->aggregate_pass_through = false;
3909 res = true;
3910 }
3911 else
3912 {
3913 const char *pull_failure
3914 = pull_accesses_from_callee (caller: cs->caller, param_desc, arg_desc,
3915 delta_offset: ipf->unit_offset,
3916 arg_size: ipf->unit_size, change_p: &res);
3917 if (pull_failure)
3918 {
3919 if (dump_file && (dump_flags & TDF_DETAILS))
3920 fprintf (stream: dump_file, format: " %u->%u: arg access pull "
3921 "failed: %s.\n", idx, i, pull_failure);
3922
3923 ipf->aggregate_pass_through = false;
3924 pacc->certain = true;
3925
3926 if (overlapping_certain_accesses_p (desc: param_desc, NULL))
3927 {
3928 if (dump_file && (dump_flags & TDF_DETAILS))
3929 fprintf (stream: dump_file, format: " ...leading to overlap, "
3930 " disqualifying candidate parameter %u\n",
3931 idx);
3932 param_desc->split_candidate = false;
3933 }
3934 else
3935 bump_reached_size (desc: param_desc, size: pacc->unit_size, idx);
3936
3937 res = true;
3938 }
3939 else
3940 {
3941 if (dump_file && (dump_flags & TDF_DETAILS))
3942 fprintf (stream: dump_file, format: " %u->%u: arg access pull "
3943 "succeeded.\n", idx, i);
3944 if (cross_scc)
3945 ipf->aggregate_pass_through = false;
3946 }
3947 }
3948 }
3949 }
3950
3951 /* Handle argument-parameter count mismatches. */
3952 for (; (i < args_count); i++)
3953 {
3954 isra_param_flow *ipf = &csum->m_arg_flow[i];
3955
3956 if (ipf->pointer_pass_through || ipf->aggregate_pass_through)
3957 {
3958 int idx = get_single_param_flow_source (param_flow: ipf);
3959 ipf->pointer_pass_through = false;
3960 ipf->aggregate_pass_through = false;
3961 isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx];
3962 if (!param_desc->split_candidate)
3963 continue;
3964
3965 if (dump_file && (dump_flags & TDF_DETAILS))
3966 fprintf (stream: dump_file, format: " %u->%u: no corresponding formal parameter\n",
3967 idx, i);
3968 param_desc->split_candidate = false;
3969 res = true;
3970 }
3971 }
3972 return res;
3973}
3974
3975/* Worker for call_for_symbol_and_aliases, look at all callers and if all their
3976 callers ignore the return value, or come from the same SCC and use the
3977 return value only to compute their return value, return false, otherwise
3978 return true. */
3979
3980static bool
3981retval_used_p (cgraph_node *node, void *)
3982{
3983 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
3984 {
3985 isra_call_summary *csum = call_sums->get (edge: cs);
3986 gcc_checking_assert (csum);
3987 if (csum->m_return_ignored)
3988 continue;
3989 if (!csum->m_return_returned)
3990 return true;
3991
3992 isra_func_summary *from_ifs = func_sums->get (node: cs->caller);
3993 if (!from_ifs || !from_ifs->m_candidate)
3994 return true;
3995
3996 if (!ipa_edge_within_scc (cs)
3997 && !from_ifs->m_return_ignored)
3998 return true;
3999 }
4000
4001 return false;
4002}
4003
4004/* Push into NEW_PARAMS all required parameter adjustment entries to copy or
4005 modify parameter which originally had index BASE_INDEX, in the adjustment
4006 vector of parent clone (if any) had PREV_CLONE_INDEX and was described by
4007 PREV_ADJUSTMENT. If IPA-CP has created a transformation summary for the
4008 original node, it needs to be passed in IPCP_TS, otherwise it should be
4009 NULL. If the parent clone is the original function, PREV_ADJUSTMENT is NULL
4010 and PREV_CLONE_INDEX is equal to BASE_INDEX. */
4011
4012static void
4013push_param_adjustments_for_index (isra_func_summary *ifs, unsigned base_index,
4014 unsigned prev_clone_index,
4015 ipa_adjusted_param *prev_adjustment,
4016 ipcp_transformation *ipcp_ts,
4017 vec<ipa_adjusted_param, va_gc> **new_params)
4018{
4019 isra_param_desc *desc = &(*ifs->m_parameters)[base_index];
4020 if (desc->locally_unused)
4021 {
4022 if (dump_file)
4023 fprintf (stream: dump_file, format: " Will remove parameter %u\n", base_index);
4024 return;
4025 }
4026
4027 if (!desc->split_candidate)
4028 {
4029 ipa_adjusted_param adj;
4030 if (prev_adjustment)
4031 {
4032 adj = *prev_adjustment;
4033 adj.prev_clone_adjustment = true;
4034 adj.prev_clone_index = prev_clone_index;
4035 }
4036 else
4037 {
4038 memset (s: &adj, c: 0, n: sizeof (adj));
4039 adj.op = IPA_PARAM_OP_COPY;
4040 adj.base_index = base_index;
4041 adj.prev_clone_index = prev_clone_index;
4042 }
4043 vec_safe_push (v&: (*new_params), obj: adj);
4044 return;
4045 }
4046
4047 if (dump_file)
4048 fprintf (stream: dump_file, format: " Will split parameter %u\n", base_index);
4049
4050 gcc_assert (!prev_adjustment || prev_adjustment->op == IPA_PARAM_OP_COPY);
4051 unsigned aclen = vec_safe_length (v: desc->accesses);
4052 for (unsigned j = 0; j < aclen; j++)
4053 {
4054 param_access *pa = (*desc->accesses)[j];
4055 if (!pa->certain)
4056 continue;
4057
4058 if (ipcp_ts)
4059 {
4060 ipa_argagg_value_list avl (ipcp_ts);
4061 tree value = avl.get_value (index: base_index, unit_offset: pa->unit_offset);
4062 if (value && !AGGREGATE_TYPE_P (pa->type))
4063 {
4064 if (dump_file)
4065 fprintf (stream: dump_file, format: " - omitting component at byte "
4066 "offset %u which is known to have a constant value\n ",
4067 pa->unit_offset);
4068 continue;
4069 }
4070 }
4071
4072 if (dump_file)
4073 fprintf (stream: dump_file, format: " - component at byte offset %u, "
4074 "size %u\n", pa->unit_offset, pa->unit_size);
4075
4076 ipa_adjusted_param adj;
4077 memset (s: &adj, c: 0, n: sizeof (adj));
4078 adj.op = IPA_PARAM_OP_SPLIT;
4079 adj.base_index = base_index;
4080 adj.prev_clone_index = prev_clone_index;
4081 adj.param_prefix_index = IPA_PARAM_PREFIX_ISRA;
4082 adj.reverse = pa->reverse;
4083 adj.type = pa->type;
4084 adj.alias_ptr_type = pa->alias_ptr_type;
4085 adj.unit_offset = pa->unit_offset;
4086 vec_safe_push (v&: (*new_params), obj: adj);
4087 }
4088}
4089
4090/* Worker for all call_for_symbol_thunks_and_aliases. Set calls_comdat_local
4091 flag of all callers of NODE. */
4092
4093static bool
4094mark_callers_calls_comdat_local (struct cgraph_node *node, void *)
4095{
4096 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4097 cs->caller->calls_comdat_local = true;
4098 return false;
4099}
4100
4101/* Remove any IPA-CP results stored in TS that are associated with removed
4102 parameters as marked in IFS. */
4103
4104static void
4105zap_useless_ipcp_results (const isra_func_summary *ifs, ipcp_transformation *ts)
4106{
4107 ts->remove_argaggs_if (predicate: [ifs](const ipa_argagg_value &v)
4108 {
4109 return (*ifs->m_parameters)[v.index].locally_unused;
4110 });
4111
4112 bool useful_vr = false;
4113 unsigned count = vec_safe_length (v: ts->m_vr);
4114 for (unsigned i = 0; i < count; i++)
4115 if ((*ts->m_vr)[i].known_p ())
4116 {
4117 const isra_param_desc *desc = &(*ifs->m_parameters)[i];
4118 if (desc->locally_unused)
4119 (*ts->m_vr)[i].set_unknown ();
4120 else
4121 useful_vr = true;
4122 }
4123 if (!useful_vr)
4124 ts->m_vr = NULL;
4125}
4126
4127/* Do final processing of results of IPA propagation regarding NODE, clone it
4128 if appropriate. */
4129
4130static void
4131process_isra_node_results (cgraph_node *node,
4132 hash_map<const char *, unsigned> *clone_num_suffixes)
4133{
4134 isra_func_summary *ifs = func_sums->get (node);
4135 if (!ifs || !ifs->m_candidate)
4136 return;
4137
4138 auto_vec<bool, 16> surviving_params;
4139 bool check_surviving = false;
4140 clone_info *cinfo = clone_info::get (node);
4141 if (cinfo && cinfo->param_adjustments)
4142 {
4143 check_surviving = true;
4144 cinfo->param_adjustments->get_surviving_params (surviving_params: &surviving_params);
4145 }
4146
4147 unsigned param_count = vec_safe_length (v: ifs->m_parameters);
4148 bool will_change_function = false;
4149 if (ifs->m_returns_value && ifs->m_return_ignored)
4150 will_change_function = true;
4151 else
4152 for (unsigned i = 0; i < param_count; i++)
4153 {
4154 isra_param_desc *desc = &(*ifs->m_parameters)[i];
4155 if ((desc->locally_unused || desc->split_candidate)
4156 /* Make sure we do not clone just to attempt to remove an already
4157 removed unused argument. */
4158 && (!check_surviving
4159 || (i < surviving_params.length ()
4160 && surviving_params[i])))
4161 {
4162 will_change_function = true;
4163 break;
4164 }
4165 }
4166 if (!will_change_function)
4167 return;
4168
4169 if (dump_file)
4170 {
4171 fprintf (stream: dump_file, format: "\nEvaluating analysis results for %s\n",
4172 node->dump_name ());
4173 if (ifs->m_returns_value && ifs->m_return_ignored)
4174 fprintf (stream: dump_file, format: " Will remove return value.\n");
4175 }
4176
4177 ipcp_transformation *ipcp_ts = ipcp_get_transformation_summary (node);
4178 if (ipcp_ts)
4179 zap_useless_ipcp_results (ifs, ts: ipcp_ts);
4180 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
4181 if (ipa_param_adjustments *old_adjustments
4182 = cinfo ? cinfo->param_adjustments : NULL)
4183 {
4184 unsigned old_adj_len = vec_safe_length (v: old_adjustments->m_adj_params);
4185 for (unsigned i = 0; i < old_adj_len; i++)
4186 {
4187 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
4188 push_param_adjustments_for_index (ifs, base_index: old_adj->base_index, prev_clone_index: i,
4189 prev_adjustment: old_adj, ipcp_ts, new_params: &new_params);
4190 }
4191 }
4192 else
4193 for (unsigned i = 0; i < param_count; i++)
4194 push_param_adjustments_for_index (ifs, base_index: i, prev_clone_index: i, NULL, ipcp_ts, new_params: &new_params);
4195
4196 ipa_param_adjustments *new_adjustments
4197 = (new (ggc_alloc <ipa_param_adjustments> ())
4198 ipa_param_adjustments (new_params, param_count,
4199 ifs->m_returns_value && ifs->m_return_ignored));
4200
4201 if (dump_file && (dump_flags & TDF_DETAILS))
4202 {
4203 fprintf (stream: dump_file, format: "\n Created adjustments:\n");
4204 new_adjustments->dump (f: dump_file);
4205 }
4206
4207 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
4208 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
4209 node->decl)));
4210 auto_vec<cgraph_edge *> callers = node->collect_callers ();
4211 cgraph_node *new_node
4212 = node->create_virtual_clone (redirect_callers: callers, NULL, param_adjustments: new_adjustments, suffix: "isra",
4213 num_suffix: suffix_counter);
4214 suffix_counter++;
4215 if (node->calls_comdat_local && node->same_comdat_group)
4216 {
4217 new_node->add_to_same_comdat_group (old_node: node);
4218 new_node->call_for_symbol_and_aliases (callback: mark_callers_calls_comdat_local,
4219 NULL, include_overwritable: true);
4220 }
4221 new_node->calls_comdat_local = node->calls_comdat_local;
4222
4223 if (dump_file)
4224 fprintf (stream: dump_file, format: " Created new node %s\n", new_node->dump_name ());
4225 callers.release ();
4226}
4227
4228/* If INDICES is not empty, dump a combination of NODE's dump_name and MSG
4229 followed by the list of numbers in INDICES. */
4230
4231static void
4232dump_list_of_param_indices (const cgraph_node *node, const char* msg,
4233 const vec<unsigned> &indices)
4234{
4235 if (indices.is_empty ())
4236 return;
4237 fprintf (stream: dump_file, format: "The following parameters of %s %s:", node->dump_name (),
4238 msg);
4239 for (unsigned i : indices)
4240 fprintf (stream: dump_file, format: " %u", i);
4241 fprintf (stream: dump_file, format: "\n");
4242}
4243
4244/* Check which parameters of NODE described by IFS have survived until IPA-SRA
4245 and disable transformations for those which have not or which should not
4246 transformed because the associated debug counter reached its limit. Return
4247 true if none survived or if there were no candidates to begin with.
4248 Additionally, also adjust parameter descriptions based on debug counters and
4249 hints propagated earlier. */
4250
4251static bool
4252adjust_parameter_descriptions (cgraph_node *node, isra_func_summary *ifs)
4253{
4254 bool ret = true;
4255 unsigned len = vec_safe_length (v: ifs->m_parameters);
4256 if (!len)
4257 return true;
4258
4259 auto_vec<bool, 16> surviving_params;
4260 bool check_surviving = false;
4261 clone_info *cinfo = clone_info::get (node);
4262 if (cinfo && cinfo->param_adjustments)
4263 {
4264 check_surviving = true;
4265 cinfo->param_adjustments->get_surviving_params (surviving_params: &surviving_params);
4266 }
4267 ipcp_transformation *ipcp_ts = ipcp_get_transformation_summary (node);
4268 auto_vec <unsigned> dump_dead_indices;
4269 auto_vec <unsigned> dump_bad_cond_indices;
4270 for (unsigned i = 0; i < len; i++)
4271 {
4272 isra_param_desc *desc = &(*ifs->m_parameters)[i];
4273 if (!dbg_cnt (index: ipa_sra_params))
4274 {
4275 desc->locally_unused = false;
4276 desc->split_candidate = false;
4277 continue;
4278 }
4279
4280 if (desc->split_only_when_retval_removed
4281 && !ifs->m_return_ignored)
4282 {
4283 if (dump_file && (dump_flags & TDF_DETAILS)
4284 && (desc->locally_unused || desc->split_candidate))
4285 dump_bad_cond_indices.safe_push (obj: i);
4286
4287 gcc_checking_assert (!desc->locally_unused
4288 || desc->remove_only_when_retval_removed);
4289 desc->locally_unused = false;
4290 desc->split_candidate = false;
4291 continue;
4292 }
4293 if (desc->remove_only_when_retval_removed
4294 && !ifs->m_return_ignored)
4295 {
4296 if (dump_file && (dump_flags & TDF_DETAILS)
4297 && (desc->locally_unused || desc->split_candidate))
4298 dump_bad_cond_indices.safe_push (obj: i);
4299
4300 desc->locally_unused = false;
4301 }
4302 if (check_surviving
4303 && (i >= surviving_params.length ()
4304 || !surviving_params[i]))
4305 {
4306 /* Even if the parameter was removed by a previous IPA pass, we do
4307 not clear locally_unused because if it really is unused, this
4308 information might be useful in callers. */
4309 desc->split_candidate = false;
4310
4311 if (dump_file && (dump_flags & TDF_DETAILS))
4312 dump_dead_indices.safe_push (obj: i);
4313 }
4314
4315 if (desc->split_candidate && desc->conditionally_dereferenceable)
4316 {
4317 gcc_assert (desc->safe_size_set);
4318 for (param_access *pa : *desc->accesses)
4319 if ((pa->unit_offset + pa->unit_size) > desc->safe_size)
4320 {
4321 if (dump_file && (dump_flags & TDF_DETAILS))
4322 dump_bad_cond_indices.safe_push (obj: i);
4323 desc->split_candidate = false;
4324 break;
4325 }
4326 }
4327
4328 if (desc->split_candidate)
4329 {
4330 if (desc->by_ref && !desc->not_specially_constructed)
4331 {
4332 int extra_factor
4333 = opt_for_fn (node->decl,
4334 param_ipa_sra_ptrwrap_growth_factor);
4335 desc->param_size_limit = extra_factor * desc->param_size_limit;
4336 }
4337 if (size_would_violate_limit_p (desc, size: desc->size_reached))
4338 desc->split_candidate = false;
4339 }
4340
4341 /* Avoid ICEs on size-mismatched VIEW_CONVERT_EXPRs when callers and
4342 callees don't agree on types in aggregates and we try to do both
4343 IPA-CP and IPA-SRA. */
4344 if (ipcp_ts && desc->split_candidate)
4345 {
4346 ipa_argagg_value_list avl (ipcp_ts);
4347 for (const param_access *pa : desc->accesses)
4348 {
4349 if (!pa->certain)
4350 continue;
4351 tree value = avl.get_value (index: i, unit_offset: pa->unit_offset);
4352 if (value
4353 && ((tree_to_uhwi (TYPE_SIZE (TREE_TYPE (value)))
4354 / BITS_PER_UNIT)
4355 != pa->unit_size))
4356 {
4357 desc->split_candidate = false;
4358 if (dump_file && (dump_flags & TDF_DETAILS))
4359 dump_dead_indices.safe_push (obj: i);
4360 break;
4361 }
4362 }
4363 }
4364
4365 if (desc->locally_unused || desc->split_candidate)
4366 ret = false;
4367 }
4368
4369 dump_list_of_param_indices (node, msg: "are dead on arrival or have a type "
4370 "mismatch with IPA-CP", indices: dump_dead_indices);
4371 dump_list_of_param_indices (node, msg: "fail additional requirements ",
4372 indices: dump_bad_cond_indices);
4373
4374 return ret;
4375}
4376
4377
4378/* Run the interprocedural part of IPA-SRA. */
4379
4380static unsigned int
4381ipa_sra_analysis (void)
4382{
4383 if (dump_file)
4384 {
4385 fprintf (stream: dump_file, format: "\n========== IPA-SRA IPA stage ==========\n");
4386 ipa_sra_dump_all_summaries (f: dump_file, hints: false);
4387 }
4388
4389 gcc_checking_assert (func_sums);
4390 gcc_checking_assert (call_sums);
4391 cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
4392 auto_vec <cgraph_node *, 16> stack;
4393 int node_scc_count = ipa_reduced_postorder (order, true, NULL);
4394
4395 /* One sweep from callers to callees for return value removal. */
4396 for (int i = node_scc_count - 1; i >= 0 ; i--)
4397 {
4398 cgraph_node *scc_rep = order[i];
4399 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
4400
4401 /* Preliminary IPA function level checks. */
4402 for (cgraph_node *v : cycle_nodes)
4403 {
4404 isra_func_summary *ifs = func_sums->get (node: v);
4405 if (!ifs || !ifs->m_candidate)
4406 continue;
4407 if (!ipa_sra_ipa_function_checks (node: v)
4408 || check_all_callers_for_issues (node: v))
4409 ifs->zap ();
4410 }
4411
4412 for (cgraph_node *v : cycle_nodes)
4413 {
4414 isra_func_summary *ifs = func_sums->get (node: v);
4415 if (!ifs || !ifs->m_candidate)
4416 continue;
4417 bool return_needed
4418 = (ifs->m_returns_value
4419 && (!dbg_cnt (index: ipa_sra_retvalues)
4420 || v->call_for_symbol_and_aliases (callback: retval_used_p,
4421 NULL, include_overwritable: true)));
4422 ifs->m_return_ignored = !return_needed;
4423 if (return_needed)
4424 isra_push_node_to_stack (node: v, ifs, stack: &stack);
4425 }
4426
4427 while (!stack.is_empty ())
4428 {
4429 cgraph_node *node = stack.pop ();
4430 isra_func_summary *ifs = func_sums->get (node);
4431 gcc_checking_assert (ifs && ifs->m_queued);
4432 ifs->m_queued = false;
4433
4434 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4435 if (ipa_edge_within_scc (cs)
4436 && call_sums->get (edge: cs)->m_return_returned)
4437 {
4438 enum availability av;
4439 cgraph_node *callee = cs->callee->function_symbol (avail: &av);
4440 isra_func_summary *to_ifs = func_sums->get (node: callee);
4441 if (to_ifs && to_ifs->m_return_ignored)
4442 {
4443 to_ifs->m_return_ignored = false;
4444 isra_push_node_to_stack (node: callee, ifs: to_ifs, stack: &stack);
4445 }
4446 }
4447 }
4448
4449 /* Parameter hint propagation. */
4450 for (cgraph_node *v : cycle_nodes)
4451 {
4452 isra_func_summary *ifs = func_sums->get (node: v);
4453 propagate_hints_to_all_callees (node: v, from_ifs: ifs, stack: &stack);
4454 }
4455
4456 while (!stack.is_empty ())
4457 {
4458 cgraph_node *node = stack.pop ();
4459 isra_func_summary *ifs = func_sums->get (node);
4460 gcc_checking_assert (ifs && ifs->m_queued);
4461 ifs->m_queued = false;
4462 propagate_hints_to_all_callees (node, from_ifs: ifs, stack: &stack);
4463 }
4464
4465 cycle_nodes.release ();
4466 }
4467
4468 /* One sweep from callees to callers for parameter removal and splitting. */
4469 for (int i = 0; i < node_scc_count; i++)
4470 {
4471 cgraph_node *scc_rep = order[i];
4472 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (scc_rep);
4473
4474 /* First step of parameter removal. */
4475 for (cgraph_node *v : cycle_nodes)
4476 {
4477 isra_func_summary *ifs = func_sums->get (node: v);
4478 if (!ifs || !ifs->m_candidate)
4479 continue;
4480 if (adjust_parameter_descriptions (node: v, ifs))
4481 continue;
4482 for (cgraph_edge *cs = v->indirect_calls; cs; cs = cs->next_callee)
4483 process_edge_to_unknown_caller (cs);
4484 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4485 if (!ipa_edge_within_scc (cs))
4486 param_removal_cross_scc_edge (cs);
4487 }
4488
4489 /* Look at edges within the current SCC and propagate used-ness across
4490 them, pushing onto the stack all notes which might need to be
4491 revisited. */
4492 for (cgraph_node *v : cycle_nodes)
4493 v->call_for_symbol_thunks_and_aliases (callback: propagate_used_to_scc_callers,
4494 data: &stack, include_overwritable: true);
4495
4496 /* Keep revisiting and pushing until nothing changes. */
4497 while (!stack.is_empty ())
4498 {
4499 cgraph_node *v = stack.pop ();
4500 isra_func_summary *ifs = func_sums->get (node: v);
4501 gcc_checking_assert (ifs && ifs->m_queued);
4502 ifs->m_queued = false;
4503
4504 v->call_for_symbol_thunks_and_aliases (callback: propagate_used_to_scc_callers,
4505 data: &stack, include_overwritable: true);
4506 }
4507
4508 /* Parameter splitting. */
4509 bool repeat_scc_access_propagation;
4510 do
4511 {
4512 repeat_scc_access_propagation = false;
4513 for (cgraph_node *v : cycle_nodes)
4514 {
4515 isra_func_summary *ifs = func_sums->get (node: v);
4516 if (!ifs
4517 || !ifs->m_candidate
4518 || vec_safe_is_empty (v: ifs->m_parameters))
4519 continue;
4520 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
4521 if (param_splitting_across_edge (cs))
4522 repeat_scc_access_propagation = true;
4523 }
4524 }
4525 while (repeat_scc_access_propagation);
4526
4527 if (flag_checking)
4528 for (cgraph_node *v : cycle_nodes)
4529 verify_splitting_accesses (node: v, certain_must_exist: true);
4530
4531 cycle_nodes.release ();
4532 }
4533
4534 ipa_free_postorder_info ();
4535 free (ptr: order);
4536
4537 if (dump_file)
4538 {
4539 if (dump_flags & TDF_DETAILS)
4540 {
4541 fprintf (stream: dump_file, format: "\n========== IPA-SRA propagation final state "
4542 " ==========\n");
4543 ipa_sra_dump_all_summaries (f: dump_file, hints: true);
4544 }
4545 fprintf (stream: dump_file, format: "\n========== IPA-SRA decisions ==========\n");
4546 }
4547
4548 hash_map<const char *, unsigned> *clone_num_suffixes
4549 = new hash_map<const char *, unsigned>;
4550
4551 cgraph_node *node;
4552 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
4553 process_isra_node_results (node, clone_num_suffixes);
4554
4555 delete clone_num_suffixes;
4556 ggc_delete (ptr: func_sums);
4557 func_sums = NULL;
4558 delete call_sums;
4559 call_sums = NULL;
4560
4561 if (dump_file)
4562 fprintf (stream: dump_file, format: "\n========== IPA SRA IPA analysis done "
4563 "==========\n\n");
4564 return 0;
4565}
4566
4567
4568const pass_data pass_data_ipa_sra =
4569{
4570 .type: IPA_PASS, /* type */
4571 .name: "sra", /* name */
4572 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
4573 .tv_id: TV_IPA_SRA, /* tv_id */
4574 .properties_required: 0, /* properties_required */
4575 .properties_provided: 0, /* properties_provided */
4576 .properties_destroyed: 0, /* properties_destroyed */
4577 .todo_flags_start: 0, /* todo_flags_start */
4578 .todo_flags_finish: ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
4579};
4580
4581class pass_ipa_sra : public ipa_opt_pass_d
4582{
4583public:
4584 pass_ipa_sra (gcc::context *ctxt)
4585 : ipa_opt_pass_d (pass_data_ipa_sra, ctxt,
4586 ipa_sra_generate_summary, /* generate_summary */
4587 ipa_sra_write_summary, /* write_summary */
4588 ipa_sra_read_summary, /* read_summary */
4589 NULL , /* write_optimization_summary */
4590 NULL, /* read_optimization_summary */
4591 NULL, /* stmt_fixup */
4592 0, /* function_transform_todo_flags_start */
4593 NULL, /* function_transform */
4594 NULL) /* variable_transform */
4595 {}
4596
4597 /* opt_pass methods: */
4598 bool gate (function *) final override
4599 {
4600 /* TODO: We should remove the optimize check after we ensure we never run
4601 IPA passes when not optimizing. */
4602 return (flag_ipa_sra && optimize);
4603 }
4604
4605 unsigned int execute (function *) final override
4606 {
4607 return ipa_sra_analysis ();
4608 }
4609
4610}; // class pass_ipa_sra
4611
4612} // anon namespace
4613
4614/* Intraprocedural part of IPA-SRA analysis. Scan function body of NODE and
4615 create a summary structure describing IPA-SRA opportunities and constraints
4616 in it. */
4617
4618static void
4619ipa_sra_summarize_function (cgraph_node *node)
4620{
4621 if (dump_file)
4622 fprintf (stream: dump_file, format: "Creating summary for %s/%i:\n", node->name (),
4623 node->order);
4624 gcc_obstack_init (&gensum_obstack);
4625 loaded_decls = new hash_set<tree>;
4626
4627 isra_func_summary *ifs = NULL;
4628 unsigned count = 0;
4629 if (ipa_sra_preliminary_function_checks (node))
4630 {
4631 ifs = func_sums->get_create (node);
4632 ifs->m_candidate = true;
4633 tree ret = TREE_TYPE (TREE_TYPE (node->decl));
4634 ifs->m_returns_value = (TREE_CODE (ret) != VOID_TYPE);
4635 for (tree parm = DECL_ARGUMENTS (node->decl);
4636 parm;
4637 parm = DECL_CHAIN (parm))
4638 count++;
4639 }
4640 auto_vec<gensum_param_desc, 16> param_descriptions (count);
4641
4642 struct function *fun = DECL_STRUCT_FUNCTION (node->decl);
4643 bool cfun_pushed = false;
4644 if (count > 0)
4645 {
4646 decl2desc = new hash_map<tree, gensum_param_desc *>;
4647 param_descriptions.reserve_exact (nelems: count);
4648 param_descriptions.quick_grow_cleared (len: count);
4649
4650 if (create_parameter_descriptors (node, param_descriptions: &param_descriptions))
4651 {
4652 push_cfun (new_cfun: fun);
4653 cfun_pushed = true;
4654 final_bbs = BITMAP_ALLOC (NULL);
4655 bb_dereferences = XCNEWVEC (HOST_WIDE_INT,
4656 unsafe_by_ref_count
4657 * last_basic_block_for_fn (fun));
4658 aa_walking_limit = opt_for_fn (node->decl, param_ipa_max_aa_steps);
4659 }
4660 }
4661 /* Scan function is run even when there are no removal or splitting
4662 candidates so that we can calculate hints on call edges which can be
4663 useful in callees. */
4664 scan_function (node, fun);
4665
4666 if (count > 0)
4667 {
4668 if (dump_file)
4669 {
4670 dump_gensum_param_descriptors (f: dump_file, fndecl: node->decl,
4671 param_descriptions: &param_descriptions);
4672 fprintf (stream: dump_file, format: "----------------------------------------\n");
4673 }
4674
4675 process_scan_results (node, fun, ifs, param_descriptions: &param_descriptions);
4676
4677 if (cfun_pushed)
4678 pop_cfun ();
4679 if (bb_dereferences)
4680 {
4681 free (ptr: bb_dereferences);
4682 bb_dereferences = NULL;
4683 BITMAP_FREE (final_bbs);
4684 final_bbs = NULL;
4685 }
4686 }
4687 isra_analyze_all_outgoing_calls (node);
4688
4689 delete loaded_decls;
4690 loaded_decls = NULL;
4691 if (decl2desc)
4692 {
4693 delete decl2desc;
4694 decl2desc = NULL;
4695 }
4696 obstack_free (&gensum_obstack, NULL);
4697 if (dump_file)
4698 fprintf (stream: dump_file, format: "\n\n");
4699 if (flag_checking)
4700 verify_splitting_accesses (node, certain_must_exist: false);
4701 return;
4702}
4703
4704ipa_opt_pass_d *
4705make_pass_ipa_sra (gcc::context *ctxt)
4706{
4707 return new pass_ipa_sra (ctxt);
4708}
4709
4710
4711#include "gt-ipa-sra.h"
4712

source code of gcc/ipa-sra.cc