1/* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20/* ipa-profile pass implements the following analysis propagating profille
21 inter-procedurally.
22
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
27
28 The information is used to set hot/cold thresholds.
29 - Next speculative indirect call resolution is performed: the local
30 profile pass assigns profile-id to each function and provide us with a
31 histogram specifying the most common target. We look up the callgraph
32 node corresponding to the target and produce a speculative call.
33
34 This call may or may not survive through IPA optimization based on decision
35 of inliner.
36 - Finally we propagate the following flags: unlikely executed, executed
37 once, executed at startup and executed at exit. These flags are used to
38 control code size/performance threshold and code placement (by producing
39 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
40#include "config.h"
41#include "system.h"
42#include "coretypes.h"
43#include "backend.h"
44#include "tree.h"
45#include "gimple.h"
46#include "predict.h"
47#include "alloc-pool.h"
48#include "tree-pass.h"
49#include "cgraph.h"
50#include "data-streamer.h"
51#include "gimple-iterator.h"
52#include "ipa-utils.h"
53#include "profile.h"
54#include "value-prof.h"
55#include "tree-inline.h"
56#include "symbol-summary.h"
57#include "tree-vrp.h"
58#include "ipa-prop.h"
59#include "ipa-fnsummary.h"
60
61/* Entry in the histogram. */
62
63struct histogram_entry
64{
65 gcov_type count;
66 int time;
67 int size;
68};
69
70/* Histogram of profile values.
71 The histogram is represented as an ordered vector of entries allocated via
72 histogram_pool. During construction a separate hashtable is kept to lookup
73 duplicate entries. */
74
75vec<histogram_entry *> histogram;
76static object_allocator<histogram_entry> histogram_pool ("IPA histogram");
77
78/* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
79
80struct histogram_hash : nofree_ptr_hash <histogram_entry>
81{
82 static inline hashval_t hash (const histogram_entry *);
83 static inline int equal (const histogram_entry *, const histogram_entry *);
84};
85
86inline hashval_t
87histogram_hash::hash (const histogram_entry *val)
88{
89 return val->count;
90}
91
92inline int
93histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
94{
95 return val->count == val2->count;
96}
97
98/* Account TIME and SIZE executed COUNT times into HISTOGRAM.
99 HASHTABLE is the on-side hash kept to avoid duplicates. */
100
101static void
102account_time_size (hash_table<histogram_hash> *hashtable,
103 vec<histogram_entry *> &histogram,
104 gcov_type count, int time, int size)
105{
106 histogram_entry key = {.count: count, .time: 0, .size: 0};
107 histogram_entry **val = hashtable->find_slot (value: &key, insert: INSERT);
108
109 if (!*val)
110 {
111 *val = histogram_pool.allocate ();
112 **val = key;
113 histogram.safe_push (obj: *val);
114 }
115 (*val)->time += time;
116 (*val)->size += size;
117}
118
119int
120cmp_counts (const void *v1, const void *v2)
121{
122 const histogram_entry *h1 = *(const histogram_entry * const *)v1;
123 const histogram_entry *h2 = *(const histogram_entry * const *)v2;
124 if (h1->count < h2->count)
125 return 1;
126 if (h1->count > h2->count)
127 return -1;
128 return 0;
129}
130
131/* Dump HISTOGRAM to FILE. */
132
133static void
134dump_histogram (FILE *file, vec<histogram_entry *> histogram)
135{
136 unsigned int i;
137 gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0,
138 overall_size = 0;
139
140 fprintf (stream: dump_file, format: "Histogram:\n");
141 for (i = 0; i < histogram.length (); i++)
142 {
143 overall_time += histogram[i]->count * histogram[i]->time;
144 overall_size += histogram[i]->size;
145 }
146 if (!overall_time)
147 overall_time = 1;
148 if (!overall_size)
149 overall_size = 1;
150 for (i = 0; i < histogram.length (); i++)
151 {
152 cumulated_time += histogram[i]->count * histogram[i]->time;
153 cumulated_size += histogram[i]->size;
154 fprintf (stream: file, format: " %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n",
155 (int64_t) histogram[i]->count,
156 histogram[i]->time,
157 cumulated_time * 100.0 / overall_time,
158 histogram[i]->size,
159 cumulated_size * 100.0 / overall_size);
160 }
161}
162
163/* Structure containing speculative target information from profile. */
164
165struct speculative_call_target
166{
167 speculative_call_target (unsigned int id = 0, int prob = 0)
168 : target_id (id), target_probability (prob)
169 {
170 }
171
172 /* Profile_id of target obtained from profile. */
173 unsigned int target_id;
174 /* Probability that call will land in function with target_id. */
175 unsigned int target_probability;
176};
177
178class speculative_call_summary
179{
180public:
181 speculative_call_summary () : speculative_call_targets ()
182 {}
183
184 auto_vec<speculative_call_target> speculative_call_targets;
185
186 void dump (FILE *f);
187
188};
189
190 /* Class to manage call summaries. */
191
192class ipa_profile_call_summaries
193 : public call_summary<speculative_call_summary *>
194{
195public:
196 ipa_profile_call_summaries (symbol_table *table)
197 : call_summary<speculative_call_summary *> (table)
198 {}
199
200 /* Duplicate info when an edge is cloned. */
201 void duplicate (cgraph_edge *, cgraph_edge *,
202 speculative_call_summary *old_sum,
203 speculative_call_summary *new_sum) final override;
204};
205
206static ipa_profile_call_summaries *call_sums = NULL;
207
208/* Dump all information in speculative call summary to F. */
209
210void
211speculative_call_summary::dump (FILE *f)
212{
213 cgraph_node *n2;
214
215 unsigned spec_count = speculative_call_targets.length ();
216 for (unsigned i = 0; i < spec_count; i++)
217 {
218 speculative_call_target item = speculative_call_targets[i];
219 n2 = find_func_by_profile_id (func_id: item.target_id);
220 if (n2)
221 fprintf (stream: f, format: " The %i speculative target is %s with prob %3.2f\n", i,
222 n2->dump_name (),
223 item.target_probability / (float) REG_BR_PROB_BASE);
224 else
225 fprintf (stream: f, format: " The %i speculative target is %u with prob %3.2f\n", i,
226 item.target_id,
227 item.target_probability / (float) REG_BR_PROB_BASE);
228 }
229}
230
231/* Duplicate info when an edge is cloned. */
232
233void
234ipa_profile_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
235 speculative_call_summary *old_sum,
236 speculative_call_summary *new_sum)
237{
238 if (!old_sum)
239 return;
240
241 unsigned old_count = old_sum->speculative_call_targets.length ();
242 if (!old_count)
243 return;
244
245 new_sum->speculative_call_targets.reserve_exact (nelems: old_count);
246 new_sum->speculative_call_targets.quick_grow_cleared (len: old_count);
247
248 for (unsigned i = 0; i < old_count; i++)
249 {
250 new_sum->speculative_call_targets[i]
251 = old_sum->speculative_call_targets[i];
252 }
253}
254
255/* Collect histogram and speculative target summaries from CFG profiles. */
256
257static void
258ipa_profile_generate_summary (void)
259{
260 struct cgraph_node *node;
261 gimple_stmt_iterator gsi;
262 basic_block bb;
263
264 hash_table<histogram_hash> hashtable (10);
265
266 gcc_checking_assert (!call_sums);
267 call_sums = new ipa_profile_call_summaries (symtab);
268
269 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
270 if (ENTRY_BLOCK_PTR_FOR_FN
271 (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ())
272 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
273 {
274 int time = 0;
275 int size = 0;
276 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
277 {
278 gimple *stmt = gsi_stmt (i: gsi);
279 if (gimple_code (g: stmt) == GIMPLE_CALL
280 && !gimple_call_fndecl (gs: stmt))
281 {
282 histogram_value h;
283 h = gimple_histogram_value_of_type
284 (DECL_STRUCT_FUNCTION (node->decl),
285 stmt, HIST_TYPE_INDIR_CALL);
286 /* No need to do sanity check: gimple_ic_transform already
287 takes away bad histograms. */
288 if (h)
289 {
290 gcov_type val, count, all;
291 struct cgraph_edge *e = node->get_edge (call_stmt: stmt);
292 if (e && !e->indirect_unknown_callee)
293 continue;
294
295 speculative_call_summary *csum
296 = call_sums->get_create (edge: e);
297
298 for (unsigned j = 0; j < GCOV_TOPN_MAXIMUM_TRACKED_VALUES;
299 j++)
300 {
301 if (!get_nth_most_common_value (NULL, counter_type: "indirect call",
302 hist: h, value: &val, count: &count, all: &all,
303 n: j))
304 continue;
305
306 if (val == 0 || count == 0)
307 continue;
308
309 if (count > all)
310 {
311 if (dump_file)
312 fprintf (stream: dump_file,
313 format: "Probability capped to 1\n");
314 count = all;
315 }
316 speculative_call_target item (
317 val, GCOV_COMPUTE_SCALE (count, all));
318 csum->speculative_call_targets.safe_push (obj: item);
319 }
320
321 gimple_remove_histogram_value
322 (DECL_STRUCT_FUNCTION (node->decl), stmt, h);
323 }
324 }
325 time += estimate_num_insns (stmt, &eni_time_weights);
326 size += estimate_num_insns (stmt, &eni_size_weights);
327 }
328 if (bb->count.ipa_p () && bb->count.initialized_p ())
329 account_time_size (hashtable: &hashtable, histogram,
330 count: bb->count.ipa ().to_gcov_type (),
331 time, size);
332 }
333 histogram.qsort (cmp_counts);
334}
335
336/* Serialize the speculative summary info for LTO. */
337
338static void
339ipa_profile_write_edge_summary (lto_simple_output_block *ob,
340 speculative_call_summary *csum)
341{
342 unsigned len = 0;
343
344 len = csum->speculative_call_targets.length ();
345
346 gcc_assert (len <= GCOV_TOPN_MAXIMUM_TRACKED_VALUES);
347
348 streamer_write_hwi_stream (ob->main_stream, len);
349
350 if (len)
351 {
352 unsigned spec_count = csum->speculative_call_targets.length ();
353 for (unsigned i = 0; i < spec_count; i++)
354 {
355 speculative_call_target item = csum->speculative_call_targets[i];
356 gcc_assert (item.target_id);
357 streamer_write_hwi_stream (ob->main_stream, item.target_id);
358 streamer_write_hwi_stream (ob->main_stream, item.target_probability);
359 }
360 }
361}
362
363/* Serialize the ipa info for lto. */
364
365static void
366ipa_profile_write_summary (void)
367{
368 struct lto_simple_output_block *ob
369 = lto_create_simple_output_block (LTO_section_ipa_profile);
370 unsigned int i;
371
372 streamer_write_uhwi_stream (ob->main_stream, histogram.length ());
373 for (i = 0; i < histogram.length (); i++)
374 {
375 streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
376 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
377 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
378 }
379
380 if (!call_sums)
381 return;
382
383 /* Serialize speculative targets information. */
384 unsigned int count = 0;
385 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
386 lto_symtab_encoder_iterator lsei;
387 cgraph_node *node;
388
389 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
390 lsei_next_function_in_partition (lsei: &lsei))
391 {
392 node = lsei_cgraph_node (lsei);
393 if (node->definition && node->has_gimple_body_p ()
394 && node->indirect_calls)
395 count++;
396 }
397
398 streamer_write_uhwi_stream (ob->main_stream, count);
399
400 /* Process all of the functions. */
401 for (lsei = lsei_start_function_in_partition (encoder);
402 !lsei_end_p (lsei) && count; lsei_next_function_in_partition (lsei: &lsei))
403 {
404 cgraph_node *node = lsei_cgraph_node (lsei);
405 if (node->definition && node->has_gimple_body_p ()
406 && node->indirect_calls)
407 {
408 int node_ref = lto_symtab_encoder_encode (encoder, node);
409 streamer_write_uhwi_stream (ob->main_stream, node_ref);
410
411 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
412 {
413 speculative_call_summary *csum = call_sums->get_create (edge: e);
414 ipa_profile_write_edge_summary (ob, csum);
415 }
416 }
417 }
418
419 lto_destroy_simple_output_block (ob);
420}
421
422/* Dump all profile summary data for all cgraph nodes and edges to file F. */
423
424static void
425ipa_profile_dump_all_summaries (FILE *f)
426{
427 fprintf (stream: dump_file,
428 format: "\n========== IPA-profile speculative targets: ==========\n");
429 cgraph_node *node;
430 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
431 {
432 fprintf (stream: f, format: "\nSummary for node %s:\n", node->dump_name ());
433 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
434 {
435 fprintf (stream: f, format: " Summary for %s of indirect edge %d:\n",
436 e->caller->dump_name (), e->lto_stmt_uid);
437 speculative_call_summary *csum = call_sums->get_create (edge: e);
438 csum->dump (f);
439 }
440 }
441 fprintf (stream: f, format: "\n\n");
442}
443
444/* Read speculative targets information about edge for LTO WPA. */
445
446static void
447ipa_profile_read_edge_summary (class lto_input_block *ib, cgraph_edge *edge)
448{
449 unsigned i, len;
450
451 len = streamer_read_hwi (ib);
452 gcc_assert (len <= GCOV_TOPN_MAXIMUM_TRACKED_VALUES);
453 speculative_call_summary *csum = call_sums->get_create (edge);
454
455 for (i = 0; i < len; i++)
456 {
457 unsigned int target_id = streamer_read_hwi (ib);
458 int target_probability = streamer_read_hwi (ib);
459 speculative_call_target item (target_id, target_probability);
460 csum->speculative_call_targets.safe_push (obj: item);
461 }
462}
463
464/* Read profile speculative targets section information for LTO WPA. */
465
466static void
467ipa_profile_read_summary_section (struct lto_file_decl_data *file_data,
468 class lto_input_block *ib)
469{
470 if (!ib)
471 return;
472
473 lto_symtab_encoder_t encoder = file_data->symtab_node_encoder;
474
475 unsigned int count = streamer_read_uhwi (ib);
476
477 unsigned int i;
478 unsigned int index;
479 cgraph_node * node;
480
481 for (i = 0; i < count; i++)
482 {
483 index = streamer_read_uhwi (ib);
484 node
485 = dyn_cast<cgraph_node *> (p: lto_symtab_encoder_deref (encoder, ref: index));
486
487 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
488 ipa_profile_read_edge_summary (ib, edge: e);
489 }
490}
491
492/* Deserialize the IPA histogram and speculative targets summary info for LTO.
493 */
494
495static void
496ipa_profile_read_summary (void)
497{
498 struct lto_file_decl_data ** file_data_vec
499 = lto_get_file_decl_data ();
500 struct lto_file_decl_data * file_data;
501 int j = 0;
502
503 hash_table<histogram_hash> hashtable (10);
504
505 gcc_checking_assert (!call_sums);
506 call_sums = new ipa_profile_call_summaries (symtab);
507
508 while ((file_data = file_data_vec[j++]))
509 {
510 const char *data;
511 size_t len;
512 class lto_input_block *ib
513 = lto_create_simple_input_block (file_data,
514 LTO_section_ipa_profile,
515 &data, &len);
516 if (ib)
517 {
518 unsigned int num = streamer_read_uhwi (ib);
519 unsigned int n;
520 for (n = 0; n < num; n++)
521 {
522 gcov_type count = streamer_read_gcov_count (ib);
523 int time = streamer_read_uhwi (ib);
524 int size = streamer_read_uhwi (ib);
525 account_time_size (hashtable: &hashtable, histogram,
526 count, time, size);
527 }
528
529 ipa_profile_read_summary_section (file_data, ib);
530
531 lto_destroy_simple_input_block (file_data,
532 LTO_section_ipa_profile,
533 ib, data, len);
534 }
535 }
536 histogram.qsort (cmp_counts);
537}
538
539/* Data used by ipa_propagate_frequency. */
540
541struct ipa_propagate_frequency_data
542{
543 cgraph_node *function_symbol;
544 bool maybe_unlikely_executed;
545 bool maybe_executed_once;
546 bool only_called_at_startup;
547 bool only_called_at_exit;
548};
549
550/* Worker for ipa_propagate_frequency_1. */
551
552static bool
553ipa_propagate_frequency_1 (struct cgraph_node *node, void *data)
554{
555 struct ipa_propagate_frequency_data *d;
556 struct cgraph_edge *edge;
557
558 d = (struct ipa_propagate_frequency_data *)data;
559 for (edge = node->callers;
560 edge && (d->maybe_unlikely_executed || d->maybe_executed_once
561 || d->only_called_at_startup || d->only_called_at_exit);
562 edge = edge->next_caller)
563 {
564 if (edge->caller != d->function_symbol)
565 {
566 d->only_called_at_startup &= edge->caller->only_called_at_startup;
567 /* It makes sense to put main() together with the static constructors.
568 It will be executed for sure, but rest of functions called from
569 main are definitely not at startup only. */
570 if (MAIN_NAME_P (DECL_NAME (edge->caller->decl)))
571 d->only_called_at_startup = 0;
572 d->only_called_at_exit &= edge->caller->only_called_at_exit;
573 }
574
575 /* When profile feedback is available, do not try to propagate too hard;
576 counts are already good guide on function frequencies and roundoff
577 errors can make us to push function into unlikely section even when
578 it is executed by the train run. Transfer the function only if all
579 callers are unlikely executed. */
580 if (profile_info
581 && !(edge->callee->count.ipa () == profile_count::zero ())
582 && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
583 || (edge->caller->inlined_to
584 && edge->caller->inlined_to->frequency
585 != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
586 d->maybe_unlikely_executed = false;
587 if (edge->count.ipa ().initialized_p ()
588 && !edge->count.ipa ().nonzero_p ())
589 continue;
590 switch (edge->caller->frequency)
591 {
592 case NODE_FREQUENCY_UNLIKELY_EXECUTED:
593 break;
594 case NODE_FREQUENCY_EXECUTED_ONCE:
595 {
596 if (dump_file && (dump_flags & TDF_DETAILS))
597 fprintf (stream: dump_file, format: " Called by %s that is executed once\n",
598 edge->caller->dump_name ());
599 d->maybe_unlikely_executed = false;
600 ipa_call_summary *s = ipa_call_summaries->get (edge);
601 if (s != NULL && s->loop_depth)
602 {
603 d->maybe_executed_once = false;
604 if (dump_file && (dump_flags & TDF_DETAILS))
605 fprintf (stream: dump_file, format: " Called in loop\n");
606 }
607 break;
608 }
609 case NODE_FREQUENCY_HOT:
610 case NODE_FREQUENCY_NORMAL:
611 if (dump_file && (dump_flags & TDF_DETAILS))
612 fprintf (stream: dump_file, format: " Called by %s that is normal or hot\n",
613 edge->caller->dump_name ());
614 d->maybe_unlikely_executed = false;
615 d->maybe_executed_once = false;
616 break;
617 }
618 }
619 return edge != NULL;
620}
621
622/* Return ture if NODE contains hot calls. */
623
624bool
625contains_hot_call_p (struct cgraph_node *node)
626{
627 struct cgraph_edge *e;
628 for (e = node->callees; e; e = e->next_callee)
629 if (e->maybe_hot_p ())
630 return true;
631 else if (!e->inline_failed
632 && contains_hot_call_p (node: e->callee))
633 return true;
634 for (e = node->indirect_calls; e; e = e->next_callee)
635 if (e->maybe_hot_p ())
636 return true;
637 return false;
638}
639
640/* See if the frequency of NODE can be updated based on frequencies of its
641 callers. */
642bool
643ipa_propagate_frequency (struct cgraph_node *node)
644{
645 struct ipa_propagate_frequency_data d = {.function_symbol: node, .maybe_unlikely_executed: true, .maybe_executed_once: true, .only_called_at_startup: true, .only_called_at_exit: true};
646 bool changed = false;
647
648 /* We cannot propagate anything useful about externally visible functions
649 nor about virtuals. */
650 if (!node->local
651 || node->alias
652 || (opt_for_fn (node->decl, flag_devirtualize)
653 && DECL_VIRTUAL_P (node->decl)))
654 return false;
655 gcc_assert (node->analyzed);
656 if (dump_file && (dump_flags & TDF_DETAILS))
657 fprintf (stream: dump_file, format: "Processing frequency %s\n", node->dump_name ());
658
659 node->call_for_symbol_and_aliases (callback: ipa_propagate_frequency_1, data: &d,
660 include_overwritable: true);
661
662 if ((d.only_called_at_startup && !d.only_called_at_exit)
663 && !node->only_called_at_startup)
664 {
665 node->only_called_at_startup = true;
666 if (dump_file)
667 fprintf (stream: dump_file, format: "Node %s promoted to only called at startup.\n",
668 node->dump_name ());
669 changed = true;
670 }
671 if ((d.only_called_at_exit && !d.only_called_at_startup)
672 && !node->only_called_at_exit)
673 {
674 node->only_called_at_exit = true;
675 if (dump_file)
676 fprintf (stream: dump_file, format: "Node %s promoted to only called at exit.\n",
677 node->dump_name ());
678 changed = true;
679 }
680
681 /* With profile we can decide on hot/normal based on count. */
682 if (node->count. ipa().initialized_p ())
683 {
684 bool hot = false;
685 if (!(node->count. ipa() == profile_count::zero ())
686 && node->count. ipa() >= get_hot_bb_threshold ())
687 hot = true;
688 if (!hot)
689 hot |= contains_hot_call_p (node);
690 if (hot)
691 {
692 if (node->frequency != NODE_FREQUENCY_HOT)
693 {
694 if (dump_file)
695 fprintf (stream: dump_file, format: "Node %s promoted to hot.\n",
696 node->dump_name ());
697 node->frequency = NODE_FREQUENCY_HOT;
698 return true;
699 }
700 return false;
701 }
702 else if (node->frequency == NODE_FREQUENCY_HOT)
703 {
704 if (dump_file)
705 fprintf (stream: dump_file, format: "Node %s reduced to normal.\n",
706 node->dump_name ());
707 node->frequency = NODE_FREQUENCY_NORMAL;
708 changed = true;
709 }
710 }
711 /* These come either from profile or user hints; never update them. */
712 if (node->frequency == NODE_FREQUENCY_HOT
713 || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
714 return changed;
715 if (d.maybe_unlikely_executed)
716 {
717 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
718 if (dump_file)
719 fprintf (stream: dump_file, format: "Node %s promoted to unlikely executed.\n",
720 node->dump_name ());
721 changed = true;
722 }
723 else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE)
724 {
725 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
726 if (dump_file)
727 fprintf (stream: dump_file, format: "Node %s promoted to executed once.\n",
728 node->dump_name ());
729 changed = true;
730 }
731 return changed;
732}
733
734/* Check that number of arguments of N agrees with E.
735 Be conservative when summaries are not present. */
736
737static bool
738check_argument_count (struct cgraph_node *n, struct cgraph_edge *e)
739{
740 if (!ipa_node_params_sum || !ipa_edge_args_sum)
741 return true;
742 ipa_node_params *info = ipa_node_params_sum->get (node: n->function_symbol ());
743 if (!info)
744 return true;
745 ipa_edge_args *e_info = ipa_edge_args_sum->get (edge: e);
746 if (!e_info)
747 return true;
748 if (ipa_get_param_count (info) != ipa_get_cs_argument_count (args: e_info)
749 && (ipa_get_param_count (info) >= ipa_get_cs_argument_count (args: e_info)
750 || !stdarg_p (TREE_TYPE (n->decl))))
751 return false;
752 return true;
753}
754
755/* Simple ipa profile pass propagating frequencies across the callgraph. */
756
757static unsigned int
758ipa_profile (void)
759{
760 struct cgraph_node **order;
761 struct cgraph_edge *e;
762 int order_pos;
763 bool something_changed = false;
764 int i;
765 gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
766 struct cgraph_node *n,*n2;
767 int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
768 int nmismatch = 0, nimpossible = 0;
769 bool node_map_initialized = false;
770 gcov_type threshold;
771
772 if (dump_file)
773 dump_histogram (file: dump_file, histogram);
774 for (i = 0; i < (int)histogram.length (); i++)
775 {
776 overall_time += histogram[i]->count * histogram[i]->time;
777 overall_size += histogram[i]->size;
778 }
779 threshold = 0;
780 if (overall_time)
781 {
782 gcc_assert (overall_size);
783
784 cutoff = (overall_time * param_hot_bb_count_ws_permille + 500) / 1000;
785 for (i = 0; cumulated < cutoff; i++)
786 {
787 cumulated += histogram[i]->count * histogram[i]->time;
788 threshold = histogram[i]->count;
789 }
790 if (!threshold)
791 threshold = 1;
792 if (dump_file)
793 {
794 gcov_type cumulated_time = 0, cumulated_size = 0;
795
796 for (i = 0;
797 i < (int)histogram.length () && histogram[i]->count >= threshold;
798 i++)
799 {
800 cumulated_time += histogram[i]->count * histogram[i]->time;
801 cumulated_size += histogram[i]->size;
802 }
803 fprintf (stream: dump_file, format: "Determined min count: %" PRId64
804 " Time:%3.2f%% Size:%3.2f%%\n",
805 (int64_t)threshold,
806 cumulated_time * 100.0 / overall_time,
807 cumulated_size * 100.0 / overall_size);
808 }
809
810 if (in_lto_p)
811 {
812 if (dump_file)
813 fprintf (stream: dump_file, format: "Setting hotness threshold in LTO mode.\n");
814 set_hot_bb_threshold (threshold);
815 }
816 }
817 histogram.release ();
818 histogram_pool.release ();
819
820 /* Produce speculative calls: we saved common target from profiling into
821 e->target_id. Now, at link time, we can look up corresponding
822 function node and produce speculative call. */
823
824 gcc_checking_assert (call_sums);
825
826 if (dump_file)
827 {
828 if (!node_map_initialized)
829 init_node_map (false);
830 node_map_initialized = true;
831
832 ipa_profile_dump_all_summaries (f: dump_file);
833 }
834
835 FOR_EACH_DEFINED_FUNCTION (n)
836 {
837 bool update = false;
838
839 if (!opt_for_fn (n->decl, flag_ipa_profile))
840 continue;
841
842 for (e = n->indirect_calls; e; e = e->next_callee)
843 {
844 if (n->count.initialized_p ())
845 nindirect++;
846
847 speculative_call_summary *csum = call_sums->get_create (edge: e);
848 unsigned spec_count = csum->speculative_call_targets.length ();
849 if (spec_count)
850 {
851 if (!node_map_initialized)
852 init_node_map (false);
853 node_map_initialized = true;
854 ncommon++;
855
856 unsigned speculative_id = 0;
857 profile_count orig = e->count;
858 for (unsigned i = 0; i < spec_count; i++)
859 {
860 speculative_call_target item
861 = csum->speculative_call_targets[i];
862 n2 = find_func_by_profile_id (func_id: item.target_id);
863 if (n2)
864 {
865 if (dump_file)
866 {
867 fprintf (stream: dump_file,
868 format: "Indirect call -> direct call from"
869 " other module %s => %s, prob %3.2f\n",
870 n->dump_name (),
871 n2->dump_name (),
872 item.target_probability
873 / (float) REG_BR_PROB_BASE);
874 }
875 if (item.target_probability < REG_BR_PROB_BASE / 2)
876 {
877 nuseless++;
878 if (dump_file)
879 fprintf (stream: dump_file,
880 format: "Not speculating: "
881 "probability is too low.\n");
882 }
883 else if (!e->maybe_hot_p ())
884 {
885 nuseless++;
886 if (dump_file)
887 fprintf (stream: dump_file,
888 format: "Not speculating: call is cold.\n");
889 }
890 else if (n2->get_availability () <= AVAIL_INTERPOSABLE
891 && n2->can_be_discarded_p ())
892 {
893 nuseless++;
894 if (dump_file)
895 fprintf (stream: dump_file,
896 format: "Not speculating: target is overwritable "
897 "and can be discarded.\n");
898 }
899 else if (!check_argument_count (n: n2, e))
900 {
901 nmismatch++;
902 if (dump_file)
903 fprintf (stream: dump_file,
904 format: "Not speculating: "
905 "parameter count mismatch\n");
906 }
907 else if (e->indirect_info->polymorphic
908 && !opt_for_fn (n->decl, flag_devirtualize)
909 && !possible_polymorphic_call_target_p (e, n: n2))
910 {
911 nimpossible++;
912 if (dump_file)
913 fprintf (stream: dump_file,
914 format: "Not speculating: "
915 "function is not in the polymorphic "
916 "call target list\n");
917 }
918 else
919 {
920 /* Target may be overwritable, but profile says that
921 control flow goes to this particular implementation
922 of N2. Speculate on the local alias to allow
923 inlining. */
924 if (!n2->can_be_discarded_p ())
925 {
926 cgraph_node *alias;
927 alias = dyn_cast<cgraph_node *>
928 (p: n2->noninterposable_alias ());
929 if (alias)
930 n2 = alias;
931 }
932 nconverted++;
933 profile_probability prob
934 = profile_probability::from_reg_br_prob_base
935 (v: item.target_probability).adjusted ();
936 e->make_speculative (n2,
937 direct_count: orig.apply_probability (prob),
938 speculative_id);
939 update = true;
940 speculative_id++;
941 }
942 }
943 else
944 {
945 if (dump_file)
946 fprintf (stream: dump_file,
947 format: "Function with profile-id %i not found.\n",
948 item.target_id);
949 nunknown++;
950 }
951 }
952 }
953 }
954 if (update)
955 ipa_update_overall_fn_summary (node: n);
956 }
957 if (node_map_initialized)
958 del_node_map ();
959 if (dump_file && nindirect)
960 fprintf (stream: dump_file,
961 format: "%i indirect calls trained.\n"
962 "%i (%3.2f%%) have common target.\n"
963 "%i (%3.2f%%) targets was not found.\n"
964 "%i (%3.2f%%) targets had parameter count mismatch.\n"
965 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
966 "%i (%3.2f%%) speculations seems useless.\n"
967 "%i (%3.2f%%) speculations produced.\n",
968 nindirect,
969 ncommon, ncommon * 100.0 / nindirect,
970 nunknown, nunknown * 100.0 / nindirect,
971 nmismatch, nmismatch * 100.0 / nindirect,
972 nimpossible, nimpossible * 100.0 / nindirect,
973 nuseless, nuseless * 100.0 / nindirect,
974 nconverted, nconverted * 100.0 / nindirect);
975
976 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
977 order_pos = ipa_reverse_postorder (order);
978 for (i = order_pos - 1; i >= 0; i--)
979 {
980 if (order[i]->local
981 && opt_for_fn (order[i]->decl, flag_ipa_profile)
982 && ipa_propagate_frequency (node: order[i]))
983 {
984 for (e = order[i]->callees; e; e = e->next_callee)
985 if (e->callee->local && !e->callee->aux)
986 {
987 something_changed = true;
988 e->callee->aux = (void *)1;
989 }
990 }
991 order[i]->aux = NULL;
992 }
993
994 while (something_changed)
995 {
996 something_changed = false;
997 for (i = order_pos - 1; i >= 0; i--)
998 {
999 if (order[i]->aux
1000 && opt_for_fn (order[i]->decl, flag_ipa_profile)
1001 && ipa_propagate_frequency (node: order[i]))
1002 {
1003 for (e = order[i]->callees; e; e = e->next_callee)
1004 if (e->callee->local && !e->callee->aux)
1005 {
1006 something_changed = true;
1007 e->callee->aux = (void *)1;
1008 }
1009 }
1010 order[i]->aux = NULL;
1011 }
1012 }
1013 free (ptr: order);
1014
1015 if (dump_file && (dump_flags & TDF_DETAILS))
1016 symtab->dump (f: dump_file);
1017
1018 delete call_sums;
1019 call_sums = NULL;
1020
1021 return 0;
1022}
1023
1024namespace {
1025
1026const pass_data pass_data_ipa_profile =
1027{
1028 .type: IPA_PASS, /* type */
1029 .name: "profile_estimate", /* name */
1030 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
1031 .tv_id: TV_IPA_PROFILE, /* tv_id */
1032 .properties_required: 0, /* properties_required */
1033 .properties_provided: 0, /* properties_provided */
1034 .properties_destroyed: 0, /* properties_destroyed */
1035 .todo_flags_start: 0, /* todo_flags_start */
1036 .todo_flags_finish: 0, /* todo_flags_finish */
1037};
1038
1039class pass_ipa_profile : public ipa_opt_pass_d
1040{
1041public:
1042 pass_ipa_profile (gcc::context *ctxt)
1043 : ipa_opt_pass_d (pass_data_ipa_profile, ctxt,
1044 ipa_profile_generate_summary, /* generate_summary */
1045 ipa_profile_write_summary, /* write_summary */
1046 ipa_profile_read_summary, /* read_summary */
1047 NULL, /* write_optimization_summary */
1048 NULL, /* read_optimization_summary */
1049 NULL, /* stmt_fixup */
1050 0, /* function_transform_todo_flags_start */
1051 NULL, /* function_transform */
1052 NULL) /* variable_transform */
1053 {}
1054
1055 /* opt_pass methods: */
1056 bool gate (function *) final override { return flag_ipa_profile || in_lto_p; }
1057 unsigned int execute (function *) final override { return ipa_profile (); }
1058
1059}; // class pass_ipa_profile
1060
1061} // anon namespace
1062
1063ipa_opt_pass_d *
1064make_pass_ipa_profile (gcc::context *ctxt)
1065{
1066 return new pass_ipa_profile (ctxt);
1067}
1068

source code of gcc/ipa-profile.cc