1 | /* Basic IPA optimizations based on profile. |
2 | Copyright (C) 2003-2023 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free |
8 | Software Foundation; either version 3, or (at your option) any later |
9 | version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | /* ipa-profile pass implements the following analysis propagating profille |
21 | inter-procedurally. |
22 | |
23 | - Count histogram construction. This is a histogram analyzing how much |
24 | time is spent executing statements with a given execution count read |
25 | from profile feedback. This histogram is complete only with LTO, |
26 | otherwise it contains information only about the current unit. |
27 | |
28 | The information is used to set hot/cold thresholds. |
29 | - Next speculative indirect call resolution is performed: the local |
30 | profile pass assigns profile-id to each function and provide us with a |
31 | histogram specifying the most common target. We look up the callgraph |
32 | node corresponding to the target and produce a speculative call. |
33 | |
34 | This call may or may not survive through IPA optimization based on decision |
35 | of inliner. |
36 | - Finally we propagate the following flags: unlikely executed, executed |
37 | once, executed at startup and executed at exit. These flags are used to |
38 | control code size/performance threshold and code placement (by producing |
39 | .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */ |
40 | #include "config.h" |
41 | #include "system.h" |
42 | #include "coretypes.h" |
43 | #include "backend.h" |
44 | #include "tree.h" |
45 | #include "gimple.h" |
46 | #include "predict.h" |
47 | #include "alloc-pool.h" |
48 | #include "tree-pass.h" |
49 | #include "cgraph.h" |
50 | #include "data-streamer.h" |
51 | #include "gimple-iterator.h" |
52 | #include "ipa-utils.h" |
53 | #include "profile.h" |
54 | #include "value-prof.h" |
55 | #include "tree-inline.h" |
56 | #include "symbol-summary.h" |
57 | #include "tree-vrp.h" |
58 | #include "ipa-prop.h" |
59 | #include "ipa-fnsummary.h" |
60 | |
61 | /* Entry in the histogram. */ |
62 | |
63 | struct histogram_entry |
64 | { |
65 | gcov_type count; |
66 | int time; |
67 | int size; |
68 | }; |
69 | |
70 | /* Histogram of profile values. |
71 | The histogram is represented as an ordered vector of entries allocated via |
72 | histogram_pool. During construction a separate hashtable is kept to lookup |
73 | duplicate entries. */ |
74 | |
75 | vec<histogram_entry *> histogram; |
76 | static object_allocator<histogram_entry> histogram_pool ("IPA histogram" ); |
77 | |
78 | /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */ |
79 | |
80 | struct histogram_hash : nofree_ptr_hash <histogram_entry> |
81 | { |
82 | static inline hashval_t hash (const histogram_entry *); |
83 | static inline int equal (const histogram_entry *, const histogram_entry *); |
84 | }; |
85 | |
86 | inline hashval_t |
87 | histogram_hash::hash (const histogram_entry *val) |
88 | { |
89 | return val->count; |
90 | } |
91 | |
92 | inline int |
93 | histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2) |
94 | { |
95 | return val->count == val2->count; |
96 | } |
97 | |
98 | /* Account TIME and SIZE executed COUNT times into HISTOGRAM. |
99 | HASHTABLE is the on-side hash kept to avoid duplicates. */ |
100 | |
101 | static void |
102 | account_time_size (hash_table<histogram_hash> *hashtable, |
103 | vec<histogram_entry *> &histogram, |
104 | gcov_type count, int time, int size) |
105 | { |
106 | histogram_entry key = {.count: count, .time: 0, .size: 0}; |
107 | histogram_entry **val = hashtable->find_slot (value: &key, insert: INSERT); |
108 | |
109 | if (!*val) |
110 | { |
111 | *val = histogram_pool.allocate (); |
112 | **val = key; |
113 | histogram.safe_push (obj: *val); |
114 | } |
115 | (*val)->time += time; |
116 | (*val)->size += size; |
117 | } |
118 | |
119 | int |
120 | cmp_counts (const void *v1, const void *v2) |
121 | { |
122 | const histogram_entry *h1 = *(const histogram_entry * const *)v1; |
123 | const histogram_entry *h2 = *(const histogram_entry * const *)v2; |
124 | if (h1->count < h2->count) |
125 | return 1; |
126 | if (h1->count > h2->count) |
127 | return -1; |
128 | return 0; |
129 | } |
130 | |
131 | /* Dump HISTOGRAM to FILE. */ |
132 | |
133 | static void |
134 | dump_histogram (FILE *file, vec<histogram_entry *> histogram) |
135 | { |
136 | unsigned int i; |
137 | gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, |
138 | overall_size = 0; |
139 | |
140 | fprintf (stream: dump_file, format: "Histogram:\n" ); |
141 | for (i = 0; i < histogram.length (); i++) |
142 | { |
143 | overall_time += histogram[i]->count * histogram[i]->time; |
144 | overall_size += histogram[i]->size; |
145 | } |
146 | if (!overall_time) |
147 | overall_time = 1; |
148 | if (!overall_size) |
149 | overall_size = 1; |
150 | for (i = 0; i < histogram.length (); i++) |
151 | { |
152 | cumulated_time += histogram[i]->count * histogram[i]->time; |
153 | cumulated_size += histogram[i]->size; |
154 | fprintf (stream: file, format: " %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n" , |
155 | (int64_t) histogram[i]->count, |
156 | histogram[i]->time, |
157 | cumulated_time * 100.0 / overall_time, |
158 | histogram[i]->size, |
159 | cumulated_size * 100.0 / overall_size); |
160 | } |
161 | } |
162 | |
163 | /* Structure containing speculative target information from profile. */ |
164 | |
165 | struct speculative_call_target |
166 | { |
167 | speculative_call_target (unsigned int id = 0, int prob = 0) |
168 | : target_id (id), target_probability (prob) |
169 | { |
170 | } |
171 | |
172 | /* Profile_id of target obtained from profile. */ |
173 | unsigned int target_id; |
174 | /* Probability that call will land in function with target_id. */ |
175 | unsigned int target_probability; |
176 | }; |
177 | |
178 | class speculative_call_summary |
179 | { |
180 | public: |
181 | speculative_call_summary () : speculative_call_targets () |
182 | {} |
183 | |
184 | auto_vec<speculative_call_target> speculative_call_targets; |
185 | |
186 | void dump (FILE *f); |
187 | |
188 | }; |
189 | |
190 | /* Class to manage call summaries. */ |
191 | |
192 | class ipa_profile_call_summaries |
193 | : public call_summary<speculative_call_summary *> |
194 | { |
195 | public: |
196 | ipa_profile_call_summaries (symbol_table *table) |
197 | : call_summary<speculative_call_summary *> (table) |
198 | {} |
199 | |
200 | /* Duplicate info when an edge is cloned. */ |
201 | void duplicate (cgraph_edge *, cgraph_edge *, |
202 | speculative_call_summary *old_sum, |
203 | speculative_call_summary *new_sum) final override; |
204 | }; |
205 | |
206 | static ipa_profile_call_summaries *call_sums = NULL; |
207 | |
208 | /* Dump all information in speculative call summary to F. */ |
209 | |
210 | void |
211 | speculative_call_summary::dump (FILE *f) |
212 | { |
213 | cgraph_node *n2; |
214 | |
215 | unsigned spec_count = speculative_call_targets.length (); |
216 | for (unsigned i = 0; i < spec_count; i++) |
217 | { |
218 | speculative_call_target item = speculative_call_targets[i]; |
219 | n2 = find_func_by_profile_id (func_id: item.target_id); |
220 | if (n2) |
221 | fprintf (stream: f, format: " The %i speculative target is %s with prob %3.2f\n" , i, |
222 | n2->dump_name (), |
223 | item.target_probability / (float) REG_BR_PROB_BASE); |
224 | else |
225 | fprintf (stream: f, format: " The %i speculative target is %u with prob %3.2f\n" , i, |
226 | item.target_id, |
227 | item.target_probability / (float) REG_BR_PROB_BASE); |
228 | } |
229 | } |
230 | |
231 | /* Duplicate info when an edge is cloned. */ |
232 | |
233 | void |
234 | ipa_profile_call_summaries::duplicate (cgraph_edge *, cgraph_edge *, |
235 | speculative_call_summary *old_sum, |
236 | speculative_call_summary *new_sum) |
237 | { |
238 | if (!old_sum) |
239 | return; |
240 | |
241 | unsigned old_count = old_sum->speculative_call_targets.length (); |
242 | if (!old_count) |
243 | return; |
244 | |
245 | new_sum->speculative_call_targets.reserve_exact (nelems: old_count); |
246 | new_sum->speculative_call_targets.quick_grow_cleared (len: old_count); |
247 | |
248 | for (unsigned i = 0; i < old_count; i++) |
249 | { |
250 | new_sum->speculative_call_targets[i] |
251 | = old_sum->speculative_call_targets[i]; |
252 | } |
253 | } |
254 | |
255 | /* Collect histogram and speculative target summaries from CFG profiles. */ |
256 | |
257 | static void |
258 | ipa_profile_generate_summary (void) |
259 | { |
260 | struct cgraph_node *node; |
261 | gimple_stmt_iterator gsi; |
262 | basic_block bb; |
263 | |
264 | hash_table<histogram_hash> hashtable (10); |
265 | |
266 | gcc_checking_assert (!call_sums); |
267 | call_sums = new ipa_profile_call_summaries (symtab); |
268 | |
269 | FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) |
270 | if (ENTRY_BLOCK_PTR_FOR_FN |
271 | (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ()) |
272 | FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) |
273 | { |
274 | int time = 0; |
275 | int size = 0; |
276 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
277 | { |
278 | gimple *stmt = gsi_stmt (i: gsi); |
279 | if (gimple_code (g: stmt) == GIMPLE_CALL |
280 | && !gimple_call_fndecl (gs: stmt)) |
281 | { |
282 | histogram_value h; |
283 | h = gimple_histogram_value_of_type |
284 | (DECL_STRUCT_FUNCTION (node->decl), |
285 | stmt, HIST_TYPE_INDIR_CALL); |
286 | /* No need to do sanity check: gimple_ic_transform already |
287 | takes away bad histograms. */ |
288 | if (h) |
289 | { |
290 | gcov_type val, count, all; |
291 | struct cgraph_edge *e = node->get_edge (call_stmt: stmt); |
292 | if (e && !e->indirect_unknown_callee) |
293 | continue; |
294 | |
295 | speculative_call_summary *csum |
296 | = call_sums->get_create (edge: e); |
297 | |
298 | for (unsigned j = 0; j < GCOV_TOPN_MAXIMUM_TRACKED_VALUES; |
299 | j++) |
300 | { |
301 | if (!get_nth_most_common_value (NULL, counter_type: "indirect call" , |
302 | hist: h, value: &val, count: &count, all: &all, |
303 | n: j)) |
304 | continue; |
305 | |
306 | if (val == 0 || count == 0) |
307 | continue; |
308 | |
309 | if (count > all) |
310 | { |
311 | if (dump_file) |
312 | fprintf (stream: dump_file, |
313 | format: "Probability capped to 1\n" ); |
314 | count = all; |
315 | } |
316 | speculative_call_target item ( |
317 | val, GCOV_COMPUTE_SCALE (count, all)); |
318 | csum->speculative_call_targets.safe_push (obj: item); |
319 | } |
320 | |
321 | gimple_remove_histogram_value |
322 | (DECL_STRUCT_FUNCTION (node->decl), stmt, h); |
323 | } |
324 | } |
325 | time += estimate_num_insns (stmt, &eni_time_weights); |
326 | size += estimate_num_insns (stmt, &eni_size_weights); |
327 | } |
328 | if (bb->count.ipa_p () && bb->count.initialized_p ()) |
329 | account_time_size (hashtable: &hashtable, histogram, |
330 | count: bb->count.ipa ().to_gcov_type (), |
331 | time, size); |
332 | } |
333 | histogram.qsort (cmp_counts); |
334 | } |
335 | |
336 | /* Serialize the speculative summary info for LTO. */ |
337 | |
338 | static void |
339 | ipa_profile_write_edge_summary (lto_simple_output_block *ob, |
340 | speculative_call_summary *csum) |
341 | { |
342 | unsigned len = 0; |
343 | |
344 | len = csum->speculative_call_targets.length (); |
345 | |
346 | gcc_assert (len <= GCOV_TOPN_MAXIMUM_TRACKED_VALUES); |
347 | |
348 | streamer_write_hwi_stream (ob->main_stream, len); |
349 | |
350 | if (len) |
351 | { |
352 | unsigned spec_count = csum->speculative_call_targets.length (); |
353 | for (unsigned i = 0; i < spec_count; i++) |
354 | { |
355 | speculative_call_target item = csum->speculative_call_targets[i]; |
356 | gcc_assert (item.target_id); |
357 | streamer_write_hwi_stream (ob->main_stream, item.target_id); |
358 | streamer_write_hwi_stream (ob->main_stream, item.target_probability); |
359 | } |
360 | } |
361 | } |
362 | |
363 | /* Serialize the ipa info for lto. */ |
364 | |
365 | static void |
366 | ipa_profile_write_summary (void) |
367 | { |
368 | struct lto_simple_output_block *ob |
369 | = lto_create_simple_output_block (LTO_section_ipa_profile); |
370 | unsigned int i; |
371 | |
372 | streamer_write_uhwi_stream (ob->main_stream, histogram.length ()); |
373 | for (i = 0; i < histogram.length (); i++) |
374 | { |
375 | streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count); |
376 | streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time); |
377 | streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size); |
378 | } |
379 | |
380 | if (!call_sums) |
381 | return; |
382 | |
383 | /* Serialize speculative targets information. */ |
384 | unsigned int count = 0; |
385 | lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder; |
386 | lto_symtab_encoder_iterator lsei; |
387 | cgraph_node *node; |
388 | |
389 | for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei); |
390 | lsei_next_function_in_partition (lsei: &lsei)) |
391 | { |
392 | node = lsei_cgraph_node (lsei); |
393 | if (node->definition && node->has_gimple_body_p () |
394 | && node->indirect_calls) |
395 | count++; |
396 | } |
397 | |
398 | streamer_write_uhwi_stream (ob->main_stream, count); |
399 | |
400 | /* Process all of the functions. */ |
401 | for (lsei = lsei_start_function_in_partition (encoder); |
402 | !lsei_end_p (lsei) && count; lsei_next_function_in_partition (lsei: &lsei)) |
403 | { |
404 | cgraph_node *node = lsei_cgraph_node (lsei); |
405 | if (node->definition && node->has_gimple_body_p () |
406 | && node->indirect_calls) |
407 | { |
408 | int node_ref = lto_symtab_encoder_encode (encoder, node); |
409 | streamer_write_uhwi_stream (ob->main_stream, node_ref); |
410 | |
411 | for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee) |
412 | { |
413 | speculative_call_summary *csum = call_sums->get_create (edge: e); |
414 | ipa_profile_write_edge_summary (ob, csum); |
415 | } |
416 | } |
417 | } |
418 | |
419 | lto_destroy_simple_output_block (ob); |
420 | } |
421 | |
422 | /* Dump all profile summary data for all cgraph nodes and edges to file F. */ |
423 | |
424 | static void |
425 | ipa_profile_dump_all_summaries (FILE *f) |
426 | { |
427 | fprintf (stream: dump_file, |
428 | format: "\n========== IPA-profile speculative targets: ==========\n" ); |
429 | cgraph_node *node; |
430 | FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) |
431 | { |
432 | fprintf (stream: f, format: "\nSummary for node %s:\n" , node->dump_name ()); |
433 | for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee) |
434 | { |
435 | fprintf (stream: f, format: " Summary for %s of indirect edge %d:\n" , |
436 | e->caller->dump_name (), e->lto_stmt_uid); |
437 | speculative_call_summary *csum = call_sums->get_create (edge: e); |
438 | csum->dump (f); |
439 | } |
440 | } |
441 | fprintf (stream: f, format: "\n\n" ); |
442 | } |
443 | |
444 | /* Read speculative targets information about edge for LTO WPA. */ |
445 | |
446 | static void |
447 | ipa_profile_read_edge_summary (class lto_input_block *ib, cgraph_edge *edge) |
448 | { |
449 | unsigned i, len; |
450 | |
451 | len = streamer_read_hwi (ib); |
452 | gcc_assert (len <= GCOV_TOPN_MAXIMUM_TRACKED_VALUES); |
453 | speculative_call_summary *csum = call_sums->get_create (edge); |
454 | |
455 | for (i = 0; i < len; i++) |
456 | { |
457 | unsigned int target_id = streamer_read_hwi (ib); |
458 | int target_probability = streamer_read_hwi (ib); |
459 | speculative_call_target item (target_id, target_probability); |
460 | csum->speculative_call_targets.safe_push (obj: item); |
461 | } |
462 | } |
463 | |
464 | /* Read profile speculative targets section information for LTO WPA. */ |
465 | |
466 | static void |
467 | ipa_profile_read_summary_section (struct lto_file_decl_data *file_data, |
468 | class lto_input_block *ib) |
469 | { |
470 | if (!ib) |
471 | return; |
472 | |
473 | lto_symtab_encoder_t encoder = file_data->symtab_node_encoder; |
474 | |
475 | unsigned int count = streamer_read_uhwi (ib); |
476 | |
477 | unsigned int i; |
478 | unsigned int index; |
479 | cgraph_node * node; |
480 | |
481 | for (i = 0; i < count; i++) |
482 | { |
483 | index = streamer_read_uhwi (ib); |
484 | node |
485 | = dyn_cast<cgraph_node *> (p: lto_symtab_encoder_deref (encoder, ref: index)); |
486 | |
487 | for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee) |
488 | ipa_profile_read_edge_summary (ib, edge: e); |
489 | } |
490 | } |
491 | |
492 | /* Deserialize the IPA histogram and speculative targets summary info for LTO. |
493 | */ |
494 | |
495 | static void |
496 | ipa_profile_read_summary (void) |
497 | { |
498 | struct lto_file_decl_data ** file_data_vec |
499 | = lto_get_file_decl_data (); |
500 | struct lto_file_decl_data * file_data; |
501 | int j = 0; |
502 | |
503 | hash_table<histogram_hash> hashtable (10); |
504 | |
505 | gcc_checking_assert (!call_sums); |
506 | call_sums = new ipa_profile_call_summaries (symtab); |
507 | |
508 | while ((file_data = file_data_vec[j++])) |
509 | { |
510 | const char *data; |
511 | size_t len; |
512 | class lto_input_block *ib |
513 | = lto_create_simple_input_block (file_data, |
514 | LTO_section_ipa_profile, |
515 | &data, &len); |
516 | if (ib) |
517 | { |
518 | unsigned int num = streamer_read_uhwi (ib); |
519 | unsigned int n; |
520 | for (n = 0; n < num; n++) |
521 | { |
522 | gcov_type count = streamer_read_gcov_count (ib); |
523 | int time = streamer_read_uhwi (ib); |
524 | int size = streamer_read_uhwi (ib); |
525 | account_time_size (hashtable: &hashtable, histogram, |
526 | count, time, size); |
527 | } |
528 | |
529 | ipa_profile_read_summary_section (file_data, ib); |
530 | |
531 | lto_destroy_simple_input_block (file_data, |
532 | LTO_section_ipa_profile, |
533 | ib, data, len); |
534 | } |
535 | } |
536 | histogram.qsort (cmp_counts); |
537 | } |
538 | |
539 | /* Data used by ipa_propagate_frequency. */ |
540 | |
541 | struct ipa_propagate_frequency_data |
542 | { |
543 | cgraph_node *function_symbol; |
544 | bool maybe_unlikely_executed; |
545 | bool maybe_executed_once; |
546 | bool only_called_at_startup; |
547 | bool only_called_at_exit; |
548 | }; |
549 | |
550 | /* Worker for ipa_propagate_frequency_1. */ |
551 | |
552 | static bool |
553 | ipa_propagate_frequency_1 (struct cgraph_node *node, void *data) |
554 | { |
555 | struct ipa_propagate_frequency_data *d; |
556 | struct cgraph_edge *edge; |
557 | |
558 | d = (struct ipa_propagate_frequency_data *)data; |
559 | for (edge = node->callers; |
560 | edge && (d->maybe_unlikely_executed || d->maybe_executed_once |
561 | || d->only_called_at_startup || d->only_called_at_exit); |
562 | edge = edge->next_caller) |
563 | { |
564 | if (edge->caller != d->function_symbol) |
565 | { |
566 | d->only_called_at_startup &= edge->caller->only_called_at_startup; |
567 | /* It makes sense to put main() together with the static constructors. |
568 | It will be executed for sure, but rest of functions called from |
569 | main are definitely not at startup only. */ |
570 | if (MAIN_NAME_P (DECL_NAME (edge->caller->decl))) |
571 | d->only_called_at_startup = 0; |
572 | d->only_called_at_exit &= edge->caller->only_called_at_exit; |
573 | } |
574 | |
575 | /* When profile feedback is available, do not try to propagate too hard; |
576 | counts are already good guide on function frequencies and roundoff |
577 | errors can make us to push function into unlikely section even when |
578 | it is executed by the train run. Transfer the function only if all |
579 | callers are unlikely executed. */ |
580 | if (profile_info |
581 | && !(edge->callee->count.ipa () == profile_count::zero ()) |
582 | && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED |
583 | || (edge->caller->inlined_to |
584 | && edge->caller->inlined_to->frequency |
585 | != NODE_FREQUENCY_UNLIKELY_EXECUTED))) |
586 | d->maybe_unlikely_executed = false; |
587 | if (edge->count.ipa ().initialized_p () |
588 | && !edge->count.ipa ().nonzero_p ()) |
589 | continue; |
590 | switch (edge->caller->frequency) |
591 | { |
592 | case NODE_FREQUENCY_UNLIKELY_EXECUTED: |
593 | break; |
594 | case NODE_FREQUENCY_EXECUTED_ONCE: |
595 | { |
596 | if (dump_file && (dump_flags & TDF_DETAILS)) |
597 | fprintf (stream: dump_file, format: " Called by %s that is executed once\n" , |
598 | edge->caller->dump_name ()); |
599 | d->maybe_unlikely_executed = false; |
600 | ipa_call_summary *s = ipa_call_summaries->get (edge); |
601 | if (s != NULL && s->loop_depth) |
602 | { |
603 | d->maybe_executed_once = false; |
604 | if (dump_file && (dump_flags & TDF_DETAILS)) |
605 | fprintf (stream: dump_file, format: " Called in loop\n" ); |
606 | } |
607 | break; |
608 | } |
609 | case NODE_FREQUENCY_HOT: |
610 | case NODE_FREQUENCY_NORMAL: |
611 | if (dump_file && (dump_flags & TDF_DETAILS)) |
612 | fprintf (stream: dump_file, format: " Called by %s that is normal or hot\n" , |
613 | edge->caller->dump_name ()); |
614 | d->maybe_unlikely_executed = false; |
615 | d->maybe_executed_once = false; |
616 | break; |
617 | } |
618 | } |
619 | return edge != NULL; |
620 | } |
621 | |
622 | /* Return ture if NODE contains hot calls. */ |
623 | |
624 | bool |
625 | contains_hot_call_p (struct cgraph_node *node) |
626 | { |
627 | struct cgraph_edge *e; |
628 | for (e = node->callees; e; e = e->next_callee) |
629 | if (e->maybe_hot_p ()) |
630 | return true; |
631 | else if (!e->inline_failed |
632 | && contains_hot_call_p (node: e->callee)) |
633 | return true; |
634 | for (e = node->indirect_calls; e; e = e->next_callee) |
635 | if (e->maybe_hot_p ()) |
636 | return true; |
637 | return false; |
638 | } |
639 | |
640 | /* See if the frequency of NODE can be updated based on frequencies of its |
641 | callers. */ |
642 | bool |
643 | ipa_propagate_frequency (struct cgraph_node *node) |
644 | { |
645 | struct ipa_propagate_frequency_data d = {.function_symbol: node, .maybe_unlikely_executed: true, .maybe_executed_once: true, .only_called_at_startup: true, .only_called_at_exit: true}; |
646 | bool changed = false; |
647 | |
648 | /* We cannot propagate anything useful about externally visible functions |
649 | nor about virtuals. */ |
650 | if (!node->local |
651 | || node->alias |
652 | || (opt_for_fn (node->decl, flag_devirtualize) |
653 | && DECL_VIRTUAL_P (node->decl))) |
654 | return false; |
655 | gcc_assert (node->analyzed); |
656 | if (dump_file && (dump_flags & TDF_DETAILS)) |
657 | fprintf (stream: dump_file, format: "Processing frequency %s\n" , node->dump_name ()); |
658 | |
659 | node->call_for_symbol_and_aliases (callback: ipa_propagate_frequency_1, data: &d, |
660 | include_overwritable: true); |
661 | |
662 | if ((d.only_called_at_startup && !d.only_called_at_exit) |
663 | && !node->only_called_at_startup) |
664 | { |
665 | node->only_called_at_startup = true; |
666 | if (dump_file) |
667 | fprintf (stream: dump_file, format: "Node %s promoted to only called at startup.\n" , |
668 | node->dump_name ()); |
669 | changed = true; |
670 | } |
671 | if ((d.only_called_at_exit && !d.only_called_at_startup) |
672 | && !node->only_called_at_exit) |
673 | { |
674 | node->only_called_at_exit = true; |
675 | if (dump_file) |
676 | fprintf (stream: dump_file, format: "Node %s promoted to only called at exit.\n" , |
677 | node->dump_name ()); |
678 | changed = true; |
679 | } |
680 | |
681 | /* With profile we can decide on hot/normal based on count. */ |
682 | if (node->count. ipa().initialized_p ()) |
683 | { |
684 | bool hot = false; |
685 | if (!(node->count. ipa() == profile_count::zero ()) |
686 | && node->count. ipa() >= get_hot_bb_threshold ()) |
687 | hot = true; |
688 | if (!hot) |
689 | hot |= contains_hot_call_p (node); |
690 | if (hot) |
691 | { |
692 | if (node->frequency != NODE_FREQUENCY_HOT) |
693 | { |
694 | if (dump_file) |
695 | fprintf (stream: dump_file, format: "Node %s promoted to hot.\n" , |
696 | node->dump_name ()); |
697 | node->frequency = NODE_FREQUENCY_HOT; |
698 | return true; |
699 | } |
700 | return false; |
701 | } |
702 | else if (node->frequency == NODE_FREQUENCY_HOT) |
703 | { |
704 | if (dump_file) |
705 | fprintf (stream: dump_file, format: "Node %s reduced to normal.\n" , |
706 | node->dump_name ()); |
707 | node->frequency = NODE_FREQUENCY_NORMAL; |
708 | changed = true; |
709 | } |
710 | } |
711 | /* These come either from profile or user hints; never update them. */ |
712 | if (node->frequency == NODE_FREQUENCY_HOT |
713 | || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) |
714 | return changed; |
715 | if (d.maybe_unlikely_executed) |
716 | { |
717 | node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; |
718 | if (dump_file) |
719 | fprintf (stream: dump_file, format: "Node %s promoted to unlikely executed.\n" , |
720 | node->dump_name ()); |
721 | changed = true; |
722 | } |
723 | else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE) |
724 | { |
725 | node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; |
726 | if (dump_file) |
727 | fprintf (stream: dump_file, format: "Node %s promoted to executed once.\n" , |
728 | node->dump_name ()); |
729 | changed = true; |
730 | } |
731 | return changed; |
732 | } |
733 | |
734 | /* Check that number of arguments of N agrees with E. |
735 | Be conservative when summaries are not present. */ |
736 | |
737 | static bool |
738 | check_argument_count (struct cgraph_node *n, struct cgraph_edge *e) |
739 | { |
740 | if (!ipa_node_params_sum || !ipa_edge_args_sum) |
741 | return true; |
742 | ipa_node_params *info = ipa_node_params_sum->get (node: n->function_symbol ()); |
743 | if (!info) |
744 | return true; |
745 | ipa_edge_args *e_info = ipa_edge_args_sum->get (edge: e); |
746 | if (!e_info) |
747 | return true; |
748 | if (ipa_get_param_count (info) != ipa_get_cs_argument_count (args: e_info) |
749 | && (ipa_get_param_count (info) >= ipa_get_cs_argument_count (args: e_info) |
750 | || !stdarg_p (TREE_TYPE (n->decl)))) |
751 | return false; |
752 | return true; |
753 | } |
754 | |
755 | /* Simple ipa profile pass propagating frequencies across the callgraph. */ |
756 | |
757 | static unsigned int |
758 | ipa_profile (void) |
759 | { |
760 | struct cgraph_node **order; |
761 | struct cgraph_edge *e; |
762 | int order_pos; |
763 | bool something_changed = false; |
764 | int i; |
765 | gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0; |
766 | struct cgraph_node *n,*n2; |
767 | int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0; |
768 | int nmismatch = 0, nimpossible = 0; |
769 | bool node_map_initialized = false; |
770 | gcov_type threshold; |
771 | |
772 | if (dump_file) |
773 | dump_histogram (file: dump_file, histogram); |
774 | for (i = 0; i < (int)histogram.length (); i++) |
775 | { |
776 | overall_time += histogram[i]->count * histogram[i]->time; |
777 | overall_size += histogram[i]->size; |
778 | } |
779 | threshold = 0; |
780 | if (overall_time) |
781 | { |
782 | gcc_assert (overall_size); |
783 | |
784 | cutoff = (overall_time * param_hot_bb_count_ws_permille + 500) / 1000; |
785 | for (i = 0; cumulated < cutoff; i++) |
786 | { |
787 | cumulated += histogram[i]->count * histogram[i]->time; |
788 | threshold = histogram[i]->count; |
789 | } |
790 | if (!threshold) |
791 | threshold = 1; |
792 | if (dump_file) |
793 | { |
794 | gcov_type cumulated_time = 0, cumulated_size = 0; |
795 | |
796 | for (i = 0; |
797 | i < (int)histogram.length () && histogram[i]->count >= threshold; |
798 | i++) |
799 | { |
800 | cumulated_time += histogram[i]->count * histogram[i]->time; |
801 | cumulated_size += histogram[i]->size; |
802 | } |
803 | fprintf (stream: dump_file, format: "Determined min count: %" PRId64 |
804 | " Time:%3.2f%% Size:%3.2f%%\n" , |
805 | (int64_t)threshold, |
806 | cumulated_time * 100.0 / overall_time, |
807 | cumulated_size * 100.0 / overall_size); |
808 | } |
809 | |
810 | if (in_lto_p) |
811 | { |
812 | if (dump_file) |
813 | fprintf (stream: dump_file, format: "Setting hotness threshold in LTO mode.\n" ); |
814 | set_hot_bb_threshold (threshold); |
815 | } |
816 | } |
817 | histogram.release (); |
818 | histogram_pool.release (); |
819 | |
820 | /* Produce speculative calls: we saved common target from profiling into |
821 | e->target_id. Now, at link time, we can look up corresponding |
822 | function node and produce speculative call. */ |
823 | |
824 | gcc_checking_assert (call_sums); |
825 | |
826 | if (dump_file) |
827 | { |
828 | if (!node_map_initialized) |
829 | init_node_map (false); |
830 | node_map_initialized = true; |
831 | |
832 | ipa_profile_dump_all_summaries (f: dump_file); |
833 | } |
834 | |
835 | FOR_EACH_DEFINED_FUNCTION (n) |
836 | { |
837 | bool update = false; |
838 | |
839 | if (!opt_for_fn (n->decl, flag_ipa_profile)) |
840 | continue; |
841 | |
842 | for (e = n->indirect_calls; e; e = e->next_callee) |
843 | { |
844 | if (n->count.initialized_p ()) |
845 | nindirect++; |
846 | |
847 | speculative_call_summary *csum = call_sums->get_create (edge: e); |
848 | unsigned spec_count = csum->speculative_call_targets.length (); |
849 | if (spec_count) |
850 | { |
851 | if (!node_map_initialized) |
852 | init_node_map (false); |
853 | node_map_initialized = true; |
854 | ncommon++; |
855 | |
856 | unsigned speculative_id = 0; |
857 | profile_count orig = e->count; |
858 | for (unsigned i = 0; i < spec_count; i++) |
859 | { |
860 | speculative_call_target item |
861 | = csum->speculative_call_targets[i]; |
862 | n2 = find_func_by_profile_id (func_id: item.target_id); |
863 | if (n2) |
864 | { |
865 | if (dump_file) |
866 | { |
867 | fprintf (stream: dump_file, |
868 | format: "Indirect call -> direct call from" |
869 | " other module %s => %s, prob %3.2f\n" , |
870 | n->dump_name (), |
871 | n2->dump_name (), |
872 | item.target_probability |
873 | / (float) REG_BR_PROB_BASE); |
874 | } |
875 | if (item.target_probability < REG_BR_PROB_BASE / 2) |
876 | { |
877 | nuseless++; |
878 | if (dump_file) |
879 | fprintf (stream: dump_file, |
880 | format: "Not speculating: " |
881 | "probability is too low.\n" ); |
882 | } |
883 | else if (!e->maybe_hot_p ()) |
884 | { |
885 | nuseless++; |
886 | if (dump_file) |
887 | fprintf (stream: dump_file, |
888 | format: "Not speculating: call is cold.\n" ); |
889 | } |
890 | else if (n2->get_availability () <= AVAIL_INTERPOSABLE |
891 | && n2->can_be_discarded_p ()) |
892 | { |
893 | nuseless++; |
894 | if (dump_file) |
895 | fprintf (stream: dump_file, |
896 | format: "Not speculating: target is overwritable " |
897 | "and can be discarded.\n" ); |
898 | } |
899 | else if (!check_argument_count (n: n2, e)) |
900 | { |
901 | nmismatch++; |
902 | if (dump_file) |
903 | fprintf (stream: dump_file, |
904 | format: "Not speculating: " |
905 | "parameter count mismatch\n" ); |
906 | } |
907 | else if (e->indirect_info->polymorphic |
908 | && !opt_for_fn (n->decl, flag_devirtualize) |
909 | && !possible_polymorphic_call_target_p (e, n: n2)) |
910 | { |
911 | nimpossible++; |
912 | if (dump_file) |
913 | fprintf (stream: dump_file, |
914 | format: "Not speculating: " |
915 | "function is not in the polymorphic " |
916 | "call target list\n" ); |
917 | } |
918 | else |
919 | { |
920 | /* Target may be overwritable, but profile says that |
921 | control flow goes to this particular implementation |
922 | of N2. Speculate on the local alias to allow |
923 | inlining. */ |
924 | if (!n2->can_be_discarded_p ()) |
925 | { |
926 | cgraph_node *alias; |
927 | alias = dyn_cast<cgraph_node *> |
928 | (p: n2->noninterposable_alias ()); |
929 | if (alias) |
930 | n2 = alias; |
931 | } |
932 | nconverted++; |
933 | profile_probability prob |
934 | = profile_probability::from_reg_br_prob_base |
935 | (v: item.target_probability).adjusted (); |
936 | e->make_speculative (n2, |
937 | direct_count: orig.apply_probability (prob), |
938 | speculative_id); |
939 | update = true; |
940 | speculative_id++; |
941 | } |
942 | } |
943 | else |
944 | { |
945 | if (dump_file) |
946 | fprintf (stream: dump_file, |
947 | format: "Function with profile-id %i not found.\n" , |
948 | item.target_id); |
949 | nunknown++; |
950 | } |
951 | } |
952 | } |
953 | } |
954 | if (update) |
955 | ipa_update_overall_fn_summary (node: n); |
956 | } |
957 | if (node_map_initialized) |
958 | del_node_map (); |
959 | if (dump_file && nindirect) |
960 | fprintf (stream: dump_file, |
961 | format: "%i indirect calls trained.\n" |
962 | "%i (%3.2f%%) have common target.\n" |
963 | "%i (%3.2f%%) targets was not found.\n" |
964 | "%i (%3.2f%%) targets had parameter count mismatch.\n" |
965 | "%i (%3.2f%%) targets was not in polymorphic call target list.\n" |
966 | "%i (%3.2f%%) speculations seems useless.\n" |
967 | "%i (%3.2f%%) speculations produced.\n" , |
968 | nindirect, |
969 | ncommon, ncommon * 100.0 / nindirect, |
970 | nunknown, nunknown * 100.0 / nindirect, |
971 | nmismatch, nmismatch * 100.0 / nindirect, |
972 | nimpossible, nimpossible * 100.0 / nindirect, |
973 | nuseless, nuseless * 100.0 / nindirect, |
974 | nconverted, nconverted * 100.0 / nindirect); |
975 | |
976 | order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); |
977 | order_pos = ipa_reverse_postorder (order); |
978 | for (i = order_pos - 1; i >= 0; i--) |
979 | { |
980 | if (order[i]->local |
981 | && opt_for_fn (order[i]->decl, flag_ipa_profile) |
982 | && ipa_propagate_frequency (node: order[i])) |
983 | { |
984 | for (e = order[i]->callees; e; e = e->next_callee) |
985 | if (e->callee->local && !e->callee->aux) |
986 | { |
987 | something_changed = true; |
988 | e->callee->aux = (void *)1; |
989 | } |
990 | } |
991 | order[i]->aux = NULL; |
992 | } |
993 | |
994 | while (something_changed) |
995 | { |
996 | something_changed = false; |
997 | for (i = order_pos - 1; i >= 0; i--) |
998 | { |
999 | if (order[i]->aux |
1000 | && opt_for_fn (order[i]->decl, flag_ipa_profile) |
1001 | && ipa_propagate_frequency (node: order[i])) |
1002 | { |
1003 | for (e = order[i]->callees; e; e = e->next_callee) |
1004 | if (e->callee->local && !e->callee->aux) |
1005 | { |
1006 | something_changed = true; |
1007 | e->callee->aux = (void *)1; |
1008 | } |
1009 | } |
1010 | order[i]->aux = NULL; |
1011 | } |
1012 | } |
1013 | free (ptr: order); |
1014 | |
1015 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1016 | symtab->dump (f: dump_file); |
1017 | |
1018 | delete call_sums; |
1019 | call_sums = NULL; |
1020 | |
1021 | return 0; |
1022 | } |
1023 | |
1024 | namespace { |
1025 | |
1026 | const pass_data pass_data_ipa_profile = |
1027 | { |
1028 | .type: IPA_PASS, /* type */ |
1029 | .name: "profile_estimate" , /* name */ |
1030 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
1031 | .tv_id: TV_IPA_PROFILE, /* tv_id */ |
1032 | .properties_required: 0, /* properties_required */ |
1033 | .properties_provided: 0, /* properties_provided */ |
1034 | .properties_destroyed: 0, /* properties_destroyed */ |
1035 | .todo_flags_start: 0, /* todo_flags_start */ |
1036 | .todo_flags_finish: 0, /* todo_flags_finish */ |
1037 | }; |
1038 | |
1039 | class pass_ipa_profile : public ipa_opt_pass_d |
1040 | { |
1041 | public: |
1042 | pass_ipa_profile (gcc::context *ctxt) |
1043 | : ipa_opt_pass_d (pass_data_ipa_profile, ctxt, |
1044 | ipa_profile_generate_summary, /* generate_summary */ |
1045 | ipa_profile_write_summary, /* write_summary */ |
1046 | ipa_profile_read_summary, /* read_summary */ |
1047 | NULL, /* write_optimization_summary */ |
1048 | NULL, /* read_optimization_summary */ |
1049 | NULL, /* stmt_fixup */ |
1050 | 0, /* function_transform_todo_flags_start */ |
1051 | NULL, /* function_transform */ |
1052 | NULL) /* variable_transform */ |
1053 | {} |
1054 | |
1055 | /* opt_pass methods: */ |
1056 | bool gate (function *) final override { return flag_ipa_profile || in_lto_p; } |
1057 | unsigned int execute (function *) final override { return ipa_profile (); } |
1058 | |
1059 | }; // class pass_ipa_profile |
1060 | |
1061 | } // anon namespace |
1062 | |
1063 | ipa_opt_pass_d * |
1064 | make_pass_ipa_profile (gcc::context *ctxt) |
1065 | { |
1066 | return new pass_ipa_profile (ctxt); |
1067 | } |
1068 | |