1 | /* Read and annotate call graph profile from the auto profile data file. |
2 | Copyright (C) 2014-2023 Free Software Foundation, Inc. |
3 | Contributed by Dehao Chen (dehao@google.com) |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #include "config.h" |
22 | #define INCLUDE_MAP |
23 | #define INCLUDE_SET |
24 | #include "system.h" |
25 | #include "coretypes.h" |
26 | #include "backend.h" |
27 | #include "tree.h" |
28 | #include "gimple.h" |
29 | #include "predict.h" |
30 | #include "alloc-pool.h" |
31 | #include "tree-pass.h" |
32 | #include "ssa.h" |
33 | #include "cgraph.h" |
34 | #include "gcov-io.h" |
35 | #include "diagnostic-core.h" |
36 | #include "profile.h" |
37 | #include "langhooks.h" |
38 | #include "cfgloop.h" |
39 | #include "tree-cfg.h" |
40 | #include "tree-cfgcleanup.h" |
41 | #include "tree-into-ssa.h" |
42 | #include "gimple-iterator.h" |
43 | #include "value-prof.h" |
44 | #include "symbol-summary.h" |
45 | #include "ipa-prop.h" |
46 | #include "ipa-fnsummary.h" |
47 | #include "ipa-inline.h" |
48 | #include "tree-inline.h" |
49 | #include "auto-profile.h" |
50 | #include "tree-pretty-print.h" |
51 | #include "gimple-pretty-print.h" |
52 | |
53 | /* The following routines implements AutoFDO optimization. |
54 | |
55 | This optimization uses sampling profiles to annotate basic block counts |
56 | and uses heuristics to estimate branch probabilities. |
57 | |
58 | There are three phases in AutoFDO: |
59 | |
60 | Phase 1: Read profile from the profile data file. |
61 | The following info is read from the profile datafile: |
62 | * string_table: a map between function name and its index. |
63 | * autofdo_source_profile: a map from function_instance name to |
64 | function_instance. This is represented as a forest of |
65 | function_instances. |
66 | * WorkingSet: a histogram of how many instructions are covered for a |
67 | given percentage of total cycles. This is describing the binary |
68 | level information (not source level). This info is used to help |
69 | decide if we want aggressive optimizations that could increase |
70 | code footprint (e.g. loop unroll etc.) |
71 | A function instance is an instance of function that could either be a |
72 | standalone symbol, or a clone of a function that is inlined into another |
73 | function. |
74 | |
75 | Phase 2: Early inline + value profile transformation. |
76 | Early inline uses autofdo_source_profile to find if a callsite is: |
77 | * inlined in the profiled binary. |
78 | * callee body is hot in the profiling run. |
79 | If both condition satisfies, early inline will inline the callsite |
80 | regardless of the code growth. |
81 | Phase 2 is an iterative process. During each iteration, we also check |
82 | if an indirect callsite is promoted and inlined in the profiling run. |
83 | If yes, vpt will happen to force promote it and in the next iteration, |
84 | einline will inline the promoted callsite in the next iteration. |
85 | |
86 | Phase 3: Annotate control flow graph. |
87 | AutoFDO uses a separate pass to: |
88 | * Annotate basic block count |
89 | * Estimate branch probability |
90 | |
91 | After the above 3 phases, all profile is readily annotated on the GCC IR. |
92 | AutoFDO tries to reuse all FDO infrastructure as much as possible to make |
93 | use of the profile. E.g. it uses existing mechanism to calculate the basic |
94 | block/edge frequency, as well as the cgraph node/edge count. |
95 | */ |
96 | |
97 | #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo" |
98 | #define AUTO_PROFILE_VERSION 2 |
99 | |
100 | namespace autofdo |
101 | { |
102 | |
103 | /* Intermediate edge info used when propagating AutoFDO profile information. |
104 | We can't edge->count() directly since it's computed from edge's probability |
105 | while probability is yet not decided during propagation. */ |
106 | #define AFDO_EINFO(e) ((class edge_info *) e->aux) |
107 | class edge_info |
108 | { |
109 | public: |
110 | edge_info () : count_ (profile_count::zero ().afdo ()), annotated_ (false) {} |
111 | bool is_annotated () const { return annotated_; } |
112 | void set_annotated () { annotated_ = true; } |
113 | profile_count get_count () const { return count_; } |
114 | void set_count (profile_count count) { count_ = count; } |
115 | private: |
116 | profile_count count_; |
117 | bool annotated_; |
118 | }; |
119 | |
120 | /* Represent a source location: (function_decl, lineno). */ |
121 | typedef std::pair<tree, unsigned> decl_lineno; |
122 | |
123 | /* Represent an inline stack. vector[0] is the leaf node. */ |
124 | typedef auto_vec<decl_lineno> inline_stack; |
125 | |
126 | /* String array that stores function names. */ |
127 | typedef auto_vec<char *> string_vector; |
128 | |
129 | /* Map from function name's index in string_table to target's |
130 | execution count. */ |
131 | typedef std::map<unsigned, gcov_type> icall_target_map; |
132 | |
133 | /* Set of gimple stmts. Used to track if the stmt has already been promoted |
134 | to direct call. */ |
135 | typedef std::set<gimple *> stmt_set; |
136 | |
137 | /* Represent count info of an inline stack. */ |
138 | class count_info |
139 | { |
140 | public: |
141 | /* Sampled count of the inline stack. */ |
142 | gcov_type count; |
143 | |
144 | /* Map from indirect call target to its sample count. */ |
145 | icall_target_map targets; |
146 | |
147 | /* Whether this inline stack is already used in annotation. |
148 | |
149 | Each inline stack should only be used to annotate IR once. |
150 | This will be enforced when instruction-level discriminator |
151 | is supported. */ |
152 | bool annotated; |
153 | }; |
154 | |
155 | /* operator< for "const char *". */ |
156 | struct string_compare |
157 | { |
158 | bool operator()(const char *a, const char *b) const |
159 | { |
160 | return strcmp (s1: a, s2: b) < 0; |
161 | } |
162 | }; |
163 | |
164 | /* Store a string array, indexed by string position in the array. */ |
165 | class string_table |
166 | { |
167 | public: |
168 | string_table () |
169 | {} |
170 | |
171 | ~string_table (); |
172 | |
173 | /* For a given string, returns its index. */ |
174 | int get_index (const char *name) const; |
175 | |
176 | /* For a given decl, returns the index of the decl name. */ |
177 | int get_index_by_decl (tree decl) const; |
178 | |
179 | /* For a given index, returns the string. */ |
180 | const char *get_name (int index) const; |
181 | |
182 | /* Read profile, return TRUE on success. */ |
183 | bool read (); |
184 | |
185 | private: |
186 | typedef std::map<const char *, unsigned, string_compare> string_index_map; |
187 | string_vector vector_; |
188 | string_index_map map_; |
189 | }; |
190 | |
191 | /* Profile of a function instance: |
192 | 1. total_count of the function. |
193 | 2. head_count (entry basic block count) of the function (only valid when |
194 | function is a top-level function_instance, i.e. it is the original copy |
195 | instead of the inlined copy). |
196 | 3. map from source location (decl_lineno) to profile (count_info). |
197 | 4. map from callsite to callee function_instance. */ |
198 | class function_instance |
199 | { |
200 | public: |
201 | typedef auto_vec<function_instance *> function_instance_stack; |
202 | |
203 | /* Read the profile and return a function_instance with head count as |
204 | HEAD_COUNT. Recursively read callsites to create nested function_instances |
205 | too. STACK is used to track the recursive creation process. */ |
206 | static function_instance * |
207 | read_function_instance (function_instance_stack *stack, |
208 | gcov_type head_count); |
209 | |
210 | /* Recursively deallocate all callsites (nested function_instances). */ |
211 | ~function_instance (); |
212 | |
213 | /* Accessors. */ |
214 | int |
215 | name () const |
216 | { |
217 | return name_; |
218 | } |
219 | gcov_type |
220 | total_count () const |
221 | { |
222 | return total_count_; |
223 | } |
224 | gcov_type |
225 | head_count () const |
226 | { |
227 | return head_count_; |
228 | } |
229 | |
230 | /* Traverse callsites of the current function_instance to find one at the |
231 | location of LINENO and callee name represented in DECL. */ |
232 | function_instance *get_function_instance_by_decl (unsigned lineno, |
233 | tree decl) const; |
234 | |
235 | /* Store the profile info for LOC in INFO. Return TRUE if profile info |
236 | is found. */ |
237 | bool get_count_info (location_t loc, count_info *info) const; |
238 | |
239 | /* Read the inlined indirect call target profile for STMT and store it in |
240 | MAP, return the total count for all inlined indirect calls. */ |
241 | gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const; |
242 | |
243 | /* Sum of counts that is used during annotation. */ |
244 | gcov_type total_annotated_count () const; |
245 | |
246 | /* Mark LOC as annotated. */ |
247 | void mark_annotated (location_t loc); |
248 | |
249 | private: |
250 | /* Callsite, represented as (decl_lineno, callee_function_name_index). */ |
251 | typedef std::pair<unsigned, unsigned> callsite; |
252 | |
253 | /* Map from callsite to callee function_instance. */ |
254 | typedef std::map<callsite, function_instance *> callsite_map; |
255 | |
256 | function_instance (unsigned name, gcov_type head_count) |
257 | : name_ (name), total_count_ (0), head_count_ (head_count) |
258 | { |
259 | } |
260 | |
261 | /* Map from source location (decl_lineno) to profile (count_info). */ |
262 | typedef std::map<unsigned, count_info> position_count_map; |
263 | |
264 | /* function_instance name index in the string_table. */ |
265 | unsigned name_; |
266 | |
267 | /* Total sample count. */ |
268 | gcov_type total_count_; |
269 | |
270 | /* Entry BB's sample count. */ |
271 | gcov_type head_count_; |
272 | |
273 | /* Map from callsite location to callee function_instance. */ |
274 | callsite_map callsites; |
275 | |
276 | /* Map from source location to count_info. */ |
277 | position_count_map pos_counts; |
278 | }; |
279 | |
280 | /* Profile for all functions. */ |
281 | class autofdo_source_profile |
282 | { |
283 | public: |
284 | static autofdo_source_profile * |
285 | create () |
286 | { |
287 | autofdo_source_profile *map = new autofdo_source_profile (); |
288 | |
289 | if (map->read ()) |
290 | return map; |
291 | delete map; |
292 | return NULL; |
293 | } |
294 | |
295 | ~autofdo_source_profile (); |
296 | |
297 | /* For a given DECL, returns the top-level function_instance. */ |
298 | function_instance *get_function_instance_by_decl (tree decl) const; |
299 | |
300 | /* Find count_info for a given gimple STMT. If found, store the count_info |
301 | in INFO and return true; otherwise return false. */ |
302 | bool get_count_info (gimple *stmt, count_info *info) const; |
303 | |
304 | /* Find total count of the callee of EDGE. */ |
305 | gcov_type get_callsite_total_count (struct cgraph_edge *edge) const; |
306 | |
307 | /* Update value profile INFO for STMT from the inlined indirect callsite. |
308 | Return true if INFO is updated. */ |
309 | bool update_inlined_ind_target (gcall *stmt, count_info *info); |
310 | |
311 | /* Mark LOC as annotated. */ |
312 | void mark_annotated (location_t loc); |
313 | |
314 | private: |
315 | /* Map from function_instance name index (in string_table) to |
316 | function_instance. */ |
317 | typedef std::map<unsigned, function_instance *> name_function_instance_map; |
318 | |
319 | autofdo_source_profile () {} |
320 | |
321 | /* Read AutoFDO profile and returns TRUE on success. */ |
322 | bool read (); |
323 | |
324 | /* Return the function_instance in the profile that correspond to the |
325 | inline STACK. */ |
326 | function_instance * |
327 | get_function_instance_by_inline_stack (const inline_stack &stack) const; |
328 | |
329 | name_function_instance_map map_; |
330 | }; |
331 | |
332 | /* Store the strings read from the profile data file. */ |
333 | static string_table *afdo_string_table; |
334 | |
335 | /* Store the AutoFDO source profile. */ |
336 | static autofdo_source_profile *afdo_source_profile; |
337 | |
338 | /* gcov_summary structure to store the profile_info. */ |
339 | static gcov_summary *afdo_profile_info; |
340 | |
341 | /* Helper functions. */ |
342 | |
343 | /* Return the original name of NAME: strip the suffix that starts |
344 | with '.' Caller is responsible for freeing RET. */ |
345 | |
346 | static char * |
347 | get_original_name (const char *name) |
348 | { |
349 | char *ret = xstrdup (name); |
350 | char *find = strchr (s: ret, c: '.'); |
351 | if (find != NULL) |
352 | *find = 0; |
353 | return ret; |
354 | } |
355 | |
356 | /* Return the combined location, which is a 32bit integer in which |
357 | higher 16 bits stores the line offset of LOC to the start lineno |
358 | of DECL, The lower 16 bits stores the discriminator. */ |
359 | |
360 | static unsigned |
361 | get_combined_location (location_t loc, tree decl) |
362 | { |
363 | /* TODO: allow more bits for line and less bits for discriminator. */ |
364 | if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16)) |
365 | warning_at (loc, OPT_Woverflow, "offset exceeds 16 bytes" ); |
366 | return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16) |
367 | | get_discriminator_from_loc (loc); |
368 | } |
369 | |
370 | /* Return the function decl of a given lexical BLOCK. */ |
371 | |
372 | static tree |
373 | get_function_decl_from_block (tree block) |
374 | { |
375 | if (!inlined_function_outer_scope_p (block)) |
376 | return NULL_TREE; |
377 | |
378 | return BLOCK_ABSTRACT_ORIGIN (block); |
379 | } |
380 | |
381 | /* Store inline stack for STMT in STACK. */ |
382 | |
383 | static void |
384 | get_inline_stack (location_t locus, inline_stack *stack) |
385 | { |
386 | if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) |
387 | return; |
388 | |
389 | tree block = LOCATION_BLOCK (locus); |
390 | if (block && TREE_CODE (block) == BLOCK) |
391 | { |
392 | for (block = BLOCK_SUPERCONTEXT (block); |
393 | block && (TREE_CODE (block) == BLOCK); |
394 | block = BLOCK_SUPERCONTEXT (block)) |
395 | { |
396 | location_t tmp_locus = BLOCK_SOURCE_LOCATION (block); |
397 | if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION) |
398 | continue; |
399 | |
400 | tree decl = get_function_decl_from_block (block); |
401 | stack->safe_push ( |
402 | obj: std::make_pair (x&: decl, y: get_combined_location (loc: locus, decl))); |
403 | locus = tmp_locus; |
404 | } |
405 | } |
406 | stack->safe_push ( |
407 | obj: std::make_pair (x&: current_function_decl, |
408 | y: get_combined_location (loc: locus, decl: current_function_decl))); |
409 | } |
410 | |
411 | /* Return STMT's combined location, which is a 32bit integer in which |
412 | higher 16 bits stores the line offset of LOC to the start lineno |
413 | of DECL, The lower 16 bits stores the discriminator. */ |
414 | |
415 | static unsigned |
416 | get_relative_location_for_stmt (gimple *stmt) |
417 | { |
418 | location_t locus = gimple_location (g: stmt); |
419 | if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) |
420 | return UNKNOWN_LOCATION; |
421 | |
422 | for (tree block = gimple_block (g: stmt); block && (TREE_CODE (block) == BLOCK); |
423 | block = BLOCK_SUPERCONTEXT (block)) |
424 | if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION) |
425 | return get_combined_location (loc: locus, |
426 | decl: get_function_decl_from_block (block)); |
427 | return get_combined_location (loc: locus, decl: current_function_decl); |
428 | } |
429 | |
430 | /* Return true if BB contains indirect call. */ |
431 | |
432 | static bool |
433 | has_indirect_call (basic_block bb) |
434 | { |
435 | gimple_stmt_iterator gsi; |
436 | |
437 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
438 | { |
439 | gimple *stmt = gsi_stmt (i: gsi); |
440 | if (gimple_code (g: stmt) == GIMPLE_CALL && !gimple_call_internal_p (gs: stmt) |
441 | && (gimple_call_fn (gs: stmt) == NULL |
442 | || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL)) |
443 | return true; |
444 | } |
445 | return false; |
446 | } |
447 | |
448 | /* Member functions for string_table. */ |
449 | |
450 | /* Deconstructor. */ |
451 | |
452 | string_table::~string_table () |
453 | { |
454 | for (unsigned i = 0; i < vector_.length (); i++) |
455 | free (ptr: vector_[i]); |
456 | } |
457 | |
458 | |
459 | /* Return the index of a given function NAME. Return -1 if NAME is not |
460 | found in string table. */ |
461 | |
462 | int |
463 | string_table::get_index (const char *name) const |
464 | { |
465 | if (name == NULL) |
466 | return -1; |
467 | string_index_map::const_iterator iter = map_.find (x: name); |
468 | if (iter == map_.end ()) |
469 | return -1; |
470 | |
471 | return iter->second; |
472 | } |
473 | |
474 | /* Return the index of a given function DECL. Return -1 if DECL is not |
475 | found in string table. */ |
476 | |
477 | int |
478 | string_table::get_index_by_decl (tree decl) const |
479 | { |
480 | char *name |
481 | = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); |
482 | int ret = get_index (name); |
483 | free (ptr: name); |
484 | if (ret != -1) |
485 | return ret; |
486 | ret = get_index (name: lang_hooks.dwarf_name (decl, 0)); |
487 | if (ret != -1) |
488 | return ret; |
489 | if (DECL_FROM_INLINE (decl)) |
490 | return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl)); |
491 | |
492 | return -1; |
493 | } |
494 | |
495 | /* Return the function name of a given INDEX. */ |
496 | |
497 | const char * |
498 | string_table::get_name (int index) const |
499 | { |
500 | gcc_assert (index > 0 && index < (int)vector_.length ()); |
501 | return vector_[index]; |
502 | } |
503 | |
504 | /* Read the string table. Return TRUE if reading is successful. */ |
505 | |
506 | bool |
507 | string_table::read () |
508 | { |
509 | if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES) |
510 | return false; |
511 | /* Skip the length of the section. */ |
512 | gcov_read_unsigned (); |
513 | /* Read in the file name table. */ |
514 | unsigned string_num = gcov_read_unsigned (); |
515 | for (unsigned i = 0; i < string_num; i++) |
516 | { |
517 | vector_.safe_push (obj: get_original_name (name: gcov_read_string ())); |
518 | map_[vector_.last ()] = i; |
519 | } |
520 | return true; |
521 | } |
522 | |
523 | /* Member functions for function_instance. */ |
524 | |
525 | function_instance::~function_instance () |
526 | { |
527 | for (callsite_map::iterator iter = callsites.begin (); |
528 | iter != callsites.end (); ++iter) |
529 | delete iter->second; |
530 | } |
531 | |
532 | /* Traverse callsites of the current function_instance to find one at the |
533 | location of LINENO and callee name represented in DECL. */ |
534 | |
535 | function_instance * |
536 | function_instance::get_function_instance_by_decl (unsigned lineno, |
537 | tree decl) const |
538 | { |
539 | int func_name_idx = afdo_string_table->get_index_by_decl (decl); |
540 | if (func_name_idx != -1) |
541 | { |
542 | callsite_map::const_iterator ret |
543 | = callsites.find (x: std::make_pair (x&: lineno, y&: func_name_idx)); |
544 | if (ret != callsites.end ()) |
545 | return ret->second; |
546 | } |
547 | func_name_idx |
548 | = afdo_string_table->get_index (name: lang_hooks.dwarf_name (decl, 0)); |
549 | if (func_name_idx != -1) |
550 | { |
551 | callsite_map::const_iterator ret |
552 | = callsites.find (x: std::make_pair (x&: lineno, y&: func_name_idx)); |
553 | if (ret != callsites.end ()) |
554 | return ret->second; |
555 | } |
556 | if (DECL_FROM_INLINE (decl)) |
557 | return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl)); |
558 | |
559 | return NULL; |
560 | } |
561 | |
562 | /* Store the profile info for LOC in INFO. Return TRUE if profile info |
563 | is found. */ |
564 | |
565 | bool |
566 | function_instance::get_count_info (location_t loc, count_info *info) const |
567 | { |
568 | position_count_map::const_iterator iter = pos_counts.find (x: loc); |
569 | if (iter == pos_counts.end ()) |
570 | return false; |
571 | *info = iter->second; |
572 | return true; |
573 | } |
574 | |
575 | /* Mark LOC as annotated. */ |
576 | |
577 | void |
578 | function_instance::mark_annotated (location_t loc) |
579 | { |
580 | position_count_map::iterator iter = pos_counts.find (x: loc); |
581 | if (iter == pos_counts.end ()) |
582 | return; |
583 | iter->second.annotated = true; |
584 | } |
585 | |
586 | /* Read the inlined indirect call target profile for STMT and store it in |
587 | MAP, return the total count for all inlined indirect calls. */ |
588 | |
589 | gcov_type |
590 | function_instance::find_icall_target_map (gcall *stmt, |
591 | icall_target_map *map) const |
592 | { |
593 | gcov_type ret = 0; |
594 | unsigned stmt_offset = get_relative_location_for_stmt (stmt); |
595 | |
596 | for (callsite_map::const_iterator iter = callsites.begin (); |
597 | iter != callsites.end (); ++iter) |
598 | { |
599 | unsigned callee = iter->second->name (); |
600 | /* Check if callsite location match the stmt. */ |
601 | if (iter->first.first != stmt_offset) |
602 | continue; |
603 | struct cgraph_node *node = cgraph_node::get_for_asmname ( |
604 | get_identifier (afdo_string_table->get_name (callee))); |
605 | if (node == NULL) |
606 | continue; |
607 | (*map)[callee] = iter->second->total_count (); |
608 | ret += iter->second->total_count (); |
609 | } |
610 | return ret; |
611 | } |
612 | |
613 | /* Read the profile and create a function_instance with head count as |
614 | HEAD_COUNT. Recursively read callsites to create nested function_instances |
615 | too. STACK is used to track the recursive creation process. */ |
616 | |
617 | /* function instance profile format: |
618 | |
619 | ENTRY_COUNT: 8 bytes |
620 | NAME_INDEX: 4 bytes |
621 | NUM_POS_COUNTS: 4 bytes |
622 | NUM_CALLSITES: 4 byte |
623 | POS_COUNT_1: |
624 | POS_1_OFFSET: 4 bytes |
625 | NUM_TARGETS: 4 bytes |
626 | COUNT: 8 bytes |
627 | TARGET_1: |
628 | VALUE_PROFILE_TYPE: 4 bytes |
629 | TARGET_IDX: 8 bytes |
630 | COUNT: 8 bytes |
631 | TARGET_2 |
632 | ... |
633 | TARGET_n |
634 | POS_COUNT_2 |
635 | ... |
636 | POS_COUNT_N |
637 | CALLSITE_1: |
638 | CALLSITE_1_OFFSET: 4 bytes |
639 | FUNCTION_INSTANCE_PROFILE (nested) |
640 | CALLSITE_2 |
641 | ... |
642 | CALLSITE_n. */ |
643 | |
644 | function_instance * |
645 | function_instance::read_function_instance (function_instance_stack *stack, |
646 | gcov_type head_count) |
647 | { |
648 | unsigned name = gcov_read_unsigned (); |
649 | unsigned num_pos_counts = gcov_read_unsigned (); |
650 | unsigned num_callsites = gcov_read_unsigned (); |
651 | function_instance *s = new function_instance (name, head_count); |
652 | stack->safe_push (obj: s); |
653 | |
654 | for (unsigned i = 0; i < num_pos_counts; i++) |
655 | { |
656 | unsigned offset = gcov_read_unsigned (); |
657 | unsigned num_targets = gcov_read_unsigned (); |
658 | gcov_type count = gcov_read_counter (); |
659 | s->pos_counts[offset].count = count; |
660 | for (unsigned j = 0; j < stack->length (); j++) |
661 | (*stack)[j]->total_count_ += count; |
662 | for (unsigned j = 0; j < num_targets; j++) |
663 | { |
664 | /* Only indirect call target histogram is supported now. */ |
665 | gcov_read_unsigned (); |
666 | gcov_type target_idx = gcov_read_counter (); |
667 | s->pos_counts[offset].targets[target_idx] = gcov_read_counter (); |
668 | } |
669 | } |
670 | for (unsigned i = 0; i < num_callsites; i++) |
671 | { |
672 | unsigned offset = gcov_read_unsigned (); |
673 | function_instance *callee_function_instance |
674 | = read_function_instance (stack, head_count: 0); |
675 | s->callsites[std::make_pair (x&: offset, y: callee_function_instance->name ())] |
676 | = callee_function_instance; |
677 | } |
678 | stack->pop (); |
679 | return s; |
680 | } |
681 | |
682 | /* Sum of counts that is used during annotation. */ |
683 | |
684 | gcov_type |
685 | function_instance::total_annotated_count () const |
686 | { |
687 | gcov_type ret = 0; |
688 | for (callsite_map::const_iterator iter = callsites.begin (); |
689 | iter != callsites.end (); ++iter) |
690 | ret += iter->second->total_annotated_count (); |
691 | for (position_count_map::const_iterator iter = pos_counts.begin (); |
692 | iter != pos_counts.end (); ++iter) |
693 | if (iter->second.annotated) |
694 | ret += iter->second.count; |
695 | return ret; |
696 | } |
697 | |
698 | /* Member functions for autofdo_source_profile. */ |
699 | |
700 | autofdo_source_profile::~autofdo_source_profile () |
701 | { |
702 | for (name_function_instance_map::const_iterator iter = map_.begin (); |
703 | iter != map_.end (); ++iter) |
704 | delete iter->second; |
705 | } |
706 | |
707 | /* For a given DECL, returns the top-level function_instance. */ |
708 | |
709 | function_instance * |
710 | autofdo_source_profile::get_function_instance_by_decl (tree decl) const |
711 | { |
712 | int index = afdo_string_table->get_index_by_decl (decl); |
713 | if (index == -1) |
714 | return NULL; |
715 | name_function_instance_map::const_iterator ret = map_.find (x: index); |
716 | return ret == map_.end () ? NULL : ret->second; |
717 | } |
718 | |
719 | /* Find count_info for a given gimple STMT. If found, store the count_info |
720 | in INFO and return true; otherwise return false. */ |
721 | |
722 | bool |
723 | autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const |
724 | { |
725 | if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) |
726 | return false; |
727 | |
728 | inline_stack stack; |
729 | get_inline_stack (locus: gimple_location (g: stmt), stack: &stack); |
730 | if (stack.length () == 0) |
731 | return false; |
732 | function_instance *s = get_function_instance_by_inline_stack (stack); |
733 | if (s == NULL) |
734 | return false; |
735 | return s->get_count_info (loc: stack[0].second, info); |
736 | } |
737 | |
738 | /* Mark LOC as annotated. */ |
739 | |
740 | void |
741 | autofdo_source_profile::mark_annotated (location_t loc) |
742 | { |
743 | inline_stack stack; |
744 | get_inline_stack (locus: loc, stack: &stack); |
745 | if (stack.length () == 0) |
746 | return; |
747 | function_instance *s = get_function_instance_by_inline_stack (stack); |
748 | if (s == NULL) |
749 | return; |
750 | s->mark_annotated (loc: stack[0].second); |
751 | } |
752 | |
753 | /* Update value profile INFO for STMT from the inlined indirect callsite. |
754 | Return true if INFO is updated. */ |
755 | |
756 | bool |
757 | autofdo_source_profile::update_inlined_ind_target (gcall *stmt, |
758 | count_info *info) |
759 | { |
760 | if (dump_file) |
761 | { |
762 | fprintf (stream: dump_file, format: "Checking indirect call -> direct call " ); |
763 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
764 | } |
765 | |
766 | if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) |
767 | { |
768 | if (dump_file) |
769 | fprintf (stream: dump_file, format: " good locus\n" ); |
770 | return false; |
771 | } |
772 | |
773 | count_info old_info; |
774 | get_count_info (stmt, info: &old_info); |
775 | gcov_type total = 0; |
776 | for (icall_target_map::const_iterator iter = old_info.targets.begin (); |
777 | iter != old_info.targets.end (); ++iter) |
778 | total += iter->second; |
779 | |
780 | /* Program behavior changed, original promoted (and inlined) target is not |
781 | hot any more. Will avoid promote the original target. |
782 | |
783 | To check if original promoted target is still hot, we check the total |
784 | count of the unpromoted targets (stored in TOTAL). If a callsite count |
785 | (stored in INFO) is smaller than half of the total count, the original |
786 | promoted target is considered not hot any more. */ |
787 | if (info->count < total / 2) |
788 | { |
789 | if (dump_file) |
790 | fprintf (stream: dump_file, format: " not hot anymore %ld < %ld" , |
791 | (long)info->count, |
792 | (long)total /2); |
793 | return false; |
794 | } |
795 | |
796 | inline_stack stack; |
797 | get_inline_stack (locus: gimple_location (g: stmt), stack: &stack); |
798 | if (stack.length () == 0) |
799 | { |
800 | if (dump_file) |
801 | fprintf (stream: dump_file, format: " no inline stack\n" ); |
802 | return false; |
803 | } |
804 | function_instance *s = get_function_instance_by_inline_stack (stack); |
805 | if (s == NULL) |
806 | { |
807 | if (dump_file) |
808 | fprintf (stream: dump_file, format: " function not found in inline stack\n" ); |
809 | return false; |
810 | } |
811 | icall_target_map map; |
812 | if (s->find_icall_target_map (stmt, map: &map) == 0) |
813 | { |
814 | if (dump_file) |
815 | fprintf (stream: dump_file, format: " no target map\n" ); |
816 | return false; |
817 | } |
818 | for (icall_target_map::const_iterator iter = map.begin (); |
819 | iter != map.end (); ++iter) |
820 | info->targets[iter->first] = iter->second; |
821 | if (dump_file) |
822 | fprintf (stream: dump_file, format: " looks good\n" ); |
823 | return true; |
824 | } |
825 | |
826 | /* Find total count of the callee of EDGE. */ |
827 | |
828 | gcov_type |
829 | autofdo_source_profile::get_callsite_total_count ( |
830 | struct cgraph_edge *edge) const |
831 | { |
832 | inline_stack stack; |
833 | stack.safe_push (obj: std::make_pair (x&: edge->callee->decl, y: 0)); |
834 | get_inline_stack (locus: gimple_location (g: edge->call_stmt), stack: &stack); |
835 | |
836 | function_instance *s = get_function_instance_by_inline_stack (stack); |
837 | if (s == NULL |
838 | || afdo_string_table->get_index (IDENTIFIER_POINTER ( |
839 | DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ()) |
840 | return 0; |
841 | |
842 | return s->total_count (); |
843 | } |
844 | |
845 | /* Read AutoFDO profile and returns TRUE on success. */ |
846 | |
847 | /* source profile format: |
848 | |
849 | GCOV_TAG_AFDO_FUNCTION: 4 bytes |
850 | LENGTH: 4 bytes |
851 | NUM_FUNCTIONS: 4 bytes |
852 | FUNCTION_INSTANCE_1 |
853 | FUNCTION_INSTANCE_2 |
854 | ... |
855 | FUNCTION_INSTANCE_N. */ |
856 | |
857 | bool |
858 | autofdo_source_profile::read () |
859 | { |
860 | if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION) |
861 | { |
862 | inform (UNKNOWN_LOCATION, "Not expected TAG." ); |
863 | return false; |
864 | } |
865 | |
866 | /* Skip the length of the section. */ |
867 | gcov_read_unsigned (); |
868 | |
869 | /* Read in the function/callsite profile, and store it in local |
870 | data structure. */ |
871 | unsigned function_num = gcov_read_unsigned (); |
872 | for (unsigned i = 0; i < function_num; i++) |
873 | { |
874 | function_instance::function_instance_stack stack; |
875 | function_instance *s = function_instance::read_function_instance ( |
876 | stack: &stack, head_count: gcov_read_counter ()); |
877 | map_[s->name ()] = s; |
878 | } |
879 | return true; |
880 | } |
881 | |
882 | /* Return the function_instance in the profile that correspond to the |
883 | inline STACK. */ |
884 | |
885 | function_instance * |
886 | autofdo_source_profile::get_function_instance_by_inline_stack ( |
887 | const inline_stack &stack) const |
888 | { |
889 | name_function_instance_map::const_iterator iter = map_.find ( |
890 | x: afdo_string_table->get_index_by_decl (decl: stack[stack.length () - 1].first)); |
891 | if (iter == map_.end()) |
892 | return NULL; |
893 | function_instance *s = iter->second; |
894 | for (unsigned i = stack.length() - 1; i > 0; i--) |
895 | { |
896 | s = s->get_function_instance_by_decl ( |
897 | lineno: stack[i].second, decl: stack[i - 1].first); |
898 | if (s == NULL) |
899 | return NULL; |
900 | } |
901 | return s; |
902 | } |
903 | |
904 | /* Module profile is only used by LIPO. Here we simply ignore it. */ |
905 | |
906 | static void |
907 | fake_read_autofdo_module_profile () |
908 | { |
909 | /* Read in the module info. */ |
910 | gcov_read_unsigned (); |
911 | |
912 | /* Skip the length of the section. */ |
913 | gcov_read_unsigned (); |
914 | |
915 | /* Read in the file name table. */ |
916 | unsigned total_module_num = gcov_read_unsigned (); |
917 | gcc_assert (total_module_num == 0); |
918 | } |
919 | |
920 | /* Read data from profile data file. */ |
921 | |
922 | static void |
923 | read_profile (void) |
924 | { |
925 | if (gcov_open (auto_profile_file, 1) == 0) |
926 | { |
927 | error ("cannot open profile file %s" , auto_profile_file); |
928 | return; |
929 | } |
930 | |
931 | if (gcov_read_unsigned () != GCOV_DATA_MAGIC) |
932 | { |
933 | error ("AutoFDO profile magic number does not match" ); |
934 | return; |
935 | } |
936 | |
937 | /* Skip the version number. */ |
938 | unsigned version = gcov_read_unsigned (); |
939 | if (version != AUTO_PROFILE_VERSION) |
940 | { |
941 | error ("AutoFDO profile version %u does not match %u" , |
942 | version, AUTO_PROFILE_VERSION); |
943 | return; |
944 | } |
945 | |
946 | /* Skip the empty integer. */ |
947 | gcov_read_unsigned (); |
948 | |
949 | /* string_table. */ |
950 | afdo_string_table = new string_table (); |
951 | if (!afdo_string_table->read()) |
952 | { |
953 | error ("cannot read string table from %s" , auto_profile_file); |
954 | return; |
955 | } |
956 | |
957 | /* autofdo_source_profile. */ |
958 | afdo_source_profile = autofdo_source_profile::create (); |
959 | if (afdo_source_profile == NULL) |
960 | { |
961 | error ("cannot read function profile from %s" , auto_profile_file); |
962 | return; |
963 | } |
964 | |
965 | /* autofdo_module_profile. */ |
966 | fake_read_autofdo_module_profile (); |
967 | } |
968 | |
969 | /* From AutoFDO profiles, find values inside STMT for that we want to measure |
970 | histograms for indirect-call optimization. |
971 | |
972 | This function is actually served for 2 purposes: |
973 | * before annotation, we need to mark histogram, promote and inline |
974 | * after annotation, we just need to mark, and let follow-up logic to |
975 | decide if it needs to promote and inline. */ |
976 | |
977 | static bool |
978 | afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map, |
979 | bool transform) |
980 | { |
981 | gimple *gs = gsi_stmt (i: *gsi); |
982 | tree callee; |
983 | |
984 | if (map.size () == 0) |
985 | return false; |
986 | gcall *stmt = dyn_cast <gcall *> (p: gs); |
987 | if (!stmt |
988 | || gimple_call_internal_p (gs: stmt) |
989 | || gimple_call_fndecl (gs: stmt) != NULL_TREE) |
990 | return false; |
991 | |
992 | gcov_type total = 0; |
993 | icall_target_map::const_iterator max_iter = map.end (); |
994 | |
995 | for (icall_target_map::const_iterator iter = map.begin (); |
996 | iter != map.end (); ++iter) |
997 | { |
998 | total += iter->second; |
999 | if (max_iter == map.end () || max_iter->second < iter->second) |
1000 | max_iter = iter; |
1001 | } |
1002 | struct cgraph_node *direct_call = cgraph_node::get_for_asmname ( |
1003 | get_identifier (afdo_string_table->get_name (max_iter->first))); |
1004 | if (direct_call == NULL || !direct_call->profile_id) |
1005 | return false; |
1006 | |
1007 | callee = gimple_call_fn (gs: stmt); |
1008 | |
1009 | histogram_value hist = gimple_alloc_histogram_value ( |
1010 | cfun, HIST_TYPE_INDIR_CALL, stmt, value: callee); |
1011 | hist->n_counters = 4; |
1012 | hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters); |
1013 | gimple_add_histogram_value (cfun, stmt, hist); |
1014 | |
1015 | /* Total counter */ |
1016 | hist->hvalue.counters[0] = total; |
1017 | /* Number of value/counter pairs */ |
1018 | hist->hvalue.counters[1] = 1; |
1019 | /* Value */ |
1020 | hist->hvalue.counters[2] = direct_call->profile_id; |
1021 | /* Counter */ |
1022 | hist->hvalue.counters[3] = max_iter->second; |
1023 | |
1024 | if (!transform) |
1025 | return false; |
1026 | |
1027 | cgraph_node* current_function_node = cgraph_node::get (decl: current_function_decl); |
1028 | |
1029 | /* If the direct call is a recursive call, don't promote it since |
1030 | we are not set up to inline recursive calls at this stage. */ |
1031 | if (direct_call == current_function_node) |
1032 | return false; |
1033 | |
1034 | struct cgraph_edge *indirect_edge |
1035 | = current_function_node->get_edge (call_stmt: stmt); |
1036 | |
1037 | if (dump_file) |
1038 | { |
1039 | fprintf (stream: dump_file, format: "Indirect call -> direct call " ); |
1040 | print_generic_expr (dump_file, callee, TDF_SLIM); |
1041 | fprintf (stream: dump_file, format: " => " ); |
1042 | print_generic_expr (dump_file, direct_call->decl, TDF_SLIM); |
1043 | } |
1044 | |
1045 | if (direct_call == NULL) |
1046 | { |
1047 | if (dump_file) |
1048 | fprintf (stream: dump_file, format: " not transforming\n" ); |
1049 | return false; |
1050 | } |
1051 | if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL) |
1052 | { |
1053 | if (dump_file) |
1054 | fprintf (stream: dump_file, format: " no declaration\n" ); |
1055 | return false; |
1056 | } |
1057 | |
1058 | if (dump_file) |
1059 | { |
1060 | fprintf (stream: dump_file, format: " transformation on insn " ); |
1061 | print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); |
1062 | fprintf (stream: dump_file, format: "\n" ); |
1063 | } |
1064 | |
1065 | /* FIXME: Count should be initialized. */ |
1066 | struct cgraph_edge *new_edge |
1067 | = indirect_edge->make_speculative (n2: direct_call, |
1068 | direct_count: profile_count::uninitialized ()); |
1069 | cgraph_edge::redirect_call_stmt_to_callee (e: new_edge); |
1070 | gimple_remove_histogram_value (cfun, stmt, hist); |
1071 | inline_call (new_edge, true, NULL, NULL, false); |
1072 | return true; |
1073 | } |
1074 | |
1075 | /* From AutoFDO profiles, find values inside STMT for that we want to measure |
1076 | histograms and adds them to list VALUES. */ |
1077 | |
1078 | static bool |
1079 | afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map, |
1080 | bool transform) |
1081 | { |
1082 | return afdo_indirect_call (gsi, map, transform); |
1083 | } |
1084 | |
1085 | typedef std::set<basic_block> bb_set; |
1086 | typedef std::set<edge> edge_set; |
1087 | |
1088 | static bool |
1089 | is_bb_annotated (const basic_block bb, const bb_set &annotated) |
1090 | { |
1091 | return annotated.find (x: bb) != annotated.end (); |
1092 | } |
1093 | |
1094 | static void |
1095 | set_bb_annotated (basic_block bb, bb_set *annotated) |
1096 | { |
1097 | annotated->insert (x: bb); |
1098 | } |
1099 | |
1100 | /* For a given BB, set its execution count. Attach value profile if a stmt |
1101 | is not in PROMOTED, because we only want to promote an indirect call once. |
1102 | Return TRUE if BB is annotated. */ |
1103 | |
1104 | static bool |
1105 | afdo_set_bb_count (basic_block bb, const stmt_set &promoted) |
1106 | { |
1107 | gimple_stmt_iterator gsi; |
1108 | edge e; |
1109 | edge_iterator ei; |
1110 | gcov_type max_count = 0; |
1111 | bool has_annotated = false; |
1112 | |
1113 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
1114 | { |
1115 | count_info info; |
1116 | gimple *stmt = gsi_stmt (i: gsi); |
1117 | if (gimple_clobber_p (s: stmt) || is_gimple_debug (gs: stmt)) |
1118 | continue; |
1119 | if (afdo_source_profile->get_count_info (stmt, info: &info)) |
1120 | { |
1121 | if (info.count > max_count) |
1122 | max_count = info.count; |
1123 | has_annotated = true; |
1124 | if (info.targets.size () > 0 |
1125 | && promoted.find (x: stmt) == promoted.end ()) |
1126 | afdo_vpt (gsi: &gsi, map: info.targets, transform: false); |
1127 | } |
1128 | } |
1129 | |
1130 | if (!has_annotated) |
1131 | return false; |
1132 | |
1133 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
1134 | afdo_source_profile->mark_annotated (loc: gimple_location (g: gsi_stmt (i: gsi))); |
1135 | for (gphi_iterator gpi = gsi_start_phis (bb); |
1136 | !gsi_end_p (i: gpi); |
1137 | gsi_next (i: &gpi)) |
1138 | { |
1139 | gphi *phi = gpi.phi (); |
1140 | size_t i; |
1141 | for (i = 0; i < gimple_phi_num_args (gs: phi); i++) |
1142 | afdo_source_profile->mark_annotated (loc: gimple_phi_arg_location (phi, i)); |
1143 | } |
1144 | FOR_EACH_EDGE (e, ei, bb->succs) |
1145 | afdo_source_profile->mark_annotated (loc: e->goto_locus); |
1146 | |
1147 | bb->count = profile_count::from_gcov_type (v: max_count).afdo (); |
1148 | return true; |
1149 | } |
1150 | |
1151 | /* BB1 and BB2 are in an equivalent class iff: |
1152 | 1. BB1 dominates BB2. |
1153 | 2. BB2 post-dominates BB1. |
1154 | 3. BB1 and BB2 are in the same loop nest. |
1155 | This function finds the equivalent class for each basic block, and |
1156 | stores a pointer to the first BB in its equivalent class. Meanwhile, |
1157 | set bb counts for the same equivalent class to be idenical. Update |
1158 | ANNOTATED_BB for the first BB in its equivalent class. */ |
1159 | |
1160 | static void |
1161 | afdo_find_equiv_class (bb_set *annotated_bb) |
1162 | { |
1163 | basic_block bb; |
1164 | |
1165 | FOR_ALL_BB_FN (bb, cfun) |
1166 | bb->aux = NULL; |
1167 | |
1168 | FOR_ALL_BB_FN (bb, cfun) |
1169 | { |
1170 | if (bb->aux != NULL) |
1171 | continue; |
1172 | bb->aux = bb; |
1173 | for (basic_block bb1 : get_dominated_by (CDI_DOMINATORS, bb)) |
1174 | if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1) |
1175 | && bb1->loop_father == bb->loop_father) |
1176 | { |
1177 | bb1->aux = bb; |
1178 | if (bb1->count > bb->count && is_bb_annotated (bb: bb1, annotated: *annotated_bb)) |
1179 | { |
1180 | bb->count = bb1->count; |
1181 | set_bb_annotated (bb, annotated: annotated_bb); |
1182 | } |
1183 | } |
1184 | |
1185 | for (basic_block bb1 : get_dominated_by (CDI_POST_DOMINATORS, bb)) |
1186 | if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1) |
1187 | && bb1->loop_father == bb->loop_father) |
1188 | { |
1189 | bb1->aux = bb; |
1190 | if (bb1->count > bb->count && is_bb_annotated (bb: bb1, annotated: *annotated_bb)) |
1191 | { |
1192 | bb->count = bb1->count; |
1193 | set_bb_annotated (bb, annotated: annotated_bb); |
1194 | } |
1195 | } |
1196 | } |
1197 | } |
1198 | |
1199 | /* If a basic block's count is known, and only one of its in/out edges' count |
1200 | is unknown, its count can be calculated. Meanwhile, if all of the in/out |
1201 | edges' counts are known, then the basic block's unknown count can also be |
1202 | calculated. Also, if a block has a single predecessor or successor, the block's |
1203 | count can be propagated to that predecessor or successor. |
1204 | IS_SUCC is true if out edges of a basic blocks are examined. |
1205 | Update ANNOTATED_BB accordingly. |
1206 | Return TRUE if any basic block/edge count is changed. */ |
1207 | |
1208 | static bool |
1209 | afdo_propagate_edge (bool is_succ, bb_set *annotated_bb) |
1210 | { |
1211 | basic_block bb; |
1212 | bool changed = false; |
1213 | |
1214 | FOR_EACH_BB_FN (bb, cfun) |
1215 | { |
1216 | edge e, unknown_edge = NULL; |
1217 | edge_iterator ei; |
1218 | int num_unknown_edge = 0; |
1219 | int num_edge = 0; |
1220 | profile_count total_known_count = profile_count::zero ().afdo (); |
1221 | |
1222 | FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds) |
1223 | { |
1224 | gcc_assert (AFDO_EINFO (e) != NULL); |
1225 | if (! AFDO_EINFO (e)->is_annotated ()) |
1226 | num_unknown_edge++, unknown_edge = e; |
1227 | else |
1228 | total_known_count += AFDO_EINFO (e)->get_count (); |
1229 | num_edge++; |
1230 | } |
1231 | |
1232 | /* Be careful not to annotate block with no successor in special cases. */ |
1233 | if (num_unknown_edge == 0 && total_known_count > bb->count) |
1234 | { |
1235 | bb->count = total_known_count; |
1236 | if (!is_bb_annotated (bb, annotated: *annotated_bb)) |
1237 | set_bb_annotated (bb, annotated: annotated_bb); |
1238 | changed = true; |
1239 | } |
1240 | else if (num_unknown_edge == 1 && is_bb_annotated (bb, annotated: *annotated_bb)) |
1241 | { |
1242 | if (bb->count > total_known_count) |
1243 | { |
1244 | profile_count new_count = bb->count - total_known_count; |
1245 | AFDO_EINFO(unknown_edge)->set_count(new_count); |
1246 | if (num_edge == 1) |
1247 | { |
1248 | basic_block succ_or_pred_bb = is_succ ? unknown_edge->dest : unknown_edge->src; |
1249 | if (new_count > succ_or_pred_bb->count) |
1250 | { |
1251 | succ_or_pred_bb->count = new_count; |
1252 | if (!is_bb_annotated (bb: succ_or_pred_bb, annotated: *annotated_bb)) |
1253 | set_bb_annotated (bb: succ_or_pred_bb, annotated: annotated_bb); |
1254 | } |
1255 | } |
1256 | } |
1257 | else |
1258 | AFDO_EINFO (unknown_edge)->set_count (profile_count::zero().afdo ()); |
1259 | AFDO_EINFO (unknown_edge)->set_annotated (); |
1260 | changed = true; |
1261 | } |
1262 | } |
1263 | return changed; |
1264 | } |
1265 | |
1266 | /* Special propagation for circuit expressions. Because GCC translates |
1267 | control flow into data flow for circuit expressions. E.g. |
1268 | BB1: |
1269 | if (a && b) |
1270 | BB2 |
1271 | else |
1272 | BB3 |
1273 | |
1274 | will be translated into: |
1275 | |
1276 | BB1: |
1277 | if (a) |
1278 | goto BB.t1 |
1279 | else |
1280 | goto BB.t3 |
1281 | BB.t1: |
1282 | if (b) |
1283 | goto BB.t2 |
1284 | else |
1285 | goto BB.t3 |
1286 | BB.t2: |
1287 | goto BB.t3 |
1288 | BB.t3: |
1289 | tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2) |
1290 | if (tmp) |
1291 | goto BB2 |
1292 | else |
1293 | goto BB3 |
1294 | |
1295 | In this case, we need to propagate through PHI to determine the edge |
1296 | count of BB1->BB.t1, BB.t1->BB.t2. */ |
1297 | |
1298 | static void |
1299 | afdo_propagate_circuit (const bb_set &annotated_bb) |
1300 | { |
1301 | basic_block bb; |
1302 | FOR_ALL_BB_FN (bb, cfun) |
1303 | { |
1304 | gimple *def_stmt; |
1305 | tree cmp_rhs, cmp_lhs; |
1306 | gimple *cmp_stmt = last_nondebug_stmt (bb); |
1307 | edge e; |
1308 | edge_iterator ei; |
1309 | |
1310 | if (!cmp_stmt || gimple_code (g: cmp_stmt) != GIMPLE_COND) |
1311 | continue; |
1312 | cmp_rhs = gimple_cond_rhs (gs: cmp_stmt); |
1313 | cmp_lhs = gimple_cond_lhs (gs: cmp_stmt); |
1314 | if (!TREE_CONSTANT (cmp_rhs) |
1315 | || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs))) |
1316 | continue; |
1317 | if (TREE_CODE (cmp_lhs) != SSA_NAME) |
1318 | continue; |
1319 | if (!is_bb_annotated (bb, annotated: annotated_bb)) |
1320 | continue; |
1321 | def_stmt = SSA_NAME_DEF_STMT (cmp_lhs); |
1322 | while (def_stmt && gimple_code (g: def_stmt) == GIMPLE_ASSIGN |
1323 | && gimple_assign_single_p (gs: def_stmt) |
1324 | && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME) |
1325 | def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)); |
1326 | if (!def_stmt) |
1327 | continue; |
1328 | gphi *phi_stmt = dyn_cast <gphi *> (p: def_stmt); |
1329 | if (!phi_stmt) |
1330 | continue; |
1331 | FOR_EACH_EDGE (e, ei, bb->succs) |
1332 | { |
1333 | unsigned i, total = 0; |
1334 | edge only_one; |
1335 | bool check_value_one = (((integer_onep (cmp_rhs)) |
1336 | ^ (gimple_cond_code (gs: cmp_stmt) == EQ_EXPR)) |
1337 | ^ ((e->flags & EDGE_TRUE_VALUE) != 0)); |
1338 | if (! AFDO_EINFO (e)->is_annotated ()) |
1339 | continue; |
1340 | for (i = 0; i < gimple_phi_num_args (gs: phi_stmt); i++) |
1341 | { |
1342 | tree val = gimple_phi_arg_def (gs: phi_stmt, index: i); |
1343 | edge ep = gimple_phi_arg_edge (phi: phi_stmt, i); |
1344 | |
1345 | if (!TREE_CONSTANT (val) |
1346 | || !(integer_zerop (val) || integer_onep (val))) |
1347 | continue; |
1348 | if (check_value_one ^ integer_onep (val)) |
1349 | continue; |
1350 | total++; |
1351 | only_one = ep; |
1352 | if (! (AFDO_EINFO (e)->get_count ()).nonzero_p () |
1353 | && ! AFDO_EINFO (ep)->is_annotated ()) |
1354 | { |
1355 | AFDO_EINFO (ep)->set_count (profile_count::zero ().afdo ()); |
1356 | AFDO_EINFO (ep)->set_annotated (); |
1357 | } |
1358 | } |
1359 | if (total == 1 && ! AFDO_EINFO (only_one)->is_annotated ()) |
1360 | { |
1361 | AFDO_EINFO (only_one)->set_count (AFDO_EINFO (e)->get_count ()); |
1362 | AFDO_EINFO (only_one)->set_annotated (); |
1363 | } |
1364 | } |
1365 | } |
1366 | } |
1367 | |
1368 | /* Propagate the basic block count and edge count on the control flow |
1369 | graph. We do the propagation iteratively until stablize. */ |
1370 | |
1371 | static void |
1372 | afdo_propagate (bb_set *annotated_bb) |
1373 | { |
1374 | basic_block bb; |
1375 | bool changed = true; |
1376 | int i = 0; |
1377 | |
1378 | FOR_ALL_BB_FN (bb, cfun) |
1379 | { |
1380 | bb->count = ((basic_block)bb->aux)->count; |
1381 | if (is_bb_annotated (bb: (basic_block)bb->aux, annotated: *annotated_bb)) |
1382 | set_bb_annotated (bb, annotated: annotated_bb); |
1383 | } |
1384 | |
1385 | while (changed && i++ < 10) |
1386 | { |
1387 | changed = false; |
1388 | |
1389 | if (afdo_propagate_edge (is_succ: true, annotated_bb)) |
1390 | changed = true; |
1391 | if (afdo_propagate_edge (is_succ: false, annotated_bb)) |
1392 | changed = true; |
1393 | afdo_propagate_circuit (annotated_bb: *annotated_bb); |
1394 | } |
1395 | } |
1396 | |
1397 | /* Propagate counts on control flow graph and calculate branch |
1398 | probabilities. */ |
1399 | |
1400 | static void |
1401 | afdo_calculate_branch_prob (bb_set *annotated_bb) |
1402 | { |
1403 | edge e; |
1404 | edge_iterator ei; |
1405 | basic_block bb; |
1406 | |
1407 | calculate_dominance_info (CDI_POST_DOMINATORS); |
1408 | calculate_dominance_info (CDI_DOMINATORS); |
1409 | loop_optimizer_init (0); |
1410 | |
1411 | FOR_ALL_BB_FN (bb, cfun) |
1412 | { |
1413 | gcc_assert (bb->aux == NULL); |
1414 | FOR_EACH_EDGE (e, ei, bb->succs) |
1415 | { |
1416 | gcc_assert (e->aux == NULL); |
1417 | e->aux = new edge_info (); |
1418 | } |
1419 | } |
1420 | |
1421 | afdo_find_equiv_class (annotated_bb); |
1422 | afdo_propagate (annotated_bb); |
1423 | |
1424 | FOR_EACH_BB_FN (bb, cfun) |
1425 | { |
1426 | int num_unknown_succ = 0; |
1427 | profile_count total_count = profile_count::zero ().afdo (); |
1428 | |
1429 | FOR_EACH_EDGE (e, ei, bb->succs) |
1430 | { |
1431 | gcc_assert (AFDO_EINFO (e) != NULL); |
1432 | if (! AFDO_EINFO (e)->is_annotated ()) |
1433 | num_unknown_succ++; |
1434 | else |
1435 | total_count += AFDO_EINFO (e)->get_count (); |
1436 | } |
1437 | if (num_unknown_succ == 0 && total_count.nonzero_p()) |
1438 | { |
1439 | FOR_EACH_EDGE (e, ei, bb->succs) |
1440 | e->probability |
1441 | = AFDO_EINFO (e)->get_count ().probability_in (overall: total_count); |
1442 | } |
1443 | } |
1444 | FOR_ALL_BB_FN (bb, cfun) |
1445 | { |
1446 | bb->aux = NULL; |
1447 | FOR_EACH_EDGE (e, ei, bb->succs) |
1448 | if (AFDO_EINFO (e) != NULL) |
1449 | { |
1450 | delete AFDO_EINFO (e); |
1451 | e->aux = NULL; |
1452 | } |
1453 | } |
1454 | |
1455 | loop_optimizer_finalize (); |
1456 | free_dominance_info (CDI_DOMINATORS); |
1457 | free_dominance_info (CDI_POST_DOMINATORS); |
1458 | } |
1459 | |
1460 | /* Perform value profile transformation using AutoFDO profile. Add the |
1461 | promoted stmts to PROMOTED_STMTS. Return TRUE if there is any |
1462 | indirect call promoted. */ |
1463 | |
1464 | static bool |
1465 | afdo_vpt_for_early_inline (stmt_set *promoted_stmts) |
1466 | { |
1467 | basic_block bb; |
1468 | if (afdo_source_profile->get_function_instance_by_decl ( |
1469 | decl: current_function_decl) == NULL) |
1470 | return false; |
1471 | |
1472 | compute_fn_summary (cgraph_node::get (decl: current_function_decl), true); |
1473 | |
1474 | bool has_vpt = false; |
1475 | FOR_EACH_BB_FN (bb, cfun) |
1476 | { |
1477 | if (!has_indirect_call (bb)) |
1478 | continue; |
1479 | gimple_stmt_iterator gsi; |
1480 | |
1481 | gcov_type bb_count = 0; |
1482 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
1483 | { |
1484 | count_info info; |
1485 | gimple *stmt = gsi_stmt (i: gsi); |
1486 | if (afdo_source_profile->get_count_info (stmt, info: &info)) |
1487 | bb_count = MAX (bb_count, info.count); |
1488 | } |
1489 | |
1490 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
1491 | { |
1492 | gcall *stmt = dyn_cast <gcall *> (p: gsi_stmt (i: gsi)); |
1493 | /* IC_promotion and early_inline_2 is done in multiple iterations. |
1494 | No need to promoted the stmt if its in promoted_stmts (means |
1495 | it is already been promoted in the previous iterations). */ |
1496 | if ((!stmt) || gimple_call_fn (gs: stmt) == NULL |
1497 | || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL |
1498 | || promoted_stmts->find (x: stmt) != promoted_stmts->end ()) |
1499 | continue; |
1500 | |
1501 | count_info info; |
1502 | afdo_source_profile->get_count_info (stmt, info: &info); |
1503 | info.count = bb_count; |
1504 | if (afdo_source_profile->update_inlined_ind_target (stmt, info: &info)) |
1505 | { |
1506 | /* Promote the indirect call and update the promoted_stmts. */ |
1507 | promoted_stmts->insert (x: stmt); |
1508 | if (afdo_vpt (gsi: &gsi, map: info.targets, transform: true)) |
1509 | has_vpt = true; |
1510 | } |
1511 | } |
1512 | } |
1513 | |
1514 | if (has_vpt) |
1515 | { |
1516 | unsigned todo = optimize_inline_calls (current_function_decl); |
1517 | if (todo & TODO_update_ssa_any) |
1518 | update_ssa (TODO_update_ssa); |
1519 | return true; |
1520 | } |
1521 | |
1522 | return false; |
1523 | } |
1524 | |
1525 | /* Annotate auto profile to the control flow graph. Do not annotate value |
1526 | profile for stmts in PROMOTED_STMTS. */ |
1527 | |
1528 | static void |
1529 | afdo_annotate_cfg (const stmt_set &promoted_stmts) |
1530 | { |
1531 | basic_block bb; |
1532 | bb_set annotated_bb; |
1533 | const function_instance *s |
1534 | = afdo_source_profile->get_function_instance_by_decl ( |
1535 | decl: current_function_decl); |
1536 | |
1537 | if (s == NULL) |
1538 | return; |
1539 | cgraph_node::get (decl: current_function_decl)->count |
1540 | = profile_count::from_gcov_type (v: s->head_count ()).afdo (); |
1541 | ENTRY_BLOCK_PTR_FOR_FN (cfun)->count |
1542 | = profile_count::from_gcov_type (v: s->head_count ()).afdo (); |
1543 | EXIT_BLOCK_PTR_FOR_FN (cfun)->count = profile_count::zero ().afdo (); |
1544 | profile_count max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; |
1545 | |
1546 | FOR_EACH_BB_FN (bb, cfun) |
1547 | { |
1548 | /* As autoFDO uses sampling approach, we have to assume that all |
1549 | counters are zero when not seen by autoFDO. */ |
1550 | bb->count = profile_count::zero ().afdo (); |
1551 | if (afdo_set_bb_count (bb, promoted: promoted_stmts)) |
1552 | set_bb_annotated (bb, annotated: &annotated_bb); |
1553 | if (bb->count > max_count) |
1554 | max_count = bb->count; |
1555 | } |
1556 | if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count |
1557 | > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count) |
1558 | { |
1559 | ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count |
1560 | = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; |
1561 | set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, annotated: &annotated_bb); |
1562 | } |
1563 | if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count |
1564 | > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count) |
1565 | { |
1566 | EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count |
1567 | = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; |
1568 | set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, annotated: &annotated_bb); |
1569 | } |
1570 | afdo_source_profile->mark_annotated ( |
1571 | DECL_SOURCE_LOCATION (current_function_decl)); |
1572 | afdo_source_profile->mark_annotated (cfun->function_start_locus); |
1573 | afdo_source_profile->mark_annotated (cfun->function_end_locus); |
1574 | if (max_count.nonzero_p()) |
1575 | { |
1576 | /* Calculate, propagate count and probability information on CFG. */ |
1577 | afdo_calculate_branch_prob (annotated_bb: &annotated_bb); |
1578 | } |
1579 | update_max_bb_count (); |
1580 | profile_status_for_fn (cfun) = PROFILE_READ; |
1581 | cfun->cfg->full_profile = true; |
1582 | if (flag_value_profile_transformations) |
1583 | { |
1584 | gimple_value_profile_transformations (); |
1585 | free_dominance_info (CDI_DOMINATORS); |
1586 | free_dominance_info (CDI_POST_DOMINATORS); |
1587 | update_ssa (TODO_update_ssa); |
1588 | } |
1589 | } |
1590 | |
1591 | /* Wrapper function to invoke early inliner. */ |
1592 | |
1593 | static unsigned int |
1594 | early_inline () |
1595 | { |
1596 | compute_fn_summary (cgraph_node::get (decl: current_function_decl), true); |
1597 | unsigned int todo = early_inliner (cfun); |
1598 | if (todo & TODO_update_ssa_any) |
1599 | update_ssa (TODO_update_ssa); |
1600 | return todo; |
1601 | } |
1602 | |
1603 | /* Use AutoFDO profile to annoate the control flow graph. |
1604 | Return the todo flag. */ |
1605 | |
1606 | static unsigned int |
1607 | auto_profile (void) |
1608 | { |
1609 | struct cgraph_node *node; |
1610 | |
1611 | if (symtab->state == FINISHED) |
1612 | return 0; |
1613 | |
1614 | init_node_map (true); |
1615 | profile_info = autofdo::afdo_profile_info; |
1616 | |
1617 | FOR_EACH_FUNCTION (node) |
1618 | { |
1619 | if (!gimple_has_body_p (node->decl)) |
1620 | continue; |
1621 | |
1622 | /* Don't profile functions produced for builtin stuff. */ |
1623 | if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) |
1624 | continue; |
1625 | |
1626 | push_cfun (DECL_STRUCT_FUNCTION (node->decl)); |
1627 | |
1628 | /* First do indirect call promotion and early inline to make the |
1629 | IR match the profiled binary before actual annotation. |
1630 | |
1631 | This is needed because an indirect call might have been promoted |
1632 | and inlined in the profiled binary. If we do not promote and |
1633 | inline these indirect calls before annotation, the profile for |
1634 | these promoted functions will be lost. |
1635 | |
1636 | e.g. foo() --indirect_call--> bar() |
1637 | In profiled binary, the callsite is promoted and inlined, making |
1638 | the profile look like: |
1639 | |
1640 | foo: { |
1641 | loc_foo_1: count_1 |
1642 | bar@loc_foo_2: { |
1643 | loc_bar_1: count_2 |
1644 | loc_bar_2: count_3 |
1645 | } |
1646 | } |
1647 | |
1648 | Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined. |
1649 | If we perform annotation on it, the profile inside bar@loc_foo2 |
1650 | will be wasted. |
1651 | |
1652 | To avoid this, we promote loc_foo_2 and inline the promoted bar |
1653 | function before annotation, so the profile inside bar@loc_foo2 |
1654 | will be useful. */ |
1655 | autofdo::stmt_set promoted_stmts; |
1656 | unsigned int todo = 0; |
1657 | for (int i = 0; i < 10; i++) |
1658 | { |
1659 | if (!flag_value_profile_transformations |
1660 | || !autofdo::afdo_vpt_for_early_inline (promoted_stmts: &promoted_stmts)) |
1661 | break; |
1662 | todo |= early_inline (); |
1663 | } |
1664 | |
1665 | todo |= early_inline (); |
1666 | autofdo::afdo_annotate_cfg (promoted_stmts); |
1667 | compute_function_frequency (); |
1668 | |
1669 | /* Local pure-const may imply need to fixup the cfg. */ |
1670 | todo |= execute_fixup_cfg (); |
1671 | if (todo & TODO_cleanup_cfg) |
1672 | cleanup_tree_cfg (); |
1673 | |
1674 | free_dominance_info (CDI_DOMINATORS); |
1675 | free_dominance_info (CDI_POST_DOMINATORS); |
1676 | cgraph_edge::rebuild_edges (); |
1677 | compute_fn_summary (cgraph_node::get (decl: current_function_decl), true); |
1678 | pop_cfun (); |
1679 | } |
1680 | |
1681 | return 0; |
1682 | } |
1683 | } /* namespace autofdo. */ |
1684 | |
1685 | /* Read the profile from the profile data file. */ |
1686 | |
1687 | void |
1688 | read_autofdo_file (void) |
1689 | { |
1690 | if (auto_profile_file == NULL) |
1691 | auto_profile_file = DEFAULT_AUTO_PROFILE_FILE; |
1692 | |
1693 | autofdo::afdo_profile_info = XNEW (gcov_summary); |
1694 | autofdo::afdo_profile_info->runs = 1; |
1695 | autofdo::afdo_profile_info->sum_max = 0; |
1696 | |
1697 | /* Read the profile from the profile file. */ |
1698 | autofdo::read_profile (); |
1699 | } |
1700 | |
1701 | /* Free the resources. */ |
1702 | |
1703 | void |
1704 | end_auto_profile (void) |
1705 | { |
1706 | delete autofdo::afdo_source_profile; |
1707 | delete autofdo::afdo_string_table; |
1708 | profile_info = NULL; |
1709 | } |
1710 | |
1711 | /* Returns TRUE if EDGE is hot enough to be inlined early. */ |
1712 | |
1713 | bool |
1714 | afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge) |
1715 | { |
1716 | gcov_type count |
1717 | = autofdo::afdo_source_profile->get_callsite_total_count (edge); |
1718 | |
1719 | if (count > 0) |
1720 | { |
1721 | bool is_hot; |
1722 | profile_count pcount = profile_count::from_gcov_type (v: count).afdo (); |
1723 | gcov_summary *saved_profile_info = profile_info; |
1724 | /* At early inline stage, profile_info is not set yet. We need to |
1725 | temporarily set it to afdo_profile_info to calculate hotness. */ |
1726 | profile_info = autofdo::afdo_profile_info; |
1727 | is_hot = maybe_hot_count_p (NULL, pcount); |
1728 | profile_info = saved_profile_info; |
1729 | return is_hot; |
1730 | } |
1731 | |
1732 | return false; |
1733 | } |
1734 | |
1735 | namespace |
1736 | { |
1737 | |
1738 | const pass_data pass_data_ipa_auto_profile = { |
1739 | .type: SIMPLE_IPA_PASS, .name: "afdo" , /* name */ |
1740 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
1741 | .tv_id: TV_IPA_AUTOFDO, /* tv_id */ |
1742 | .properties_required: 0, /* properties_required */ |
1743 | .properties_provided: 0, /* properties_provided */ |
1744 | .properties_destroyed: 0, /* properties_destroyed */ |
1745 | .todo_flags_start: 0, /* todo_flags_start */ |
1746 | .todo_flags_finish: 0, /* todo_flags_finish */ |
1747 | }; |
1748 | |
1749 | class pass_ipa_auto_profile : public simple_ipa_opt_pass |
1750 | { |
1751 | public: |
1752 | pass_ipa_auto_profile (gcc::context *ctxt) |
1753 | : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt) |
1754 | { |
1755 | } |
1756 | |
1757 | /* opt_pass methods: */ |
1758 | bool |
1759 | gate (function *) final override |
1760 | { |
1761 | return flag_auto_profile; |
1762 | } |
1763 | unsigned int |
1764 | execute (function *) final override |
1765 | { |
1766 | return autofdo::auto_profile (); |
1767 | } |
1768 | }; // class pass_ipa_auto_profile |
1769 | |
1770 | } // anon namespace |
1771 | |
1772 | simple_ipa_opt_pass * |
1773 | make_pass_ipa_auto_profile (gcc::context *ctxt) |
1774 | { |
1775 | return new pass_ipa_auto_profile (ctxt); |
1776 | } |
1777 | |